Read Journal of the American Society of Nephrology on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Language: en

Requires Subscription: Yes, requires a Journal of the American Society of Nephrology subscription

Schedule Every morning

			  # -*- coding: utf-8 -*-

import time

from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup

class JASN(BasicNewsRecipe):
    title          = u'Journal of the American Society of Nephrology'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 31 #days
    max_articles_per_feed = 25
    delay = 5
    needs_subscription = True

    INDEX = 'http://jasn.asnjournals.org/current.shtml'
    no_stylesheets = True
    remove_tags_before = dict(name='h2')
    #remove_tags_after  = dict(name='th', attrs={'align':'left'})
    remove_tags = [
        dict(name='iframe'),
       #dict(name='div', attrs={'class':'related-articles'}),
        dict(name='td', attrs={'id':['jasnFooter']}),
        dict(name='table', attrs={'id':"jasnNavBar"}),
        dict(name='table', attrs={'class':'content_box_outer_table'}),
        dict(name='th', attrs={'align':'left'})
       ]



    #TO LOGIN
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        self.kidney_toc_soup = BeautifulSoup(br.open(self.INDEX).read())
        toc = self.kidney_toc_soup.find(id='tocTable')
        t = toc.find(text=lambda x: x and '[Full Text]' in x)
        a = t.findParent('a', href=True)
        url = a.get('href')
        if url.startswith('/'):
            url = 'http://jasn.asnjournals.org'+url
        br.open(url)
        br.select_form(name='UserSignIn')
        br['username'] = self.username
        br['code'] = self.password
        response = br.submit()
        raw = response.read()
        if 'Sign Out' not in raw:
            raise ValueError('Failed to log in, is your account expired?')
        return br

    #feeds          = [
        #('JASN',
        #'http://jasn.asnjournals.org/rss/current.xml'),
    #]


    #TO GET ARTICLE TOC
    def jasn_get_index(self):
        return self.index_to_soup('http://jasn.asnjournals.org/current.shtml')

    # To parse artice toc
    def parse_index(self):
            parse_soup = self.jasn_get_index()

            div = parse_soup.find(id='tocBody')

            current_section = None
            current_articles = []
            feeds = []
            for x in div.findAll(True):
                if x.name == 'h2':
                    # Section heading found
                    if current_articles and current_section:
                        feeds.append((current_section, current_articles))
                    current_section = self.tag_to_string(x)
                    current_articles = []
                    self.log('\tFound section:', current_section)
                if current_section is not None and x.name == 'strong':
                    title = self.tag_to_string(x)
                    a = x.parent.parent.find('a', href=lambda x: x and '/full/' in x)
                    if a is None:
                        continue
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    if url.startswith('/'):
                        url = 'http://jasn.asnjournals.org'+url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    current_articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})

            if current_articles and current_section:
                feeds.append((current_section, current_articles))

            return feeds



    def preprocess_html(self, soup):
        for a in soup.findAll(text=lambda x: x and '[in this window]' in x):
            a = a.findParent('a')
            url = a.get('href', None)
            if not url:
                continue
            if url.startswith('/'):
                url = 'http://jasn.asnjournals.org'+url
                img = isoup = None
                try:
                    isoup = self.index_to_soup(url)
                except:
                    time.sleep(5)
                    try:
                        isoup = self.index_to_soup(url)
                    except:
                        continue
                img = isoup.find('img', src=lambda x: x and x.startswith('/content/'))

            if img is not None:
                img.extract()
                table = a.findParent('table')
                table.replaceWith(img)
        return soup