Read Journal of the American Society of Nephrology on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Language: en
Requires Subscription: Yes, requires a Journal of the American Society of Nephrology subscription
Schedule Every morning
# -*- coding: utf-8 -*-
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class JASN(BasicNewsRecipe):
title = u'Journal of the American Society of Nephrology'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 31 #days
max_articles_per_feed = 25
delay = 5
needs_subscription = True
INDEX = 'http://jasn.asnjournals.org/current.shtml'
no_stylesheets = True
remove_tags_before = dict(name='h2')
#remove_tags_after = dict(name='th', attrs={'align':'left'})
remove_tags = [
dict(name='iframe'),
#dict(name='div', attrs={'class':'related-articles'}),
dict(name='td', attrs={'id':['jasnFooter']}),
dict(name='table', attrs={'id':"jasnNavBar"}),
dict(name='table', attrs={'class':'content_box_outer_table'}),
dict(name='th', attrs={'align':'left'})
]
#TO LOGIN
def get_browser(self):
br = BasicNewsRecipe.get_browser()
self.kidney_toc_soup = BeautifulSoup(br.open(self.INDEX).read())
toc = self.kidney_toc_soup.find(id='tocTable')
t = toc.find(text=lambda x: x and '[Full Text]' in x)
a = t.findParent('a', href=True)
url = a.get('href')
if url.startswith('/'):
url = 'http://jasn.asnjournals.org'+url
br.open(url)
br.select_form(name='UserSignIn')
br['username'] = self.username
br['code'] = self.password
response = br.submit()
raw = response.read()
if 'Sign Out' not in raw:
raise ValueError('Failed to log in, is your account expired?')
return br
#feeds = [
#('JASN',
#'http://jasn.asnjournals.org/rss/current.xml'),
#]
#TO GET ARTICLE TOC
def jasn_get_index(self):
return self.index_to_soup('http://jasn.asnjournals.org/current.shtml')
# To parse artice toc
def parse_index(self):
parse_soup = self.jasn_get_index()
div = parse_soup.find(id='tocBody')
current_section = None
current_articles = []
feeds = []
for x in div.findAll(True):
if x.name == 'h2':
# Section heading found
if current_articles and current_section:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
self.log('\tFound section:', current_section)
if current_section is not None and x.name == 'strong':
title = self.tag_to_string(x)
a = x.parent.parent.find('a', href=lambda x: x and '/full/' in x)
if a is None:
continue
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://jasn.asnjournals.org'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url,
'description':'', 'date':''})
if current_articles and current_section:
feeds.append((current_section, current_articles))
return feeds
def preprocess_html(self, soup):
for a in soup.findAll(text=lambda x: x and '[in this window]' in x):
a = a.findParent('a')
url = a.get('href', None)
if not url:
continue
if url.startswith('/'):
url = 'http://jasn.asnjournals.org'+url
img = isoup = None
try:
isoup = self.index_to_soup(url)
except:
time.sleep(5)
try:
isoup = self.index_to_soup(url)
except:
continue
img = isoup.find('img', src=lambda x: x and x.startswith('/content/'))
if img is not None:
img.extract()
table = a.findParent('table')
table.replaceWith(img)
return soup