Read Harvard Business Review Blogs on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Paid content. To subscribe go to http://hbr.harvardbusiness.org
Language: en
Requires Subscription: Yes, requires a Harvard Business Review Blogs subscription
Schedule Every morning
from calibre.web.feeds.news import BasicNewsRecipe
import re
class HBR(BasicNewsRecipe):
title = 'Harvard Business Review Blogs'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
__author__ = 'Kovid Goyal'
language = 'en'
no_stylesheets = True
#recipe_disabled = ('hbr.org has started requiring the use of javascript'
# ' to log into their website. This is unsupported in calibre, so'
# ' this recipe has been disabled. If you would like to see '
# ' HBR supported in calibre, contact hbr.org and ask them'
# ' to provide a javascript free login method.')
needs_subscription = False
LOGIN_URL = 'http://hbr.org/login?request_url=/'
LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
INDEX = 'http://hbr.org/current'
remove_tags_after = dict(id='articleBody')
remove_tags_before = dict(id='pageFeature')
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
oldest_article = 30
max_articles_per_feed = 100
use_embedded_content = False
keep_only_tags = [ dict(name='div', id='pageContainer')
]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
'mailingListTout', 'partnerCenter', 'pageFooter', 'shareWidgetTop']),
dict(name=['iframe', 'style'])]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
self.logout_url = None
return br
#'''
br.open(self.LOGIN_URL)
br.select_form(name='signin-form')
br['signin-form:username'] = self.username
br['signin-form:password'] = self.password
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
try:
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
except:
self.logout_url = self.LOGOUT_URL
#'''
return br
#-------------------------------------------------------------------------------------------------
def cleanup(self):
if self.logout_url is not None:
self.browser.open(self.logout_url)
#-------------------------------------------------------------------------------------------------
def map_url(self, url):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
def get_cover_url(self):
cover_url = None
index = 'http://hbr.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
if link_item:
cover_url = 'http://hbr.org' + link_item['src']
return cover_url