Read Harvard Business Review on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Print issue - paid content. Harvard Business Review is a general management magazine. A monthly research-based magazine written for business practitioners, it claims a high ranking business readership among academics, executives, and management consultants. It has been the frequent publishing home for scholars and management thinkers such as Clayton M. Christensen, Peter F. Drucker, Michael E. Porter, Rosabeth Moss Kanter, John Hagel III, Thomas H. Davenport, Gary Hamel, C.K. Prahalad, Vijay Govindarajan, Robert S. Kaplan, Robert H. Schaffer and others.
Language: en
Requires Subscription: Yes, requires a Harvard Business Review subscription
Schedule Every morning
from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import date, timedelta
class HBR(BasicNewsRecipe):
title = 'Harvard Business Review'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal and Sujata Raman'
timefmt = ' [%B %Y]'
language = 'en'
no_stylesheets = True
recipe_disabled = ('hbr.org has started requiring the use of javascript'
' to log into their website. This is unsupported in calibre, so'
' this recipe has been disabled. If you would like to see '
' HBR supported in calibre, contact hbr.org and ask them'
' to provide a javascript free login method.')
LOGIN_URL = 'https://hbr.org/login?request_url=/'
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
INDEX = 'http://hbr.org/archive-toc/BR'
keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter',
'superNavHeadContainer', 'hbrDisqus',
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
dict(name='iframe')]
extra_css = '''
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
'''
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
self.logout_url = None
#'''
br.open(self.LOGIN_URL)
br.select_form(name='signin-form')
br['signin-form:username'] = self.username
br['signin-form:password'] = self.password
raw = br.submit().read()
if '>Sign out<' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
try:
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
except:
self.logout_url = self.LOGOUT_URL
#'''
return br
def cleanup(self):
if self.logout_url is not None:
self.browser.open(self.logout_url)
def map_url(self, url):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
def hbr_get_toc(self):
#return self.index_to_soup(open('/t/hbr.html').read())
today = date.today()
future = today + timedelta(days=30)
for x in [x.strftime('%y%m') for x in (future, today)]:
url = self.INDEX + x
soup = self.index_to_soup(url)
if not soup.find(text='Issue Not Found'):
return soup
raise Exception('Could not find current issue')
def hbr_parse_toc(self, soup):
feeds = []
current_section = None
articles = []
for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
if x.name == 'h3':
if current_section is not None and articles:
feeds.append((current_section, articles))
current_section = self.tag_to_string(x).capitalize()
articles = []
self.log('\tFound section:', current_section)
else:
a = x.find('a', href=True)
if a is None: continue
title = self.tag_to_string(a)
url = a['href']
if '/ar/' not in url:
continue
if url.startswith('/'):
url = 'http://hbr.org' + url
url = self.map_url(url)
p = x.parent.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', desc)
articles.append({'title':title, 'url':url, 'description':desc,
'date':''})
return feeds
def parse_index(self):
soup = self.hbr_get_toc()
#open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
feeds = self.hbr_parse_toc(soup)
return feeds
def get_cover_url(self):
cover_url = None
index = 'http://hbr.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
if link_item:
cover_url = 'http://hbr.org' + link_item['src']
return cover_url