Read Harvard Business Review on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Print issue - paid content. Harvard Business Review is a general management magazine. A monthly research-based magazine written for business practitioners, it claims a high ranking business readership among academics, executives, and management consultants. It has been the frequent publishing home for scholars and management thinkers such as Clayton M. Christensen, Peter F. Drucker, Michael E. Porter, Rosabeth Moss Kanter, John Hagel III, Thomas H. Davenport, Gary Hamel, C.K. Prahalad, Vijay Govindarajan, Robert S. Kaplan, Robert H. Schaffer and others.

Language: en

Requires Subscription: Yes, requires a Harvard Business Review subscription

Schedule Every morning

			  from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import date, timedelta

class HBR(BasicNewsRecipe):

    title = 'Harvard Business Review'
    description = 'To subscribe go to http://hbr.harvardbusiness.org'
    needs_subscription = True
    __author__ = 'Kovid Goyal and Sujata Raman'
    timefmt                = ' [%B %Y]'
    language = 'en'
    no_stylesheets = True
    recipe_disabled = ('hbr.org has started requiring the use of javascript'
            ' to log into their website. This is unsupported in calibre, so'
            ' this recipe has been disabled. If you would like to see '
            ' HBR supported in calibre, contact hbr.org and ask them'
            ' to provide a javascript free login method.')

    LOGIN_URL = 'https://hbr.org/login?request_url=/'
    LOGOUT_URL = 'https://hbr.org/logout?request_url=/'

    INDEX = 'http://hbr.org/archive-toc/BR'

    keep_only_tags = [dict(name='div', id='pageContainer')]
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
        'mailingListTout', 'partnerCenter', 'pageFooter',
        'superNavHeadContainer', 'hbrDisqus',
        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
                #articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
                '''

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        self.logout_url = None

        #'''
        br.open(self.LOGIN_URL)
        br.select_form(name='signin-form')
        br['signin-form:username'] = self.username
        br['signin-form:password'] = self.password
        raw = br.submit().read()
        if '>Sign out<' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
        try:
            link = br.find_link(text='Sign out')
            if link:
                self.logout_url = link.absolute_url
        except:
            self.logout_url = self.LOGOUT_URL
        #'''
        return br

    def cleanup(self):
        if self.logout_url is not None:
            self.browser.open(self.logout_url)

    def map_url(self, url):
        if url.endswith('/ar/1'):
            return url[:-1]+'pr'


    def hbr_get_toc(self):
        #return self.index_to_soup(open('/t/hbr.html').read())

        today = date.today()
        future = today + timedelta(days=30)
        for x in [x.strftime('%y%m') for x in (future, today)]:
            url = self.INDEX + x
            soup = self.index_to_soup(url)
            if not soup.find(text='Issue Not Found'):
                return soup
        raise Exception('Could not find current issue')

    def hbr_parse_toc(self, soup):
        feeds = []
        current_section = None
        articles = []
        for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
            if x.name == 'h3':
                if current_section is not None and articles:
                    feeds.append((current_section, articles))
                current_section = self.tag_to_string(x).capitalize()
                articles = []
                self.log('\tFound section:', current_section)
            else:
                a = x.find('a', href=True)
                if a is None: continue
                title = self.tag_to_string(a)
                url = a['href']
                if '/ar/' not in url:
                    continue
                if url.startswith('/'):
                    url = 'http://hbr.org' + url
                url = self.map_url(url)
                p = x.parent.find('p')
                desc = ''
                if p is not None:
                    desc = self.tag_to_string(p)
                self.log('\t\tFound article:', title)
                self.log('\t\t\t', url)
                self.log('\t\t\t', desc)

                articles.append({'title':title, 'url':url, 'description':desc,
                    'date':''})
        return feeds


    def parse_index(self):
        soup = self.hbr_get_toc()
        #open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
        feeds = self.hbr_parse_toc(soup)
        return feeds

    def get_cover_url(self):
        cover_url = None
        index = 'http://hbr.org/current'
        soup = self.index_to_soup(index)
        link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)

        if link_item:
           cover_url = 'http://hbr.org' + link_item['src']

        return cover_url