Read Harper's Magazine Print on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Print issue - Paid content. Harper's Magazine: Founded June 1850.

Language: en

Requires Subscription: Yes, requires a Harper's Magazine Print subscription

Schedule Every morning

			  __license__   = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
harpers.org - paid subscription/ printed issue articles
This recipe only get's article's published in text format
images and pdf's are ignored
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe

class Harpers_full(BasicNewsRecipe):
    title                 = "Harper's Magazine - articles from printed edition"
    __author__            = 'Darko Miletic'
    description           = "Harper's Magazine: Founded June 1850."
    publisher             = "Harpers's"
    category              = 'news, politics, USA'
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    delay                 = 1
    language              = 'en'
    needs_subscription    = True
    masthead_url          = 'http://www.harpers.org/media/image/Harpers_305x100.gif'
    publication_type      = 'magazine'    
    INDEX                 = strftime('http://www.harpers.org/archive/%Y/%m')
    LOGIN                 = 'http://www.harpers.org'
    cover_url             = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
    extra_css             = ' body{font-family: "Georgia",serif} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        }

    keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
    remove_tags = [
                     dict(name='table', attrs={'class':['rcnt','rcnt topline']})
                    ,dict(name='link')
                  ]
    remove_attributes=['xmlns']              

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
            br.select_form(nr=1)
            br['handle'  ] = self.username
            br['password'] = self.password
            br.submit()
        return br

    def parse_index(self):
        articles = []
        print 'Processing ' + self.INDEX
        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('div', attrs={'class':'title'}):
            text_link = item.parent.find('img',attrs={'alt':'Text'})
            if text_link:
                url   = self.LOGIN + item.a['href']
                title = item.a.contents[0]
                date  = strftime(' %B %Y')
                articles.append({
                                  'title'      :title
                                 ,'date'       :date
                                 ,'url'        :url
                                 ,'description':''
                                })
        return [(soup.head.title.string, articles)]