Read Time on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Weekly magazine

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
'''
time.com
'''

import re
from calibre.web.feeds.news import BasicNewsRecipe
from lxml import html

class Time(BasicNewsRecipe):
    #recipe_disabled = ('This recipe has been disabled as TIME no longer'
    #        ' publish complete articles on the web.')
    title                 = u'Time'
    __author__            = 'Kovid Goyal'
    description           = ('Weekly US magazine.')
    encoding = 'utf-8'
    no_stylesheets        = True
    language = 'en'
    remove_javascript     = True
    #needs_subscription = 'optional'

    keep_only_tags = [
            {
                'class':['artHd', 'articleContent',
                            'entry-title','entry-meta', 'entry-content', 'thumbnail']
            },
        ]
    remove_tags = [
            {'class':['content-tools', 'quigo', 'see',
                'first-tier-social-tools', 'navigation', 'enlarge lightbox']},
            {'id':['share-tools']},
            {'rel':'lightbox'},
            ]

    recursions = 10
    match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*']

    preprocess_regexps = [(re.compile(
        r'<meta .+/>'), lambda m:'')]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if False and self.username and self.password:
            # This site uses javascript in its login process
            res = br.open('http://www.time.com/time/magazine')
            br.select_form(nr=1)
            br['username'] = self.username
            br['password'] = self.password
            res = br.submit()
            raw = res.read()
            if '>Log Out<' not in raw:
                raise ValueError('Failed to login to time.com, check'
                        ' your username and password')
        return br

    def parse_index(self):
        raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True)
        root = html.fromstring(raw)
        img = root.xpath('//a[.="View Large Cover" and @href]')
        if img:
            cover_url = 'http://www.time.com' + img[0].get('href')
            try:
                nsoup = self.index_to_soup(cover_url)
                img = nsoup.find('img', src=re.compile('archive/covers'))
                if img is not None:
                    self.cover_url = img['src']
            except:
                self.log.exception('Failed to fetch cover')


        feeds = []
        parent = root.xpath('//div[@class="content-main-aside"]')[0]
        for sec in parent.xpath(
                'descendant::section[contains(@class, "sec-mag-section")]'):
            h3 = sec.xpath('./h3')
            if h3:
                section = html.tostring(h3[0], encoding=unicode,
                        method='text').strip().capitalize()
                self.log('Found section', section)
                articles = list(self.find_articles(sec))
                if articles:
                    feeds.append((section, articles))

        return feeds

    def find_articles(self, sec):

        for article in sec.xpath('./article'):
            h2 = article.xpath('./*[@class="entry-title"]')
            if not h2: continue
            a = h2[0].xpath('./a[@href]')
            if not a: continue
            title = html.tostring(a[0], encoding=unicode,
                        method='text').strip()
            if not title: continue
            url =  a[0].get('href')
            if url.startswith('/'):
                url = 'http://www.time.com'+url
            desc = ''
            p = article.xpath('./*[@class="entry-content"]')
            if p:
                desc = html.tostring(p[0], encoding=unicode,
                        method='text')
            self.log('\t', title, ':\n\t\t', desc)
            yield {
                    'title' : title,
                    'url'   : url,
                    'date'  : '',
                    'description' : desc
                    }

    def postprocess_html(self,soup,first):
        for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
            tag.extract()
        return soup