Read Time on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Weekly magazine
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
'''
time.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from lxml import html
class Time(BasicNewsRecipe):
#recipe_disabled = ('This recipe has been disabled as TIME no longer'
# ' publish complete articles on the web.')
title = u'Time'
__author__ = 'Kovid Goyal'
description = ('Weekly US magazine.')
encoding = 'utf-8'
no_stylesheets = True
language = 'en'
remove_javascript = True
#needs_subscription = 'optional'
keep_only_tags = [
{
'class':['artHd', 'articleContent',
'entry-title','entry-meta', 'entry-content', 'thumbnail']
},
]
remove_tags = [
{'class':['content-tools', 'quigo', 'see',
'first-tier-social-tools', 'navigation', 'enlarge lightbox']},
{'id':['share-tools']},
{'rel':'lightbox'},
]
recursions = 10
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*']
preprocess_regexps = [(re.compile(
r'<meta .+/>'), lambda m:'')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if False and self.username and self.password:
# This site uses javascript in its login process
res = br.open('http://www.time.com/time/magazine')
br.select_form(nr=1)
br['username'] = self.username
br['password'] = self.password
res = br.submit()
raw = res.read()
if '>Log Out<' not in raw:
raise ValueError('Failed to login to time.com, check'
' your username and password')
return br
def parse_index(self):
raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True)
root = html.fromstring(raw)
img = root.xpath('//a[.="View Large Cover" and @href]')
if img:
cover_url = 'http://www.time.com' + img[0].get('href')
try:
nsoup = self.index_to_soup(cover_url)
img = nsoup.find('img', src=re.compile('archive/covers'))
if img is not None:
self.cover_url = img['src']
except:
self.log.exception('Failed to fetch cover')
feeds = []
parent = root.xpath('//div[@class="content-main-aside"]')[0]
for sec in parent.xpath(
'descendant::section[contains(@class, "sec-mag-section")]'):
h3 = sec.xpath('./h3')
if h3:
section = html.tostring(h3[0], encoding=unicode,
method='text').strip().capitalize()
self.log('Found section', section)
articles = list(self.find_articles(sec))
if articles:
feeds.append((section, articles))
return feeds
def find_articles(self, sec):
for article in sec.xpath('./article'):
h2 = article.xpath('./*[@class="entry-title"]')
if not h2: continue
a = h2[0].xpath('./a[@href]')
if not a: continue
title = html.tostring(a[0], encoding=unicode,
method='text').strip()
if not title: continue
url = a[0].get('href')
if url.startswith('/'):
url = 'http://www.time.com'+url
desc = ''
p = article.xpath('./*[@class="entry-content"]')
if p:
desc = html.tostring(p[0], encoding=unicode,
method='text')
self.log('\t', title, ':\n\t\t', desc)
yield {
'title' : title,
'url' : url,
'date' : '',
'description' : desc
}
def postprocess_html(self,soup,first):
for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
tag.extract()
return soup