Read The Economist on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Print issue - paid content. The Economist is an English-language weekly news and international affairs publication. It takes an editorial stance which is supportive of free trade, globalisation, government health and education spending, as well as other, more limited forms of governmental intervention.
Language: en
Requires Subscription: Yes, requires a The Economist subscription
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
economist.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
from collections import OrderedDict
import re
class Economist(BasicNewsRecipe):
title = 'The Economist'
language = 'en'
__author__ = "Kovid Goyal"
INDEX = 'http://www.economist.com/printedition'
description = ('Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)')
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
oldest_article = 7.0
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
'share_inline_header', 'related-items']}),
{'class': lambda x: x and 'share-links-header' in x},
]
keep_only_tags = [dict(id='ec-article-body')]
no_stylesheets = True
preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
lambda x:'</html>')]
# economist.com has started throttling after about 60% of the total has
# downloaded with connection reset by peer (104) errors.
delay = 1
needs_subscription = False
'''
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username and self.password:
br.open('http://www.economist.com/user/login')
br.select_form(nr=1)
br['name'] = self.username
br['pass'] = self.password
res = br.submit()
raw = res.read()
if '>Log out<' not in raw:
raise ValueError('Failed to login to economist.com. '
'Check your username and password.')
return br
'''
def get_cover_url(self):
soup = self.index_to_soup('http://www.economist.com/printedition/covers')
div = soup.find('div', attrs={'class':lambda x: x and
'print-cover-links' in x})
a = div.find('a', href=True)
url = a.get('href')
if url.startswith('/'):
url = 'http://www.economist.com' + url
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'class':'cover-content'})
img = div.find('img', src=True)
return img.get('src')
def parse_index(self):
return self.economist_parse_index()
def economist_parse_index(self):
soup = self.index_to_soup(self.INDEX)
div = soup.find('div', attrs={'class':'issue-image'})
if div is not None:
img = div.find('img', src=True)
if img is not None:
self.cover_url = img['src']
feeds = OrderedDict()
for section in soup.findAll(attrs={'class':lambda x: x and 'section' in
x}):
h4 = section.find('h4')
if h4 is None:
continue
section_title = self.tag_to_string(h4).strip()
if not section_title:
continue
self.log('Found section: %s'%section_title)
articles = []
subsection = ''
for node in section.findAll(attrs={'class':'article'}):
subsec = node.findPreviousSibling('h5')
if subsec is not None:
subsection = self.tag_to_string(subsec)
prefix = (subsection+': ') if subsection else ''
a = node.find('a', href=True)
if a is not None:
url = a['href']
if url.startswith('/'): url = 'http://www.economist.com'+url
url += '/print'
title = self.tag_to_string(a)
if title:
title = prefix + title
self.log('\tFound article:', title)
articles.append({'title':title, 'url':url,
'description':'', 'date':''})
if articles:
if section_title not in feeds:
feeds[section_title] = []
feeds[section_title] += articles
ans = [(key, val) for key, val in feeds.iteritems()]
if not ans:
raise Exception('Could not find any articles, either the '
'economist.com server is having trouble and you should '
'try later or the website format has changed and the '
'recipe needs to be updated.')
return ans
def eco_find_image_tables(self, soup):
for x in soup.findAll('table', align=['right', 'center']):
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
yield x
def postprocess_html(self, soup, first):
body = soup.find('body')
for name, val in body.attrs:
del body[name]
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div['style'] = 'text-align:left;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
del img['width']
del img['height']
img.extract()
div.insert(2, img)
table.replaceWith(div)
return soup