Read The Economist on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Print issue - The Economist is an English-language weekly news and international affairs publication. It takes an editorial stance which is supportive of free trade, globalisation, government health and education spending, as well as other, more limited forms of governmental intervention.

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
economist.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
from collections import OrderedDict

import time, re

class Economist(BasicNewsRecipe):

    title = 'The Economist'
    language = 'en'

    __author__ = "Kovid Goyal"
    INDEX = 'http://www.economist.com/printedition'
    description = ('Global news and current affairs from a European'
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
                'share_inline_header', 'related-items']}),
            {'class': lambda x: x and 'share-links-header' in x},
    ]
    keep_only_tags = [dict(id='ec-article-body')]
    needs_subscription = False
    no_stylesheets = True
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
        lambda x:'</html>')]

    # economist.com has started throttling after about 60% of the total has
    # downloaded with connection reset by peer (104) errors.
    delay = 1

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.economist.com/printedition/covers')
        div = soup.find('div', attrs={'class':lambda x: x and
            'print-cover-links' in x})
        a = div.find('a', href=True)
        url = a.get('href')
        if url.startswith('/'):
            url = 'http://www.economist.com' + url
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'cover-content'})
        img = div.find('img', src=True)
        return img.get('src')

    def parse_index(self):
        try:
            return self.economist_parse_index()
        except:
            raise
            self.log.warn(
                'Initial attempt to parse index failed, retrying in 30 seconds')
            time.sleep(30)
            return self.economist_parse_index()

    def economist_parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        div = soup.find('div', attrs={'class':'issue-image'})
        if div is not None:
            img = div.find('img', src=True)
            if img is not None:
                self.cover_url = img['src']
        feeds = OrderedDict()
        for section in soup.findAll(attrs={'class':lambda x: x and 'section' in
            x}):
            h4 = section.find('h4')
            if h4 is None:
                continue
            section_title = self.tag_to_string(h4).strip()
            if not section_title:
                continue
            self.log('Found section: %s'%section_title)
            articles = []
            subsection = ''
            for node in section.findAll(attrs={'class':'article'}):
                subsec = node.findPreviousSibling('h5')
                if subsec is not None:
                    subsection = self.tag_to_string(subsec)
                prefix = (subsection+': ') if subsection else ''
                a = node.find('a', href=True)
                if a is not None:
                    url = a['href']
                    if url.startswith('/'): url = 'http://www.economist.com'+url
                    url += '/print'
                    title = self.tag_to_string(a)
                    if title:
                        title = prefix + title
                        self.log('\tFound article:', title)
                        articles.append({'title':title, 'url':url,
                        'description':'', 'date':''})

            if articles:
                if section_title not in feeds:
                    feeds[section_title] = []
                feeds[section_title] += articles

        ans = [(key, val) for key, val in feeds.iteritems()]
        if not ans:
            raise Exception('Could not find any articles, either the '
                    'economist.com server is having trouble and you should '
                    'try later or the website format has changed and the '
                    'recipe needs to be updated.')
        return ans

    def eco_find_image_tables(self, soup):
        for x in soup.findAll('table', align=['right', 'center']):
            if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
                yield x

    def postprocess_html(self, soup, first):
        body = soup.find('body')
        for name, val in body.attrs:
            del body[name]

        for table in list(self.eco_find_image_tables(soup)):
            caption = table.find('font')
            img = table.find('img')
            div = Tag(soup, 'div')
            div['style'] = 'text-align:left;font-size:70%'
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
            del img['width']
            del img['height']
            img.extract()
            div.insert(2, img)
            table.replaceWith(div)
        return soup

'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.threadpool import ThreadPool, makeRequests
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
import time, string, re
from datetime import datetime
from lxml import html

class Economist(BasicNewsRecipe):

    title = 'The Economist (RSS)'
    language = 'en'

    __author__ = "Kovid Goyal"
    description = ('Global news and current affairs from a European'
            ' perspective. Best downloaded on Friday mornings (GMT).'
            ' Much slower than the print edition based version.')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
                'share_inline_header', 'related-items']}),
            {'class': lambda x: x and 'share-links-header' in x},
    ]
    keep_only_tags = [dict(id='ec-article-body')]
    no_stylesheets = True
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL),
        lambda x:'</html>')]

    def parse_index(self):
        from calibre.web.feeds.feedparser import parse
        if self.test:
            self.oldest_article = 14.0
        raw = self.index_to_soup(
                'http://feeds.feedburner.com/economist/full_print_edition',
                raw=True)
        entries = parse(raw).entries
        pool = ThreadPool(10)
        self.feed_dict = {}
        requests = []
        for i, item in enumerate(entries):
            title       = item.get('title', _('Untitled article'))
            published = item.date_parsed
            if not published:
                published = time.gmtime()
            utctime = datetime(*published[:6])
            delta = datetime.utcnow() - utctime
            if delta.days*24*3600 + delta.seconds > 24*3600*self.oldest_article:
                self.log.debug('Skipping article %s as it is too old.'%title)
                continue
            link        = item.get('link', None)
            description = item.get('description', '')
            author      = item.get('author', '')

            requests.append([i, link, title, description, author, published])
        if self.test:
            requests = requests[:4]
        requests = makeRequests(self.process_eco_feed_article, requests, self.eco_article_found,
                self.eco_article_failed)
        for r in requests: pool.putRequest(r)
        pool.wait()

        return self.eco_sort_sections([(t, a) for t, a in
            self.feed_dict.items()])

    def eco_sort_sections(self, feeds):
        if not feeds:
            raise ValueError('No new articles found')
        order = {
            'The World This Week': 1,
            'Leaders': 2,
            'Letters': 3,
            'Briefing': 4,
            'Business': 5,
            'Finance And Economics': 6,
            'Science & Technology': 7,
            'Books & Arts': 8,
            'International': 9,
            'United States': 10,
            'Asia': 11,
            'Europe': 12,
            'The Americas': 13,
            'Middle East & Africa': 14,
            'Britain': 15,
            'Obituary': 16,
        }
        return sorted(feeds, cmp=lambda x,y:cmp(order.get(x[0], 100),
            order.get(y[0], 100)))

    def process_eco_feed_article(self, args):
        from calibre import browser
        i, url, title, description, author, published = args
        br = browser()
        ret = br.open(url)
        raw = ret.read()
        url = br.geturl().split('?')[0]+'/print'
        root = html.fromstring(raw)
        matches = root.xpath('//*[@class = "ec-article-info"]')
        feedtitle = 'Miscellaneous'
        if matches:
            feedtitle = string.capwords(html.tostring(matches[-1], method='text',
                    encoding=unicode).split('|')[-1].strip())
        return (i, feedtitle, url, title, description, author, published)

    def eco_article_found(self, req, result):
        from calibre.web.feeds import Article
        i, feedtitle, link, title, description, author, published = result
        self.log('Found print version for article:', title, 'in', feedtitle,
                'at', link)

        a = Article(i, title, link, author, description, published, '')

        article = dict(title=a.title, description=a.text_summary,
            date=time.strftime(self.timefmt, a.date), author=a.author, url=a.url)
        if feedtitle not in self.feed_dict:
            self.feed_dict[feedtitle] = []
        self.feed_dict[feedtitle].append(article)

    def eco_article_failed(self, req, tb):
        self.log.error('Failed to download %s with error:'%req.args[0][2])
        self.log.debug(tb)

    def eco_find_image_tables(self, soup):
        for x in soup.findAll('table', align=['right', 'center']):
            if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
                yield x

    def postprocess_html(self, soup, first):
        body = soup.find('body')
        for name, val in body.attrs:
            del body[name]
        for table in list(self.eco_find_image_tables(soup)):
            caption = table.find('font')
            img = table.find('img')
            div = Tag(soup, 'div')
            div['style'] = 'text-align:left;font-size:70%'
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
            img.extract()
            del img['width']
            del img['height']
            div.insert(2, img)
            table.replaceWith(div)
        return soup
'''