Read Washington Post Cartoons on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Cartoons from the Washington Post

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from datetime import date, timedelta

class WaPoCartoonsRecipe(BasicNewsRecipe):
    __license__   = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en'
    version = 2

    title = u'Washington Post Cartoons'
    publisher = u'Washington Post'
    category = u'News, Cartoons'
    description = u'Cartoons from the Washington Post'

    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True

    feeds = []
    feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
    feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
    feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
    feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
    feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
    feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
    feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
    feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
    feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))

    extra_css = '''
                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
                h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
                #name {margin-bottom: 0.2em}
                #copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
                '''

    def parse_index(self):
        index = []
        oldestDate = date.today() - timedelta(days = self.oldest_article)
        oldest = oldestDate.strftime('%Y%m%d')
        for feed in self.feeds:
            cartoons = []
            soup = self.index_to_soup(feed[1])

            cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
            cartoons.append(cartoon)

            select = soup.find('select', attrs = {'name': ['url', 'dest']})
            if select:
                cartoonCandidates = []
                if select['name'] == 'url':
                    cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
                else:
                    cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)

                for cartoon in cartoonCandidates:
                    cartoons.append(cartoon)

            index.append([feed[0], cartoons])

        return index

    def preprocess_html(self, soup):
        freshSoup = self.getFreshSoup(soup)

        div = soup.find('div', attrs = {'id': 'name'})
        if div:
            freshSoup.body.append(div)
            comic = soup.find('div', attrs = {'id': 'comic_full'})

            img = comic.find('img')
            if '&' in img['src']:
                img['src'], sep, bad = img['src'].rpartition('&')

            freshSoup.body.append(comic)
            freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
        else:
            span = soup.find('span', attrs = {'class': 'title'})
            if span:
                del span['class']
                span['id'] = 'name'
                span.name = 'div'
                freshSoup.body.append(span)

            img = soup.find('img', attrs = {'class': 'pic_big'})
            if img:
                td = img.parent
                if td.has_key('style'):
                    del td['style']
                td.name = 'div'
                td['id'] = 'comic_full'
                freshSoup.body.append(td)

            td = soup.find('td', attrs = {'class': 'copy'})
            if td:
                for a in td.find('a'):
                    a.extract()
                del td['class']
                td['id'] = 'copyright'
                td.name = 'div'
                freshSoup.body.append(td)

        return freshSoup

    def getFreshSoup(self, oldSoup):
        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
        if oldSoup.head.title:
            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
        return freshSoup

    def cartoonCandidatesWaPo(self, select, oldest):
        opts = select.findAll('option')
        for i in range(1, len(opts)):
            url = opts[i]['value'].rstrip('/')
            dateparts = url.split('/')[-3:]
            datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
            if datenum >= oldest:
                yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
            else:
                return

    def cartoonCandidatesCreatorsCom(self, select, oldest):
        monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
                      'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
                      'November': '11', 'December': '12'}

        opts = select.findAll('option')
        for i in range(1, len(opts)):
            if opts[i].has_key('selected'):
                continue

            dateString = self.tag_to_string(opts[i])
            rest, sep, year = dateString.rpartition(', ')
            parts = rest.split(' ')
            day = parts[2].rjust(2, '0')
            month = monthNames[parts[1]]
            datenum = str(year) + month + str(day)
            if datenum >= oldest:
                yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
            else:
                return