Read Washington Post Cartoons on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Cartoons from the Washington Post
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from datetime import date, timedelta
class WaPoCartoonsRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 2
title = u'Washington Post Cartoons'
publisher = u'Washington Post'
category = u'News, Cartoons'
description = u'Cartoons from the Washington Post'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
feeds = []
feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
#name {margin-bottom: 0.2em}
#copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
'''
def parse_index(self):
index = []
oldestDate = date.today() - timedelta(days = self.oldest_article)
oldest = oldestDate.strftime('%Y%m%d')
for feed in self.feeds:
cartoons = []
soup = self.index_to_soup(feed[1])
cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
cartoons.append(cartoon)
select = soup.find('select', attrs = {'name': ['url', 'dest']})
if select:
cartoonCandidates = []
if select['name'] == 'url':
cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
else:
cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)
for cartoon in cartoonCandidates:
cartoons.append(cartoon)
index.append([feed[0], cartoons])
return index
def preprocess_html(self, soup):
freshSoup = self.getFreshSoup(soup)
div = soup.find('div', attrs = {'id': 'name'})
if div:
freshSoup.body.append(div)
comic = soup.find('div', attrs = {'id': 'comic_full'})
img = comic.find('img')
if '&' in img['src']:
img['src'], sep, bad = img['src'].rpartition('&')
freshSoup.body.append(comic)
freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
else:
span = soup.find('span', attrs = {'class': 'title'})
if span:
del span['class']
span['id'] = 'name'
span.name = 'div'
freshSoup.body.append(span)
img = soup.find('img', attrs = {'class': 'pic_big'})
if img:
td = img.parent
if td.has_key('style'):
del td['style']
td.name = 'div'
td['id'] = 'comic_full'
freshSoup.body.append(td)
td = soup.find('td', attrs = {'class': 'copy'})
if td:
for a in td.find('a'):
a.extract()
del td['class']
td['id'] = 'copyright'
td.name = 'div'
freshSoup.body.append(td)
return freshSoup
def getFreshSoup(self, oldSoup):
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup
def cartoonCandidatesWaPo(self, select, oldest):
opts = select.findAll('option')
for i in range(1, len(opts)):
url = opts[i]['value'].rstrip('/')
dateparts = url.split('/')[-3:]
datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
if datenum >= oldest:
yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
else:
return
def cartoonCandidatesCreatorsCom(self, select, oldest):
monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
'November': '11', 'December': '12'}
opts = select.findAll('option')
for i in range(1, len(opts)):
if opts[i].has_key('selected'):
continue
dateString = self.tag_to_string(opts[i])
rest, sep, year = dateString.rpartition(', ')
parts = rest.split(' ')
day = parts[2].rjust(2, '0')
month = monthNames[parts[1]]
datenum = str(year) + month + str(day)
if datenum >= oldest:
yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
else:
return