Read New York Review of Books on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Book reviews
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
nybooks.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class NewYorkReviewOfBooks(BasicNewsRecipe):
title = u'New York Review of Books (no subscription)'
description = u'Book reviews'
language = 'en'
__author__ = 'Kovid Goyal'
no_stylesheets = True
no_javascript = True
keep_only_tags = [dict(id=['article-body', 'page-title'])]
remove_tags = [dict(attrs={'class':['article-tools', 'article-links',
'center advertisement']})]
preprocess_regexps = [(re.compile(r'<head>.*?</head>', re.DOTALL), lambda
m:'<head></head>')]
def print_version(self, url):
return url+'?pagination=false'
def parse_index(self):
soup = self.index_to_soup('http://www.nybooks.com/current-issue')
# Find cover
sidebar = soup.find(id='sidebar')
if sidebar is not None:
a = sidebar.find('a', href=lambda x: x and 'view-photo' in x)
if a is not None:
psoup = self.index_to_soup('http://www.nybooks.com'+a['href'])
cover = psoup.find('img', src=True)
self.cover_url = cover['src']
self.log('Found cover at:', self.cover_url)
# Find date
div = soup.find(id='page-title')
if div is not None:
h5 = div.find('h5')
if h5 is not None:
text = self.tag_to_string(h5)
date = text.partition(u'\u2022')[0].strip()
self.timefmt = u' [%s]'%date
self.log('Issue date:', date)
# Find TOC
toc = soup.find('ul', attrs={'class':'issue-article-list'})
articles = []
for li in toc.findAll('li'):
h3 = li.find('h3')
title = self.tag_to_string(h3)
author = self.tag_to_string(li.find('h4'))
title = title + u' (%s)'%author
url = 'http://www.nybooks.com'+h3.find('a', href=True)['href']
desc = ''
for p in li.findAll('p'):
desc += self.tag_to_string(p)
self.log('Found article:', title)
self.log('\t', url)
self.log('\t', desc)
articles.append({'title':title, 'url':url, 'date':'',
'description':desc})
return [('Current Issue', articles)]