Read Sports Illustrated Columnists on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
#from random import randint
from urllib import quote
class SportsIllustratedColumnistsRecipe(BasicNewsRecipe) :
title = u'Sports Illustrated Columnists'
__author__ = u'kwetal'
__license__ = u'GPL v3'
language = 'en'
version = 2
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
feeds = []
# RSS sources found at http://sportsillustrated.cnn.com/services/rss/
feeds.append((u'Jon Heyman', u'http://rss.cnn.com/rss/si_jon_heyman.rss'))
feeds.append((u'Austin Murphy', u'http://rss.cnn.com/rss/si_austin_murphy.rss'))
feeds.append((u'Lars Anderson', u'http://rss.cnn.com/rss/si_lars_anderson.rss'))
feeds.append((u'Melissa Segura', u'http://rss.cnn.com/rss/si_melissa_segura.rss'))
feeds.append((u'Peter King', u'http://rss.cnn.com/rss/si_peter_king.rss'))
feeds.append((u'Scott Wraight', u'http://rss.cnn.com/rss/si_scott_wraight.rss'))
def print_version(self, url) :
# This is the url and the parameters that work to get the print version.
printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis'
printUrl += '&fb=Y&partnerID=2356&url=' + quote(url)
return printUrl
# However the original javascript also uses the following parameters, but they can be left out:
# title : can be some random string
# random : some random number, but I think the number of digits is important
# expire : no idea what value to use
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
def preprocess_html(self, soup) :
temp = soup.find('div', attrs = {'class' : 'cnnstoryheadline'})
if temp :
# It's an article, make a valid content container
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
body = homeMadeSoup.find('body')
headline = temp.find('h1')
if headline :
body.append(headline)
for td in soup.findAll('td', attrs = {'class' : 'cnnstorycontentarea'}) :
for p in td.findAll('p') :
body.append(p)
return homeMadeSoup
else :
# It's a TOC, just return the whole lot
return soup