Read Cracked.com on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
America's Only HumorSite since 1958
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
from calibre.web.feeds.news import BasicNewsRecipe
class Cracked(BasicNewsRecipe):
title = u'Cracked.com'
__author__ = 'UnWeave'
language = 'en'
description = "America's Only HumorSite since 1958"
publisher = 'Cracked'
category = 'comedy, lists'
oldest_article = 3 #days
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'ascii'
remove_javascript = True
use_embedded_content = False
feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags_before = dict(id='PrimaryContent')
remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
remove_tags = [ dict(name='div', attrs={'class':['social',
'FacebookLike',
'shareBar'
]}),
dict(name='div', attrs={'id':['inline-share-buttons',
]}),
dict(name='span', attrs={'class':['views',
'KonaFilter'
]}),
#dict(name='img'),
]
def appendPage(self, soup, appendTag, position):
# Check if article has multiple pages
pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
if pageNav:
# Check not at last page
nextPage = pageNav.find('a', attrs={'class':'next'})
if nextPage:
nextPageURL = nextPage['href']
nextPageSoup = self.index_to_soup(nextPageURL)
# 8th <section> tag contains article content
nextPageContent = nextPageSoup.findAll('section')[7]
newPosition = len(nextPageContent.contents)
self.appendPage(nextPageSoup,nextPageContent,newPosition)
nextPageContent.extract()
pageNav.extract()
appendTag.insert(position,nextPageContent)
def preprocess_html(self, soup):
self.appendPage(soup, soup.body, 3)
return soup