Read MalaysianMirror on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
The Pulse of the Nation
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Tony Stegall'
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__ = '1'
__date__ = '16, October 2010'
__docformat__ = 'English'
from calibre.web.feeds.news import BasicNewsRecipe
class MalaysianMirror(BasicNewsRecipe):
title = 'MalaysianMirror'
__author__ = 'Tonythebookworm'
description = 'The Pulse of the Nation'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'news'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
conversion_options = {'linearize_tables' : True}
extra_css = '''
#content_heading{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
td{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
#content_body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [dict(name='table', attrs={'class':['contentpaneopen']})
]
remove_tags = [dict(name='table', attrs={'class':['buttonheading']})]
#######################################################################################################################
max_articles_per_feed = 10
'''
Make a variable that will hold the url for the main site because our links do not include the index
'''
INDEX = 'http://www.malaysianmirror.com'
def parse_index(self):
feeds = []
for title, url in [
(u"Media Buzz", u"http://www.malaysianmirror.com/media-buzz-front"),
(u"Life Style", u"http://www.malaysianmirror.com/lifestylefront"),
(u"Features", u"http://www.malaysianmirror.com/featurefront"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
# print 'The soup is: ', soup
for item in soup.findAll('div', attrs={'class':'contentheading'}):
#print 'item is: ', item
link = item.find('a')
#print 'the link is: ', link
if link:
url = self.INDEX + link['href']
title = self.tag_to_string(link)
#print 'the title is: ', title
#print 'the url is: ', url
#print 'the title is: ', title
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(attrs={'style':True}):
del item['style']
return soup