Read MalaysianMirror on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

The Pulse of the Nation

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env  python
__license__   = 'GPL v3'
__author__    = 'Tony Stegall'
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__   = '1'
__date__      = '16, October 2010'
__docformat__ = 'English'



from calibre.web.feeds.news import BasicNewsRecipe

class MalaysianMirror(BasicNewsRecipe):
    title      = 'MalaysianMirror'
    __author__ = 'Tonythebookworm'
    description = 'The Pulse of the Nation'
    language = 'en'
    no_stylesheets = True
    publisher           = 'Tonythebookworm'
    category            = 'news'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24

    remove_javascript   = True
    remove_empty_feeds  = True
    conversion_options = {'linearize_tables' : True}
    extra_css = '''
                    #content_heading{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}

                    td{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}

                    #content_body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
                '''

    keep_only_tags     = [dict(name='table', attrs={'class':['contentpaneopen']})
                          ]
    remove_tags = [dict(name='table', attrs={'class':['buttonheading']})]
    #######################################################################################################################


    max_articles_per_feed = 10

    '''
    Make a variable that will hold the url for the main site because our links do not include the index
    '''

    INDEX = 'http://www.malaysianmirror.com'




    def parse_index(self):
        feeds = []
        for title, url in [
                            (u"Media Buzz", u"http://www.malaysianmirror.com/media-buzz-front"),
                            (u"Life Style", u"http://www.malaysianmirror.com/lifestylefront"),
                            (u"Features", u"http://www.malaysianmirror.com/featurefront"),


                             ]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds

    def make_links(self, url):
        title = 'Temp'
        current_articles = []
        soup = self.index_to_soup(url)
       # print 'The soup is: ', soup
        for item in soup.findAll('div', attrs={'class':'contentheading'}):
            #print 'item is: ', item
            link = item.find('a')
            #print 'the link is: ', link
            if link:
                url         = self.INDEX + link['href']
                title       = self.tag_to_string(link)
                #print 'the title is: ', title
                #print 'the url is: ', url
                #print 'the title is: ', title
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
        return current_articles

    def preprocess_html(self, soup):
        for item in soup.findAll(attrs={'style':True}):
            del item['style']
        return soup