Read National Post on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Canadian national newspaper

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup

class NYTimes(BasicNewsRecipe):

    title       = 'National Post'
    __author__  = 'Krittika Goyal'
    description = 'Canadian national newspaper'
    timefmt = ' [%d %b, %Y]'
    language = 'en_CA'
    needs_subscription = False

    no_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
    remove_tags_after  = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
       #dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
       #dict(name='form', attrs={'onsubmit':''}),
       dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
    ]

   # def preprocess_html(self, soup):
        # table = soup.find('table')
        # if table is not None:
            # table.extract()
        # return soup



    #TO GET ARTICLE TOC
    def nejm_get_index(self):
            return self.index_to_soup('http://www.nationalpost.com/todays-paper/index.html')

    # To parse artice toc
    def parse_index(self):
            soup = self.nejm_get_index()

            div = soup.find(id='npContentMain')

            current_section = None
            current_articles = []
            feeds = []
            for x in div.findAll(True):
                if x.name == 'h4':
                    # Section found
                    if current_articles and current_section:
                        feeds.append((current_section, current_articles))
                    current_section = self.tag_to_string(x)
                    current_articles = []
                    self.log('\tFound section:', current_section)
                if current_section is not None and x.name == 'h5':
                    # Article found
                    title = self.tag_to_string(x)
                    a = x.find('a', href=lambda x: x and 'story' in x)
                    if a is None:
                        continue
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    #if url.startswith('story'):
                    url = 'http://www.nationalpost.com/todays-paper/'+url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    current_articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})

            if current_articles and current_section:
                feeds.append((current_section, current_articles))

            return feeds
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'npContentMain'})
        ##td = heading.findParent(name='td')
        ##td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup