Read Vancouver Province on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

News from Vancouver, BC

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env  python

__license__   = 'GPL v3'

'''
www.canada.com
'''

from calibre.web.feeds.recipes import BasicNewsRecipe


class CanWestPaper(BasicNewsRecipe):

    # un-comment the following three lines for the Vancouver Province
    title = u'Vancouver Province'
    url_prefix = 'http://www.theprovince.com'
    description = u'News from Vancouver, BC'

    # un-comment the following three lines for the Vancouver Sun
    #title = u'Vancouver Sun'
    #url_prefix = 'http://www.vancouversun.com'
    #description = u'News from Vancouver, BC'

    # un-comment the following three lines for the Edmonton Journal
    #title = u'Edmonton Journal'
    #url_prefix = 'http://www.edmontonjournal.com'
    #description = u'News from Edmonton, AB'

    # un-comment the following three lines for the Calgary Herald
    #title = u'Calgary Herald'
    #url_prefix = 'http://www.calgaryherald.com'
    #description = u'News from Calgary, AB'

    # un-comment the following three lines for the Regina Leader-Post
    #title = u'Regina Leader-Post'
    #url_prefix = 'http://www.leaderpost.com'
    #description = u'News from Regina, SK'

    # un-comment the following three lines for the Saskatoon Star-Phoenix
    #title = u'Saskatoon Star-Phoenix'
    #url_prefix = 'http://www.thestarphoenix.com'
    #description = u'News from Saskatoon, SK'

    # un-comment the following three lines for the Windsor Star
    #title = u'Windsor Star'
    #url_prefix = 'http://www.windsorstar.com'
    #description = u'News from Windsor, ON'

    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'

    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'


    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]

    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup


    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')

        articles = {}
        key = 'News'
        ans = ['News']

        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))

        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans