Read Windsor Star on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

News from Windsor, ON

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env  python

__license__   = 'GPL v3'

'''
www.canada.com
'''

from calibre.web.feeds.recipes import BasicNewsRecipe


class CanWestPaper(BasicNewsRecipe):

    # un-comment the following three lines for the Windsor Star
    title = u'Windsor Star'
    url_prefix = 'http://www.windsorstar.com'
    description = u'News from Windsor, ON'

    # un-comment the following three lines for the Ottawa Citizen
    #title = u'Ottawa Citizen'
    #url_prefix = 'http://www.ottawacitizen.com'
    #description = u'News from Ottawa, ON'

    # un-comment the following three lines for the Montreal Gazette
    #title = u'Montreal Gazette'
    #url_prefix = 'http://www.montrealgazette.com'
    #description = u'News from Montreal, QC'


    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
    timefmt = ' [%b %d]'
    extra_css = '''
                .timestamp {  font-size:xx-small; display: block; }
                #storyheader { font-size: medium; }
                #storyheader h1 { font-size: x-large; }
                #storyheader h2 { font-size: large;  font-style: italic; }
                .byline { font-size:xx-small; }
                #photocaption { font-size: small; font-style: italic }
                #photocredit { font-size: xx-small; }'''
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [{'class':'comments'},
                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
                   dict(name='div', attrs={'class':'rule_grey_solid'}),
                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]

    def preprocess_html(self,soup):
        #delete iempty id attributes--they screw up the TOC for unknow reasons
        divtags = soup.findAll('div',attrs={'id':''})
        if divtags:
            for div in divtags:
                del(div['id'])
        return soup


    def parse_index(self):
        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')

        articles = {}
        key = 'News'
        ans = ['News']

        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
                #self.log(" div class = %s" % divtag['class'])
                if divtag['class'].startswith('section_title'):
                    # div contains section title
                    if not divtag.h3:
                        continue
                    key = self.tag_to_string(divtag.h3,False)
                    ans.append(key)
                    self.log("Section name %s" % key)
                    continue
                # div contains article data
                h1tag = divtag.find('h1')
                if not h1tag:
                    continue
                atag = h1tag.find('a',href=True)
                if not atag:
                    continue
                url = self.url_prefix+'/news/todays-paper/'+atag['href']
                #self.log("Section %s" % key)
                #self.log("url %s" % url)
                title = self.tag_to_string(atag,False)
                #self.log("title %s" % title)
                pubdate = ''
                description = ''
                ptag = divtag.find('p');
                if ptag:
                    description = self.tag_to_string(ptag,False)
                    #self.log("description %s" % description)
                author = ''
                autag = divtag.find('h4')
                if autag:
                    author = self.tag_to_string(autag,False)
                    #self.log("author %s" % author)
                if not articles.has_key(key):
                    articles[key] = []
                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))

        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans