Read Dawn on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Leading English Newspaper of Pakistan covering national & international news

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag

class DawnRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en_PK'
    version = 1

    title = u'Dawn'
    publisher = u'Dawn Media Group'
    category = u'News, Pakistan'
    description = u'Leading English Newspaper of Pakistan covering national & international news'

    use_embedded_content = False
    remove_empty_feeds = True
    oldest_article = 2
    max_articles_per_feed = 100

    no_stylesheets = True
    remove_javascript = True
    encoding = 'utf-8'

    # Feeds from http://www.dawn.com/wps/wcm/connect/dawn-content-library/dawn/services/rss
    feeds = []
    feeds.append((u'Latest News', u'http://feedproxy.google.com/Dawn-All-News'))
    feeds.append((u'Pakistan News', u'http://feeds2.feedburner.com/dawn/news/pakistan'))
    feeds.append((u'World News', u'http://feeds2.feedburner.com/dawn/news/world'))
    feeds.append((u'Business News', u'http://feeds2.feedburner.com/dawn/news/business'))
    feeds.append((u'Sport News', u'http://feeds2.feedburner.com/dawn/news/sport'))
    feeds.append((u'Cricket News', u'http://feeds2.feedburner.com/dawn/news/cricket'))
    feeds.append((u'Sci-tech News', u'http://feeds2.feedburner.com/dawn/news/technology'))
    feeds.append((u'Entertainment News', u'http://feeds2.feedburner.com/dawn/news/entertainment'))
    feeds.append((u'Columnists', u'http://feeds2.feedburner.com/dawn/news/columnists'))
    #feeds.append((u'', u''))

    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
                          'publisher': publisher}

    extra_css = '''
                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
                center {font-size: xx-small; color: #666666;}
                strong {font-size: small; font-weight: bold;}
                span.news_headline {font-size: xx-large; font-weight: bold; margin: 0em; padding: 0em}
                span.news_byline {font-size: x-small; color: #696969; margin-top: 1em;}
                '''

    def print_version(self, url):
        return url + '?pagedesign=Dawn_PrintlyFriendlyPage'

    def preprocess_html(self, soup):
        newBody = Tag(soup, 'body')

        for cl in ['page_title', 'news_headline', 'news_byline']:
            tag = soup.find('span', attrs = {'class': cl})
            if tag:
                # They like their <br> tags; I don't: does not work well on small screens.
                if tag['class'] == 'news_byline':
                    for br in tag.findAll('br'):
                        br.extract()

                newBody.append(tag)

        table = soup.find('table', attrs = {'id': 'body table'})
        if table:
            for td in table.findAll('td', attrs = {'class': 'news_story'}):
                for tag in td.findAll(True):
                    if tag.has_key('id') and tag['id'] == 'banner-img_slide':
                        tag.extract()
                    elif tag.has_key('style'):
                        del tag['style']
                    elif tag.name == 'script':
                        tag.extract()

                # They like their <br> tags; I don't: does not work well on small screens.
                center = td.find('center')
                if center:
                    for br in center.findNextSiblings('br'):
                        br.extract()
                    for br in center.findPreviousSiblings('br'):
                        br.extract()

                for attr in ['align', 'valign']:
                    if td.has_key(attr):
                        del td[attr]

                td.name = 'div'
                newBody.append(td)

            soup.body.replaceWith(newBody)

            return soup