Read Common Dreams on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Progressive news and views

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env  python
##
## Title:        Common Dreams
##
## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html

# Feb 2012: Cleaned up the output to have only the main article

__license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
'''
commondreams.org
'''

import re
from calibre.web.feeds.news import BasicNewsRecipe

class CommonDreams(BasicNewsRecipe):
    # Identify the recipe
    
    title          = u'Common Dreams'
    description    = u'Breaking News & Views for the Progressive Community.'
    cover_url      = 'https://s3.amazonaws.com/s3.commondreams.org/images/common-dreams.png'
    __author__     = u'XanthanGum'
    language = 'en'
    
    oldest_article = 7
    max_articles_per_feed = 100
    
    no_stylesheets = True
    remove_javascript = True

    # Flattens all the tables to make it compatible with Nook
    conversion_options = {'linearize_tables' : True}
    
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
                 .cap, .caption { display: block; font-size: 80%; font-style: italic; } \
                 .cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
                 .byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
                    .correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
                    font-size: 80%; font-style: italic; margin: 1px auto; } \
                 .story-date, .published { font-size: 80%; } \
                 table { width: 100%; } \
                 td img { display: block; margin: 5px auto; } \
                 ul { padding-top: 10px; } \
                 ol { padding-top: 10px; } \
                 li { padding-top: 5px; padding-bottom: 5px; } \
                 h1 { font-size: 175%; font-weight: bold; } \
                 h2 { font-size: 150%; font-weight: bold; } \
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
                
    # Remove the line breaks and float left/right and picture width/height.
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'float:.*?'), lambda m: ''),
                              (re.compile(r'width:.*?px'), lambda m: ''),
                              (re.compile(r'height:.*?px'), lambda m: ''),
                              (re.compile(r'<a.*?>'), lambda m: ''),
                              (re.compile(r'</a>'), lambda m: ''),
                              ]
    
    
    # Main article is inside this tag
    keep_only_tags = [
                        dict(name='div', attrs={'id':lambda x: x and 'node-' in x}),
                     ]

    remove_tags    = [
                        dict(name='div', attrs={'class':'node-links clear-block'}),    # remove Share options
                     ]


    # Identify the news feeds
    
    feeds = [(u'Headlines', u'https://www.commondreams.org/feed/headlines_rss'),
             (u'Further News Articles', u'https://www.commondreams.org/feed/further_rss'), 
             (u'Views', u'https://www.commondreams.org/feed/views_rss'), 
             (u'Progressive Newswire', u'https://www.commondreams.org/feed/newswire_rss')]
            

    def print_version(self, url):
        url = url + '?print'
        return url