Read Barron's on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Barron's is an American weekly newspaper covering U.S. financial information, market developments, and relevant statistics. Each issue provides a wrap-up of the previous week's market activity, news reports, and an informative outlook on the week to come.

Language: en

Requires Subscription: Yes, requires a Barron's subscription

Schedule Every morning

			  ##
##    web2lrf profile to download articles from Barrons.com
##    can download subscriber-only content if username and
##    password are supplied.
##
'''
'''

import re

from calibre.web.feeds.news import BasicNewsRecipe

class Barrons(BasicNewsRecipe):

        title = 'Barron\'s'
        max_articles_per_feed = 50
        needs_subscription    = True
        language = 'en'

        __author__ = 'Kovid Goyal and Sujata Raman'
        description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
        timefmt  = ' [%a, %b %d, %Y]'
        use_embedded_content   = False
        no_stylesheets = True
        match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
        conversion_options = {'linearize_tables': True}
        ##delay = 1

        ## Don't grab articles more than 7 days old
        oldest_article = 7

        extra_css = '''
                    .datestamp{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;}
                    h3{font-family:Georgia,"Times New Roman",Times,serif; }
                    h2{font-family:Georgia,"Times New Roman",Times,serif; }
                    h1{ font-family:Georgia,"Times New Roman",Times,serif; }
                    .byline{font-family:Verdana,Geneva,Kalimati,sans-serif; font-size:x-small;}
                    .subhead{font-family:Georgia,"Times New Roman",Times,serif; font-size: small;}
                    .articlePage{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
                    .insettipUnit{font-size: x-small;}
                    '''
        remove_tags = [
                           dict(name ='div', attrs={'class':['tabContainer artTabbedNav','rssToolBox hidden','articleToolbox']}),
                           dict(name = 'a', attrs ={'class':'insetClose'})
                        ]

        preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
                [
                ## Remove anything before the body of the article.
                (r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),

                ## Remove any insets from the body of the article.
                (r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),

                ## Remove any reprint info from the body of the article.
                (r'<hr size.*?<p', lambda match : '<p'),

                ## Remove anything after the end of the article.
                (r'<!-- article end.*?</body>', lambda match : '</body>'),
                ]
        ]

        def get_browser(self):
            br = BasicNewsRecipe.get_browser()
            if self.username is not None and self.password is not None:
                br.open('http://commerce.barrons.com/auth/login')
                br.select_form(name='login_form')
                br['user']   = self.username
                br['password'] = self.password
                br.submit()
            return br

        ## Use the print version of a page when available.

        def print_version(self, url):
               main, sep, rest = url.rpartition('?')
               return main + '#printmode'

        def postprocess_html(self, soup, first):

               for tag in soup.findAll(name=['ul', 'li']):
                    tag.name = 'div'
               for tag in soup.findAll(name ='div', attrs={'id': "articleThumbnail_1"}):
                  tag.extract()

               return soup

## Comment out the feeds you don't want retrieved.
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire

        def get_feeds(self):
                return  [
                ('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
                ('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
                ('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
                ('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
                ('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
                ('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
                ]

        def get_article_url(self, article):
            return article.get('link', None)


        def get_cover_url(self):
            cover_url = None
            index = 'http://online.barrons.com/home-page'
            soup = self.index_to_soup(index)
            link_item = soup.find('ul',attrs={'class':'newsItem barronsMag'})
            if link_item:
               cover_url = link_item.img['src']
            return cover_url


        ## Logout of website
        ## NOT CURRENTLY WORKING
        # def cleanup(self):
            # try:
                # self.browser.set_debug_responses(True)
                # import sys, logging
                # logger = logging.getLogger("mechanize")
                # logger.addHandler(logging.StreamHandler(sys.stdout))
                # logger.setLevel(logging.INFO)

                # res = self.browser.open('http://online.barrons.com/logout')
            # except:
                # import traceback
                # traceback.print_exc()