Read LWN.net Weekly Edition on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Weekly summary of what has happened in the free software world.

Language: en

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  #!/usr/bin/env python

__license__   = 'GPL v3'
__copyright__ = '2011, Davide Cavalca <davide125 at tiscali.it>'
'''
lwn.net
'''

from calibre.web.feeds.news import BasicNewsRecipe
import re

class WeeklyLWN(BasicNewsRecipe):
    title = 'LWN.net Weekly Edition'
    description = 'Weekly summary of what has happened in the free software world.'
    __author__ = 'Davide Cavalca'
    language = 'en'
    site_url = 'http://lwn.net'

    extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'

    cover_url = site_url + '/images/lcorner.png'
    #masthead_url = 'http://lwn.net/images/lcorner.png'
    publication_type = 'magazine'

    remove_tags_before = dict(attrs={'class':'PageHeadline'})
    remove_tags_after = dict(attrs={'class':'ArticleText'})
    remove_tags = [dict(name=['h2', 'form'])]

    preprocess_regexps = [
        # Remove the <hr> and "Log in to post comments"
        (re.compile(r'<hr.*?comments[)]', re.DOTALL), lambda m: ''),
    ]

    conversion_options = { 'linearize_tables' : True }

    oldest_article = 7.0
    needs_subscription = 'optional'

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('https://lwn.net/login')
            br.select_form(name='loginform')
            br['Username'] = self.username
            br['Password'] = self.password
            br.submit()
        return br

    def print_version(self, url):

        # Strip off anchor
        url = url.split('#')[0]

        # Prepend site_url
        if url[0:len(self.site_url)] != self.site_url:
            url = self.site_url + url

        # Append printable URL parameter
        print_param = '?format=printable'
        if url[-len(print_param):] != print_param:
            url += print_param

        #import sys
        #print >>sys.stderr, "*** print_version(url):", url
        return url

    def parse_index(self):
        if self.username is not None and self.password is not None:
            index_url = self.print_version('/current/bigpage')
        else:
            index_url = self.print_version('/free/bigpage')
        soup = self.index_to_soup(index_url)
        body = soup.body

        articles = {}
        ans = []
        url_re = re.compile('^/Articles/')

        while True:
            tag_title = body.findNext(attrs={'class':'SummaryHL'})
            if tag_title == None:
                break

            tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'})
            if tag_section == None:
                section = 'Front Page'
            else:
                section = tag_section.string

            tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'})
            if tag_section2 != None:
                if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
                    section = "%s: %s" %(section, tag_section2.string)

            if section not in articles.keys():
                articles[section] = []
            if section not in ans:
                ans.append(section)

            body = tag_title
            while True:
                tag_url = body.findNext(name='a', attrs={'href':url_re})
                if tag_url == None:
                    break
                body = tag_url
                if tag_url.string == None:
                    continue
                elif tag_url.string == 'Full Story':
                    break
                elif tag_url.string.startswith('Comments ('):
                    break
                else:
                    continue

            if tag_url == None:
                break


            article = dict(
                title=self.tag_to_string(tag_title),
                url=tag_url['href'],
                description='', content='', date='')
            articles[section].append(article)

        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        if not ans:
            raise Exception('Could not find any articles.')

        return ans

# vim: expandtab:ts=4:sw=4