Read Polityka on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Weekly magazine. Last archive issue
Language: pl
Requires Subscription: No, it's available as free ebook
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
class Polityka(BasicNewsRecipe):
title = u'Polityka'
__author__ = 'matek09'
description = 'Weekly magazine. Last archive issue'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
remove_tags =[]
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
extra_css = '''
h1 {font-size: x-large; font-weight: bold}
'''
def parse_index(self):
soup = self.index_to_soup('http://archiwum.polityka.pl/')
box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
feeds = []
last = 0
self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
while True:
index = self.index_to_soup(last_edition)
box_list = index.findAll('div', attrs={'class' : 'box_list'})
if len(box_list) == 0:
break
articles = {}
for box in box_list:
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
if not articles.has_key(section):
articles[section] = []
articles[section].append( {
'title' : self.tag_to_string(div.a),
'url' : 'http://archiwum.polityka.pl' + div.a['href'],
'date' : '',
'description' : ''
})
for section in articles:
feeds.append((section, articles[section]))
last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
last = last + 1
return feeds