Read Kopalnia Wiedzy on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Ciekawostki ze świata nauki i techniki
Language: pl
Requires Subscription: No, it's available as free ebook
Schedule Every morning
__license__ = 'GPL v3'
__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
__version__ = 'v. 0.1'
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class KopalniaWiedzy(BasicNewsRecipe):
title = u'Kopalnia Wiedzy'
publisher = u'Kopalnia Wiedzy'
description = u'Ciekawostki ze świata nauki i techniki'
encoding = 'utf-8'
__author__ = 'Attis & Tomasz Długosz'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
INDEX = u'http://kopalniawiedzy.pl/'
remove_javascript = True
no_stylesheets = True
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
remove_tags_after = dict(attrs={'class':'ad-square'})
keep_only_tags = [dict(name="div", attrs={'class':'article-text text-small'})]
extra_css = '.topimage {margin-top: 30px}'
preprocess_regexps = [
(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
(re.compile(u'<br /><br />'),
lambda match: '<br\/>')
]
feeds = [
(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
]
def is_link_wanted(self, url, tag):
return tag['class'] == 'next'
def remove_beyond(self, tag, next):
while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next)
while after is not None:
ns = getattr(tag, next)
after.extract()
after = ns
tag = tag.parent
def append_page(self, soup, appendtag, position):
pager = soup.find('a',attrs={'class':'next'})
if pager:
nexturl = self.INDEX + pager['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'id':'articleContent'})
tag = texttag.find(attrs={'class':'pages'})
self.remove_beyond(tag, 'nextSibling')
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3)
for item in soup.findAll('div',attrs={'class':'pages'}):
item.extract()
for item in soup.findAll('p', attrs={'class':'wykop'}):
item.extract()
return soup