Read Der Tagesspiegel on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Der Tagesspiegel ist eine 1945 gegründete Berliner Abonnementzeitung. Er habe einen höheren Qualitätsanspruch als die beiden anderen Berliner Abonnementzeitungen Berliner Zeitung und Morgenpost.
Language: de
Requires Subscription: No, it's available as free ebook
Schedule Every morning
__license__ = 'GPL v3'
__copyright__ = '2010 Ingo Paschke <ipaschke@gmail.com>'
'''
Fetch Tagesspiegel.
'''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class TagesspiegelRSS(BasicNewsRecipe):
title = u'Der Tagesspiegel'
__author__ = 'Ingo Paschke'
language = 'de'
oldest_article = 7
max_articles_per_feed = 100
publication_type = 'newspaper'
extra_css = '''
.hcf-overline{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;display:block}
.hcf-teaser{font-family:Verdana,Arial,Helvetica;font-size:x-small;margin-top:0}
h1{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
.hcf-caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
.hcf-copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
.hcf-article{font-family:Arial,Helvetica;font-size:x-small}
.quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
.quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
.hcf-inline-left{float:left;margin-right:15px;position:relative;}
.hcf-inline-right{float:right;margin-right:15px;position:relative;}
.hcf-smart-box{font-family: Arial, Helvetica, sans-serif; font-size: xx-small; margin: 0px 15px 8px 0px; width: 300px;}
'''
no_stylesheets = True
no_javascript = True
remove_empty_feeds = True
encoding = 'utf-8'
remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]
def print_version(self, url):
url = url.split('/')
url[-1] = 'v_print,%s?p='%url[-1]
return '/'.join(url)
def get_masthead_url(self):
return 'http://www.tagesspiegel.de/images/tsp_logo/3114/6.png'
def parse_index(self):
soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/')
def feed_title(div):
return ''.join(div.findAll(text=True, recursive=False)).strip() if div is not None else None
articles = {}
key = None
ans = []
maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})
for div in maincol.findAll(True, attrs={'class':['hcf-teaser', 'hcf-header', 'story headline', 'hcf-teaser hcf-last']}):
if div['class'] == 'hcf-header':
try:
key = string.capwords(feed_title(div.em.a))
articles[key] = []
ans.append(key)
except:
continue
elif div['class'] in ['hcf-teaser', 'hcf-teaser hcf-last'] and getattr(div.contents[0],'name','') == 'h2':
a = div.find('a', href=True)
if not a:
continue
url = 'http://www.tagesspiegel.de' + a['href']
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')
summary = div.find('p', attrs={'class':'hcf-teaser'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed):
articles[feed] = []
if not 'podcasts' in url:
articles[feed].append(
dict(title=title, url=url, date=pubdate,
description=re.sub('mehr$', '', description),
content=''))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans