Read Der Tagesspiegel on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Der Tagesspiegel ist eine 1945 gegründete Berliner Abonnementzeitung. Er habe einen höheren Qualitätsanspruch als die beiden anderen Berliner Abonnementzeitungen Berliner Zeitung und Morgenpost.

Language: de

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  __license__   = 'GPL v3'
__copyright__ = '2010 Ingo Paschke <ipaschke@gmail.com>'

'''
Fetch Tagesspiegel.
'''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe

class TagesspiegelRSS(BasicNewsRecipe):
    title          = u'Der Tagesspiegel'
    __author__ = 'Ingo Paschke'
    language = 'de'
    oldest_article = 7
    max_articles_per_feed = 100
    publication_type = 'newspaper'

    extra_css = '''
                .hcf-overline{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;display:block}
                .hcf-teaser{font-family:Verdana,Arial,Helvetica;font-size:x-small;margin-top:0}
                h1{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
                .hcf-caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .hcf-copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .hcf-article{font-family:Arial,Helvetica;font-size:x-small}
                .quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
                .hcf-inline-left{float:left;margin-right:15px;position:relative;}
                .hcf-inline-right{float:right;margin-right:15px;position:relative;}
                .hcf-smart-box{font-family: Arial, Helvetica, sans-serif; font-size: xx-small; margin: 0px 15px 8px 0px; width: 300px;}
                '''

    no_stylesheets = True
    no_javascript = True
    remove_empty_feeds = True
    encoding = 'utf-8'
    remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]

    def print_version(self, url):
        url = url.split('/')
        url[-1] = 'v_print,%s?p='%url[-1]
        return '/'.join(url)

    def get_masthead_url(self):
        return 'http://www.tagesspiegel.de/images/tsp_logo/3114/6.png'

    def parse_index(self):
        soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/')

        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=False)).strip() if div is not None else None

        articles = {}
        key = None
        ans = []
        maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})

        for div in maincol.findAll(True, attrs={'class':['hcf-teaser', 'hcf-header', 'story headline', 'hcf-teaser hcf-last']}):

             if div['class'] == 'hcf-header':
                 try:
                     key = string.capwords(feed_title(div.em.a))
                     articles[key] = []
                     ans.append(key)
                 except:
                     continue

             elif div['class'] in ['hcf-teaser', 'hcf-teaser hcf-last'] and getattr(div.contents[0],'name','') == 'h2':
                 a = div.find('a', href=True)
                 if not a:
                     continue
                 url = 'http://www.tagesspiegel.de' + a['href']
                 title = self.tag_to_string(a, use_alt=True).strip()
                 description = ''
                 pubdate = strftime('%a, %d %b')
                 summary = div.find('p', attrs={'class':'hcf-teaser'})
                 if summary:
                     description = self.tag_to_string(summary, use_alt=False)

                 feed = key if key is not None else 'Uncategorized'
                 if not articles.has_key(feed):
                     articles[feed] = []
                 if not 'podcasts' in url:
                     articles[feed].append(
                               dict(title=title, url=url, date=pubdate,
                                    description=re.sub('mehr$', '', description),
                                    content=''))

        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]

        return ans