Read Süddeutsche Zeitung Print on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Printausgabe - Kostenpflichtig. Die Süddeutsche Zeitung (Abkürzung SZ) ist die größte deutsche überregionale Abonnement-Tageszeitung. sueddeutsche.de ist das Internetportal der Süddeutschen Zeitung. Die Artikel setzen sich zusammen aus eigenen Beiträgen der sueddeutsche.de-Redaktion, aus Texten, die von der Süddeutschen Zeitung übernommen werden und aus Agenturmeldungen.

Language: de

Requires Subscription: Yes, requires a Süddeutsche Zeitung Print subscription

Schedule Every morning

			  # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__   = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.sueddeutsche.de/sz/
'''

from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime

class SueddeutcheZeitung(BasicNewsRecipe):
    title                  = u'Süddeutsche Zeitung'
    __author__             = 'Darko Miletic'
    description            = 'News from Germany. Access to paid content.'
    publisher              = u'Süddeutsche Zeitung'
    category               = 'news, politics, Germany'
    no_stylesheets         = True
    oldest_article         = 2
    encoding               = 'iso-8859-1'
    needs_subscription     = True
    remove_empty_feeds     = True
    delay                  = 1
    cover_source           = 'http://www.sueddeutsche.de/verlag'
    PREFIX                 = 'http://www.sueddeutsche.de'
    INDEX                  = PREFIX + '/app/epaper/textversion/'
    use_embedded_content   = False
    masthead_url           = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif'
    language               = 'de'
    publication_type       = 'newspaper'
    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }

    remove_attributes = ['height','width','style']

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.INDEX)
            br.select_form(name='lbox')
            br['login_name'    ] = self.username
            br['login_passwort'] = self.password
            br.submit()
        return br

    remove_tags        =[
                         dict(attrs={'class':'hidePrint'})
                        ,dict(name=['link','object','embed','base','iframe','br'])
                        ]
    keep_only_tags     = [dict(attrs={'class':'artikelBox'})]
    remove_tags_before =  dict(attrs={'class':'artikelTitel'})
    remove_tags_after  =  dict(attrs={'class':'author'})

    feeds = [
               (u'Politik'       , INDEX + 'Politik/'      )
              ,(u'Seite drei'    , INDEX + 'Seite+drei/'   )
              ,(u'Thema des Tages'    , INDEX + 'Thema+des+Tages/'   )
              ,(u'Meinungsseite' , INDEX + 'Meinungsseite/')
              ,(u'Wissen'        , INDEX + 'Wissen/'       )
              ,(u'Panorama'      , INDEX + 'Panorama/'     )
              ,(u'Feuilleton'    , INDEX + 'Feuilleton/'   )
              ,(u'Medien'        , INDEX + 'Medien/'       )
              ,(u'Wirtschaft'    , INDEX + 'Wirtschaft/'   )
              ,(u'Sport'         , INDEX + 'Sport/'        )
              ,(u'Bayern'        , INDEX + 'Bayern/'       )
              ,(u'Muenchen'      , INDEX + 'M%FCnchen/'    )
              ,(u'Muenchen City' , INDEX + 'M%FCnchen+City/' )
              ,(u'Jetzt.de'         , INDEX + 'Jetzt.de/'        )
              ,(u'Reise'         , INDEX + 'Reise/'        )
              ,(u'SZ Extra'         , INDEX + 'SZ+Extra/'        )
              ,(u'Wochenende'    , INDEX + 'SZ+am+Wochenende/' )
              ,(u'Stellen-Markt'  , INDEX + 'Stellen-Markt/')
              ,(u'Motormarkt'  , INDEX + 'Motormarkt/')
              ,(u'Immobilien-Markt', INDEX + 'Immobilien-Markt/')
              ,(u'Thema'         , INDEX + 'Thema/'        )
              ,(u'Forum'         , INDEX + 'Forum/'        )
              ,(u'Leute'         , INDEX + 'Leute/'        )
              ,(u'Jugend'         , INDEX + 'Jugend/'        )
              ,(u'Beilage'         , INDEX + 'Beilage/'        )
            ]

    def get_cover_url(self):
        cover_source_soup = self.index_to_soup(self.cover_source)
        preview_image_div = cover_source_soup.find(attrs={'class':'preview-image'})
        return preview_image_div.div.img['src']

    def parse_index(self):
        src = self.index_to_soup(self.INDEX)
        id = ''
        for itt in src.findAll('a',href=True):
            if itt['href'].startswith('/app/epaper/textversion/inhalt/'):
               id = itt['href'].rpartition('/inhalt/')[2]
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl + id)
            tbl = soup.find(attrs={'class':'szprintd'})
            for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
                atag    = item.find(attrs={'class':'Titel'}).a
                ptag    = item.find('p')
                stag    = ptag.find('script')
                if stag:
                   stag.extract()
                url           = self.PREFIX + atag['href']
                title         = self.tag_to_string(atag)
                description   = self.tag_to_string(ptag)
                articles.append({
                                      'title'      :title
                                     ,'date'       :strftime(self.timefmt)
                                     ,'url'        :url
                                     ,'description':description
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds