Read Revista El Cultural on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

Revista de cultura

Language: es

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  from calibre.web.feeds.recipes import BasicNewsRecipe

class RevistaElCultural(BasicNewsRecipe):

    title       = 'Revista El Cultural'
    __author__  = 'Jefferson Frantz'
    description = 'Revista de cultura'
    timefmt = ' [%d %b, %Y]'
    language = 'es'

    no_stylesheets = True
    remove_javascript = True

    extra_css              = 'h1{ font-family: sans-serif; font-size: large; font-weight: bolder; text-align: justify } h2{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h4{ font-family: sans-serif; font-weight: lighter; font-size: medium; font-style: italic; text-align: justify } .rtsArticuloFirma{ font-family: sans-serif; font-size: small; text-align: justify } .column span-13 last{ font-family: sans-serif; font-size: medium; text-align: justify } .rtsImgArticulo{font-family: serif; font-size: small; color: #000000; text-align: justify}'


    def preprocess_html(self, soup):
            for item in soup.findAll(style=True):
               del item['style']

            return soup

    keep_only_tags = [dict(name='div', attrs={'class':['column span-13 last']}),dict(name='div', attrs={'class':['rtsImgArticulo']})]

    remove_tags        = [
                             dict(name=['object','link','script','ul'])
                            ,dict(name='div', attrs={'class':['rtsRating']})

                         ]


    #TO GET ARTICLES IN SECTION
    def ec_parse_section(self, url, titleSection):
            print 'Section: '+ titleSection
            soup = self.index_to_soup(url)
            div = soup.find(attrs={'id':'gallery'})
            current_articles = []

            for a in div.findAllNext('a', href=True):
                    if a is None:
                        continue
                    title = self.tag_to_string(a)

                    url = a.get('href', False)
                    if not url or not title:
                        continue

                    if not url.startswith('/version_papel/'+titleSection+'/'):
                        if len(current_articles) > 0 and not url.startswith('/secciones/'):
                            break
                        continue

                    if url.startswith('/version_papel/'+titleSection+'/'):
                         url = 'http://www.elcultural.es'+url

                    self.log('\t\tFound article:', title[0:title.find("|")-1])
                    self.log('\t\t\t', url)
                    current_articles.append({'title': title[0:title.find("|")-1], 'url':url,
                        'description':'', 'date':''})

            return current_articles


    # To GET SECTIONS
    def parse_index(self):
            feeds = []
            for title, url in [
                ('LETRAS',
                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
                ('ARTE',
                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
                ('CINE',
                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
                ('CIENCIA',
                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
##                ('OPINION',
##                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
                ('ESCENARIOS',
                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
             ]:
               articles = self.ec_parse_section(url,title)
               if articles:
                   feeds.append((title, articles))


            return feeds