Read El Universal (Edición Impresa) on your iPad or Kindle in no time. Click download to load the free ebook on your reader.

Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.

Download for free »

News from Mexico

Language: es

Requires Subscription: No, it's available as free ebook

Schedule Every morning

			  from calibre.web.feeds.news import BasicNewsRecipe

class ElUniversalImpresaRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'es_MX'
    version = 1

    title = u'El Universal (Edici\u00F3n Impresa)'
    publisher = u'El Universal'
    category = u'News, Mexico'
    description = u'News from Mexico'

    remove_empty_feeds = True
    remove_javascript = True

    INDEX = 'http://www.eluniversal.com.mx'

    extra_css = '''
                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
                '''

    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
                          'publisher': publisher, 'linearize_tables': True}

    def parse_index(self):
        soup = self.index_to_soup('http://www.eluniversal.com.mx/edicion_impresa.html')
        index = []

        table = soup.find('table', attrs = {'width': '500'})
        articles = []
        for td in table.findAll(lambda tag: tag.name == 'td' and tag.has_key('class') and tag['class'] == 'arnegro12'):
            a = td.a
            a.extract()
            title = self.tag_to_string(a)
            url = self.INDEX + a['href']
            description = self.tag_to_string(td)
            articles.append({'title': title, 'date': None, 'url': url, 'description' : description})

        index.append(('Primera Plana', articles))

        for td in table.findAll(lambda tag: tag.name == 'td' and len(tag.attrs) == 0):
            articles = []
            feedTitle = None
            for a in td.findAll('a'):
                if not feedTitle:
                    feedTitle = self.tag_to_string(a)
                    continue

                title = self.tag_to_string(a)

                url = self.INDEX + a['href']
                articles.append({'title': title, 'date': None, 'url': url, 'description': ''})

            index.append((feedTitle, articles))

        return index

    def print_version(self, url):
        if url.find('wcarton') >= 0:
            return None

        main, sep, id = url.rpartition('/')

        return main + '/vi_' + id

    def preprocess_html(self, soup):
        table = soup.find('table')
        table.extract()

        for p in soup.findAll('p'):
            if self.tag_to_string(p).strip() == '':
                p.extract()

        tag = soup.find('font', attrs = {'color': '#0F046A'})
        if tag:
            for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']:
                if tag.has_key(attr):
                    del tag[attr]
            tag.name = 'h1'

        return soup