Read Discover Magazine on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Discover is an American science magazine that publishes articles about science for a general audience. The monthly magazine was launched by Time Inc.
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
discovermagazine.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DiscoverMagazine(BasicNewsRecipe):
title = u'Discover Magazine'
description = u'Science, Technology and the Future'
__author__ = 'Starson17'
language = 'en'
oldest_article = 33
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
encoding = 'utf-8'
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
remove_tags = [
dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
dict(name='div', attrs={'id':['footer','teaser','already-subscriber','teaser-suite','related-articles']}),
dict(name='div', attrs={'class':['column']}),
dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
remove_tags_after = [dict(name='div', attrs={'class':'listingBar'})]
def append_page(self, soup, appendtag, position):
pager = soup.find('span',attrs={'class':'next'})
if pager:
nexturl = pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'articlebody'})
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'class':'listingBar'})
if pager:
pager.extract()
return soup
def postprocess_html(self, soup, first_fetch):
for tag in soup.findAll(text=re.compile('^This article is a sample')):
tag.parent.extract()
for tag in soup.findAll(['table', 'tr', 'td']):
tag.name = 'div'
for tag in soup.findAll('div', attrs={'class':'discreet advert'}):
tag.extract()
for tag in soup.findAll('hr', attrs={'size':'1'}):
tag.extract()
for tag in soup.findAll('br'):
tag.extract()
return soup
feeds = [
(u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
(u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
(u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
(u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
(u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
(u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
(u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
(u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
(u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
(u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
(u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
(u'What is This', u'http://discovermagazine.com/columns/what-is-this/rss.xml'),
(u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
(u'Think Tech', u'http://discovermagazine.com/columns/think-tech/rss.xml'),
(u'Future Tech', u'http://discovermagazine.com/columns/future-tech/rss.xml'),
(u'Discover Interview', u'http://discovermagazine.com/columns/discover-interview/rss.xml'),
]