Read Honolulu Star Advertiser on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Latest national and local Hawaii sports news
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
__license__ = 'GPL v3'
__copyright__ = '2011, M. Ching modified from work 2009-2011 Darko Miletic <darko.miletic at gmail.com>'
'''
staradvertiser.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Starbulletin(BasicNewsRecipe):
title = 'Honolulu Star-Advertiser'
__author__ = 'Darko Miletic'
description = 'Latest national and local Hawaii sports news'
publisher = 'Honolulu Star-Advertiser'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
needs_subscription = True
max_articles_per_feed = 100
language = 'en'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
publication_type = 'newspaper'
masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
# extra_css = """
# body{font-family: Verdana,Arial,Helvetica,sans-serif}
# h1,.brown,.hsa_postCredit{color: #663300}
# .storyDeck{font-size: 1.2em; font-weight: bold}
# img{display: block}
# """
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
keep_only_tags = [
dict(attrs={'id':'hsa_storyTitle'})
,dict(attrs={'id':'hsa_storyTitle article-important'})
,dict(attrs={'class':['hsa_dateStamp','hsa_postCredit','storyDeck']})
,dict(name='span',attrs={'class':['hsa_dateStamp','hsa_postCredit']})
,dict(name='span',attrs={'class':['hsa_dateStamp article-important','hsa_postCredit article-important']})
,dict(name='div',attrs={'class':'storytext article-important'})
,dict(name='div',attrs={'class':'storytext'})
]
remove_tags = [
dict(name=['object','link','script','meta','base','iframe'])
# removed 'span' from preceding list to permit keeping of author and timestamp
,dict(attrs={'class':['insideStoryImage','insideStoryAd']})
,dict(attrs={'name':'fb_share'})
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.staradvertiser.com/manage/Login/')
br.select_form(name='loginForm')
br['email'] = self.username
br['password'] = self.password
br.submit()
return br
feeds = [
(u'Breaking News', u'http://www.staradvertiser.com/news/breaking/index.rss')
,(u'News', u'http://www.staradvertiser.com/newspremium/index.rss')
,(u'Business', u'http://www.staradvertiser.com/businesspremium/index.rss')
,(u'Sports', u'http://www.staradvertiser.com/sportspremium/index.rss')
,(u'Features', u'http://www.staradvertiser.com/featurespremium/index.rss')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup