Read Yomiuri Online (Latest) on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Japanese traditional newspaper Yomiuri Online News
Language: ja
Requires Subscription: No, it's available as free ebook
Schedule Every morning
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.yomiuri.co.jp
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class YOLNews(BasicNewsRecipe):
title = u'Yomiuri Online (Latest)'
__author__ = 'Hiroshi Miura'
oldest_article = 1
max_articles_per_feed = 50
description = 'Japanese traditional newspaper Yomiuri Online News'
publisher = 'Yomiuri Online News'
category = 'news, japan'
language = 'ja'
encoding = 'Shift_JIS'
index = 'http://www.yomiuri.co.jp/latestnews/'
remove_javascript = True
masthead_title = u'YOMIURI ONLINE'
keep_only_tags = [{'class':"article-def"}]
remove_tags = [{'class':"RelatedArticle"},
{'class':"sbtns"}
]
remove_tags_after = {'class':"date-def"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'list-def'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
itd1 = itema.findNextSibling(text = True)
itd2 = itd1.findNextSibling(text = True)
itd3 = itd2.findNextSibling(text = True)
newsarticles.append({
'title' :itema.string
,'date' :''.join([itd1, itd2, itd3])
,'url' :'http://www.yomiuri.co.jp' + itema['href']
,'description':''
})
feeds.append(('latest', newsarticles))
return feeds