Read Outlook India on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Weekly news and current affairs in India
Language: en
Requires Subscription: No, it's available as free ebook
Schedule Every morning
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from calibre.web.feeds.news import BasicNewsRecipe
class OutlookIndia(BasicNewsRecipe):
title = 'Outlook India'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Weekly news and current affairs in India'
no_stylesheets = True
encoding = 'utf-8'
language = 'en_IN'
extra_css = '''
body{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.fspheading{color:#AF0E25 ; font-family:"Times New Roman",Times,serif; font-weight:bold ; font-size:large; }
.fspauthor{color:#AF0E25; font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.fspintro{color:#666666; font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.fspchannelhome{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
.fspphotocredit{color:##999999; font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':["ctl00_cphpagemiddle_reparticle_ctl00_divfullstorytext","ctl00_cphpagemiddle_reparticle_ctl00_divartpic","ctl00_cphpagemiddle_reparticle_ctl00_divfspheading", "ctl00_cphpagemiddle_reparticle_ctl00_divartpiccaption", "ctl00_cphpagemiddle_reparticle_ctl00_divartpiccredit","ctl00_cphpagemiddle_reparticle_ctl00_divfspintro", "ctl00_cphpagemiddle_reparticle_ctl00_divartbyline", "ctl00_cphpagemiddle_divglitteratiregulars","ctl00_cphpagemiddle_divcartoon","feedbackslatestfirst","ctl00_cphpagemiddle_divregulars","ctl00_cphpagemiddle_divquotes"]}),
]
remove_tags = [dict(name=['script','object','hr']),]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
# This site sends article titles in the cookie which occasionally
# contain non ascii characters causing httplib to fail. Instead just
# disable cookies as they're not needed for download. Proper solution
# would be to implement a unicode aware cookie jar
br.set_cookiejar(None)
return br
def parse_index(self):
soup = self.index_to_soup('http://www.outlookindia.com/issues.aspx')
# find cover pic
div = soup.find('div', attrs={'class':re.compile('cententcellpadding')})
if div is None: return None
a = div.find('a')
if a is not None:
href = 'http://www.outlookindia.com/' + a['href']
soup = self.index_to_soup(href)
cover = soup.find('img', attrs={'id':"ctl00_cphpagemiddle_dlissues_ctl00_imgcoverpic"}, src=True)
if cover is not None:
self.cover_url = cover['src']
# end find cover pic
#find current issue
div = soup.find('table', attrs={'id':re.compile('ctl00_cphpagemiddle_dlissues')})
if div is None: return None
a = div.find('a')
if a is not None:
href = 'http://www.outlookindia.com/' + a['href']
soup = self.index_to_soup(href)
#find current issue
#find the articles in the current issue
articles = []
for a in soup.findAll('a', attrs={'class':['contentpgsubheadinglink',"contentpgtext6",]}):
if a and a.has_key('href'):
url = 'http://www.outlookindia.com/' + a['href']
else:
url =''
title = self.tag_to_string(a)
desc = ''
date = ''
articles.append({
'title':title,
'date':date,
'url':url,
'description':desc,
})
for a in soup.findAll('a', attrs={'id':["ctl00_cphpageleft_hlglitterati","ctl00_cphpageleft_hlposcape",]}):
if a and a.has_key('href'):
url = 'http://www.outlookindia.com/' + a['href']
else:
url =''
title = self.tag_to_string(a)
desc = ''
date = ''
articles.append({
'title':title,
'date':date,
'url':url,
'description':desc,
})
return [('Current Issue', articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
def postrocess_html(self, soup, first):
for item in soup.findAll(align = "left"):
del item['align']
for tag in soup.findAll(name=['table', 'tr','td','tbody','ul','li','font','span']):
tag.name = 'div'
return soup