Read Financial Times Print (UK) on your iPad or Kindle in no time. Click download to load the free ebook on your reader.
Check out all the available public recipes or write your own with these quick start guides. ReadBeam is built on calibre, so everything in the docs and the fora applies here as well.
Print issue - Paid content. The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy.
Language: en
Requires Subscription: Yes, requires a Financial Times Print (UK) subscription
Schedule Every morning
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.ft.com/uk-edition
'''
import datetime
from calibre.ptempfile import PersistentTemporaryFile
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes(BasicNewsRecipe):
title = 'Financial Times - UK printed edition'
__author__ = 'Darko Miletic'
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
publisher = 'The Financial Times Ltd.'
category = 'news, finances, politics, UK, World'
oldest_article = 2
language = 'en_GB'
max_articles_per_feed = 250
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
encoding = 'utf8'
publication_type = 'newspaper'
articles_are_obfuscated = True
temp_files = []
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
LOGIN2 = 'http://media.ft.com/h/subs3.html'
INDEX = 'http://www.ft.com/uk-edition'
PREFIX = 'http://www.ft.com'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.open(self.LOGIN2)
br.select_form(name='loginForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
keep_only_tags = [
dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
,dict(name='div', attrs={'class':'standfirst'})
,dict(name='div', attrs={'id' :'storyContent'})
,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
]
remove_tags = [
dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
]
remove_attributes = ['width','height','lang']
extra_css = """
body{font-family: Georgia,Times,"Times New Roman",serif}
h2{font-size:large}
.ft-story-header{font-size: x-small}
.container{font-size:x-small;}
h3{font-size:x-small;color:#003399;}
.copyright{font-size: x-small}
img{margin-top: 0.8em; display: block}
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
"""
def get_artlinks(self, elem):
articles = []
count = 0
for item in elem.findAll('a',href=True):
count = count + 1
if self.test and count > 2:
return articles
rawlink = item['href']
if rawlink.startswith('http://'):
url = rawlink
else:
url = self.PREFIX + rawlink
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
title = self.tag_to_string(item)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :urlverified
,'description':''
})
return articles
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.INDEX)
wide = soup.find('div',attrs={'class':'wide'})
if not wide:
return feeds
strest = wide.findAll('h3', attrs={'class':'section'})
if not strest:
return feeds
st = wide.find('h4',attrs={'class':'section-no-arrow'})
if st:
strest.insert(0,st)
count = 0
for item in strest:
count = count + 1
if self.test and count > 2:
return feeds
ftitle = self.tag_to_string(item)
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
feedarts = self.get_artlinks(item.parent.ul)
feeds.append((ftitle,feedarts))
return feeds
def preprocess_html(self, soup):
items = ['promo-box','promo-title',
'promo-headline','promo-image',
'promo-intro','promo-link','subhead']
for item in items:
for it in soup.findAll(item):
it.name = 'div'
it.attrs = []
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
def get_obfuscated_article(self, url):
count = 0
while (count < 10):
try:
response = self.browser.open(url)
html = response.read()
count = 10
except:
print "Retrying download..."
count += 1
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name