#!/usr/bin/python -tt # -*- coding: utf-8 -*- from __future__ import print_function import codecs import os import sys import feedparser import re import sys feedparser._HTMLSanitizer.unacceptable_elements_with_end_tag.add('
') defenc = "utf-8" if sys.getdefaultencoding() == "ascii" else sys.getdefaultencoding() FedMag = ['http://fedoraplanet.org/rss20.xml'] if len(sys.argv) > 2: print ('Alone script or only one argument is allowed.') sys.exit(1) HTML = u""" """ for feed in map(feedparser.parse, FedMag): # We will parse last ten items HTML += u"""

Fedora Planet

""" cnt = 0 # Getting at least 6 items in case of some python exceptions. for item in feed["items"][:6]: if int(cnt) % 2 == 0: HTML += u"""
""" item.title = item.title.replace("&", "&") # If a blog post doesn't have a title for some reason, it breaks the way # we try to parse out the author and title. Let's say it's untitled in # order for it to appear on the page without breaking the script. if ":" in item.title: author, title = item.title.split(':', 1) else: author, title = item.title, "(untitled post)" link = item.links[0]['href'] # Remove image tag from beginning try: article_desc = '\n'.join(item.description.split('\n')[1:]) # remove html tags from description article_desc = re.sub('<[^<]+?>', '', article_desc) article_desc = re.sub('<', '<', article_desc) article_desc = re.sub('>', '>', article_desc) if len(article_desc) > 140: article_desc = ' '.join(article_desc.split()[0:25]) + '...' if not article_desc.startswith('

'): article_desc = '

%s

' % article_desc except AttributeError: print ('AttributeError. Going to next item') continue # we got # Tue, 20 Oct 2015 03:28:42 +0000 # But we expect # Tue, 20 Oct 2015 article_date = ' '.join(item.updated.split()[:4]) HTML += u""" """.format(article_url=link, article_title=title, article_desc=article_desc, article_date=article_date, author=author) cnt += 1 if int(cnt) % 2 == 0: HTML += u"""
""" # Condition if items were collected properly if int(cnt) > 3: break HTML += u"""
""" if len(sys.argv) == 1: INDEX_FILE = os.path.join('.', '_site', 'index.html') else: INDEX_FILE = sys.argv[1] with codecs.open(INDEX_FILE, 'r', 'utf8') as f: contents = [line for line in f.readlines()] if contents: with codecs.open(INDEX_FILE, 'w', 'utf8') as f: found_start = False for line in contents: if not found_start: f.write(line) if '' in line: f.write(HTML) found_start = True continue if '' in line: found_start = False f.write(line) continue #regexp = r'.*()(.*)().*' #print (re.search(regexp, contents, re.MULTILINE | re.DOTALL)) #contents = re.sub(regexp, r'\1 MYREPLACE \3', contents, re.DOTALL | re.MULTILINE)