commit 946691bf550e14df673577f455621d485b891ed9
parent c9ed84a8c38be5ba063638121ca23d0377fd887e
Author: Oscar Benedito <oscar@oscarbenedito.com>
Date:   Thu, 20 May 2021 17:18:09 +0200

TV2Feed to v0.2

Gracefully handle 429 HTTP responses and some code style changes.

Diffstat:
Mtv2feed.py | 86++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 55 insertions(+), 31 deletions(-)

diff --git a/tv2feed.py b/tv2feed.py @@ -15,7 +15,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -# Follow TV shows using web feeds! +# Follow TV shows using Atom feeds! # # How to use: go to https://www.tvmaze.com and search the shows you want to # follow. Write down their IDs (the show number in the URL) and then run the @@ -23,71 +23,95 @@ # # tv2feed id1 id2 ... # -# Or run it multiple times to get one feed per TV show. +# Or run it multiple times to get one feed per TV show. The feeds are expected +# to go under: # -# Note 1: Keep in mind that each show will make two API requests, and there is a -# limit of 20 requests every 10 seconds (for contents that are not cached). If -# you are following many shows, you might want to spread out the API calls so -# you don't hit the rate limit. -# -# Note 2: The feeds are expected to go under: -# -# - https://<domain>/<path>/feed: if multiple feeds specified +# - https://<domain>/<path>/feed: if multiple shows specified # - https://<domain>/<path>/show/<show_id>: if only one show specified # # Also note that if only one feed is specified, TV2Feed will generate the feed # assuming there is one feed per show (personalizing the title as well). # -# All the data generated is gathered from https://www.tvmaze.com and its API. +# Keep in mind that each show will make two API requests, and there is a limit +# of 20 requests every 10 seconds (for contents that are not cached). If you are +# following many shows, this script will sleep for 10 seconds and try again if +# an API call returns a 429 error code, if it fails again (or the error code is +# not 429), it will raise an error and exit. +# +# The API where the data is gathered from caches results for one hour, so you +# can add cron jobs to run every hour: +# +# 0 * * * * /usr/local/bin/tv2feed 210 431 > /srv/www/tv2feed/feed +# +# or, alternatively (the following could also be scripted with just one cronjob): +# +# 0 * * * * /usr/local/bin/tv2feed 210 > /srv/www/tv2feed/show/1 +# 0 * * * * /usr/local/bin/tv2feed 431 > /srv/www/tv2feed/show/2 +# +# All data generated is gathered from https://www.tvmaze.com and its API. -# TODO allow empty path import sys import urllib.request import json import datetime +import time +# edit these variables domain = 'oscarbenedito.com' -path = 'tv2feed' # leave empty for content under https://domain/ +path = 'projects/tv2feed' # leave empty for content under https://domain/ entries_per_show = 10 shows = sys.argv[1:] # alternatively, hardcode them in the script +# until here! -version = '0.1' # TV2Feed version +version = '0.2' # TV2Feed version url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '') id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '') info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}' episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1' -# basic sanitizing (just escaping XML) and convert to string if needed +# basic sanitizing: convert to string and escape XML def san(s): return str(s).replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;').replace('\'', '&apos;').replace('"', '&quot;') +def api_call(url): + try: + response = urllib.request.urlopen(url) + except urllib.error.HTTPError as e: + if e.code == 429: + print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr) + time.sleep(10) + response = urllib.request.urlopen(url) + else: + raise + + return json.load(response) + + if len(shows) < 1: sys.exit('Usage: tv2feed id1 [id2 [id3 ...]]') now = datetime.datetime.now(datetime.timezone.utc).isoformat() feed_data = [] for show in shows: - response = urllib.request.urlopen(info_endpoint_tmpl.format(show)) - info = json.load(response) - response = urllib.request.urlopen(episodes_endpoint_tmpl.format(show)) - episodes = json.load(response) + show_info = api_call(info_endpoint_tmpl.format(show)) + episodes = api_call(episodes_endpoint_tmpl.format(show)) - episodes.sort(reverse=True, key=lambda x : x['airstamp']) + episodes.sort(reverse=True, key=lambda x: x['airstamp']) countdown = entries_per_show - for episode in filter(lambda x : x['airstamp'] < now, episodes): + for episode in filter(lambda x: x['airstamp'] < now, episodes): feed_data.append({ 'airstamp': episode['airstamp'], 'id': episode['id'], 'name': episode['name'], 'number': episode['number'], 'season': episode['season'], - 'show_id': info['id'], - 'show_name': info['name'], + 'show_id': show_info['id'], + 'show_name': show_info['name'], 'summary': episode['summary'], 'url': episode['url'] }) @@ -95,17 +119,17 @@ for show in shows: if countdown == 0: break - if info['status'] != 'Running': + if show_info['status'] != 'Running': feed_data.append({ 'airstamp': now, 'id': 'status', - 'name': 'Show status: {}.'.format(info['status']), + 'name': 'Show status: {}'.format(show_info['status']), 'number': None, 'season': None, - 'show_id': info['id'], - 'show_name': info['name'], - 'summary': '<p>Show status: {}.</p>'.format(info['status']), - 'url': info['url'] + 'show_id': show_info['id'], + 'show_name': show_info['name'], + 'summary': '<p>Show status: {}.</p>'.format(show_info['status']), + 'url': show_info['url'] }) if len(shows) > 1: @@ -117,7 +141,7 @@ else: feed_id = id_base + 'show/' + san(feed_data[0]['show_id']) feed_url = url_base + 'show/' + san(feed_data[0]['show_id']) -ret = '<?xml version="1.0" encoding="utf-8"?>\n' +ret = '<?xml version="1.0" encoding="utf-8"?>\n' ret += '<feed xmlns="http://www.w3.org/2005/Atom">' ret += '<link href="{}" rel="self" />'.format(feed_url) ret += '<title>{}</title>'.format(feed_title) @@ -126,7 +150,7 @@ ret += '<updated>{}</updated>'.format(now) ret += '<id>' + feed_id + '.atom</id>' ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version) -for episode in sorted(feed_data, reverse=True, key=lambda x : x['airstamp']): +for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']): season = 'S' + san(episode['season']) if episode['season'] is not None else '' number = 'E' + san(episode['number']) if episode['number'] is not None else '' sn = season + number + ' ' if season + number != '' else ''