commit 946691bf550e14df673577f455621d485b891ed9
parent c9ed84a8c38be5ba063638121ca23d0377fd887e
Author: Oscar Benedito <oscar@oscarbenedito.com>
Date: Thu, 20 May 2021 17:18:09 +0200
TV2Feed to v0.2
Gracefully handle 429 HTTP responses and some code style changes.
Diffstat:
M | tv2feed.py | | | 86 | ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
1 file changed, 55 insertions(+), 31 deletions(-)
diff --git a/tv2feed.py b/tv2feed.py
@@ -15,7 +15,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
-# Follow TV shows using web feeds!
+# Follow TV shows using Atom feeds!
#
# How to use: go to https://www.tvmaze.com and search the shows you want to
# follow. Write down their IDs (the show number in the URL) and then run the
@@ -23,71 +23,95 @@
#
# tv2feed id1 id2 ...
#
-# Or run it multiple times to get one feed per TV show.
+# Or run it multiple times to get one feed per TV show. The feeds are expected
+# to go under:
#
-# Note 1: Keep in mind that each show will make two API requests, and there is a
-# limit of 20 requests every 10 seconds (for contents that are not cached). If
-# you are following many shows, you might want to spread out the API calls so
-# you don't hit the rate limit.
-#
-# Note 2: The feeds are expected to go under:
-#
-# - https://<domain>/<path>/feed: if multiple feeds specified
+# - https://<domain>/<path>/feed: if multiple shows specified
# - https://<domain>/<path>/show/<show_id>: if only one show specified
#
# Also note that if only one feed is specified, TV2Feed will generate the feed
# assuming there is one feed per show (personalizing the title as well).
#
-# All the data generated is gathered from https://www.tvmaze.com and its API.
+# Keep in mind that each show will make two API requests, and there is a limit
+# of 20 requests every 10 seconds (for contents that are not cached). If you are
+# following many shows, this script will sleep for 10 seconds and try again if
+# an API call returns a 429 error code, if it fails again (or the error code is
+# not 429), it will raise an error and exit.
+#
+# The API where the data is gathered from caches results for one hour, so you
+# can add cron jobs to run every hour:
+#
+# 0 * * * * /usr/local/bin/tv2feed 210 431 > /srv/www/tv2feed/feed
+#
+# or, alternatively (the following could also be scripted with just one cronjob):
+#
+# 0 * * * * /usr/local/bin/tv2feed 210 > /srv/www/tv2feed/show/1
+# 0 * * * * /usr/local/bin/tv2feed 431 > /srv/www/tv2feed/show/2
+#
+# All data generated is gathered from https://www.tvmaze.com and its API.
-# TODO allow empty path
import sys
import urllib.request
import json
import datetime
+import time
+# edit these variables
domain = 'oscarbenedito.com'
-path = 'tv2feed' # leave empty for content under https://domain/
+path = 'projects/tv2feed' # leave empty for content under https://domain/
entries_per_show = 10
shows = sys.argv[1:] # alternatively, hardcode them in the script
+# until here!
-version = '0.1' # TV2Feed version
+version = '0.2' # TV2Feed version
url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '')
id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '')
info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}'
episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1'
-# basic sanitizing (just escaping XML) and convert to string if needed
+# basic sanitizing: convert to string and escape XML
def san(s):
return str(s).replace('&', '&').replace('>', '>').replace('<', '<').replace('\'', ''').replace('"', '"')
+def api_call(url):
+ try:
+ response = urllib.request.urlopen(url)
+ except urllib.error.HTTPError as e:
+ if e.code == 429:
+ print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr)
+ time.sleep(10)
+ response = urllib.request.urlopen(url)
+ else:
+ raise
+
+ return json.load(response)
+
+
if len(shows) < 1:
sys.exit('Usage: tv2feed id1 [id2 [id3 ...]]')
now = datetime.datetime.now(datetime.timezone.utc).isoformat()
feed_data = []
for show in shows:
- response = urllib.request.urlopen(info_endpoint_tmpl.format(show))
- info = json.load(response)
- response = urllib.request.urlopen(episodes_endpoint_tmpl.format(show))
- episodes = json.load(response)
+ show_info = api_call(info_endpoint_tmpl.format(show))
+ episodes = api_call(episodes_endpoint_tmpl.format(show))
- episodes.sort(reverse=True, key=lambda x : x['airstamp'])
+ episodes.sort(reverse=True, key=lambda x: x['airstamp'])
countdown = entries_per_show
- for episode in filter(lambda x : x['airstamp'] < now, episodes):
+ for episode in filter(lambda x: x['airstamp'] < now, episodes):
feed_data.append({
'airstamp': episode['airstamp'],
'id': episode['id'],
'name': episode['name'],
'number': episode['number'],
'season': episode['season'],
- 'show_id': info['id'],
- 'show_name': info['name'],
+ 'show_id': show_info['id'],
+ 'show_name': show_info['name'],
'summary': episode['summary'],
'url': episode['url']
})
@@ -95,17 +119,17 @@ for show in shows:
if countdown == 0:
break
- if info['status'] != 'Running':
+ if show_info['status'] != 'Running':
feed_data.append({
'airstamp': now,
'id': 'status',
- 'name': 'Show status: {}.'.format(info['status']),
+ 'name': 'Show status: {}'.format(show_info['status']),
'number': None,
'season': None,
- 'show_id': info['id'],
- 'show_name': info['name'],
- 'summary': '<p>Show status: {}.</p>'.format(info['status']),
- 'url': info['url']
+ 'show_id': show_info['id'],
+ 'show_name': show_info['name'],
+ 'summary': '<p>Show status: {}.</p>'.format(show_info['status']),
+ 'url': show_info['url']
})
if len(shows) > 1:
@@ -117,7 +141,7 @@ else:
feed_id = id_base + 'show/' + san(feed_data[0]['show_id'])
feed_url = url_base + 'show/' + san(feed_data[0]['show_id'])
-ret = '<?xml version="1.0" encoding="utf-8"?>\n'
+ret = '<?xml version="1.0" encoding="utf-8"?>\n'
ret += '<feed xmlns="http://www.w3.org/2005/Atom">'
ret += '<link href="{}" rel="self" />'.format(feed_url)
ret += '<title>{}</title>'.format(feed_title)
@@ -126,7 +150,7 @@ ret += '<updated>{}</updated>'.format(now)
ret += '<id>' + feed_id + '.atom</id>'
ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version)
-for episode in sorted(feed_data, reverse=True, key=lambda x : x['airstamp']):
+for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']):
season = 'S' + san(episode['season']) if episode['season'] is not None else ''
number = 'E' + san(episode['number']) if episode['number'] is not None else ''
sn = season + number + ' ' if season + number != '' else ''