TV2Feed to v0.2 - osf - Oscar's Standalone Files.

commit 946691bf550e14df673577f455621d485b891ed9
parent c9ed84a8c38be5ba063638121ca23d0377fd887e
Author: Oscar Benedito <oscar@oscarbenedito.com>
Date:   Thu, 20 May 2021 17:18:09 +0200

TV2Feed to v0.2

Gracefully handle 429 HTTP responses and some code style changes.

Diffstat:
M tv2feed.py  | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------

1 file changed, 55 insertions(+), 31 deletions(-)
diff --git a/tv2feed.py b/tv2feed.py
@@ -15,7 +15,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-# Follow TV shows using web feeds!
+# Follow TV shows using Atom feeds!
 #
 # How to use: go to https://www.tvmaze.com and search the shows you want to
 # follow. Write down their IDs (the show number in the URL) and then run the
@@ -23,71 +23,95 @@
 #
 #     tv2feed id1 id2 ...
 #
-# Or run it multiple times to get one feed per TV show.
+# Or run it multiple times to get one feed per TV show. The feeds are expected
+# to go under:
 #
-# Note 1: Keep in mind that each show will make two API requests, and there is a
-# limit of 20 requests every 10 seconds (for contents that are not cached). If
-# you are following many shows, you might want to spread out the API calls so
-# you don't hit the rate limit.
-#
-# Note 2: The feeds are expected to go under:
-#
-#   - https://<domain>/<path>/feed: if multiple feeds specified
+#   - https://<domain>/<path>/feed: if multiple shows specified
 #   - https://<domain>/<path>/show/<show_id>: if only one show specified
 #
 # Also note that if only one feed is specified, TV2Feed will generate the feed
 # assuming there is one feed per show (personalizing the title as well).
 #
-# All the data generated is gathered from https://www.tvmaze.com and its API.
+# Keep in mind that each show will make two API requests, and there is a limit
+# of 20 requests every 10 seconds (for contents that are not cached). If you are
+# following many shows, this script will sleep for 10 seconds and try again if
+# an API call returns a 429 error code, if it fails again (or the error code is
+# not 429), it will raise an error and exit.
+#
+# The API where the data is gathered from caches results for one hour, so you
+# can add cron jobs to run every hour:
+#
+#     0 * * * * /usr/local/bin/tv2feed 210 431 > /srv/www/tv2feed/feed
+#
+# or, alternatively (the following could also be scripted with just one cronjob):
+#
+#     0 * * * * /usr/local/bin/tv2feed 210 > /srv/www/tv2feed/show/1
+#     0 * * * * /usr/local/bin/tv2feed 431 > /srv/www/tv2feed/show/2
+#
+# All data generated is gathered from https://www.tvmaze.com and its API.
 
-# TODO allow empty path
 
 import sys
 import urllib.request
 import json
 import datetime
+import time
 
 
+# edit these variables
 domain = 'oscarbenedito.com'
-path = 'tv2feed'            # leave empty for content under https://domain/
+path = 'projects/tv2feed'   # leave empty for content under https://domain/
 entries_per_show = 10
 shows = sys.argv[1:]        # alternatively, hardcode them in the script
+# until here!
 
-version = '0.1'             # TV2Feed version
+version = '0.2'             # TV2Feed version
 url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '')
 id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '')
 info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}'
 episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1'
 
 
-# basic sanitizing (just escaping XML) and convert to string if needed
+# basic sanitizing: convert to string and escape XML
 def san(s):
     return str(s).replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;').replace('\'', '&apos;').replace('"', '&quot;')
 
 
+def api_call(url):
+    try:
+        response = urllib.request.urlopen(url)
+    except urllib.error.HTTPError as e:
+        if e.code == 429:
+            print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr)
+            time.sleep(10)
+            response = urllib.request.urlopen(url)
+        else:
+            raise
+
+    return json.load(response)
+
+
 if len(shows) < 1:
     sys.exit('Usage: tv2feed id1 [id2 [id3 ...]]')
 
 now = datetime.datetime.now(datetime.timezone.utc).isoformat()
 feed_data = []
 for show in shows:
-    response = urllib.request.urlopen(info_endpoint_tmpl.format(show))
-    info = json.load(response)
-    response = urllib.request.urlopen(episodes_endpoint_tmpl.format(show))
-    episodes = json.load(response)
+    show_info = api_call(info_endpoint_tmpl.format(show))
+    episodes = api_call(episodes_endpoint_tmpl.format(show))
 
-    episodes.sort(reverse=True, key=lambda x : x['airstamp'])
+    episodes.sort(reverse=True, key=lambda x: x['airstamp'])
 
     countdown = entries_per_show
-    for episode in filter(lambda x : x['airstamp'] < now, episodes):
+    for episode in filter(lambda x: x['airstamp'] < now, episodes):
         feed_data.append({
             'airstamp': episode['airstamp'],
             'id': episode['id'],
             'name': episode['name'],
             'number': episode['number'],
             'season': episode['season'],
-            'show_id': info['id'],
-            'show_name': info['name'],
+            'show_id': show_info['id'],
+            'show_name': show_info['name'],
             'summary': episode['summary'],
             'url': episode['url']
         })
@@ -95,17 +119,17 @@ for show in shows:
         if countdown == 0:
             break
 
-    if info['status'] != 'Running':
+    if show_info['status'] != 'Running':
         feed_data.append({
             'airstamp': now,
             'id': 'status',
-            'name': 'Show status: {}.'.format(info['status']),
+            'name': 'Show status: {}'.format(show_info['status']),
             'number': None,
             'season': None,
-            'show_id': info['id'],
-            'show_name': info['name'],
-            'summary': '<p>Show status: {}.</p>'.format(info['status']),
-            'url': info['url']
+            'show_id': show_info['id'],
+            'show_name': show_info['name'],
+            'summary': '<p>Show status: {}.</p>'.format(show_info['status']),
+            'url': show_info['url']
         })
 
 if len(shows) > 1:
@@ -117,7 +141,7 @@ else:
     feed_id = id_base + 'show/' + san(feed_data[0]['show_id'])
     feed_url = url_base + 'show/' + san(feed_data[0]['show_id'])
 
-ret  = '<?xml version="1.0" encoding="utf-8"?>\n'
+ret = '<?xml version="1.0" encoding="utf-8"?>\n'
 ret += '<feed xmlns="http://www.w3.org/2005/Atom">'
 ret += '<link href="{}" rel="self" />'.format(feed_url)
 ret += '<title>{}</title>'.format(feed_title)
@@ -126,7 +150,7 @@ ret += '<updated>{}</updated>'.format(now)
 ret += '<id>' + feed_id + '.atom</id>'
 ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version)
 
-for episode in sorted(feed_data, reverse=True, key=lambda x : x['airstamp']):
+for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']):
     season = 'S' + san(episode['season']) if episode['season'] is not None else ''
     number = 'E' + san(episode['number']) if episode['number'] is not None else ''
     sn = season + number + ' ' if season + number != '' else ''