tv2feed.py (5930B) - raw


      1 #!/usr/bin/env python3
      2 # Copyright (C) 2021 Oscar Benedito <oscar@oscarbenedito.com>
      3 # License: Affero General Public License version 3 or later
      4 
      5 # TV2Feed: Follow TV shows using Atom feeds!
      6 #
      7 # How to use
      8 # ----------
      9 #
     10 # Go to <https://www.tvmaze.com> and search the TV shows you want to follow.
     11 # Write down their IDs (the number in the URL) and then run the following:
     12 #
     13 #     tv2feed id1 id2 ...
     14 #
     15 # Or run it multiple times to get one feed per TV show. The feeds are expected
     16 # to go under:
     17 #
     18 #   - `https://<domain>/<path>/feed`: if multiple shows specified
     19 #   - `https://<domain>/<path>/show/<show_id>`: if only one show specified
     20 #
     21 # That is because the feed URIs will point there. Note that if only one show is
     22 # specified, TV2Feed will generate it assuming there is one feed per show (which
     23 # will make the feed title the same as the show's).
     24 #
     25 # The API where the data is gathered from caches results for one hour, so you
     26 # can add cron jobs to run every hour:
     27 #
     28 #     0 * * * * /usr/local/bin/tv2feed 210 431 > /srv/www/tv2feed/feed
     29 #
     30 # or, alternatively (could also be scripted with just one cronjob):
     31 #
     32 #     0 * * * * /usr/local/bin/tv2feed 210 > /srv/www/tv2feed/show/210
     33 #     0 * * * * /usr/local/bin/tv2feed 431 > /srv/www/tv2feed/show/431
     34 #
     35 # Other notes
     36 # -----------
     37 #
     38 # Each show will make two API requests, and there is a limit of 20 requests
     39 # every 10 seconds (for contents that are not cached). If you are following many
     40 # shows, this script will sleep for 10 seconds and try again if an API call
     41 # returns a 429 error code, if it fails again (or the error code is not 429), it
     42 # will raise an error and exit.
     43 #
     44 # All data generated is gathered from TVmaze's API.
     45 
     46 
     47 import sys
     48 import urllib.request
     49 import json
     50 import datetime
     51 import time
     52 
     53 
     54 # edit these variables
     55 domain = 'oscarbenedito.com'
     56 path = 'projects/tv2feed'   # leave empty for content under https://domain/
     57 entries_per_show = 10
     58 shows = sys.argv[1:]        # alternatively, hardcode them in the script
     59 # until here!
     60 
     61 version = '1.0'             # TV2Feed version
     62 url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '')
     63 id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '')
     64 info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}'
     65 episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1'
     66 
     67 
     68 # basic sanitizing: convert to string and escape XML
     69 def san(s):
     70     return str(s).replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;').replace('\'', '&apos;').replace('"', '&quot;')
     71 
     72 
     73 def api_call(url):
     74     try:
     75         response = urllib.request.urlopen(url)
     76     except urllib.error.HTTPError as e:
     77         if e.code == 429:
     78             print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr)
     79             time.sleep(10)
     80             response = urllib.request.urlopen(url)
     81         else:
     82             raise
     83 
     84     return json.load(response)
     85 
     86 
     87 if len(shows) < 1:
     88     sys.exit('Usage: tv2feed id1 [id2 [id3 ...]]')
     89 
     90 now = datetime.datetime.now(datetime.timezone.utc).isoformat()
     91 feed_data = []
     92 for show in shows:
     93     show_info = api_call(info_endpoint_tmpl.format(show))
     94     episodes = api_call(episodes_endpoint_tmpl.format(show))
     95 
     96     episodes = list(filter(lambda x: x['airstamp'] is not None and x['airstamp'] < now, episodes))
     97     episodes.sort(reverse=True, key=lambda x: x['airstamp'])
     98 
     99     countdown = entries_per_show
    100     for episode in episodes:
    101         feed_data.append({
    102             'airstamp': episode['airstamp'],
    103             'id': episode['id'],
    104             'name': episode['name'],
    105             'number': episode['number'],
    106             'season': episode['season'],
    107             'show_id': show_info['id'],
    108             'show_name': show_info['name'],
    109             'summary': episode['summary'],
    110             'url': episode['url']
    111         })
    112         countdown -= 1
    113         if countdown == 0:
    114             break
    115 
    116     if show_info['status'] != 'Running':
    117         feed_data.append({
    118             'airstamp': now,
    119             'id': 'status/' + show_info['status'],
    120             'name': 'Show status: {}'.format(show_info['status']),
    121             'number': None,
    122             'season': None,
    123             'show_id': show_info['id'],
    124             'show_name': show_info['name'],
    125             'summary': '<p>Show status: {}.</p>'.format(show_info['status']),
    126             'url': show_info['url']
    127         })
    128 
    129 if len(shows) > 1:
    130     feed_title = 'TV2Feed'
    131     feed_id = id_base + 'feed'
    132     feed_url = url_base + 'feed'
    133 else:
    134     feed_title = san(feed_data[0]['show_name'])
    135     feed_id = id_base + 'show/' + san(feed_data[0]['show_id'])
    136     feed_url = url_base + 'show/' + san(feed_data[0]['show_id'])
    137 
    138 ret = '<?xml version="1.0" encoding="utf-8"?>\n'
    139 ret += '<feed xmlns="http://www.w3.org/2005/Atom">'
    140 ret += '<link href="{}" rel="self" />'.format(feed_url)
    141 ret += '<title>{}</title>'.format(feed_title)
    142 ret += '<author><name>TV2Feed</name></author>'
    143 ret += '<updated>{}</updated>'.format(now)
    144 ret += '<id>' + feed_id + '.atom</id>'
    145 ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version)
    146 
    147 for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']):
    148     season = 'S' + san(episode['season']) if episode['season'] is not None else ''
    149     number = 'E' + san(episode['number']) if episode['number'] is not None else ''
    150     sn = season + number + ' ' if season + number != '' else ''
    151     ret += '<entry>'
    152     ret += '<title>{} - {}{}</title>'.format(san(episode['show_name']), sn, san(episode['name']))
    153     ret += '<link rel="alternate" href="{}" />'.format(san(episode['url']))
    154     ret += '<id>' + id_base + 'show/' + san(episode['show_id']) + '/episode/' + san(episode['id']) + '</id>'
    155     ret += '<updated>{}</updated>'.format(san(episode['airstamp']))
    156     ret += '<summary type="html">{}</summary>'.format(san(episode['summary']))
    157     ret += '</entry>'
    158 
    159 ret += '</feed>'
    160 print(ret)