tv2feed.py (6460B) - raw


      1 #!/usr/bin/env python3
      2 # Copyright (C) 2021 Oscar Benedito <oscar@oscarbenedito.com>
      3 # License: Affero General Public License version 3 or later
      4 
      5 # TV2Feed: Follow TV shows using Atom feeds!
      6 #
      7 # How to use
      8 # ----------
      9 #
     10 # Go to <https://www.tvmaze.com> and search the TV shows you want to follow.
     11 # Write down their IDs (the number in the URL) and then run the following:
     12 #
     13 #     tv2feed /etc/tv2feed.json
     14 #
     15 # with contents such as:
     16 #
     17 #     {
     18 #       "domain": "example.com",
     19 #       "path": "tv2feed",  # leave empty for content under https://domain/
     20 #       "entries_per_show": 10,
     21 #       "shows": [
     22 #         {"id": 210, "name": "Show name for your own reference"},
     23 #         {"id": 431, "name": "Another show"}   # only id is used by TV2Feed
     24 #       ]
     25 #     }
     26 #
     27 # Or run it multiple times to get one feed per TV show. The feeds are expected
     28 # to go under:
     29 #
     30 #   - `https://<domain>/<path>/feed`: if multiple shows specified
     31 #   - `https://<domain>/<path>/show/<show_id>`: if only one show specified
     32 #
     33 # That is because the feed URIs will point there. Note that if only one show is
     34 # specified, TV2Feed will generate it assuming there is one feed per show (which
     35 # will make the feed title the same as the show's).
     36 #
     37 # The API where the data is gathered from caches results for one hour, so you
     38 # can add cron jobs to run every hour:
     39 #
     40 #     0 * * * * /usr/local/bin/tv2feed /etc/tv2feed.json > /srv/www/tv2feed/feed
     41 #
     42 # or, alternatively, use one config file per show:
     43 #
     44 #     0 * * * * /usr/local/bin/tv2feed /etc/tv2feed-210.json > /srv/www/tv2feed/show/210
     45 #     0 * * * * /usr/local/bin/tv2feed /etc/tv2feed-431.json > /srv/www/tv2feed/show/431
     46 #
     47 # Other notes
     48 # -----------
     49 #
     50 # Each show will make two API requests, and there is a limit of 20 requests
     51 # every 10 seconds (for contents that are not cached). If you are following many
     52 # shows, this script will sleep for 10 seconds and try again if an API call
     53 # returns a 429 error code, if it fails again (or the error code is not 429), it
     54 # will raise an error and exit.
     55 #
     56 # All data generated is gathered from TVmaze's API.
     57 
     58 
     59 import sys
     60 import urllib.request
     61 import json
     62 import datetime
     63 import time
     64 
     65 version = '1.1'             # TV2Feed version
     66 url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '')
     67 id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '')
     68 info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}'
     69 episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1'
     70 
     71 
     72 # basic sanitizing: convert to string and escape XML
     73 def san(s):
     74     return str(s).replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;').replace('\'', '&apos;').replace('"', '&quot;')
     75 
     76 
     77 def api_call(url):
     78     try:
     79         response = urllib.request.urlopen(url)
     80     except urllib.error.HTTPError as e:
     81         if e.code == 429:
     82             print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr)
     83             time.sleep(10)
     84             response = urllib.request.urlopen(url)
     85         else:
     86             raise
     87 
     88     return json.load(response)
     89 
     90 
     91 if len(sys.argv) != 2:
     92     sys.exit('Usage: tv2feed config.json')
     93 
     94 with open(sys.argv[1], encoding='utf-8') as config_file:
     95     config = json.load(config_file)
     96 
     97 domain = config['domain']
     98 path = config['path']
     99 entries_per_show = config.get('entries_per_show', 10)
    100 shows = [show['id'] for show in config['shows']]
    101 
    102 
    103 if len(shows) < 1:
    104     sys.exit('Config must contain at least one show')
    105 
    106 now = datetime.datetime.now(datetime.timezone.utc).isoformat()
    107 feed_data = []
    108 for show in shows:
    109     show_info = api_call(info_endpoint_tmpl.format(show))
    110     episodes = api_call(episodes_endpoint_tmpl.format(show))
    111 
    112     episodes = list(filter(lambda x: x['airstamp'] is not None and x['airstamp'] < now, episodes))
    113     episodes.sort(reverse=True, key=lambda x: x['airstamp'])
    114 
    115     countdown = entries_per_show
    116     for episode in episodes:
    117         feed_data.append({
    118             'airstamp': episode['airstamp'],
    119             'id': episode['id'],
    120             'name': episode['name'],
    121             'number': episode['number'],
    122             'season': episode['season'],
    123             'show_id': show_info['id'],
    124             'show_name': show_info['name'],
    125             'summary': str(episode['summary']) + '<br>IMDB: ' + str(show_info['externals']['imdb']),
    126             'url': episode['url']
    127         })
    128         countdown -= 1
    129         if countdown == 0:
    130             break
    131 
    132     if show_info['status'] != 'Running':
    133         feed_data.append({
    134             'airstamp': now,
    135             'id': 'status/' + show_info['status'],
    136             'name': 'Show status: {}'.format(show_info['status']),
    137             'number': None,
    138             'season': None,
    139             'show_id': show_info['id'],
    140             'show_name': show_info['name'],
    141             'summary': '<p>Show status: {}.</p>'.format(show_info['status']),
    142             'url': show_info['url']
    143         })
    144 
    145 if len(shows) > 1:
    146     feed_title = 'TV2Feed'
    147     feed_id = id_base + 'feed'
    148     feed_url = url_base + 'feed'
    149 else:
    150     feed_title = san(feed_data[0]['show_name'])
    151     feed_id = id_base + 'show/' + san(feed_data[0]['show_id'])
    152     feed_url = url_base + 'show/' + san(feed_data[0]['show_id'])
    153 
    154 ret = '<?xml version="1.0" encoding="utf-8"?>\n'
    155 ret += '<feed xmlns="http://www.w3.org/2005/Atom">'
    156 ret += '<link href="{}" rel="self" />'.format(feed_url)
    157 ret += '<title>{}</title>'.format(feed_title)
    158 ret += '<author><name>TV2Feed</name></author>'
    159 ret += '<updated>{}</updated>'.format(now)
    160 ret += '<id>' + feed_id + '.atom</id>'
    161 ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version)
    162 
    163 for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']):
    164     season = 'S' + san(episode['season']) if episode['season'] is not None else ''
    165     number = 'E' + san(episode['number']) if episode['number'] is not None else ''
    166     sn = season + number + ' ' if season + number != '' else ''
    167     ret += '<entry>'
    168     ret += '<title>{} - {}{}</title>'.format(san(episode['show_name']), sn, san(episode['name']))
    169     ret += '<link rel="alternate" href="{}" />'.format(san(episode['url']))
    170     ret += '<id>' + id_base + 'show/' + san(episode['show_id']) + '/episode/' + san(episode['id']) + '</id>'
    171     ret += '<updated>{}</updated>'.format(san(episode['airstamp']))
    172     ret += '<summary type="html">{}</summary>'.format(san(episode['summary']))
    173     ret += '</entry>'
    174 
    175 ret += '</feed>'
    176 print(ret)