tv2feed.py (6473B) - raw


      1 #!/usr/bin/env python3
      2 # TV2Feed
      3 # Copyright (C) 2021 Oscar Benedito <oscar@oscarbenedito.com>
      4 #
      5 # This program is free software: you can redistribute it and/or modify
      6 # it under the terms of the GNU Affero General Public License as
      7 # published by the Free Software Foundation, either version 3 of the
      8 # License, or (at your option) any later version.
      9 #
     10 # This program is distributed in the hope that it will be useful,
     11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 # GNU Affero General Public License for more details.
     14 #
     15 # You should have received a copy of the GNU Affero General Public License
     16 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
     17 
     18 # Follow TV shows using Atom feeds!
     19 #
     20 # How to use
     21 # ----------
     22 #
     23 # Go to <https://www.tvmaze.com> and search the TV shows you want to follow.
     24 # Write down their IDs (the number in the URL) and then run the following:
     25 #
     26 #     tv2feed id1 id2 ...
     27 #
     28 # Or run it multiple times to get one feed per TV show. The feeds are expected
     29 # to go under:
     30 #
     31 #   - `https://<domain>/<path>/feed`: if multiple shows specified
     32 #   - `https://<domain>/<path>/show/<show_id>`: if only one show specified
     33 #
     34 # That is because the feed URIs will point there. Note that if only one show is
     35 # specified, TV2Feed will generate it assuming there is one feed per show (which
     36 # will make the feed title the same as the show's).
     37 #
     38 # The API where the data is gathered from caches results for one hour, so you
     39 # can add cron jobs to run every hour:
     40 #
     41 #     0 * * * * /usr/local/bin/tv2feed 210 431 > /srv/www/tv2feed/feed
     42 #
     43 # or, alternatively (could also be scripted with just one cronjob):
     44 #
     45 #     0 * * * * /usr/local/bin/tv2feed 210 > /srv/www/tv2feed/show/210
     46 #     0 * * * * /usr/local/bin/tv2feed 431 > /srv/www/tv2feed/show/431
     47 #
     48 # Other notes
     49 # -----------
     50 #
     51 # Each show will make two API requests, and there is a limit of 20 requests
     52 # every 10 seconds (for contents that are not cached). If you are following many
     53 # shows, this script will sleep for 10 seconds and try again if an API call
     54 # returns a 429 error code, if it fails again (or the error code is not 429), it
     55 # will raise an error and exit.
     56 #
     57 # All data generated is gathered from [TVmaze][]'s API.
     58 
     59 
     60 import sys
     61 import urllib.request
     62 import json
     63 import datetime
     64 import time
     65 
     66 
     67 # edit these variables
     68 domain = 'oscarbenedito.com'
     69 path = 'projects/tv2feed'   # leave empty for content under https://domain/
     70 entries_per_show = 10
     71 shows = sys.argv[1:]        # alternatively, hardcode them in the script
     72 # until here!
     73 
     74 version = '0.3.1'           # TV2Feed version
     75 url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '')
     76 id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '')
     77 info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}'
     78 episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1'
     79 
     80 
     81 # basic sanitizing: convert to string and escape XML
     82 def san(s):
     83     return str(s).replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;').replace('\'', '&apos;').replace('"', '&quot;')
     84 
     85 
     86 def api_call(url):
     87     try:
     88         response = urllib.request.urlopen(url)
     89     except urllib.error.HTTPError as e:
     90         if e.code == 429:
     91             print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr)
     92             time.sleep(10)
     93             response = urllib.request.urlopen(url)
     94         else:
     95             raise
     96 
     97     return json.load(response)
     98 
     99 
    100 if len(shows) < 1:
    101     sys.exit('Usage: tv2feed id1 [id2 [id3 ...]]')
    102 
    103 now = datetime.datetime.now(datetime.timezone.utc).isoformat()
    104 feed_data = []
    105 for show in shows:
    106     show_info = api_call(info_endpoint_tmpl.format(show))
    107     episodes = api_call(episodes_endpoint_tmpl.format(show))
    108 
    109     episodes.sort(reverse=True, key=lambda x: x['airstamp'])
    110 
    111     countdown = entries_per_show
    112     for episode in filter(lambda x: x['airstamp'] < now, episodes):
    113         feed_data.append({
    114             'airstamp': episode['airstamp'],
    115             'id': episode['id'],
    116             'name': episode['name'],
    117             'number': episode['number'],
    118             'season': episode['season'],
    119             'show_id': show_info['id'],
    120             'show_name': show_info['name'],
    121             'summary': episode['summary'],
    122             'url': episode['url']
    123         })
    124         countdown -= 1
    125         if countdown == 0:
    126             break
    127 
    128     if show_info['status'] != 'Running':
    129         feed_data.append({
    130             'airstamp': now,
    131             'id': 'status/' + show_info['status'],
    132             'name': 'Show status: {}'.format(show_info['status']),
    133             'number': None,
    134             'season': None,
    135             'show_id': show_info['id'],
    136             'show_name': show_info['name'],
    137             'summary': '<p>Show status: {}.</p>'.format(show_info['status']),
    138             'url': show_info['url']
    139         })
    140 
    141 if len(shows) > 1:
    142     feed_title = 'TV2Feed'
    143     feed_id = id_base + 'feed'
    144     feed_url = url_base + 'feed'
    145 else:
    146     feed_title = san(feed_data[0]['show_name'])
    147     feed_id = id_base + 'show/' + san(feed_data[0]['show_id'])
    148     feed_url = url_base + 'show/' + san(feed_data[0]['show_id'])
    149 
    150 ret = '<?xml version="1.0" encoding="utf-8"?>\n'
    151 ret += '<feed xmlns="http://www.w3.org/2005/Atom">'
    152 ret += '<link href="{}" rel="self" />'.format(feed_url)
    153 ret += '<title>{}</title>'.format(feed_title)
    154 ret += '<author><name>TV2Feed</name></author>'
    155 ret += '<updated>{}</updated>'.format(now)
    156 ret += '<id>' + feed_id + '.atom</id>'
    157 ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version)
    158 
    159 for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']):
    160     season = 'S' + san(episode['season']) if episode['season'] is not None else ''
    161     number = 'E' + san(episode['number']) if episode['number'] is not None else ''
    162     sn = season + number + ' ' if season + number != '' else ''
    163     ret += '<entry>'
    164     ret += '<title>{} - {}{}</title>'.format(san(episode['show_name']), sn, san(episode['name']))
    165     ret += '<link rel="alternate" href="{}" />'.format(san(episode['url']))
    166     ret += '<id>' + id_base + 'show/' + san(episode['show_id']) + '/episode/' + san(episode['id']) + '</id>'
    167     ret += '<updated>{}</updated>'.format(san(episode['airstamp']))
    168     ret += '<summary type="html">{}</summary>'.format(san(episode['summary']))
    169     ret += '</entry>'
    170 
    171 ret += '</feed>'
    172 print(ret)