tv2feed.py (6460B) - raw
1 #!/usr/bin/env python3 2 # Copyright (C) 2021 Oscar Benedito <oscar@oscarbenedito.com> 3 # License: Affero General Public License version 3 or later 4 5 # TV2Feed: Follow TV shows using Atom feeds! 6 # 7 # How to use 8 # ---------- 9 # 10 # Go to <https://www.tvmaze.com> and search the TV shows you want to follow. 11 # Write down their IDs (the number in the URL) and then run the following: 12 # 13 # tv2feed /etc/tv2feed.json 14 # 15 # with contents such as: 16 # 17 # { 18 # "domain": "example.com", 19 # "path": "tv2feed", # leave empty for content under https://domain/ 20 # "entries_per_show": 10, 21 # "shows": [ 22 # {"id": 210, "name": "Show name for your own reference"}, 23 # {"id": 431, "name": "Another show"} # only id is used by TV2Feed 24 # ] 25 # } 26 # 27 # Or run it multiple times to get one feed per TV show. The feeds are expected 28 # to go under: 29 # 30 # - `https://<domain>/<path>/feed`: if multiple shows specified 31 # - `https://<domain>/<path>/show/<show_id>`: if only one show specified 32 # 33 # That is because the feed URIs will point there. Note that if only one show is 34 # specified, TV2Feed will generate it assuming there is one feed per show (which 35 # will make the feed title the same as the show's). 36 # 37 # The API where the data is gathered from caches results for one hour, so you 38 # can add cron jobs to run every hour: 39 # 40 # 0 * * * * /usr/local/bin/tv2feed /etc/tv2feed.json > /srv/www/tv2feed/feed 41 # 42 # or, alternatively, use one config file per show: 43 # 44 # 0 * * * * /usr/local/bin/tv2feed /etc/tv2feed-210.json > /srv/www/tv2feed/show/210 45 # 0 * * * * /usr/local/bin/tv2feed /etc/tv2feed-431.json > /srv/www/tv2feed/show/431 46 # 47 # Other notes 48 # ----------- 49 # 50 # Each show will make two API requests, and there is a limit of 20 requests 51 # every 10 seconds (for contents that are not cached). If you are following many 52 # shows, this script will sleep for 10 seconds and try again if an API call 53 # returns a 429 error code, if it fails again (or the error code is not 429), it 54 # will raise an error and exit. 55 # 56 # All data generated is gathered from TVmaze's API. 57 58 59 import sys 60 import urllib.request 61 import json 62 import datetime 63 import time 64 65 version = '1.1' # TV2Feed version 66 url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '') 67 id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '') 68 info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}' 69 episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1' 70 71 72 # basic sanitizing: convert to string and escape XML 73 def san(s): 74 return str(s).replace('&', '&').replace('>', '>').replace('<', '<').replace('\'', ''').replace('"', '"') 75 76 77 def api_call(url): 78 try: 79 response = urllib.request.urlopen(url) 80 except urllib.error.HTTPError as e: 81 if e.code == 429: 82 print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr) 83 time.sleep(10) 84 response = urllib.request.urlopen(url) 85 else: 86 raise 87 88 return json.load(response) 89 90 91 if len(sys.argv) != 2: 92 sys.exit('Usage: tv2feed config.json') 93 94 with open(sys.argv[1], encoding='utf-8') as config_file: 95 config = json.load(config_file) 96 97 domain = config['domain'] 98 path = config['path'] 99 entries_per_show = config.get('entries_per_show', 10) 100 shows = [show['id'] for show in config['shows']] 101 102 103 if len(shows) < 1: 104 sys.exit('Config must contain at least one show') 105 106 now = datetime.datetime.now(datetime.timezone.utc).isoformat() 107 feed_data = [] 108 for show in shows: 109 show_info = api_call(info_endpoint_tmpl.format(show)) 110 episodes = api_call(episodes_endpoint_tmpl.format(show)) 111 112 episodes = list(filter(lambda x: x['airstamp'] is not None and x['airstamp'] < now, episodes)) 113 episodes.sort(reverse=True, key=lambda x: x['airstamp']) 114 115 countdown = entries_per_show 116 for episode in episodes: 117 feed_data.append({ 118 'airstamp': episode['airstamp'], 119 'id': episode['id'], 120 'name': episode['name'], 121 'number': episode['number'], 122 'season': episode['season'], 123 'show_id': show_info['id'], 124 'show_name': show_info['name'], 125 'summary': str(episode['summary']) + '<br>IMDB: ' + str(show_info['externals']['imdb']), 126 'url': episode['url'] 127 }) 128 countdown -= 1 129 if countdown == 0: 130 break 131 132 if show_info['status'] != 'Running': 133 feed_data.append({ 134 'airstamp': now, 135 'id': 'status/' + show_info['status'], 136 'name': 'Show status: {}'.format(show_info['status']), 137 'number': None, 138 'season': None, 139 'show_id': show_info['id'], 140 'show_name': show_info['name'], 141 'summary': '<p>Show status: {}.</p>'.format(show_info['status']), 142 'url': show_info['url'] 143 }) 144 145 if len(shows) > 1: 146 feed_title = 'TV2Feed' 147 feed_id = id_base + 'feed' 148 feed_url = url_base + 'feed' 149 else: 150 feed_title = san(feed_data[0]['show_name']) 151 feed_id = id_base + 'show/' + san(feed_data[0]['show_id']) 152 feed_url = url_base + 'show/' + san(feed_data[0]['show_id']) 153 154 ret = '<?xml version="1.0" encoding="utf-8"?>\n' 155 ret += '<feed xmlns="http://www.w3.org/2005/Atom">' 156 ret += '<link href="{}" rel="self" />'.format(feed_url) 157 ret += '<title>{}</title>'.format(feed_title) 158 ret += '<author><name>TV2Feed</name></author>' 159 ret += '<updated>{}</updated>'.format(now) 160 ret += '<id>' + feed_id + '.atom</id>' 161 ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version) 162 163 for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']): 164 season = 'S' + san(episode['season']) if episode['season'] is not None else '' 165 number = 'E' + san(episode['number']) if episode['number'] is not None else '' 166 sn = season + number + ' ' if season + number != '' else '' 167 ret += '<entry>' 168 ret += '<title>{} - {}{}</title>'.format(san(episode['show_name']), sn, san(episode['name'])) 169 ret += '<link rel="alternate" href="{}" />'.format(san(episode['url'])) 170 ret += '<id>' + id_base + 'show/' + san(episode['show_id']) + '/episode/' + san(episode['id']) + '</id>' 171 ret += '<updated>{}</updated>'.format(san(episode['airstamp'])) 172 ret += '<summary type="html">{}</summary>'.format(san(episode['summary'])) 173 ret += '</entry>' 174 175 ret += '</feed>' 176 print(ret)