tv2feed.py (5930B) - raw
1 #!/usr/bin/env python3 2 # Copyright (C) 2021 Oscar Benedito <oscar@oscarbenedito.com> 3 # License: Affero General Public License version 3 or later 4 5 # TV2Feed: Follow TV shows using Atom feeds! 6 # 7 # How to use 8 # ---------- 9 # 10 # Go to <https://www.tvmaze.com> and search the TV shows you want to follow. 11 # Write down their IDs (the number in the URL) and then run the following: 12 # 13 # tv2feed id1 id2 ... 14 # 15 # Or run it multiple times to get one feed per TV show. The feeds are expected 16 # to go under: 17 # 18 # - `https://<domain>/<path>/feed`: if multiple shows specified 19 # - `https://<domain>/<path>/show/<show_id>`: if only one show specified 20 # 21 # That is because the feed URIs will point there. Note that if only one show is 22 # specified, TV2Feed will generate it assuming there is one feed per show (which 23 # will make the feed title the same as the show's). 24 # 25 # The API where the data is gathered from caches results for one hour, so you 26 # can add cron jobs to run every hour: 27 # 28 # 0 * * * * /usr/local/bin/tv2feed 210 431 > /srv/www/tv2feed/feed 29 # 30 # or, alternatively (could also be scripted with just one cronjob): 31 # 32 # 0 * * * * /usr/local/bin/tv2feed 210 > /srv/www/tv2feed/show/210 33 # 0 * * * * /usr/local/bin/tv2feed 431 > /srv/www/tv2feed/show/431 34 # 35 # Other notes 36 # ----------- 37 # 38 # Each show will make two API requests, and there is a limit of 20 requests 39 # every 10 seconds (for contents that are not cached). If you are following many 40 # shows, this script will sleep for 10 seconds and try again if an API call 41 # returns a 429 error code, if it fails again (or the error code is not 429), it 42 # will raise an error and exit. 43 # 44 # All data generated is gathered from TVmaze's API. 45 46 47 import sys 48 import urllib.request 49 import json 50 import datetime 51 import time 52 53 54 # edit these variables 55 domain = 'oscarbenedito.com' 56 path = 'projects/tv2feed' # leave empty for content under https://domain/ 57 entries_per_show = 10 58 shows = sys.argv[1:] # alternatively, hardcode them in the script 59 # until here! 60 61 version = '1.0' # TV2Feed version 62 url_base = 'https://{}/{}'.format(domain, path + '/' if path != '' else '') 63 id_base = 'tag:{},2021-05-19:/{}'.format(domain, path + '/' if path != '' else '') 64 info_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}' 65 episodes_endpoint_tmpl = 'https://api.tvmaze.com/shows/{}/episodes?specials=1' 66 67 68 # basic sanitizing: convert to string and escape XML 69 def san(s): 70 return str(s).replace('&', '&').replace('>', '>').replace('<', '<').replace('\'', ''').replace('"', '"') 71 72 73 def api_call(url): 74 try: 75 response = urllib.request.urlopen(url) 76 except urllib.error.HTTPError as e: 77 if e.code == 429: 78 print('Error 429. Sleeping for 10 seconds and retrying...', file=sys.stderr) 79 time.sleep(10) 80 response = urllib.request.urlopen(url) 81 else: 82 raise 83 84 return json.load(response) 85 86 87 if len(shows) < 1: 88 sys.exit('Usage: tv2feed id1 [id2 [id3 ...]]') 89 90 now = datetime.datetime.now(datetime.timezone.utc).isoformat() 91 feed_data = [] 92 for show in shows: 93 show_info = api_call(info_endpoint_tmpl.format(show)) 94 episodes = api_call(episodes_endpoint_tmpl.format(show)) 95 96 episodes = list(filter(lambda x: x['airstamp'] is not None and x['airstamp'] < now, episodes)) 97 episodes.sort(reverse=True, key=lambda x: x['airstamp']) 98 99 countdown = entries_per_show 100 for episode in episodes: 101 feed_data.append({ 102 'airstamp': episode['airstamp'], 103 'id': episode['id'], 104 'name': episode['name'], 105 'number': episode['number'], 106 'season': episode['season'], 107 'show_id': show_info['id'], 108 'show_name': show_info['name'], 109 'summary': episode['summary'], 110 'url': episode['url'] 111 }) 112 countdown -= 1 113 if countdown == 0: 114 break 115 116 if show_info['status'] != 'Running': 117 feed_data.append({ 118 'airstamp': now, 119 'id': 'status/' + show_info['status'], 120 'name': 'Show status: {}'.format(show_info['status']), 121 'number': None, 122 'season': None, 123 'show_id': show_info['id'], 124 'show_name': show_info['name'], 125 'summary': '<p>Show status: {}.</p>'.format(show_info['status']), 126 'url': show_info['url'] 127 }) 128 129 if len(shows) > 1: 130 feed_title = 'TV2Feed' 131 feed_id = id_base + 'feed' 132 feed_url = url_base + 'feed' 133 else: 134 feed_title = san(feed_data[0]['show_name']) 135 feed_id = id_base + 'show/' + san(feed_data[0]['show_id']) 136 feed_url = url_base + 'show/' + san(feed_data[0]['show_id']) 137 138 ret = '<?xml version="1.0" encoding="utf-8"?>\n' 139 ret += '<feed xmlns="http://www.w3.org/2005/Atom">' 140 ret += '<link href="{}" rel="self" />'.format(feed_url) 141 ret += '<title>{}</title>'.format(feed_title) 142 ret += '<author><name>TV2Feed</name></author>' 143 ret += '<updated>{}</updated>'.format(now) 144 ret += '<id>' + feed_id + '.atom</id>' 145 ret += '<generator uri="https://oscarbenedito.com/projects/tv2feed/" version="{}">TV2Feed</generator>'.format(version) 146 147 for episode in sorted(feed_data, reverse=True, key=lambda x: x['airstamp']): 148 season = 'S' + san(episode['season']) if episode['season'] is not None else '' 149 number = 'E' + san(episode['number']) if episode['number'] is not None else '' 150 sn = season + number + ' ' if season + number != '' else '' 151 ret += '<entry>' 152 ret += '<title>{} - {}{}</title>'.format(san(episode['show_name']), sn, san(episode['name'])) 153 ret += '<link rel="alternate" href="{}" />'.format(san(episode['url'])) 154 ret += '<id>' + id_base + 'show/' + san(episode['show_id']) + '/episode/' + san(episode['id']) + '</id>' 155 ret += '<updated>{}</updated>'.format(san(episode['airstamp'])) 156 ret += '<summary type="html">{}</summary>'.format(san(episode['summary'])) 157 ret += '</entry>' 158 159 ret += '</feed>' 160 print(ret)