gensite.py (16034B) - raw
1 #!/usr/bin/env python3 2 # gensite.py: Static site generator based on makesite.py. 3 # Copyright (C) 2020-2021 Oscar Benedito <oscar@oscarbenedito.com> 4 # 5 # This program is free software: you can redistribute it and/or modify 6 # it under the terms of the GNU Affero General Public License as published by 7 # the Free Software Foundation, either version 3 of the License, or 8 # (at your option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU Affero General Public License for more details. 14 # 15 # You should have received a copy of the GNU Affero General Public License 16 # along with this program. If not, see <https://www.gnu.org/licenses/>. 17 # 18 # This file incorporates work covered by the following copyright and 19 # permission notice: 20 # 21 # Copyright (c) 2018 Sunaina Pai 22 # 23 # Permission is hereby granted, free of charge, to any person obtaining 24 # a copy of this software and associated documentation files (the 25 # "Software"), to deal in the Software without restriction, including 26 # without limitation the rights to use, copy, modify, merge, publish, 27 # distribute, sublicense, and/or sell copies of the Software, and to 28 # permit persons to whom the Software is furnished to do so, subject to 29 # the following conditions: 30 # 31 # The above copyright notice and this permission notice shall be 32 # included in all copies or substantial portions of the Software. 33 # 34 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 35 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 36 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 37 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 38 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 39 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 40 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 41 42 43 """Static site generator based on makesite.py.""" 44 45 46 import os 47 import shutil 48 import re 49 import glob 50 import sys 51 import datetime 52 import hashlib 53 import markdown 54 55 56 def fread(filename): 57 """Read file and close the file.""" 58 with open(filename, 'r') as f: 59 return f.read() 60 61 62 def fwrite(filename, text): 63 """Write content to file and close the file.""" 64 filename = filename + 'index.html' if filename.endswith('/') or filename == '' else filename 65 filename = os.path.join('_site', filename) 66 if os.path.exists(filename): 67 log('W', 'Overwritting file: {}', filename) 68 69 basedir = os.path.dirname(filename) 70 if not os.path.isdir(basedir): 71 os.makedirs(basedir) 72 73 with open(filename, 'w') as f: 74 f.write(text) 75 76 77 def log(type, msg, *args): 78 """Log message with specified arguments.""" 79 if type == 'E': 80 sys.stderr.write('Error: ' + msg.format(*args) + '\n') 81 sys.exit(1) 82 if type == 'W': 83 sys.stderr.write('Warning: ' + msg.format(*args) + '\n') 84 # if type == 'I': 85 # sys.stderr.write('Info: ' + msg.format(*args) + '\n') 86 87 88 def urlize(name): 89 """Convert string tu URL.""" 90 return name.lower().replace(' ', '-') 91 92 93 def add_to_sitemap(path, lastmod=None, freq=None, priority=None): 94 """Add URL to sitemap.""" 95 global sitemap 96 path = '<loc>https://oscarbenedito.com/' + path + '</loc>' 97 if lastmod == '1970-01-01T00:00:00Z': 98 lastmod = None 99 lastmod = '<lastmod>' + lastmod + '</lastmod>' if lastmod else '' 100 freq = '<changefreq>' + freq + '</changefreq>' if freq else '' 101 priority = '<priority>' + priority + '</priority>' if priority else '' 102 sitemap += '<url>' + path + lastmod + freq + priority + '</url>' 103 104 105 def set_redirect(src, dst): 106 """Create HTML redirect.""" 107 fwrite(src, '<!DOCTYPE html><html><head><meta charset="utf-8">' 108 '<meta http-equiv="refresh" content="0; url=/{}"/>' 109 '<link rel="canonical" href="/{}"/><meta name="robots" content="noindex">' 110 '</head><body><p>This page has been moved to ' 111 '<a href="/{}">https://oscarbenedito.com/{}</a>.</p>' 112 '</body></html>'.format(dst, dst, dst, dst)) 113 log('I', 'redirect /{} => /{}', src, dst) 114 115 116 def read_headers(text): 117 """Parse headers in text and yield (key, value, end-index) tuples.""" 118 for match in re.finditer(r'\s*<!--\s*(.+?)\s*:\s*(.+?)\s*-->\s*|.+', text): 119 if not match.group(1): 120 break 121 yield match.group(1), match.group(2), match.end() 122 123 124 def prettify_date(date_str): 125 """Convert ISO 8601 date string to human friendly date string.""" 126 d = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%SZ') 127 return d.strftime('%B %-d, %Y') 128 129 130 def render(template, pre=False, **params): 131 """Replace placeholders in template with values from params.""" 132 if not pre: 133 template = re.sub(r'{{\s*_if\s+([^}\s]+)\s*}}(.*?){{\s*_fi\s*}}', 134 lambda m: m.group(2) if m.group(1) in params else '', 135 template, flags=re.DOTALL) 136 return re.sub(r'{{\s*([^}\s]+)\s*}}', 137 lambda m: str(params.get(m.group(1), m.group(0))), 138 template) 139 140 141 def read_content(filename): 142 """Read content and metadata from file into a dictionary.""" 143 # read file content 144 text = fread(filename) 145 146 # read metadata and save it in a dictionary 147 date_slug = os.path.basename(filename).split('.')[0] 148 match = re.search(r'^(?:(\d\d\d\d-\d\d-\d\d)-)?(.+)$', date_slug) 149 content = { 150 'date': (match.group(1) or '1970-01-01') + 'T00:00:00Z', 151 'slug': match.group(2) 152 } 153 154 # read headers 155 end = 0 156 for key, val, end in read_headers(text): 157 content[key] = val 158 159 if 'lastmod' in content: 160 content['modified'] = '1' 161 else: 162 content['lastmod'] = content['date'] 163 164 # separate content from headers 165 text = text[end:] 166 167 # convert Markdown content to HTML 168 if filename.endswith('.md'): 169 text = markdown.markdown(text, extensions=['footnotes', 'fenced_code']) 170 171 content.update({ 172 'content': text, 173 'year': content['date'][:4], 174 'month': content['date'][5:7], 175 'day': content['date'][8:10], 176 'date_nice': prettify_date(content['date']), 177 'lastmod_nice': prettify_date(content['lastmod']) 178 }) 179 180 if 'categories' in content: 181 # convert the categories string to array of categories 182 categories = [c.strip() for c in content['categories'].split(',')] 183 categories_html = ', '.join(['<a class="p-category" href="/blog/categories/' + urlize(c) + '/">' + c + '</a>' for c in categories]) 184 content.update({ 185 'categories': categories, 186 'categories_html': categories_html 187 }) 188 189 return content 190 191 192 def make_pages(src, dst, layout, blog=False, **params): 193 """Generate pages from page content.""" 194 items = [] 195 categories = {} 196 197 for src_path in glob.glob(src): 198 content = read_content(src_path) 199 200 page_params = dict(params, **content) 201 202 # populate placeholders in content if content-rendering is enabled 203 if page_params.get('render') == 'yes': 204 rendered_content = render(page_params['content'], **page_params) 205 page_params['content'] = rendered_content 206 207 if 'url' not in page_params: 208 page_params['url'] = render(dst, **page_params) 209 else: # can be deleted, just to warn since I have never used it 210 log('W', 'parameter \'url\' set in {}', src_path) 211 212 if blog: 213 page_params['src_path'] = src_path 214 items.append(page_params) 215 else: 216 fwrite(page_params['url'], render(layout, **page_params)) 217 pri = page_params['priority'] if 'priority' in page_params else None 218 add_to_sitemap(page_params['url'], lastmod=page_params['lastmod'], priority=pri) 219 log('I', 'page {} => /{}', src_path, page_params['url']) 220 221 # the following is only executed if blog == True, otherwise items is empty 222 items.sort(key=lambda x: x['date'], reverse=True) 223 for i, item in enumerate(items): 224 if i != 0: 225 item['next_url'] = items[i-1]['url'] 226 item['next_title'] = items[i-1]['title'] 227 item['multiple_pages'] = '1' 228 if i < len(items)-1: 229 item['prev_url'] = items[i+1]['url'] 230 item['prev_title'] = items[i+1]['title'] 231 item['multiple_pages'] = '1' 232 233 for category in item['categories']: 234 if category not in categories: 235 categories[category] = [item] 236 else: 237 categories[category].append(item) 238 239 fwrite(item['url'], render(layout, **item)) 240 pri = item['priority'] if 'priority' in item else None 241 add_to_sitemap(item['url'], lastmod=item['lastmod'], priority=pri) 242 log('I', 'post {} => /{}', item['src_path'], item['url']) 243 244 return items, categories 245 246 247 def make_lists(posts, dst, l_html, l_html_item, l_feed, l_feed_item, **params): 248 """Generate HTML lists and Atom feed for a set of posts.""" 249 if os.path.isfile('content/' + dst + '_index.md'): 250 text = fread('content/' + dst + '_index.md') 251 else: 252 text = fread('content/' + dst[:-1] + '.md') 253 end = 0 254 255 for key, val, end in read_headers(text): 256 params[key] = val 257 258 params['intro'] = markdown.markdown(text[end:], extensions=['footnotes', 'fenced_code']) 259 260 # make HTML lists 261 ipp = 5 # items per page 262 params['content'] = '' 263 title = params['title'] 264 if dst != 'blog/': # blog feed appears on all pages already 265 params['extraheader'] = '<link rel="alternate" type="application/atom+xml" ' \ 266 'title="{}" href="/{}index.xml"/>'.format(params['feed_title'], dst) 267 268 for i, post in enumerate(posts): 269 item_params = dict(params, **post) 270 271 # remove tags and truncate at 50 words 272 item_params['summary'] = ' '.join(re.sub('(?s)<.*?>', '', post['content']).split()[:50]) + '...' 273 274 params['content'] += render(l_html_item, **item_params) 275 276 if i % ipp == ipp-1 or i == len(posts)-1: 277 page = i//ipp + 1 278 curr_dst = dst + ('page/{}/'.format(page) if i >= ipp else '') 279 280 if i != len(posts)-1: 281 params['multiple_pages'] = '1' 282 params['next_url'] = '{}page/{}/'.format(dst, page + 1) 283 elif page > 1: 284 params.pop('next_url') 285 286 if page != 1: 287 params['title'] = '{} (page {} of {})'.format(title, page, ((len(posts)-1)//ipp) + 1) 288 289 fwrite(curr_dst, render(l_html, **params)) 290 log('I', 'list => /{}', curr_dst) 291 292 params['prev_url'] = curr_dst 293 params['content'] = '' 294 295 set_redirect(dst + 'page/1/', dst) 296 297 # make Atom feed 298 ipp = 15 # item per feed 299 params['url'] = dst 300 page_dst = dst + 'index.xml' 301 params['content'] = '' 302 for i, post in enumerate(posts): 303 if (i == ipp): 304 break 305 item_params = dict(params, **post) 306 307 # escape HTML content 308 item_params['c_escaped'] = post['content'].replace('&', '&').replace('>', '>').replace('<', '<') 309 310 params['content'] += render(l_feed_item, **item_params) 311 312 params['updated'] = posts[0]['lastmod'] 313 fwrite(page_dst, render(l_feed, **params)) 314 log('I', 'feed => /{}', page_dst) 315 316 317 def make_archive(posts, categories, dst, layout, **params): 318 year = 0 319 params['content'] = '<h2>Posts ({})</h2>\n'.format(len(posts)) 320 for post in posts: 321 if post['year'] != year: 322 params['content'] += '</ul>\n' if year != 0 else '' 323 params['content'] += '<h3>{}</h3>\n<ul>\n'.format(post['year']) 324 year = post['year'] 325 params['content'] += '<li><a href="/{}">{}</a> ({})</li>\n' \ 326 ''.format(post['url'], post['title'], post['date_nice'][:-6]) 327 params['content'] += '</ul>\n' 328 329 params['content'] += '<h2>Categories ({})</h2>\n<ul>\n'.format(len(categories)) 330 for key in sorted(categories): 331 val = categories[key] 332 params['content'] += '<li><a href="/{}categories/{}/">{}</a> ({} {})</li>\n' \ 333 ''.format(dst, urlize(key), key, len(val), 'entry' if len(val) == 1 else 'entries') 334 params['content'] += '</ul>\n' 335 336 page_dst = dst + 'archive/' 337 fwrite(page_dst, render(layout, **params)) 338 add_to_sitemap(page_dst, lastmod=posts[0]['lastmod'], priority='0.4') 339 log('I', 'page => /{}', page_dst) 340 341 342 def main(): 343 # create a new _site directory from scratch 344 if os.path.isdir('_site'): 345 shutil.rmtree('_site') 346 shutil.copytree('static', '_site') 347 348 # initialize parameters 349 params = {} 350 351 # initialize sitemap 352 global sitemap 353 sitemap = '<?xml version="1.0" encoding="UTF-8"?>\n' \ 354 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' 355 356 # copy assets adding part of their sha256 value to the filename 357 for path, _, files in os.walk('assets'): 358 for name in files: 359 file = os.path.join(path, name) 360 rfile = os.path.relpath(file, 'assets') 361 with open(file, 'r') as c: 362 content = c.read() 363 364 # minify css 365 if os.path.splitext(file)[1] == '.css': 366 content = re.sub('\s*/\*(?:.|\n)*?\*/\s*', '', content) 367 content = re.sub('\s+', ' ', content) 368 content = re.sub('\s*({|}|;|,)\s*', r'\1', content) 369 content = re.sub(':\s*', ':', content) 370 rfile = '{0}.min{1}'.format(*os.path.splitext(rfile)) 371 372 h = hashlib.sha256() 373 h.update(content.encode('utf-8')) 374 name, ext = os.path.splitext(rfile) 375 dst = '{n}.{h}{e}'.format(n=name, h=h.hexdigest()[:8], e=ext) 376 377 params['_asset_' + rfile] = dst 378 basedir = os.path.dirname(os.path.join('_site', dst)) 379 if not os.path.isdir(basedir): 380 os.makedirs(basedir) 381 with open(os.path.join('_site', dst), 'w') as c: 382 c.write(content) 383 384 # load layouts 385 l_base = fread('layouts/base.html') 386 l_page = render(l_base, pre=True, content=fread('layouts/page.html')) 387 l_post = render(l_base, pre=True, content=fread('layouts/post.html')) 388 l_list = render(l_base, pre=True, content=fread('layouts/list.html')) 389 l_feed = fread('layouts/feed.xml') 390 item_html = fread('layouts/item.html') 391 item_xml = fread('layouts/item.xml') 392 393 # create site pages 394 make_pages('content/_index.md', '', l_page, **params) 395 make_pages('content/[!_]*.*', '{{ slug }}/', l_page, **params) 396 fwrite('404.html', render(fread('layouts/404.html'), **params)) 397 398 # create blog post pages 399 all_posts, categories = make_pages('content/blog/[!_]*.*', 400 'blog/{{ year }}/{{ month }}/{{ slug }}/', 401 l_post, blog=True, **params) 402 403 # create HTML list pages and Atom feed 404 make_lists(all_posts, 'blog/', l_list, item_html, l_feed, item_xml, **params) 405 406 add_to_sitemap('blog/', lastmod=all_posts[0]['lastmod'], priority='1.0') 407 408 # create blog archive 409 make_archive(all_posts, categories, 'blog/', l_page, title='Blog archive', **params) 410 411 # create blog categories 412 for name, c_posts in categories.items(): 413 dst = 'blog/categories/' + urlize(name) + '/' 414 make_lists(c_posts, dst, l_list, item_html, l_feed, item_xml, **params) 415 416 # set redirections 417 set_redirect('licenses/agpl-v3/', 'licenses/agpl-3.0.txt') 418 set_redirect('licenses/gpl-v3/', 'licenses/gpl-3.0.txt') 419 set_redirect('licenses/cc-by-4.0/', 'licenses/cc-by-4.0.txt') 420 set_redirect('composer/', 'projects/composer/composer.html') 421 422 fwrite('sitemap.xml', sitemap + '</urlset>') 423 424 425 if __name__ == '__main__': 426 main()