gensite.py (16944B) - raw
1 #!/usr/bin/env python3 2 # gensite.py: Static site generator based on makesite.py. 3 # Copyright (C) 2020-2021 Oscar Benedito <oscar@oscarbenedito.com> 4 # 5 # This program is free software: you can redistribute it and/or modify 6 # it under the terms of the GNU Affero General Public License as published by 7 # the Free Software Foundation, either version 3 of the License, or 8 # (at your option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU Affero General Public License for more details. 14 # 15 # You should have received a copy of the GNU Affero General Public License 16 # along with this program. If not, see <https://www.gnu.org/licenses/>. 17 # 18 # This file incorporates work covered by the following copyright and 19 # permission notice: 20 # 21 # Copyright (c) 2018 Sunaina Pai 22 # 23 # Permission is hereby granted, free of charge, to any person obtaining 24 # a copy of this software and associated documentation files (the 25 # "Software"), to deal in the Software without restriction, including 26 # without limitation the rights to use, copy, modify, merge, publish, 27 # distribute, sublicense, and/or sell copies of the Software, and to 28 # permit persons to whom the Software is furnished to do so, subject to 29 # the following conditions: 30 # 31 # The above copyright notice and this permission notice shall be 32 # included in all copies or substantial portions of the Software. 33 # 34 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 35 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 36 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 37 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 38 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 39 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 40 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 41 42 43 """Static site generator based on makesite.py.""" 44 45 46 import os 47 import shutil 48 import re 49 import glob 50 import sys 51 import datetime 52 import hashlib 53 54 55 def fread(filename): 56 """Read file and close the file.""" 57 with open(filename, "r") as f: 58 return f.read() 59 60 61 def fwrite(filename, text): 62 """Write content to file and close the file.""" 63 filename = filename + "index.html" if filename.endswith("/") or filename == "" else filename # fmt: skip 64 filename = os.path.join("_site", filename) 65 if os.path.exists(filename): 66 log("W", "Overwritting file: {}", filename) 67 68 basedir = os.path.dirname(filename) 69 if not os.path.isdir(basedir): 70 os.makedirs(basedir) 71 72 with open(filename, "w") as f: 73 f.write(text) 74 75 76 def log(type, msg, *args): 77 """Log message with specified arguments.""" 78 if type == "E": 79 sys.stderr.write("Error: " + msg.format(*args) + "\n") 80 sys.exit(1) 81 if type == "W": 82 sys.stderr.write("Warning: " + msg.format(*args) + "\n") 83 # if type == 'I': 84 # sys.stderr.write('Info: ' + msg.format(*args) + '\n') 85 86 87 def urlize(name): 88 """Convert string tu URL.""" 89 return name.lower().replace(" ", "-") 90 91 92 def add_to_sitemap(path, lastmod=None, freq=None, priority=None): 93 """Add URL to sitemap.""" 94 global sitemap 95 path = "<loc>https://oscarbenedito.com/" + path + "</loc>" 96 if lastmod == "1970-01-01T00:00:00Z": 97 lastmod = None 98 lastmod = "<lastmod>" + lastmod + "</lastmod>" if lastmod else "" 99 freq = "<changefreq>" + freq + "</changefreq>" if freq else "" 100 priority = "<priority>" + priority + "</priority>" if priority else "" 101 sitemap += "<url>" + path + lastmod + freq + priority + "</url>" 102 103 104 def set_redirect(src, dst): 105 """Create HTML redirect.""" 106 fwrite( 107 src, 108 '<!DOCTYPE html><html><head><meta charset="utf-8">' 109 '<meta http-equiv="refresh" content="0; url=/{}"/>' 110 '<link rel="canonical" href="/{}"/><meta name="robots" content="noindex">' 111 "</head><body><p>This page has been moved to " 112 '<a href="/{}">https://oscarbenedito.com/{}</a>.</p>' 113 "</body></html>".format(dst, dst, dst, dst), 114 ) 115 log("I", "redirect /{} => /{}", src, dst) 116 # uncomment next line to print apache redirects 117 # sys.stdout.write('Redirect permanent "/{}" "/{}"\n'.format(src, dst)) 118 119 120 def read_headers(text): 121 """Parse headers in text and yield (key, value, end-index) tuples.""" 122 for match in re.finditer(r"\s*<!--\s*(.+?)\s*:\s*(.+?)\s*-->\s*|.+", text): 123 if not match.group(1): 124 break 125 yield match.group(1), match.group(2), match.end() 126 127 128 def prettify_date(date_str): 129 """Convert ISO 8601 date string to human friendly date string.""" 130 d = datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ") 131 return d.strftime("%B %-d, %Y") 132 133 134 def render(template, pre=False, **params): 135 """Replace placeholders in template with values from params.""" 136 if not pre: 137 template = re.sub( 138 r"{{\s*_if\s+([^}\s]+)\s*}}(.*?){{\s*_fi\s*}}", 139 lambda m: m.group(2) if m.group(1) in params else "", 140 template, 141 flags=re.DOTALL, 142 ) 143 template = re.sub( 144 r"{{\s*_ife\s+([^}\s]+)\s*}}(.*?){{\s*_else\s*}}(.*?){{\s*_fi\s*}}", 145 lambda m: m.group(2) if m.group(1) in params else m.group(3), 146 template, 147 flags=re.DOTALL, 148 ) 149 return re.sub( 150 r"{{\s*([^}\s]+)\s*}}", 151 lambda m: str(params.get(m.group(1), m.group(0))), 152 template, 153 ) 154 155 156 def read_content(filename): 157 """Read content and metadata from file into a dictionary.""" 158 # read file content 159 text = fread(filename) 160 161 # read metadata and save it in a dictionary 162 date_slug = os.path.basename(filename).split(".")[0] 163 match = re.search(r"^(?:(\d\d\d\d-\d\d-\d\d)-)?(.+)$", date_slug) 164 content = { 165 "date": (match.group(1) or "1970-01-01") + "T00:00:00Z", 166 "slug": match.group(2), 167 } 168 169 # read headers 170 end = 0 171 for key, val, end in read_headers(text): 172 content[key] = val 173 174 if "lastmod" in content: 175 content["modified"] = "1" 176 else: 177 content["lastmod"] = content["date"] 178 179 # separate content from headers 180 text = text[end:] 181 182 # convert Markdown content to HTML 183 # if filename.endswith('.md'): 184 # import markdown 185 # text = markdown.markdown(text, extensions=['footnotes', 'fenced_code']) 186 187 content.update( 188 { 189 "content": text, 190 "year": content["date"][:4], 191 "month": content["date"][5:7], 192 "day": content["date"][8:10], 193 "date_nice": prettify_date(content["date"]), 194 "lastmod_nice": prettify_date(content["lastmod"]), 195 } 196 ) 197 198 if "categories" in content: 199 # convert the categories string to array of categories 200 categories = [c.strip() for c in content["categories"].split(",")] 201 categories_html = ", ".join( 202 [ 203 '<a class="p-category" href="/blog/categories/{}/">{}</a>'.format( 204 urlize(c), c 205 ) 206 for c in categories 207 ] 208 ) 209 content.update({"categories": categories, "categories_html": categories_html}) 210 211 return content 212 213 214 def make_pages(src, dst, layout, blog=False, **params): 215 """Generate pages from page content.""" 216 items = [] 217 categories = {} 218 219 for src_path in glob.glob(src): 220 content = read_content(src_path) 221 222 page_params = dict(params, **content) 223 224 # populate placeholders in content if content-rendering is enabled 225 if page_params.get("render") == "yes": 226 rendered_content = render(page_params["content"], **page_params) 227 page_params["content"] = rendered_content 228 229 if "url" not in page_params: 230 page_params["url"] = render(dst, **page_params) 231 else: # can be deleted, just to warn since I have never used it 232 log("W", "parameter 'url' set in {}", src_path) 233 234 if blog: 235 page_params["src_path"] = src_path 236 items.append(page_params) 237 else: 238 fwrite(page_params["url"], render(layout, **page_params)) 239 pri = page_params["priority"] if "priority" in page_params else None 240 add_to_sitemap( 241 page_params["url"], lastmod=page_params["lastmod"], priority=pri 242 ) 243 log("I", "page {} => /{}", src_path, page_params["url"]) 244 245 # the following is only executed if blog == True, otherwise items is empty 246 items.sort(key=lambda x: x["date"], reverse=True) 247 for i, item in enumerate(items): 248 if i != 0: 249 item["next_url"] = items[i - 1]["url"] 250 item["next_title"] = items[i - 1]["title"] 251 item["multiple_pages"] = "1" 252 if i < len(items) - 1: 253 item["prev_url"] = items[i + 1]["url"] 254 item["prev_title"] = items[i + 1]["title"] 255 item["multiple_pages"] = "1" 256 257 for category in item["categories"]: 258 if category not in categories: 259 categories[category] = [item] 260 else: 261 categories[category].append(item) 262 263 fwrite(item["url"], render(layout, **item)) 264 pri = item["priority"] if "priority" in item else None 265 add_to_sitemap(item["url"], lastmod=item["lastmod"], priority=pri) 266 log("I", "post {} => /{}", item["src_path"], item["url"]) 267 268 return items, categories 269 270 271 def make_lists(posts, dst, l_html, l_html_item, l_feed, l_feed_item, **params): 272 """Generate HTML lists and Atom feed for a set of posts.""" 273 if os.path.isfile("content/" + dst + "_index.html"): 274 text = fread("content/" + dst + "_index.html") 275 else: 276 text = fread("content/" + dst[:-1] + ".html") 277 end = 0 278 279 for key, val, end in read_headers(text): 280 params[key] = val 281 282 params["intro"] = text[end:] 283 284 # make HTML lists 285 ipp = 5 # items per page 286 params["content"] = "" 287 title = params["title"] 288 if dst != "blog/": # blog feed appears on all pages already 289 params["extraheader"] = ( 290 '<link rel="alternate" type="application/atom+xml" ' 291 'title="{}" href="/{}index.xml"/>'.format(params["feed_title"], dst) 292 ) 293 294 for i, post in enumerate(posts): 295 item_params = dict(params, **post) 296 297 # remove tags and truncate at 50 words 298 item_params["summary"] = ( 299 " ".join(re.sub("(?s)<.*?>", "", post["content"]).split()[:50]) + "..." 300 ) 301 302 params["content"] += render(l_html_item, **item_params) 303 304 if i % ipp == ipp - 1 or i == len(posts) - 1: 305 page = i // ipp + 1 306 curr_dst = dst + ("page/{}/".format(page) if i >= ipp else "") 307 308 if i != len(posts) - 1: 309 params["multiple_pages"] = "1" 310 params["next_url"] = "{}page/{}/".format(dst, page + 1) 311 elif page > 1: 312 params.pop("next_url") 313 314 if page != 1: 315 params["title"] = "{} (page {} of {})".format( 316 title, page, ((len(posts) - 1) // ipp) + 1 317 ) 318 319 fwrite(curr_dst, render(l_html, **params)) 320 log("I", "list => /{}", curr_dst) 321 322 params["prev_url"] = curr_dst 323 params["content"] = "" 324 325 set_redirect(dst + "page/1/", dst) 326 327 # make Atom feed 328 ipp = 15 # item per feed 329 params["url"] = dst 330 page_dst = dst + "index.xml" 331 params["content"] = "" 332 for i, post in enumerate(posts): 333 if i == ipp: 334 break 335 item_params = dict(params, **post) 336 337 # escape HTML content 338 item_params["c_escaped"] = ( 339 post["content"] 340 .replace("&", "&") 341 .replace(">", ">") 342 .replace("<", "<") 343 ) 344 345 params["content"] += render(l_feed_item, **item_params) 346 347 params["updated"] = posts[0]["lastmod"] 348 fwrite(page_dst, render(l_feed, **params)) 349 log("I", "feed => /{}", page_dst) 350 351 352 def make_archive(posts, categories, dst, layout, **params): 353 year = 0 354 params["content"] = "<h2>Posts ({})</h2>\n".format(len(posts)) 355 for post in posts: 356 if post["year"] != year: 357 params["content"] += "</ul>\n" if year != 0 else "" 358 params["content"] += "<h3>{}</h3>\n<ul>\n".format(post["year"]) 359 year = post["year"] 360 params["content"] += '<li><a href="/{}">{}</a> ({})</li>\n' "".format( 361 post["url"], post["title"], post["date_nice"][:-6] 362 ) 363 params["content"] += "</ul>\n" 364 365 params["content"] += "<h2>Categories ({})</h2>\n<ul>\n".format(len(categories)) 366 for key in sorted(categories): 367 val = categories[key] 368 params[ 369 "content" 370 ] += '<li><a href="/{}categories/{}/">{}</a> ({} {})</li>\n' "".format( 371 dst, urlize(key), key, len(val), "entry" if len(val) == 1 else "entries" 372 ) 373 params["content"] += "</ul>\n" 374 375 page_dst = dst + "archive/" 376 fwrite(page_dst, render(layout, **params)) 377 add_to_sitemap(page_dst, lastmod=posts[0]["lastmod"], priority="0.4") 378 log("I", "page => /{}", page_dst) 379 380 381 def main(): 382 # create a new _site directory from scratch 383 if os.path.isdir("_site"): 384 shutil.rmtree("_site") 385 shutil.copytree("static", "_site") 386 387 # initialize parameters 388 params = {} 389 390 # initialize sitemap 391 global sitemap 392 sitemap = ( 393 '<?xml version="1.0" encoding="UTF-8"?>\n' 394 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' 395 ) 396 397 # copy assets adding part of their sha256 value to the filename 398 for path, _, files in os.walk("assets"): 399 for name in files: 400 file = os.path.join(path, name) 401 rfile = os.path.relpath(file, "assets") 402 with open(file, "r") as c: 403 content = c.read() 404 405 # minify css 406 if os.path.splitext(file)[1] == ".css": 407 content = re.sub(r"\s*/\*(?:.|\n)*?\*/\s*", "", content) 408 content = re.sub(r"\s+", " ", content) 409 content = re.sub(r"\s*({|}|;|,)\s*", r"\1", content) 410 content = re.sub(r":\s*", ":", content) 411 rfile = "{0}.min{1}".format(*os.path.splitext(rfile)) 412 413 h = hashlib.sha256() 414 h.update(content.encode("utf-8")) 415 name, ext = os.path.splitext(rfile) 416 dst = "{n}.{h}{e}".format(n=name, h=h.hexdigest()[:8], e=ext) 417 418 params["_asset_" + rfile] = dst 419 basedir = os.path.dirname(os.path.join("_site", dst)) 420 if not os.path.isdir(basedir): 421 os.makedirs(basedir) 422 with open(os.path.join("_site", dst), "w") as c: 423 c.write(content) 424 425 # load layouts 426 l_base = fread("layouts/base.html") 427 l_page = render(l_base, pre=True, content=fread("layouts/page.html")) 428 l_post = render(l_base, pre=True, content=fread("layouts/post.html")) 429 l_list = render(l_base, pre=True, content=fread("layouts/list.html")) 430 l_feed = fread("layouts/feed.xml") 431 item_html = fread("layouts/item.html") 432 item_xml = fread("layouts/item.xml") 433 434 # create site pages 435 make_pages("content/_index.html", "", l_page, **params) 436 make_pages("content/[!_]*.*", "{{ slug }}/", l_page, **params) 437 make_pages("content/projects/[!_]*.*", "projects/{{ slug }}/", l_page, **params) 438 make_pages("content/en/_index.html", "en/", l_page, **params) 439 make_pages("content/en/[!_]*.*", "en/{{ slug }}/", l_page, **params) 440 fwrite("404.html", render(fread("layouts/404.html"), **params)) 441 442 # create blog post pages 443 all_posts, categories = make_pages( 444 "content/blog/[!_]*.*", 445 "blog/{{ year }}/{{ month }}/{{ slug }}/", 446 l_post, 447 blog=True, 448 **params 449 ) 450 451 # create HTML list pages and Atom feed 452 make_lists(all_posts, "blog/", l_list, item_html, l_feed, item_xml, **params) 453 454 add_to_sitemap("blog/", lastmod=all_posts[0]["lastmod"], priority="1.0") 455 456 # create blog archive 457 make_archive(all_posts, categories, "blog/", l_page, title="Blog archive", **params) 458 459 # create blog categories 460 for name, c_posts in categories.items(): 461 dst = "blog/categories/" + urlize(name) + "/" 462 make_lists(c_posts, dst, l_list, item_html, l_feed, item_xml, **params) 463 464 # set redirections 465 set_redirect("licenses/agpl-v3/", "licenses/agpl-3.0.txt") 466 set_redirect("licenses/gpl-v3/", "licenses/gpl-3.0.txt") 467 set_redirect("licenses/cc-by-4.0/", "licenses/cc-by-4.0.txt") 468 set_redirect("composer/", "projects/composer/composer.html") 469 set_redirect("contact/", "en/#contact-me") 470 set_redirect("about/", "en/about/") 471 472 fwrite("sitemap.xml", sitemap + "</urlset>") 473 474 475 if __name__ == "__main__": 476 main()