wiki2html.py (7582B) - raw
1 #!/usr/bin/env python3 2 # Vimwiki to HTML converter 3 # Copyright (C) 2021 Oscar Benedito 4 # 5 # This program is free software: you can redistribute it and/or modify 6 # it under the terms of the GNU Affero General Public License as 7 # published by the Free Software Foundation, either version 3 of the 8 # License, or (at your option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU Affero General Public License for more details. 14 # 15 # You should have received a copy of the GNU Affero General Public License 16 # along with this program. If not, see <https://www.gnu.org/licenses/>. 17 18 import os 19 import re 20 import sys 21 import datetime 22 import markdown 23 24 25 def get_file_metadata_and_text(file, wiki_path, categories): 26 metadata = {} 27 metadata['path'] = os.path.splitext(os.path.relpath(file, wiki_path))[0] 28 29 with open(file, 'r') as f: 30 text = f.read() 31 32 # read headers and remove from text 33 end = 0 34 for match in re.finditer(r'\s*<!--\s*(.+?)\s*:\s*(.+?)\s*-->\s*|.+', text): 35 if not match.group(1): 36 break 37 metadata[match.group(1)] = match.group(2) 38 end = match.end() 39 text = text[end:] 40 41 # read title and remove from text 42 match = re.match('# (.+?)\s*\n', text) 43 if match is not None: 44 metadata['title'] = match.group(1) 45 text = text[match.end():] 46 else: 47 metadata['title'] = metadata['path'] 48 49 # read description 50 match = re.match('\*([^* ].+?)\*\s*\n', text) 51 if match is not None: 52 metadata['description'] = match.group(1) 53 54 if 'category' not in metadata: 55 metadata['category'] = categories[-1] 56 57 return metadata, text 58 59 60 def make_toc_file(path, wiki_path, categories): 61 # generate categories' data 62 cat_info = { c : [] for c in categories } 63 for root, dirs, files in os.walk(wiki_path): 64 for f in files: 65 if f[-3:] == '.md' and os.path.join(wiki_path, root, f) != path: 66 metadata = get_file_metadata_and_text(os.path.join(wiki_path, root, f), wiki_path, categories)[0] 67 cat_info[metadata['category']].append(metadata) 68 69 # generate markdown 70 text = '<!-- category: {} -->\n# Table of Contents\n'.format(categories[0]) 71 for cat, files in cat_info.items(): 72 if len(files) != 0: 73 text += '\n## ' + cat + '\n\n' if cat != categories[0] else '\n' 74 for f in sorted(files, key=lambda x : x['title']): 75 date = ' (' + f['date'] + ')' if 'date' in f else '' 76 des = ': ' + f['description'] if 'description' in f else '' 77 text += '- [' + f['title'] + '](' + f['path'] + ')' + date + des + '\n' 78 79 # print to file 80 with open(path, 'w') as f: 81 f.write(text) 82 83 84 def href_to_html(match, wiki_path, root_path): 85 href = match.group(2) 86 if not re.search('://', href): 87 if re.search('^' + root_path + 'static/', href): # it's a static file, link to it 88 href = wiki_path + href[len(root_path):] 89 else: # it's a vimwiki page, link to the output html 90 href += '.html' 91 if match.group(3): 92 href += '#' + match.group(3).replace(' ', '-').lower() 93 return "[{}]({})".format(match.group(1), href) 94 95 96 def wiki_to_html(input_file, output_file, template_file, root_path, wiki_path, categories): 97 params, text = get_file_metadata_and_text(input_file, wiki_path, categories) 98 99 # format links for HTML 100 text = re.sub('\[([^]]+)\]\(([^)#]*)(?:#([^)]*))?\)', lambda m: href_to_html(m, wiki_path, root_path), text) 101 # add support for TODOs 102 text = re.sub('^([ \t]*- )\[ \] ', lambda m : m.group(1) + '<img class="checkbox-0"> ', text, flags=re.MULTILINE) 103 text = re.sub('^([ \t]*- )\[\.\] ', lambda m : m.group(1) + '<img class="checkbox-1"> ', text, flags=re.MULTILINE) 104 text = re.sub('^([ \t]*- )\[o\] ', lambda m : m.group(1) + '<img class="checkbox-2"> ', text, flags=re.MULTILINE) 105 text = re.sub('^([ \t]*- )\[O\] ', lambda m : m.group(1) + '<img class="checkbox-3"> ', text, flags=re.MULTILINE) 106 text = re.sub('^([ \t]*- )\[X\] ', lambda m : m.group(1) + '<img class="checkbox-4"> ', text, flags=re.MULTILINE) 107 108 params['metadata'] = '' 109 if params['category'] != categories[0]: 110 params['metadata'] += '<span class="category">' + params['category'] + '</span>' 111 if 'date' in params: 112 args = [int(i) for i in params['date'].split('-')] 113 date_str = datetime.date(args[0], args[1], args[2]).strftime('%B %-d, %Y') 114 params['metadata'] += '<span class="date">' + date_str + '</span>' 115 if params['metadata'] != '': 116 params['metadata'] = '<div class="metadata">' + params['metadata'] + '</div>' 117 118 params['root_path'] = root_path 119 params['content'] = markdown.markdown(text, extensions=['footnotes', 'fenced_code'], tab_length=2) 120 121 with open(template_file, 'r') as f: 122 html = f.read() 123 124 # render template variables 125 html = re.sub(r'{{\s*([^}\s]+)\s*}}', lambda m: str(params.get(m.group(1), m.group(0))), html) 126 127 with open(output_file, 'w') as f: 128 f.write(html) 129 130 131 # Arguments received: 132 # 0. executable path 133 # 1. force : [0/1] overwrite an existing file 134 # 2. syntax : the syntax chosen for this wiki 135 # 3. extension : the file extension for this wiki 136 # 4. output_dir : the full path of the output directory 137 # 5. input_file : the full path of the wiki page 138 # 6. css_file : the full path of the css file for this wiki (ignored here) 139 # 7. template_path : the full path to the wiki's templates 140 # 8. template_default : the default template name 141 # 9. template_ext : the extension of template files 142 # 10. root_path : a count of ../ for pages buried in subdirs. '-' if in root 143 # 11. toc_path : path from root to TOC (without extension) 144 # 12-.categories : the rest of the arguments are the categories in order. 145 # The first category is used for meta files that should appear directly 146 # in the root directory. The last category is used for files that have 147 # not set any category. 148 if __name__ == '__main__': 149 output_dir = sys.argv[4] 150 input_file = sys.argv[5] 151 output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(input_file))[0]) + '.html' 152 template_path = os.path.join(sys.argv[7], sys.argv[8]) + sys.argv[9] 153 root_path = sys.argv[10] if sys.argv[10] != '-' else '' 154 155 # if force is not enabled and HTML is already up to date, exit 156 if sys.argv[1] == '0' and os.path.getmtime(output_file) > os.path.getmtime(input_file): 157 sys.exit(0) 158 159 if sys.argv[2] != 'markdown': 160 print('Error: Unsupported syntax', file=sys.stderr) 161 sys.exit(1) 162 163 # global variables 164 wiki_path = os.path.join(os.path.dirname(input_file), root_path) 165 categories = [ c for c in sys.argv[12:] ] 166 167 # make TOC if any files have been updated after TOC 168 toc_path = os.path.join(wiki_path, sys.argv[11]) 169 for root, dirs, files in os.walk(wiki_path): 170 if os.path.getmtime(toc_path) < os.path.getmtime(os.path.join(wiki_path, root)): 171 make_toc_file(toc_path, wiki_path, categories) 172 break 173 for f in files: 174 if os.path.getmtime(toc_path) < os.path.getmtime(os.path.join(wiki_path, root, f)): 175 make_toc_file(toc_path, wiki_path, categories) 176 break 177 178 wiki_to_html(input_file, output_file, template_path, root_path, wiki_path, categories)