wiki2html.py (7582B) - raw


      1 #!/usr/bin/env python3
      2 # Vimwiki to HTML converter
      3 # Copyright (C) 2021 Oscar Benedito
      4 #
      5 # This program is free software: you can redistribute it and/or modify
      6 # it under the terms of the GNU Affero General Public License as
      7 # published by the Free Software Foundation, either version 3 of the
      8 # License, or (at your option) any later version.
      9 #
     10 # This program is distributed in the hope that it will be useful,
     11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 # GNU Affero General Public License for more details.
     14 #
     15 # You should have received a copy of the GNU Affero General Public License
     16 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
     17 
     18 import os
     19 import re
     20 import sys
     21 import datetime
     22 import markdown
     23 
     24 
     25 def get_file_metadata_and_text(file, wiki_path, categories):
     26     metadata = {}
     27     metadata['path'] = os.path.splitext(os.path.relpath(file, wiki_path))[0]
     28 
     29     with open(file, 'r') as f:
     30         text = f.read()
     31 
     32     # read headers and remove from text
     33     end = 0
     34     for match in re.finditer(r'\s*<!--\s*(.+?)\s*:\s*(.+?)\s*-->\s*|.+', text):
     35         if not match.group(1):
     36             break
     37         metadata[match.group(1)] = match.group(2)
     38         end = match.end()
     39     text = text[end:]
     40 
     41     # read title and remove from text
     42     match = re.match('# (.+?)\s*\n', text)
     43     if match is not None:
     44         metadata['title'] = match.group(1)
     45         text = text[match.end():]
     46     else:
     47         metadata['title'] = metadata['path']
     48 
     49     # read description
     50     match = re.match('\*([^* ].+?)\*\s*\n', text)
     51     if match is not None:
     52         metadata['description'] = match.group(1)
     53 
     54     if 'category' not in metadata:
     55         metadata['category'] = categories[-1]
     56 
     57     return metadata, text
     58 
     59 
     60 def make_toc_file(path, wiki_path, categories):
     61     # generate categories' data
     62     cat_info = { c : [] for c in categories }
     63     for root, dirs, files in os.walk(wiki_path):
     64         for f in files:
     65             if f[-3:] == '.md' and os.path.join(wiki_path, root, f) != path:
     66                 metadata = get_file_metadata_and_text(os.path.join(wiki_path, root, f), wiki_path, categories)[0]
     67                 cat_info[metadata['category']].append(metadata)
     68 
     69     # generate markdown
     70     text = '<!-- category: {} -->\n# Table of Contents\n'.format(categories[0])
     71     for cat, files in cat_info.items():
     72         if len(files) != 0:
     73             text += '\n## ' + cat + '\n\n' if cat != categories[0] else '\n'
     74         for f in sorted(files, key=lambda x : x['title']):
     75             date = ' (' + f['date'] + ')' if 'date' in f else ''
     76             des = ': ' + f['description'] if 'description' in f else ''
     77             text += '- [' + f['title'] + '](' + f['path'] + ')' + date + des + '\n'
     78 
     79     # print to file
     80     with open(path, 'w') as f:
     81         f.write(text)
     82 
     83 
     84 def href_to_html(match, wiki_path, root_path):
     85     href = match.group(2)
     86     if not re.search('://', href):
     87         if re.search('^' + root_path + 'static/', href): # it's a static file, link to it
     88             href = wiki_path + href[len(root_path):]
     89         else: # it's a vimwiki page, link to the output html
     90             href += '.html'
     91             if match.group(3):
     92                 href += '#' + match.group(3).replace(' ', '-').lower()
     93     return "[{}]({})".format(match.group(1), href)
     94 
     95 
     96 def wiki_to_html(input_file, output_file, template_file, root_path, wiki_path, categories):
     97     params, text = get_file_metadata_and_text(input_file, wiki_path, categories)
     98 
     99     # format links for HTML
    100     text = re.sub('\[([^]]+)\]\(([^)#]*)(?:#([^)]*))?\)', lambda m: href_to_html(m, wiki_path, root_path), text)
    101     # add support for TODOs
    102     text = re.sub('^([ \t]*- )\[ \] ', lambda m : m.group(1) + '<img class="checkbox-0"> ', text, flags=re.MULTILINE)
    103     text = re.sub('^([ \t]*- )\[\.\] ', lambda m : m.group(1) + '<img class="checkbox-1"> ', text, flags=re.MULTILINE)
    104     text = re.sub('^([ \t]*- )\[o\] ', lambda m : m.group(1) + '<img class="checkbox-2"> ', text, flags=re.MULTILINE)
    105     text = re.sub('^([ \t]*- )\[O\] ', lambda m : m.group(1) + '<img class="checkbox-3"> ', text, flags=re.MULTILINE)
    106     text = re.sub('^([ \t]*- )\[X\] ', lambda m : m.group(1) + '<img class="checkbox-4"> ', text, flags=re.MULTILINE)
    107 
    108     params['metadata'] = ''
    109     if params['category'] != categories[0]:
    110         params['metadata'] += '<span class="category">' + params['category'] + '</span>'
    111     if 'date' in params:
    112         args = [int(i) for i in params['date'].split('-')]
    113         date_str = datetime.date(args[0], args[1], args[2]).strftime('%B %-d, %Y')
    114         params['metadata'] += '<span class="date">' + date_str + '</span>'
    115     if params['metadata'] != '':
    116         params['metadata'] = '<div class="metadata">' + params['metadata'] + '</div>'
    117 
    118     params['root_path'] = root_path
    119     params['content'] = markdown.markdown(text, extensions=['footnotes', 'fenced_code'], tab_length=2)
    120 
    121     with open(template_file, 'r') as f:
    122         html = f.read()
    123 
    124     # render template variables
    125     html = re.sub(r'{{\s*([^}\s]+)\s*}}', lambda m: str(params.get(m.group(1), m.group(0))), html)
    126 
    127     with open(output_file, 'w') as f:
    128         f.write(html)
    129 
    130 
    131 # Arguments received:
    132 #     0.  executable path
    133 #     1.  force : [0/1] overwrite an existing file
    134 #     2.  syntax : the syntax chosen for this wiki
    135 #     3.  extension : the file extension for this wiki
    136 #     4.  output_dir : the full path of the output directory
    137 #     5.  input_file : the full path of the wiki page
    138 #     6.  css_file : the full path of the css file for this wiki (ignored here)
    139 #     7.  template_path : the full path to the wiki's templates
    140 #     8.  template_default : the default template name
    141 #     9.  template_ext : the extension of template files
    142 #     10. root_path : a count of ../ for pages buried in subdirs. '-' if in root
    143 #     11. toc_path : path from root to TOC (without extension)
    144 #     12-.categories : the rest of the arguments are the categories in order.
    145 #         The first category is used for meta files that should appear directly
    146 #         in the root directory. The last category is used for files that have
    147 #         not set any category.
    148 if __name__ == '__main__':
    149     output_dir = sys.argv[4]
    150     input_file = sys.argv[5]
    151     output_file = os.path.join(output_dir, os.path.splitext(os.path.basename(input_file))[0]) + '.html'
    152     template_path = os.path.join(sys.argv[7], sys.argv[8]) + sys.argv[9]
    153     root_path = sys.argv[10] if sys.argv[10] != '-' else ''
    154 
    155     # if force is not enabled and HTML is already up to date, exit
    156     if sys.argv[1] == '0' and os.path.getmtime(output_file) > os.path.getmtime(input_file):
    157         sys.exit(0)
    158 
    159     if sys.argv[2] != 'markdown':
    160         print('Error: Unsupported syntax', file=sys.stderr)
    161         sys.exit(1)
    162 
    163     # global variables
    164     wiki_path = os.path.join(os.path.dirname(input_file), root_path)
    165     categories = [ c for c in sys.argv[12:] ]
    166 
    167     # make TOC if any files have been updated after TOC
    168     toc_path = os.path.join(wiki_path, sys.argv[11])
    169     for root, dirs, files in os.walk(wiki_path):
    170         if os.path.getmtime(toc_path) < os.path.getmtime(os.path.join(wiki_path, root)):
    171             make_toc_file(toc_path, wiki_path, categories)
    172             break
    173         for f in files:
    174             if os.path.getmtime(toc_path) < os.path.getmtime(os.path.join(wiki_path, root, f)):
    175                 make_toc_file(toc_path, wiki_path, categories)
    176                 break
    177 
    178     wiki_to_html(input_file, output_file, template_path, root_path, wiki_path, categories)