ClickHouse/docs/tools/concatenate.py

64 lines
2.4 KiB
Python
Raw Normal View History

2017-12-29 12:43:05 +00:00
# -*- coding: utf-8 -*-
# - Single-page document.
# - Requirements to the md-souces:
# - Don't use links without anchors. It means, that you can not just link file. You should specify an anchor at the top of the file and then link to this anchor
# - Anchors should be unique through whole document.
# - Implementation:
# - Script gets list of the file from the `pages` section of `mkdocs.yml`. It gets commented files too, and it right.
# - Files are concatenated by order with incrementing level of headers in all files except the first one
# - Script converts links to other files into inside page links.
# - Skipping links started with 'http'
# - Not http-links with anchor are cutted to the anchor sign (#).
# - For not http-links without anchor script logs an error and cuts them from the resulting single-page document.
2017-12-29 12:43:05 +00:00
import logging
2017-12-29 12:43:05 +00:00
import re
import os
def concatenate(lang, docs_path, single_page_file):
2017-12-29 12:43:05 +00:00
proj_config = os.path.join(docs_path, 'toc_%s.yml' % lang)
lang_path = os.path.join(docs_path, lang)
2017-12-29 12:43:05 +00:00
with open(proj_config) as cfg_file:
2018-07-09 12:35:19 +00:00
files_to_concatenate = []
for l in cfg_file:
if '.md' in l and 'single_page' not in l:
path = (l[l.index(':') + 1:]).strip(" '\n")
files_to_concatenate.append(path)
2017-12-29 12:43:05 +00:00
2018-07-11 08:14:23 +00:00
logging.info(
str(len(files_to_concatenate)) +
2018-07-11 08:17:36 +00:00
' files will be concatenated into single md-file.')
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
2017-12-29 12:43:05 +00:00
first_file = True
2017-12-29 12:43:05 +00:00
for path in files_to_concatenate:
with open(os.path.join(lang_path, path)) as f:
2018-12-18 11:08:03 +00:00
anchors = set()
tmp_path = path.replace('/index.md', '/').replace('.md', '/')
prefixes = ['', '../', '../../', '../../../']
parts = tmp_path.split('/')
2018-12-18 11:08:03 +00:00
anchors.add(parts[-2] + '/')
anchors.add('/'.join(parts[1:]))
for part in parts[0:-2]:
for prefix in prefixes:
2018-12-18 11:08:03 +00:00
anchors.add(prefix + tmp_path)
tmp_path = tmp_path.replace(part, '..')
2018-12-18 11:08:03 +00:00
for anchor in anchors:
single_page_file.write('<a name="%s"></a>\n' % anchor)
single_page_file.write('\n\n')
for l in f:
if l.startswith('#'):
l = '#' + l
single_page_file.write(l)
2017-12-29 12:43:05 +00:00
single_page_file.flush()