ClickHouse/docs/concatenate.py

89 lines
2.8 KiB
Python
Raw Normal View History

2017-12-29 12:43:05 +00:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# - Single-page document.
# - Requirements to the md-souces:
# - Don't use links without anchors. It means, that you can not just link file. You should specify an anchor at the top of the file and then link to this anchor
# - Anchors should be unique through whole document.
# - Implementation:
# - Script gets list of the file from the `pages` section of `mkdocs.yml`. It gets commented files too, and it right.
# - Files are concatenated by order with incrementing level of headers in all files except the first one
# - Script converts links to other files into inside page links.
# - Skipping links started with 'http'
# - Not http-links with anchor are cutted to the anchor sign (#).
# - For not http-links without anchor script logs an error and cuts them from the resulting single-page document.
2017-12-29 12:43:05 +00:00
import codecs
import sys
import re
import os
if len(sys.argv) < 2:
print "Usage: concatenate.py language_dir"
print "Example: concatenate.py ru"
2017-12-29 12:43:05 +00:00
sys.exit(1)
if not os.path.exists(sys.argv[1]):
print "Pass language_dir correctly. For example, 'ru'."
sys.exit(2)
#Configuration
PROJ_CONFIG = 'mkdocs_'+sys.argv[1]+'.yml'
SINGLE_PAGE = sys.argv[1]+'_single_page/index.md'
DOCS_DIR = sys.argv[1]+'/'
# 1. Open mkdocs.yml file and read `pages` configuration to get an ordered list of files
cfg_file = open(PROJ_CONFIG)
files_to_concatenate=[]
for l in cfg_file :
if( '.md' in l ) and ('single_page' not in l):
path = (l[l.index(':')+1:]).strip(" '\n")
files_to_concatenate.append(path)
print str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:"
print files_to_concatenate
# 2. Concatenate all of the files in the list
single_page_file = open(SINGLE_PAGE, 'w')
first_file = True
for path in files_to_concatenate:
single_page_file.write('\n\n')
file = open(DOCS_DIR+path)
#function is passed into re.sub() to process links
def link_proc( matchObj ):
text, link = matchObj.group().strip('[)').split('](')
if link.startswith('http'):
return '['+text+']('+link+')'
else :
sharp_pos = link.find('#')
if sharp_pos > -1:
return '['+text+']('+link[sharp_pos:]+')'
else :
print 'ERROR: Link ['+text+']('+link+') in file '+path+' has no anchor. Please provide it.'
#return '['+text+'](#'+link.replace('/','-')+')'
for l in file:
#Processing links in a string
l = re.sub(r'\[.+?\]\(.+?\)', link_proc, l)
#Correcting headers levels
if not first_file:
if( l.startswith('#') ):
l='#'+l
else :
first_file = False
single_page_file.write(l)
single_page_file.close()