Refactoring of documentation infrastructure to get rid of a lots of copypaste (#2616)

* Clean up docs folder by moving all build-related tools to subdirectory

* Remove unused script

* Remove unused script #2

* Some refactoring in concatenate.py

* Rewrite build.sh in Python

- Get rid of half of copypaste in yml files
- Draft of redirects support

* Actually include redirects.conf

* copy conf too

* Keep H1 the same in single page docs

* fix some paths

* Keep only pages index in yaml

* Workaround for missing jQuery

* Delay docs init
This commit is contained in:
Ivan Blinkov 2018-07-09 22:59:07 +03:00 committed by alexey-milovidov
parent 68a2caed9a
commit ba1393fbbd
77 changed files with 266 additions and 392 deletions

6
.gitignore vendored
View File

@ -10,10 +10,8 @@
*.logrt
/build
/docs/en_single_page/
/docs/ru_single_page/
/docs/venv/
/docs/build/
/docs/build
/docs/tools/venv/
/docs/en/development/build/
/docs/ru/development/build/

View File

@ -1,19 +0,0 @@
#!/bin/bash
set -e
if [ $# -lt 1 ]; then
LANGS="ru en"
elif [[ $# -eq 1 ]]; then
LANGS=$1
fi
for lang in $LANGS; do
echo -e "\n\nLANG=$lang. Creating single page source"
mkdir $lang'_single_page' 2>/dev/null || true
cp -r $lang/images $lang'_single_page'
./concatenate.py $lang
echo -e "\n\nLANG=$lang. Building multipage..."
mkdocs build -f mkdocs_$lang.yml
echo -e "\n\nLANG=$lang. Building single page..."
mkdocs build -f mkdocs_$lang'_single_page.yml'
done

View File

@ -1,87 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# - Single-page document.
# - Requirements to the md-souces:
# - Don't use links without anchors. It means, that you can not just link file. You should specify an anchor at the top of the file and then link to this anchor
# - Anchors should be unique through whole document.
# - Implementation:
# - Script gets list of the file from the `pages` section of `mkdocs.yml`. It gets commented files too, and it right.
# - Files are concatenated by order with incrementing level of headers in all files except the first one
# - Script converts links to other files into inside page links.
# - Skipping links started with 'http'
# - Not http-links with anchor are cutted to the anchor sign (#).
# - For not http-links without anchor script logs an error and cuts them from the resulting single-page document.
import codecs
import sys
import re
import os
if len(sys.argv) < 2:
print("Usage: concatenate.py language_dir")
print("Example: concatenate.py ru")
sys.exit(1)
if not os.path.exists(sys.argv[1]):
print("Pass language_dir correctly. For example, 'ru'.")
sys.exit(2)
# Configuration
PROJ_CONFIG = 'mkdocs_' + sys.argv[1] + '.yml'
SINGLE_PAGE = sys.argv[1] + '_single_page/index.md'
DOCS_DIR = sys.argv[1] + '/'
# 1. Open mkdocs.yml file and read `pages` configuration to get an ordered list of files
cfg_file = open(PROJ_CONFIG)
files_to_concatenate = []
for l in cfg_file:
if('.md' in l) and ('single_page' not in l):
path = (l[l.index(':') + 1:]).strip(" '\n")
files_to_concatenate.append(path)
print(str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:")
print(files_to_concatenate)
# 2. Concatenate all of the files in the list
single_page_file = open(SINGLE_PAGE, 'w')
first_file = True
for path in files_to_concatenate:
single_page_file.write('\n\n')
file = open(DOCS_DIR + path)
# function is passed into re.sub() to process links
def link_proc(matchObj):
text, link = matchObj.group().strip('[)').split('](')
if link.startswith('http'):
return '[' + text + '](' + link + ')'
else:
sharp_pos = link.find('#')
if sharp_pos > -1:
return '[' + text + '](' + link[sharp_pos:] + ')'
else:
raise RuntimeError('ERROR: Link [' + text + '](' + link + ') in file ' + path + ' has no anchor. Please provide it.')
# return '['+text+'](#'+link.replace('/','-')+')'
for l in file:
# Processing links in a string
l = re.sub(r'\[.+?\]\(.+?\)', link_proc, l)
# Correcting headers levels
if not first_file:
if(l.startswith('#')):
l = '#' + l
else:
first_file = False
single_page_file.write(l)
single_page_file.close()

View File

@ -1,41 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
SOURCES_TREE = 'ru'
from os import walk
def get_header(filepath):
f = open(filepath)
header = ''
for line in f:
if line.startswith('#'):
# print line
header = line[1:].strip(' \n')
break
f.close()
return header
pages_file = open("strings_for_pages.txt", "w")
md_links_file = open("links_for_md.txt", "w")
for (dirpath, dirnames, filenames) in walk(SOURCES_TREE):
for filename in filenames:
if '.md' not in filename:
continue
header = get_header(dirpath + '/' + filename)
path = dirpath.replace('docs/', '') + '/' + filename
if filename == 'index.md':
pages_file.write("- '" + header + "': " + "'" + path + "'\n")
else:
pages_file.write(" - '" + header + "': " + "'" + path + "'\n")
md_links_file.write("[" + header + "](" + path + ")\n")
pages_file.close()
md_links_file.close()

View File

@ -1,44 +1,3 @@
site_name: ClickHouse Documentation
copyright: ©20162018 Yandex LLC
docs_dir: en
site_dir: build/docs/en
use_directory_urls: true
strict: true
repo_name: 'yandex/ClickHouse'
repo_url: 'https://github.com/yandex/ClickHouse/'
edit_uri: 'edit/master/docs/en'
extra_css:
- assets/stylesheets/custom.css
markdown_extensions:
- codehilite
theme:
name: null
custom_dir: 'mkdocs-material-theme'
language: 'en'
feature:
tabs: false
palette:
primary: 'white'
accent: 'white'
font: false
logo: 'images/logo.svg'
favicon: 'assets/images/favicon.ico'
include_search_page: false
search_index_only: true
static_templates:
- 404.html
extra:
single_page: false
search:
language: 'en'
pages:
- 'ClickHouse': 'index.md'

View File

@ -1,41 +0,0 @@
site_name: ClickHouse Documentation
copyright: ©20162018 Yandex LLC
docs_dir: en_single_page
site_dir: build/docs/en/single
use_directory_urls: false
strict: true
repo_name: 'yandex/ClickHouse'
repo_url: 'https://github.com/yandex/ClickHouse/'
extra_css:
- assets/stylesheets/custom.css
markdown_extensions:
- codehilite
theme:
name: null
custom_dir: 'mkdocs-material-theme'
language: 'en'
feature:
tabs: false
palette:
primary: 'white'
accent: 'white'
font: false
logo: 'images/logo.svg'
favicon: 'assets/images/favicon.ico'
include_search_page: false
search_index_only: true
static_templates:
- 404.html
extra:
single_page: true
search:
language: 'en'
pages:
- 'Documentation': 'index.md'

View File

@ -1,43 +1,3 @@
site_name: Документация ClickHouse
copyright: ©20162018 Yandex LLC
docs_dir: ru
site_dir: build/docs/ru
use_directory_urls: true
strict: true
repo_name: 'yandex/ClickHouse'
repo_url: 'https://github.com/yandex/ClickHouse/'
edit_uri: 'edit/master/docs/ru'
extra_css:
- assets/stylesheets/custom.css
markdown_extensions:
- codehilite
theme:
name: null
custom_dir: 'mkdocs-material-theme'
language: 'ru'
feature:
tabs: false
palette:
primary: 'white'
accent: 'white'
font: false
logo: 'images/logo.svg'
favicon: 'assets/images/favicon.ico'
include_search_page: false
search_index_only: true
static_templates:
- 404.html
extra:
single_page: false
search:
language: 'en, ru'
pages:
- 'ClickHouse': 'index.md'

View File

@ -1,45 +0,0 @@
site_name: Документация ClickHouse
copyright: ©20162018 Yandex LLC
docs_dir: ru_single_page
site_dir: build/docs/ru/single
use_directory_urls: false
strict: true
repo_name: 'yandex/ClickHouse'
repo_url: 'https://github.com/yandex/ClickHouse/'
extra_css:
- assets/stylesheets/custom.css
markdown_extensions:
- codehilite
theme:
name: null
custom_dir: 'mkdocs-material-theme'
language: 'ru'
feature:
tabs: false
palette:
primary: 'white'
accent: 'white'
font: false
# text: Roboto
# code: Roboto Mono
logo: 'images/logo.svg'
favicon: 'assets/images/favicon.ico'
include_search_page: false
search_index_only: true
static_templates:
- 404.html
extra:
single_page: true
search:
language: 'en, ru'
pages:
- 'Документация': 'index.md'

1
docs/redirects.txt Normal file
View File

@ -0,0 +1 @@
test.md roadmap.md

162
docs/tools/build.py Executable file
View File

@ -0,0 +1,162 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import argparse
import contextlib
import logging
import os
import shutil
import sys
import tempfile
from mkdocs import config
from mkdocs import exceptions
from mkdocs.commands import build as mkdocs_build
from concatenate import concatenate
@contextlib.contextmanager
def temp_dir():
path = tempfile.mkdtemp(dir=os.environ.get('TEMP'))
try:
yield path
finally:
shutil.rmtree(path)
@contextlib.contextmanager
def autoremoved_file(path):
try:
with open(path, 'w') as handle:
yield handle
finally:
os.unlink(path)
def build_for_lang(lang, args):
logging.info('Building %s docs' % lang)
config_path = os.path.join(args.docs_dir, 'mkdocs_%s.yml' % lang)
try:
theme_cfg = {
'name': None,
'custom_dir': 'mkdocs-material-theme',
'language': lang,
'feature': {
'tabs': False
},
'palette': {
'primary': 'white',
'accent': 'white'
},
'font': False,
'logo': 'images/logo.svg',
'favicon': 'assets/images/favicon.ico',
'include_search_page': False,
'search_index_only': True,
'static_templates': ['404.html'],
'extra': {
'single_page': False,
'search': {
'language': 'en' if lang == 'en' else 'en, %s' % lang
}
}
}
cfg = config.load_config(
config_file=config_path,
site_name='ClickHouse Documentation' if lang == 'en' else 'Документация ClickHouse',
docs_dir=os.path.join(args.docs_dir, lang),
site_dir=os.path.join(args.output_dir, lang),
strict=True,
theme=theme_cfg,
copyright='©20162018 Yandex LLC',
use_directory_urls=True,
repo_name='yandex/ClickHouse',
repo_url='https://github.com/yandex/ClickHouse/',
edit_uri='edit/master/docs/%s' % lang,
extra_css=['assets/stylesheets/custom.css'],
markdown_extensions=['codehilite']
)
mkdocs_build.build(cfg)
if not args.skip_single_page:
build_single_page_version(lang, args, cfg)
except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e))
def build_single_page_version(lang, args, cfg):
logging.info('Building single page version for ' + lang)
with autoremoved_file(os.path.join(args.docs_dir, lang, 'single.md')) as single_md:
concatenate(lang, args.docs_dir, single_md)
with temp_dir() as temp:
cfg.load_dict({
'docs_dir': os.path.join(args.docs_dir, lang),
'site_dir': temp,
'extra': {
'single_page': True,
'search': {
'language': 'en, ru'
}
},
'pages': [
{cfg.data.get('site_name'): 'single.md'}
]
})
mkdocs_build.build(cfg)
shutil.copytree(
os.path.join(temp, 'single'),
os.path.join(args.output_dir, lang, 'single')
)
def build_redirects(args):
lang_re_fragment = args.lang.replace(',', '|')
rewrites = []
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
for line in f:
from_path, to_path = line.split(' ', 1)
from_path = '^/docs/(' + lang_re_fragment + ')/' + from_path.replace('.md', '/?') + '$'
to_path = '/docs/$1/' + to_path.replace('.md', '/')
rewrites.append(' '.join(['rewrite', from_path, to_path, 'permanent;']))
with open(os.path.join(args.output_dir, 'redirects.conf'), 'w') as f:
f.write('\n'.join(rewrites))
def build(args):
for lang in args.lang.split(','):
build_for_lang(lang, args)
build_redirects(args)
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,ru')
arg_parser.add_argument('--docs-dir', default='..')
arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme')
arg_parser.add_argument('--output-dir', default='../build')
arg_parser.add_argument('--skip-single-page', action='store_true')
arg_parser.add_argument('--verbose', action='store_true')
args = arg_parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
stream=sys.stderr
)
from build import build
build(args)

71
docs/tools/concatenate.py Executable file
View File

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
# - Single-page document.
# - Requirements to the md-souces:
# - Don't use links without anchors. It means, that you can not just link file. You should specify an anchor at the top of the file and then link to this anchor
# - Anchors should be unique through whole document.
# - Implementation:
# - Script gets list of the file from the `pages` section of `mkdocs.yml`. It gets commented files too, and it right.
# - Files are concatenated by order with incrementing level of headers in all files except the first one
# - Script converts links to other files into inside page links.
# - Skipping links started with 'http'
# - Not http-links with anchor are cutted to the anchor sign (#).
# - For not http-links without anchor script logs an error and cuts them from the resulting single-page document.
import logging
import re
import os
def concatenate(lang, docs_path, single_page_file):
proj_config = os.path.join(docs_path, 'mkdocs_%s.yml' % lang)
lang_path = os.path.join(docs_path, lang)
with open(proj_config) as cfg_file:
files_to_concatenate = []
for l in cfg_file:
if '.md' in l and 'single_page' not in l:
path = (l[l.index(':') + 1:]).strip(" '\n")
files_to_concatenate.append(path)
logging.info(str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:")
logging.info(files_to_concatenate)
first_file = True
for path in files_to_concatenate:
single_page_file.write('\n\n')
with open(os.path.join(lang_path, path)) as f:
# function is passed into re.sub() to process links
def link_proc(matchObj):
text, link = matchObj.group().strip('[)').split('](')
if link.startswith('http'):
return '[' + text + '](' + link + ')'
else:
sharp_pos = link.find('#')
if sharp_pos > -1:
return '[' + text + '](' + link[sharp_pos:] + ')'
else:
raise RuntimeError(
'ERROR: Link [' + text + '](' + link + ') in file ' + path + ' has no anchor. Please provide it.')
# return '['+text+'](#'+link.replace('/','-')+')'
for l in f:
# Processing links in a string
l = re.sub(r'\[.+?\]\(.+?\)', link_proc, l)
# Correcting headers levels
if not first_file:
if l.startswith('#'):
l = '#' + l
else:
first_file = False
single_page_file.write(l)
single_page_file.flush()

View File

Before

Width:  |  Height:  |  Size: 171 B

After

Width:  |  Height:  |  Size: 171 B

View File

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -180,12 +180,6 @@
{% endblock %}
</div>
{% block scripts %}
{% if config.extra.single_page and page.content %}
<script type="text/javascript">
document.getElementById("content").innerHTML = {{ page.content|tojson|safe }};
document.getElementsByClassName('md-footer')[0].style.display = 'block';
</script>
{% endif %}
{% block libs %}
<script src="{{ base_url }}/assets/javascripts/modernizr.1aa3b519.js"></script>
{% endblock %}
@ -208,7 +202,6 @@
{% endif %}
{% endif %}
{% endif %}
<script>app.initialize({version:"{{ mkdocs_version }}",url:{base:"{{ base_url }}"}})</script>
{% for path in extra_javascript %}
<script src="{{ path }}"></script>
{% endfor %}
@ -218,5 +211,30 @@
{% include "partials/integrations/analytics.html" %}
{% endif %}
{% endblock %}
{% if config.extra.single_page and page.content %}
<script type="text/javascript" async="async">
function ready(callback){
if (document.readyState != 'loading') {
callback();
} else if (document.addEventListener) {
document.addEventListener('DOMContentLoaded', callback);
} else document.attachEvent('onreadystatechange', function() {
if (document.readyState == 'complete') {
callback();
}
});
}
ready(function () {
document.getElementById("content").innerHTML = {{ page.content|tojson|safe }};
document.getElementsByClassName('md-footer')[0].style.display = 'block';
app.initialize({
version: "{{ mkdocs_version }}",
url: {
base: "{{ base_url }}"
}
});
});
</script>
{% endif %}
</body>
</html>

View File

@ -1,64 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Gets all the files in SOURCES_TREE directory, shows all level headers
# for each file and skip or process files by user's selection.
import os
SOURCES_TREE = 'ru'
STOP_AT_THE_FIRST_FILE = False
for (dirpath, dirnames, filenames) in os.walk(SOURCES_TREE):
for filename in filenames:
if filename == 'conf.py':
continue
print '=== ' + dirpath + '/' + filename
f = open(dirpath + '/' + filename)
content = f.readlines()
f.close()
# Showing headers structure in md-file
count_lines = 0
for l in content:
if l.startswith('#'):
print l
if l.startswith('==='):
print content[count_lines - 1] + l
if l.startswith('---'):
print content[count_lines - 1] + l
count_lines += 1
# At this stage user should check the headers structucture and choose what to to
# Replace headers markup or not
choise = raw_input('What to do with a file (pass(s) or process(p)): ')
if choise == 's':
continue
else:
print 'processing...'
count_lines = 0
for l in content:
if l.startswith('==='):
print count_lines, content[count_lines - 1], content[count_lines]
content[count_lines - 1] = '# ' + content[count_lines - 1]
content.pop(count_lines)
if l.startswith('---'):
print count_lines, content[count_lines - 1], content[count_lines]
content[count_lines - 1] = '## ' + content[count_lines - 1]
content.pop(count_lines)
count_lines += 1
f = open(dirpath + '/' + filename, 'w')
for l in content:
f.write(l)
f.close()
if STOP_AT_THE_FIRST_FILE:
break
if STOP_AT_THE_FIRST_FILE:
break

View File

@ -22,8 +22,8 @@ var paths = {
'!presentations/**/*.html',
'!public/**/*.html'],
reference: ['deprecated/reference_ru.html', 'deprecated/reference_en.html'],
docs: [docsDir + '/build/docs/**/*'],
docstxt: ['docs/**/*.txt'],
docs: [docsDir + '/build/**/*'],
docstxt: ['docs/**/*.txt', 'docs/redirects.conf'],
docsjson: ['docs/**/*.json'],
docsxml: ['docs/**/*.xml'],
docssitemap: ['sitemap.xml'],
@ -58,7 +58,7 @@ gulp.task('reference', [], function () {
});
gulp.task('docs', [], function () {
run('cd ' + docsDir + '; ./build.sh');
run('cd ' + docsDir + '/tools; ./build.py');
return gulp.src(paths.docs)
.pipe(gulp.dest(outputDir + '/../docs'))
});

View File

@ -18,6 +18,8 @@ server {
rewrite ^/reference_ru.html$ https://clickhouse.yandex/docs/ru/single/ permanent;
rewrite ^/presentations/(.*)$ https://yandex.github.io/clickhouse-presentations/$1 permanent;
include /usr/share/nginx/html/docs/redirects.conf;
if ( $uri !~ .*/index.html ){
rewrite ^/docs/(.*)/(.+)\.html$ /docs/$1/$2/ permanent;
}