2020-05-08 08:04:09 +00:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
|
|
|
import yaml
|
|
|
|
|
|
|
|
import bs4
|
|
|
|
import mkdocs.commands.build
|
|
|
|
|
|
|
|
import test
|
|
|
|
import util
|
|
|
|
import website
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
TEMPORARY_FILE_NAME = "single.md"
|
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
|
|
|
|
def recursive_values(item):
|
|
|
|
if isinstance(item, dict):
|
2020-10-02 16:54:07 +00:00
|
|
|
for _, value in list(item.items()):
|
2020-05-08 08:04:09 +00:00
|
|
|
yield from recursive_values(value)
|
|
|
|
elif isinstance(item, list):
|
|
|
|
for value in item:
|
|
|
|
yield from recursive_values(value)
|
|
|
|
elif isinstance(item, str):
|
|
|
|
yield item
|
|
|
|
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
anchor_not_allowed_chars = re.compile(r"[^\w\-]")
|
|
|
|
|
|
|
|
|
2021-03-14 13:31:16 +00:00
|
|
|
def generate_anchor_from_path(path):
|
2022-03-22 16:39:58 +00:00
|
|
|
return re.sub(anchor_not_allowed_chars, "-", path)
|
|
|
|
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
absolute_link = re.compile(r"^https?://")
|
2021-03-15 11:56:04 +00:00
|
|
|
|
2021-03-14 13:31:16 +00:00
|
|
|
|
|
|
|
def replace_link(match, path):
|
2021-03-15 11:27:49 +00:00
|
|
|
title = match.group(1)
|
|
|
|
link = match.group(2)
|
|
|
|
|
|
|
|
# Not a relative link
|
2021-03-15 11:56:04 +00:00
|
|
|
if re.search(absolute_link, link):
|
2021-03-15 11:27:49 +00:00
|
|
|
return match.group(0)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
if link.endswith("/"):
|
|
|
|
link = link[0:-1] + ".md"
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
return "{}(#{})".format(
|
|
|
|
title,
|
|
|
|
generate_anchor_from_path(
|
|
|
|
os.path.normpath(os.path.join(os.path.dirname(path), link))
|
|
|
|
),
|
|
|
|
)
|
2021-03-14 13:31:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Concatenates Markdown files to a single file.
|
2020-05-08 08:04:09 +00:00
|
|
|
def concatenate(lang, docs_path, single_page_file, nav):
|
|
|
|
lang_path = os.path.join(docs_path, lang)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
proj_config = f"{docs_path}/toc_{lang}.yml"
|
2020-05-08 08:04:09 +00:00
|
|
|
if os.path.exists(proj_config):
|
|
|
|
with open(proj_config) as cfg_file:
|
2022-03-22 16:39:58 +00:00
|
|
|
nav = yaml.full_load(cfg_file.read())["nav"]
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
files_to_concatenate = list(recursive_values(nav))
|
|
|
|
files_count = len(files_to_concatenate)
|
2022-03-22 16:39:58 +00:00
|
|
|
logging.info(
|
|
|
|
f"{files_count} files will be concatenated into single md-file for {lang}."
|
|
|
|
)
|
|
|
|
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
|
|
|
|
assert files_count > 0, f"Empty single-page for {lang}"
|
2020-05-08 08:04:09 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
for path in files_to_concatenate:
|
|
|
|
try:
|
|
|
|
with open(os.path.join(lang_path, path)) as f:
|
2021-03-14 13:31:16 +00:00
|
|
|
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
|
2022-03-22 16:39:58 +00:00
|
|
|
single_page_file.write(
|
|
|
|
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
|
|
|
|
)
|
2020-05-08 08:04:09 +00:00
|
|
|
|
|
|
|
in_metadata = False
|
2021-03-14 13:31:16 +00:00
|
|
|
for line in f:
|
|
|
|
# Skip YAML metadata.
|
2022-03-22 16:39:58 +00:00
|
|
|
if line == "---\n":
|
2020-05-08 08:04:09 +00:00
|
|
|
in_metadata = not in_metadata
|
2021-03-14 13:31:16 +00:00
|
|
|
continue
|
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
if not in_metadata:
|
2021-03-14 13:31:16 +00:00
|
|
|
# Increase the level of headers.
|
2022-03-22 16:39:58 +00:00
|
|
|
if line.startswith("#"):
|
|
|
|
line = "#" + line
|
2021-03-14 13:31:16 +00:00
|
|
|
|
|
|
|
# Replace links within the docs.
|
|
|
|
|
2021-03-15 11:27:49 +00:00
|
|
|
if re.search(link_regexp, line):
|
2021-03-14 13:31:16 +00:00
|
|
|
line = re.sub(
|
2021-03-15 11:27:49 +00:00
|
|
|
link_regexp,
|
2021-03-14 13:31:16 +00:00
|
|
|
lambda match: replace_link(match, path),
|
2022-03-22 16:39:58 +00:00
|
|
|
line,
|
|
|
|
)
|
2021-03-14 13:31:16 +00:00
|
|
|
|
|
|
|
# If failed to replace the relative link, print to log
|
2022-01-31 13:04:25 +00:00
|
|
|
# But with some exceptions:
|
|
|
|
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
|
|
|
|
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
|
2022-03-22 16:39:58 +00:00
|
|
|
if (
|
|
|
|
"../" in line
|
|
|
|
and (not "../usr/share" in line)
|
|
|
|
and (not "../src/" in line)
|
|
|
|
):
|
|
|
|
logging.info("Failed to resolve relative link:")
|
2021-03-14 13:31:16 +00:00
|
|
|
logging.info(path)
|
|
|
|
logging.info(line)
|
|
|
|
|
|
|
|
single_page_file.write(line)
|
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
except IOError as e:
|
|
|
|
logging.warning(str(e))
|
|
|
|
|
|
|
|
single_page_file.flush()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-11-06 09:54:19 +00:00
|
|
|
def get_temporary_file_name(lang, args):
|
|
|
|
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-11-06 09:54:19 +00:00
|
|
|
def remove_temporary_files(lang, args):
|
|
|
|
single_md_path = get_temporary_file_name(lang, args)
|
|
|
|
if os.path.exists(single_md_path):
|
|
|
|
os.unlink(single_md_path)
|
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
|
|
|
|
def build_single_page_version(lang, args, nav, cfg):
|
2022-03-22 16:39:58 +00:00
|
|
|
logging.info(f"Building single page version for {lang}")
|
|
|
|
os.environ["SINGLE_PAGE"] = "1"
|
|
|
|
extra = cfg.data["extra"]
|
|
|
|
extra["single_page"] = True
|
|
|
|
extra["is_amp"] = False
|
2020-05-08 08:04:09 +00:00
|
|
|
|
2021-11-06 09:54:19 +00:00
|
|
|
single_md_path = get_temporary_file_name(lang, args)
|
2022-03-22 16:39:58 +00:00
|
|
|
with open(single_md_path, "w") as single_md:
|
2020-05-08 08:04:09 +00:00
|
|
|
concatenate(lang, args.docs_dir, single_md, nav)
|
|
|
|
|
|
|
|
with util.temp_dir() as site_temp:
|
|
|
|
with util.temp_dir() as docs_temp:
|
|
|
|
docs_src_lang = os.path.join(args.docs_dir, lang)
|
|
|
|
docs_temp_lang = os.path.join(docs_temp, lang)
|
|
|
|
shutil.copytree(docs_src_lang, docs_temp_lang)
|
|
|
|
for root, _, filenames in os.walk(docs_temp_lang):
|
|
|
|
for filename in filenames:
|
2022-03-22 16:39:58 +00:00
|
|
|
if filename != "single.md" and filename.endswith(".md"):
|
2020-05-08 08:04:09 +00:00
|
|
|
os.unlink(os.path.join(root, filename))
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
cfg.load_dict(
|
|
|
|
{
|
|
|
|
"docs_dir": docs_temp_lang,
|
|
|
|
"site_dir": site_temp,
|
|
|
|
"extra": extra,
|
|
|
|
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
|
|
|
}
|
|
|
|
)
|
2020-05-08 08:04:09 +00:00
|
|
|
|
|
|
|
if not args.test_only:
|
|
|
|
mkdocs.commands.build.build(cfg)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
single_page_output_path = os.path.join(
|
|
|
|
args.docs_dir, args.docs_output_dir, lang, "single"
|
|
|
|
)
|
2020-05-08 08:04:09 +00:00
|
|
|
|
|
|
|
if os.path.exists(single_page_output_path):
|
|
|
|
shutil.rmtree(single_page_output_path)
|
|
|
|
|
|
|
|
shutil.copytree(
|
2022-03-22 16:39:58 +00:00
|
|
|
os.path.join(site_temp, "single"), single_page_output_path
|
2020-05-08 08:04:09 +00:00
|
|
|
)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
single_page_index_html = os.path.join(
|
|
|
|
single_page_output_path, "index.html"
|
|
|
|
)
|
|
|
|
single_page_content_js = os.path.join(
|
|
|
|
single_page_output_path, "content.js"
|
|
|
|
)
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
with open(single_page_index_html, "r") as f:
|
|
|
|
sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
with open(single_page_index_html, "w") as f:
|
2020-05-08 08:04:09 +00:00
|
|
|
f.write(sp_prefix)
|
|
|
|
f.write(sp_suffix)
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
with open(single_page_content_js, "w") as f:
|
2020-05-08 08:04:09 +00:00
|
|
|
if args.minify:
|
|
|
|
import jsmin
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
sp_js = jsmin.jsmin(sp_js)
|
|
|
|
f.write(sp_js)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
logging.info(f"Re-building single page for {lang} pdf/test")
|
2020-05-08 08:04:09 +00:00
|
|
|
with util.temp_dir() as test_dir:
|
2022-03-22 16:39:58 +00:00
|
|
|
extra["single_page"] = False
|
|
|
|
cfg.load_dict(
|
|
|
|
{
|
|
|
|
"docs_dir": docs_temp_lang,
|
|
|
|
"site_dir": test_dir,
|
|
|
|
"extra": extra,
|
|
|
|
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
|
|
|
}
|
|
|
|
)
|
2020-05-08 08:04:09 +00:00
|
|
|
mkdocs.commands.build.build(cfg)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
css_in = " ".join(website.get_css_in(args))
|
|
|
|
js_in = " ".join(website.get_js_in(args))
|
|
|
|
subprocess.check_call(
|
|
|
|
f"cat {css_in} > {test_dir}/css/base.css", shell=True
|
|
|
|
)
|
|
|
|
subprocess.check_call(
|
|
|
|
f"cat {js_in} > {test_dir}/js/base.js", shell=True
|
|
|
|
)
|
2021-03-14 13:31:16 +00:00
|
|
|
|
2020-05-08 08:04:09 +00:00
|
|
|
if args.save_raw_single_page:
|
|
|
|
shutil.copytree(test_dir, args.save_raw_single_page)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
logging.info(f"Running tests for {lang}")
|
2020-12-21 20:04:22 +00:00
|
|
|
test.test_single_page(
|
2022-03-22 16:39:58 +00:00
|
|
|
os.path.join(test_dir, "single", "index.html"), lang
|
|
|
|
)
|
2020-05-08 08:04:09 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
logging.info(f"Finished building single page version for {lang}")
|
2021-09-20 06:51:41 +00:00
|
|
|
|
2021-11-06 09:54:19 +00:00
|
|
|
remove_temporary_files(lang, args)
|