ClickHouse/docs/tools/build.py

268 lines
8.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2020-03-30 08:25:29 +00:00
import argparse
import datetime
import logging
import os
import shutil
2018-10-12 15:43:16 +00:00
import subprocess
import sys
import time
2020-03-30 08:25:29 +00:00
import jinja2
import livereload
import markdown.util
import nav # monkey patches mkdocs
from mkdocs import config
from mkdocs import exceptions
import mkdocs.commands.build
import amp
import blog
WIP on docs (#3813) * CLICKHOUSE-4063: less manual html @ index.md * CLICKHOUSE-4063: recommend markdown="1" in README.md * CLICKHOUSE-4003: manually purge custom.css for now * CLICKHOUSE-4064: expand <details> before any print (including to pdf) * CLICKHOUSE-3927: rearrange interfaces/formats.md a bit * CLICKHOUSE-3306: add few http headers * Remove copy-paste introduced in #3392 * Hopefully better chinese fonts #3392 * get rid of tabs @ custom.css * Apply comments and patch from #3384 * Add jdbc.md to ToC and some translation, though it still looks badly incomplete * minor punctuation * Add some backlinks to official website from mirrors that just blindly take markdown sources * Do not make fonts extra light * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's//g' {} * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's/ sql/g' {} * Remove outdated stuff from roadmap.md * Not so light font on front page too * Refactor Chinese formats.md to match recent changes in other languages * Update some links on front page * Remove some outdated comment * Add twitter link to front page * More front page links tuning * Add Amsterdam meetup link * Smaller font to avoid second line * Add Amsterdam link to README.md * Proper docs nav translation * Back to 300 font-weight except Chinese * fix docs build * Update Amsterdam link * remove symlinks * more zh punctuation * apply lost comment by @zhang2014 * Apply comments by @zhang2014 from #3417 * Remove Beijing link * rm incorrect symlink * restore content of docs/zh/operations/table_engines/index.md * CLICKHOUSE-3751: stem terms while searching docs * CLICKHOUSE-3751: use English stemmer in non-English docs too * CLICKHOUSE-4135 fix * Remove past meetup link * Add blog link to top nav * Add ContentSquare article link * Add form link to front page + refactor some texts * couple markup fixes * minor * Introduce basic ODBC driver page in docs * More verbose 3rd party libs disclaimer * Put third-party stuff into a separate folder * Separate third-party stuff in ToC too * Update links * Move stuff that is not really (only) a client library into a separate page * Add clickhouse-hdfs-loader link * Some introduction for "interfaces" section * Rewrite tcp.md * http_interface.md -> http.md * fix link * Remove unconvenient error for now * try to guess anchor instead of failing * remove symlink * Remove outdated info from introduction * remove ru roadmap.md * replace ru roadmap.md with symlink * Update roadmap.md * lost file * Title case in toc_en.yml * Sync "Functions" ToC section with en * Remove reference to pretty old ClickHouse release from docs * couple lost symlinks in fa * Close quote in proper place * Rewrite en/getting_started/index.md * Sync en<>ru getting_started/index.md * minor changes * Some gui.md refactoring * Translate DataGrip section to ru * Translate DataGrip section to zh * Translate DataGrip section to fa * Translate DBeaver section to fa * Translate DBeaver section to zh * Split third-party GUI to open-source and commercial * Mention some RDBMS integrations + ad-hoc translation fixes * Add rel="external nofollow" to outgoing links from docs * Lost blank lines * Fix class name * More rel="external nofollow" * Apply suggestions by @sundy-li * Mobile version of front page improvements * test * test 2 * test 3 * Update LICENSE * minor docs fix * Highlight current article as suggested by @sundy-li * fix link destination * Introduce backup.md (only "en" for now) * Mention INSERT+SELECT in backup.md * Some improvements for replication.md * Add backup.md to toc * Mention clickhouse-backup tool * Mention LightHouse in third-party GUI list * Introduce interfaces/third-party/proxy.md * Add clickhouse-bulk to proxy.md * Major extension of integrations.md contents * fix link target * remove unneeded file * better toc item name * fix markdown * better ru punctuation * Add yet another possible backup approach * Simplify copying permalinks to headers * Support non-eng link anchors in docs + update some deps * Generate anchors for single-page mode automatically * Remove anchors to top of pages * Remove anchors that nobody links to * build fixes * fix few links * restore css * fix some links * restore gifs * fix lost words * more docs fixes * docs fixes * NULL anchor * update urllib3 dependency * more fixes
2018-12-12 17:28:00 +00:00
import mdx_clickhouse
import redirects
import single_page
WIP on docs (#3860) * CLICKHOUSE-4063: less manual html @ index.md * CLICKHOUSE-4063: recommend markdown="1" in README.md * CLICKHOUSE-4003: manually purge custom.css for now * CLICKHOUSE-4064: expand <details> before any print (including to pdf) * CLICKHOUSE-3927: rearrange interfaces/formats.md a bit * CLICKHOUSE-3306: add few http headers * Remove copy-paste introduced in #3392 * Hopefully better chinese fonts #3392 * get rid of tabs @ custom.css * Apply comments and patch from #3384 * Add jdbc.md to ToC and some translation, though it still looks badly incomplete * minor punctuation * Add some backlinks to official website from mirrors that just blindly take markdown sources * Do not make fonts extra light * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's//g' {} * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's/ sql/g' {} * Remove outdated stuff from roadmap.md * Not so light font on front page too * Refactor Chinese formats.md to match recent changes in other languages * Update some links on front page * Remove some outdated comment * Add twitter link to front page * More front page links tuning * Add Amsterdam meetup link * Smaller font to avoid second line * Add Amsterdam link to README.md * Proper docs nav translation * Back to 300 font-weight except Chinese * fix docs build * Update Amsterdam link * remove symlinks * more zh punctuation * apply lost comment by @zhang2014 * Apply comments by @zhang2014 from #3417 * Remove Beijing link * rm incorrect symlink * restore content of docs/zh/operations/table_engines/index.md * CLICKHOUSE-3751: stem terms while searching docs * CLICKHOUSE-3751: use English stemmer in non-English docs too * CLICKHOUSE-4135 fix * Remove past meetup link * Add blog link to top nav * Add ContentSquare article link * Add form link to front page + refactor some texts * couple markup fixes * minor * Introduce basic ODBC driver page in docs * More verbose 3rd party libs disclaimer * Put third-party stuff into a separate folder * Separate third-party stuff in ToC too * Update links * Move stuff that is not really (only) a client library into a separate page * Add clickhouse-hdfs-loader link * Some introduction for "interfaces" section * Rewrite tcp.md * http_interface.md -> http.md * fix link * Remove unconvenient error for now * try to guess anchor instead of failing * remove symlink * Remove outdated info from introduction * remove ru roadmap.md * replace ru roadmap.md with symlink * Update roadmap.md * lost file * Title case in toc_en.yml * Sync "Functions" ToC section with en * Remove reference to pretty old ClickHouse release from docs * couple lost symlinks in fa * Close quote in proper place * Rewrite en/getting_started/index.md * Sync en<>ru getting_started/index.md * minor changes * Some gui.md refactoring * Translate DataGrip section to ru * Translate DataGrip section to zh * Translate DataGrip section to fa * Translate DBeaver section to fa * Translate DBeaver section to zh * Split third-party GUI to open-source and commercial * Mention some RDBMS integrations + ad-hoc translation fixes * Add rel="external nofollow" to outgoing links from docs * Lost blank lines * Fix class name * More rel="external nofollow" * Apply suggestions by @sundy-li * Mobile version of front page improvements * test * test 2 * test 3 * Update LICENSE * minor docs fix * Highlight current article as suggested by @sundy-li * fix link destination * Introduce backup.md (only "en" for now) * Mention INSERT+SELECT in backup.md * Some improvements for replication.md * Add backup.md to toc * Mention clickhouse-backup tool * Mention LightHouse in third-party GUI list * Introduce interfaces/third-party/proxy.md * Add clickhouse-bulk to proxy.md * Major extension of integrations.md contents * fix link target * remove unneeded file * better toc item name * fix markdown * better ru punctuation * Add yet another possible backup approach * Simplify copying permalinks to headers * Support non-eng link anchors in docs + update some deps * Generate anchors for single-page mode automatically * Remove anchors to top of pages * Remove anchors that nobody links to * build fixes * fix few links * restore css * fix some links * restore gifs * fix lost words * more docs fixes * docs fixes * NULL anchor * update urllib3 dependency * more fixes * Remove excessive content from print version * Try short license again * Back to long license for now * Introduce anchor integrity checks for single-page docs * Add --save-raw-single-page option to build.py (helps to debug incorrect anchors) * fix kafka engine links * fix one class of broken anchors * fix some broken links * Add https://github.com/hatarist/clickhouse-cli to third-party section (in gui.md for now, maybe will add cli.md later) * fix one more class of links to nowhere * less duplicate anchors * get rid of weird anchors * fix anchor * fix link * fix couple links
2018-12-18 11:32:08 +00:00
import test
import util
2020-03-30 11:39:26 +00:00
import website
2020-09-19 16:42:36 +00:00
from cmake_in_clickhouse_generator import generate_cmake_flags_files
class ClickHouseMarkdown(markdown.extensions.Extension):
class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines):
for line in lines:
if "<!--hide-->" not in line:
yield line
def extendMarkdown(self, md):
md.preprocessors.register(
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
)
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
def build_for_lang(lang, args):
logging.info(f"Building {lang} docs")
os.environ["SINGLE_PAGE"] = "0"
try:
theme_cfg = {
"name": None,
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
"language": lang,
"direction": "rtl" if lang == "fa" else "ltr",
"static_templates": ["404.html"],
"extra": {
"now": int(
time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
}
2020-03-30 13:07:08 +00:00
# the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
2020-03-30 08:25:29 +00:00
2020-03-30 13:07:08 +00:00
site_names = {
"en": "ClickHouse %s Documentation",
"zh": "ClickHouse文档 %s",
"ru": "Документация ClickHouse %s",
"ja": "ClickHouseドキュメント %s",
2020-03-30 13:07:08 +00:00
}
2020-03-30 08:25:29 +00:00
assert len(site_names) == len(languages)
2020-12-21 20:04:22 +00:00
site_dir = os.path.join(args.docs_output_dir, lang)
2020-02-18 14:19:44 +00:00
plugins = ["macros"]
if args.htmlproofer:
plugins.append("htmlproofer")
website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names["en"]) % ""
site_name = site_name.replace(" ", " ")
2020-03-30 08:25:29 +00:00
raw_config = dict(
site_name=site_name,
site_url=f"{website_url}/docs/{lang}/",
docs_dir=os.path.join(args.docs_dir, lang),
site_dir=site_dir,
2021-09-23 14:25:39 +00:00
strict=True,
theme=theme_cfg,
copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True,
repo_name="ClickHouse/ClickHouse",
repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f"edit/master/docs/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins,
extra=dict(
now=datetime.datetime.now().isoformat(),
single_page=False,
rev=args.rev,
rev_short=args.rev_short,
rev_url=args.rev_url,
website_url=website_url,
events=args.events,
languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False,
is_blog=False,
),
)
# Clean to be safe if last build finished abnormally
2021-11-12 20:25:55 +00:00
single_page.remove_temporary_files(lang, args)
raw_config["nav"] = nav.build_docs_nav(lang, args)
2020-04-03 13:23:32 +00:00
2020-03-30 08:25:29 +00:00
cfg = config.load_config(**raw_config)
if not args.skip_multi_page:
2020-12-21 20:04:22 +00:00
mkdocs.commands.build.build(cfg)
if not args.skip_amp:
amp.build_amp(lang, args, cfg)
if not args.skip_single_page:
single_page.build_single_page_version(
lang, args, raw_config.get("nav"), cfg
)
2020-03-30 08:25:29 +00:00
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
logging.info(f"Finished building {lang} docs")
2020-03-22 14:48:36 +00:00
except exceptions.ConfigurationError as e:
raise SystemExit("\n" + str(e))
def build_docs(args):
tasks = []
for lang in args.lang.split(","):
2020-02-18 14:19:44 +00:00
if lang:
tasks.append(
(
lang,
args,
)
)
2020-02-03 08:58:13 +00:00
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
2020-12-21 20:04:22 +00:00
redirects.build_docs_redirects(args)
def build(args):
if os.path.exists(args.output_dir):
shutil.rmtree(args.output_dir)
if not args.skip_website:
2020-03-30 11:39:26 +00:00
website.build_website(args)
if not args.skip_docs:
2020-09-28 12:47:10 +00:00
generate_cmake_flags_files()
2020-09-19 16:42:36 +00:00
build_docs(args)
if not args.skip_blog:
blog.build_blog(args)
if not args.skip_website:
website.process_benchmark_results(args)
2020-03-30 11:39:26 +00:00
website.minify_website(args)
redirects.build_static_redirects(args)
2020-01-29 20:27:36 +00:00
if __name__ == "__main__":
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
# A root path to ClickHouse source code.
src_dir = ".."
website_dir = os.path.join(src_dir, "website")
2020-09-19 22:27:36 +00:00
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
arg_parser.add_argument("--blog-lang", default="en")
arg_parser.add_argument("--docs-dir", default=".")
arg_parser.add_argument("--theme-dir", default=website_dir)
arg_parser.add_argument("--website-dir", default=website_dir)
arg_parser.add_argument("--src-dir", default=src_dir)
arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
arg_parser.add_argument("--output-dir", default="build")
arg_parser.add_argument("--nav-limit", type=int, default="0")
arg_parser.add_argument("--skip-multi-page", action="store_true")
arg_parser.add_argument("--skip-single-page", action="store_true")
arg_parser.add_argument("--skip-amp", action="store_true")
arg_parser.add_argument("--skip-website", action="store_true")
arg_parser.add_argument("--skip-blog", action="store_true")
arg_parser.add_argument("--skip-git-log", action="store_true")
arg_parser.add_argument("--skip-docs", action="store_true")
arg_parser.add_argument("--test-only", action="store_true")
arg_parser.add_argument("--minify", action="store_true")
arg_parser.add_argument("--htmlproofer", action="store_true")
arg_parser.add_argument("--no-docs-macros", action="store_true")
arg_parser.add_argument("--save-raw-single-page", type=str)
arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument("--verbose", action="store_true")
args = arg_parser.parse_args()
2021-09-20 17:52:25 +00:00
args.minify = False # TODO remove
2020-03-19 06:28:58 +00:00
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
2020-03-19 06:28:58 +00:00
)
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
2020-03-19 06:28:58 +00:00
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
2020-02-18 14:19:44 +00:00
2020-12-21 20:04:22 +00:00
from github import get_events
args.rev = (
subprocess.check_output("git rev-parse HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_short = (
subprocess.check_output("git rev-parse --short HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
args.events = get_events(args)
if args.test_only:
args.skip_multi_page = True
args.skip_blog = True
args.skip_website = True
args.skip_amp = True
if args.skip_git_log or args.skip_amp:
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
from build import build
build(args)
if args.livereload:
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server()
server.watch(
args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
)
server.watch(
args.website_dir + "**/*",
livereload.shell(new_args, cwd="tools", shell=True),
)
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0)