Merge branch 'master' into concurrency-control-docs

This commit is contained in:
Sergei Trifonov 2023-01-11 20:34:59 +01:00 committed by GitHub
commit 6adb7d47f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 95 additions and 496 deletions

View File

@ -1,45 +1,96 @@
#!/usr/bin/env python3
from pathlib import Path
import argparse
import logging
import os
import shutil
import subprocess
import sys
import livereload
import redirects
import website
def write_redirect_html(output_path: Path, to_url: str) -> None:
output_dir = output_path.parent
output_dir.mkdir(parents=True, exist_ok=True)
output_path.write_text(
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<meta http-equiv="refresh" content="0; url={to_url}">
<script type="text/javascript">
window.location.href = "{to_url}";
</script>
<title>Page Redirection</title>
</head>
<body>
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
</body>
</html>"""
)
def build(args):
if os.path.exists(args.output_dir):
def build_static_redirects(output_dir: Path):
for static_redirect in [
("benchmark.html", "/benchmark/dbms/"),
("benchmark_hardware.html", "/benchmark/hardware/"),
(
"tutorial.html",
"/docs/en/getting_started/tutorial/",
),
(
"reference_en.html",
"/docs/en/single/",
),
(
"reference_ru.html",
"/docs/ru/single/",
),
(
"docs/index.html",
"/docs/en/",
),
]:
write_redirect_html(output_dir / static_redirect[0], static_redirect[1])
def build(root_dir: Path, output_dir: Path):
if output_dir.exists():
shutil.rmtree(args.output_dir)
if not args.skip_website:
website.build_website(args)
redirects.build_static_redirects(args)
(output_dir / "data").mkdir(parents=True)
logging.info("Building website")
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
root_dir / "utils" / "list-versions" / "version_date.tsv",
output_dir / "data" / "version_date.tsv",
)
# This file can be requested to install ClickHouse.
shutil.copy2(
root_dir / "docs" / "_includes" / "install" / "universal.sh",
output_dir / "data" / "install.sh",
)
build_static_redirects(output_dir)
if __name__ == "__main__":
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
root_dir = Path(__file__).parent.parent.parent
docs_dir = root_dir / "docs"
# A root path to ClickHouse source code.
src_dir = ".."
website_dir = os.path.join(src_dir, "website")
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
arg_parser.add_argument("--theme-dir", default=website_dir)
arg_parser.add_argument("--website-dir", default=website_dir)
arg_parser.add_argument("--src-dir", default=src_dir)
arg_parser.add_argument("--output-dir", default="build")
arg_parser.add_argument("--nav-limit", type=int, default="0")
arg_parser.add_argument("--skip-multi-page", action="store_true")
arg_parser.add_argument("--skip-website", action="store_true")
arg_parser.add_argument("--htmlproofer", action="store_true")
arg_parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
arg_parser.add_argument(
"--output-dir",
type=Path,
default=docs_dir / "build",
help="path to the output dir",
)
arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument("--verbose", action="store_true")
@ -49,26 +100,9 @@ if __name__ == "__main__":
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
)
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
args.rev = (
subprocess.check_output("git rev-parse HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_short = (
subprocess.check_output("git rev-parse --short HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
build(args)
build(root_dir, args.output_dir)
if args.livereload:
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server()
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0)

View File

@ -1,22 +0,0 @@
#!/bin/bash
# Fixes missing documentation in other languages
# by putting relative symbolic links to the original doc file.
BASE_DIR=$(dirname $(readlink -f $0))
function do_make_links()
{
set -x
langs=(en zh ru ja)
src_file="$1"
for lang in "${langs[@]}"
do
dst_file="${src_file/\/en\///${lang}/}"
mkdir -p $(dirname "${dst_file}")
ln -sr "${src_file}" "${dst_file}" 2>/dev/null
done
}
export -f do_make_links
find "${BASE_DIR}/../en" -iname '*.md' -exec /bin/bash -c 'do_make_links "{}"' \;

View File

@ -1,142 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import datetime
import os
import subprocess
import jinja2
import markdown.inlinepatterns
import markdown.extensions
import markdown.util
import macros.plugin
import slugify as slugify_impl
def slugify(value, separator):
return slugify_impl.slugify(
value, separator=separator, word_boundary=True, save_order=True
)
MARKDOWN_EXTENSIONS = [
"mdx_clickhouse",
"admonition",
"attr_list",
"def_list",
"codehilite",
"nl2br",
"sane_lists",
"pymdownx.details",
"pymdownx.magiclink",
"pymdownx.superfences",
"extra",
{"toc": {"permalink": True, "slugify": slugify}},
]
class ClickHouseLinkMixin(object):
def handleMatch(self, m, data):
try:
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
except IndexError:
return
if el is not None:
href = el.get("href") or ""
is_external = href.startswith("http:") or href.startswith("https:")
if is_external:
if not href.startswith("https://clickhouse.com"):
el.set("rel", "external nofollow noreferrer")
return el, start, end
class ClickHouseAutolinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
):
pass
class ClickHouseLinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
):
pass
class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines):
for line in lines:
if "<!--hide-->" not in line:
yield line
class ClickHouseMarkdown(markdown.extensions.Extension):
def extendMarkdown(self, md, md_globals):
md.preprocessors["clickhouse"] = ClickHousePreprocessor()
md.inlinePatterns["link"] = ClickHouseLinkPattern(
markdown.inlinepatterns.LINK_RE, md
)
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
markdown.inlinepatterns.AUTOLINK_RE, md
)
def makeExtension(**kwargs):
return ClickHouseMarkdown(**kwargs)
def get_translations(dirname, lang):
import babel.support
return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
disabled = False
def on_config(self, config):
super(PatchedMacrosPlugin, self).on_config(config)
self.env.comment_start_string = "{##"
self.env.comment_end_string = "##}"
self.env.loader = jinja2.FileSystemLoader(
[
os.path.join(config.data["site_dir"]),
os.path.join(config.data["extra"]["includes_dir"]),
]
)
def on_env(self, env, config, files):
import util
env.add_extension("jinja2.ext.i18n")
dirname = os.path.join(config.data["theme"].dirs[0], "locale")
lang = config.data["theme"]["language"]
env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
util.init_jinja2_filters(env)
return env
def render(self, markdown):
if not self.disabled:
return self.render_impl(markdown)
else:
return markdown
def on_page_markdown(self, markdown, page, config, files):
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
markdown, page, config, files
)
if os.path.islink(page.file.abs_src_path):
lang = config.data["theme"]["language"]
page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
return markdown
def render_impl(self, markdown):
md_template = self.env.from_string(markdown)
return md_template.render(**self.variables)
macros.plugin.MacrosPlugin = PatchedMacrosPlugin

View File

@ -1,53 +0,0 @@
import os
def write_redirect_html(out_path, to_url):
out_dir = os.path.dirname(out_path)
try:
os.makedirs(out_dir)
except OSError:
pass
with open(out_path, "w") as f:
f.write(
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
<!DOCTYPE HTML>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<meta http-equiv="refresh" content="0; url={to_url}">
<script type="text/javascript">
window.location.href = "{to_url}";
</script>
<title>Page Redirection</title>
</head>
<body>
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
</body>
</html>"""
)
def build_static_redirects(args):
for static_redirect in [
("benchmark.html", "/benchmark/dbms/"),
("benchmark_hardware.html", "/benchmark/hardware/"),
(
"tutorial.html",
"/docs/en/getting_started/tutorial/",
),
(
"reference_en.html",
"/docs/en/single/",
),
(
"reference_ru.html",
"/docs/ru/single/",
),
(
"docs/index.html",
"/docs/en/",
),
]:
write_redirect_html(
os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
)

View File

@ -25,7 +25,10 @@ then
# Add files.
cp -R "${BUILD_DIR}"/* .
echo -n "${BASE_DOMAIN}" > CNAME
echo -n "" > README.md
cat > README.md << 'EOF'
## This repo is the source for https://content.clickhouse.com
It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job.
EOF
echo -n "" > ".nojekyll"
cp "${BASE_DIR}/../../LICENSE" .
git add ./*

View File

@ -1,30 +1 @@
Babel==2.9.1
Jinja2==3.0.3
Markdown==3.3.2
MarkupSafe==2.1.1
PyYAML==6.0
Pygments>=2.12.0
beautifulsoup4==4.9.1
click==7.1.2
ghp_import==2.1.0
importlib_metadata==4.11.4
jinja2-highlight==0.6.1
livereload==2.6.3
mergedeep==1.3.4
mkdocs-macros-plugin==0.4.20
mkdocs-macros-test==0.1.0
mkdocs-material==8.2.15
mkdocs==1.3.0
mkdocs_material_extensions==1.0.3
packaging==21.3
pymdown_extensions==9.4
pyparsing==3.0.9
python-slugify==4.0.1
python_dateutil==2.8.2
pytz==2022.1
six==1.15.0
soupsieve==2.3.2
termcolor==1.1.0
text_unidecode==1.3
tornado==6.1
zipp==3.8.0

View File

@ -1,136 +0,0 @@
import collections
import contextlib
import datetime
import multiprocessing
import os
import shutil
import sys
import socket
import tempfile
import threading
import jinja2
import yaml
@contextlib.contextmanager
def temp_dir():
path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
try:
yield path
finally:
shutil.rmtree(path)
@contextlib.contextmanager
def cd(new_cwd):
old_cwd = os.getcwd()
os.chdir(new_cwd)
try:
yield
finally:
os.chdir(old_cwd)
def get_free_port():
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
def run_function_in_parallel(func, args_list, threads=False):
processes = []
exit_code = 0
for task in args_list:
cls = threading.Thread if threads else multiprocessing.Process
processes.append(cls(target=func, args=task))
processes[-1].start()
for process in processes:
process.join()
if not threads:
if process.exitcode and not exit_code:
exit_code = process.exitcode
if exit_code:
sys.exit(exit_code)
def read_md_file(path):
in_meta = False
meta = {}
meta_text = []
content = []
if os.path.exists(path):
with open(path, "r") as f:
for line in f:
if line.startswith("---"):
if in_meta:
in_meta = False
meta = yaml.full_load("".join(meta_text))
else:
in_meta = True
else:
if in_meta:
meta_text.append(line)
else:
content.append(line)
return meta, "".join(content)
def write_md_file(path, meta, content):
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(path, "w") as f:
if meta:
print("---", file=f)
yaml.dump(meta, f)
print("---", file=f)
if not content.startswith("\n"):
print("", file=f)
f.write(content)
def represent_ordereddict(dumper, data):
value = []
for item_key, item_value in data.items():
node_key = dumper.represent_data(item_key)
node_value = dumper.represent_data(item_value)
value.append((node_key, node_value))
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
yaml.add_representer(collections.OrderedDict, represent_ordereddict)
def init_jinja2_filters(env):
import website
chunk_size = 10240
env.filters["chunks"] = lambda line: [
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
]
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
d, "%Y-%m-%d"
).strftime("%a, %d %b %Y %H:%M:%S GMT")
def init_jinja2_env(args):
import mdx_clickhouse
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(
[args.website_dir, os.path.join(args.src_dir, "docs", "_includes")]
),
extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
)
env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
translations_dir = os.path.join(args.website_dir, "locale")
env.install_gettext_translations(
mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
)
init_jinja2_filters(env)
return env

View File

@ -1,63 +0,0 @@
import hashlib
import json
import logging
import os
import shutil
import subprocess
import util
def build_website(args):
logging.info("Building website")
env = util.init_jinja2_env(args)
shutil.copytree(
args.website_dir,
args.output_dir,
ignore=shutil.ignore_patterns(
"*.md",
"*.sh",
"*.css",
"*.json",
"js/*.js",
"build",
"docs",
"public",
"node_modules",
"src",
"templates",
"locale",
".gitkeep",
),
)
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
os.path.join(args.output_dir, "data", "version_date.tsv"),
)
# This file can be requested to install ClickHouse.
shutil.copy2(
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
os.path.join(args.output_dir, "data", "install.sh"),
)
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
if filename == "main.html":
continue
path = os.path.join(root, filename)
if not filename.endswith(".html"):
continue
logging.info("Processing %s", path)
with open(path, "rb") as f:
content = f.read().decode("utf-8")
template = env.from_string(content)
content = template.render(args.__dict__)
with open(path, "wb") as f:
f.write(content.encode("utf-8"))

View File

@ -22,6 +22,9 @@ def started_single_node_cluster():
def test_move_and_s3_memory_usage(started_single_node_cluster):
pytest.skip("Test is too flaky. Disable it for now.")
if small_node.is_built_with_sanitizer() or small_node.is_debug_build():
pytest.skip("Disabled for debug and sanitizers. Too slow.")

View File

@ -29,6 +29,12 @@ node_without_interserver_listen_host = cluster.add_instance(
def start_cluster():
try:
cluster.start()
cluster.wait_for_url(
f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}"
)
cluster.wait_for_url(
f"http://{node_without_interserver_listen_host.ip_address}:8123"
)
yield cluster
finally:

View File

@ -252,12 +252,12 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' |
while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
# Check for executable bit on non-executable files
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
# Check for BOM
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
# Too many exclamation marks
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
@ -336,7 +336,7 @@ for test_case in "${expect_tests[@]}"; do
done
# Conflict markers
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
# Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors

View File

@ -1 +0,0 @@
# This is not a website

View File

@ -1 +0,0 @@
# This directory will contain miscellaneous data files on ClickHouse website