mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-17 03:42:48 +00:00
a980123a43
Add Greenhouse careers page to website
320 lines
11 KiB
Python
320 lines
11 KiB
Python
import concurrent.futures
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
import bs4
|
|
import closure
|
|
import cssmin
|
|
import htmlmin
|
|
import jsmin
|
|
|
|
import util
|
|
|
|
|
|
def handle_iframe(iframe, soup):
|
|
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/']
|
|
illegal_domain = True
|
|
iframe_src = iframe.attrs['src']
|
|
for domain in allowed_domains:
|
|
if iframe_src.startswith(domain):
|
|
illegal_domain = False
|
|
break
|
|
if illegal_domain:
|
|
raise RuntimeError(f'iframe from illegal domain: {iframe_src}')
|
|
wrapper = soup.new_tag('div')
|
|
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9']
|
|
iframe.insert_before(wrapper)
|
|
iframe.extract()
|
|
wrapper.insert(0, iframe)
|
|
if 'width' in iframe.attrs:
|
|
del iframe.attrs['width']
|
|
if 'height' in iframe.attrs:
|
|
del iframe.attrs['height']
|
|
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture'
|
|
iframe.attrs['class'] = 'embed-responsive-item'
|
|
iframe.attrs['frameborder'] = '0'
|
|
iframe.attrs['allowfullscreen'] = '1'
|
|
|
|
|
|
def adjust_markdown_html(content):
|
|
soup = bs4.BeautifulSoup(
|
|
content,
|
|
features='html.parser'
|
|
)
|
|
|
|
for a in soup.find_all('a'):
|
|
a_class = a.attrs.get('class')
|
|
a_href = a.attrs.get('href')
|
|
if a_class and 'headerlink' in a_class:
|
|
a.string = '\xa0'
|
|
if a_href and a_href.startswith('http'):
|
|
a.attrs['target'] = '_blank'
|
|
|
|
for code in soup.find_all('code'):
|
|
code_class = code.attrs.get('class')
|
|
if code_class:
|
|
code.attrs['class'] = code_class + ['syntax']
|
|
else:
|
|
code.attrs['class'] = 'syntax'
|
|
|
|
for iframe in soup.find_all('iframe'):
|
|
handle_iframe(iframe, soup)
|
|
|
|
for img in soup.find_all('img'):
|
|
if img.attrs.get('alt') == 'iframe':
|
|
img.name = 'iframe'
|
|
img.string = ''
|
|
handle_iframe(img, soup)
|
|
continue
|
|
img_class = img.attrs.get('class')
|
|
if img_class:
|
|
img.attrs['class'] = img_class + ['img-fluid']
|
|
else:
|
|
img.attrs['class'] = 'img-fluid'
|
|
|
|
for details in soup.find_all('details'):
|
|
for summary in details.find_all('summary'):
|
|
if summary.parent != details:
|
|
summary.extract()
|
|
details.insert(0, summary)
|
|
|
|
for dd in soup.find_all('dd'):
|
|
dd_class = dd.attrs.get('class')
|
|
if dd_class:
|
|
dd.attrs['class'] = dd_class + ['pl-3']
|
|
else:
|
|
dd.attrs['class'] = 'pl-3'
|
|
|
|
for div in soup.find_all('div'):
|
|
div_class = div.attrs.get('class')
|
|
is_admonition = div_class and 'admonition' in div.attrs.get('class')
|
|
if is_admonition:
|
|
for a in div.find_all('a'):
|
|
a_class = a.attrs.get('class')
|
|
if a_class:
|
|
a.attrs['class'] = a_class + ['alert-link']
|
|
else:
|
|
a.attrs['class'] = 'alert-link'
|
|
|
|
for p in div.find_all('p'):
|
|
p_class = p.attrs.get('class')
|
|
if is_admonition and p_class and ('admonition-title' in p_class):
|
|
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2']
|
|
|
|
if is_admonition:
|
|
div.attrs['role'] = 'alert'
|
|
if ('info' in div_class) or ('note' in div_class):
|
|
mode = 'alert-primary'
|
|
elif ('attention' in div_class) or ('warning' in div_class):
|
|
mode = 'alert-warning'
|
|
elif 'important' in div_class:
|
|
mode = 'alert-danger'
|
|
elif 'tip' in div_class:
|
|
mode = 'alert-info'
|
|
else:
|
|
mode = 'alert-secondary'
|
|
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
|
|
|
|
return str(soup)
|
|
|
|
|
|
def minify_html(content):
|
|
return htmlmin.minify(content)
|
|
|
|
|
|
def build_website(args):
|
|
logging.info('Building website')
|
|
env = util.init_jinja2_env(args)
|
|
|
|
shutil.copytree(
|
|
args.website_dir,
|
|
args.output_dir,
|
|
ignore=shutil.ignore_patterns(
|
|
'*.md',
|
|
'*.sh',
|
|
'*.css',
|
|
'*.json',
|
|
'js/*.js',
|
|
'build',
|
|
'docs',
|
|
'public',
|
|
'node_modules',
|
|
'src',
|
|
'templates',
|
|
'locale',
|
|
'.gitkeep'
|
|
)
|
|
)
|
|
|
|
# This file can be requested to check for available ClickHouse releases.
|
|
shutil.copy2(
|
|
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
|
|
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
|
|
|
|
# This file can be requested to install ClickHouse.
|
|
shutil.copy2(
|
|
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'),
|
|
os.path.join(args.output_dir, 'data', 'install.sh'))
|
|
|
|
for root, _, filenames in os.walk(args.output_dir):
|
|
for filename in filenames:
|
|
if filename == 'main.html':
|
|
continue
|
|
|
|
path = os.path.join(root, filename)
|
|
if not filename.endswith('.html'):
|
|
continue
|
|
logging.info('Processing %s', path)
|
|
with open(path, 'rb') as f:
|
|
content = f.read().decode('utf-8')
|
|
|
|
template = env.from_string(content)
|
|
content = template.render(args.__dict__)
|
|
|
|
with open(path, 'wb') as f:
|
|
f.write(content.encode('utf-8'))
|
|
|
|
|
|
def get_css_in(args):
|
|
return [
|
|
f"'{args.website_dir}/css/bootstrap.css'",
|
|
f"'{args.website_dir}/css/docsearch.css'",
|
|
f"'{args.website_dir}/css/base.css'",
|
|
f"'{args.website_dir}/css/blog.css'",
|
|
f"'{args.website_dir}/css/docs.css'",
|
|
f"'{args.website_dir}/css/highlight.css'",
|
|
f"'{args.website_dir}/css/main.css'"
|
|
]
|
|
|
|
|
|
def get_js_in(args):
|
|
return [
|
|
f"'{args.website_dir}/js/jquery.js'",
|
|
f"'{args.website_dir}/js/popper.js'",
|
|
f"'{args.website_dir}/js/bootstrap.js'",
|
|
f"'{args.website_dir}/js/sentry.js'",
|
|
f"'{args.website_dir}/js/base.js'",
|
|
f"'{args.website_dir}/js/index.js'",
|
|
f"'{args.website_dir}/js/docsearch.js'",
|
|
f"'{args.website_dir}/js/docs.js'",
|
|
f"'{args.website_dir}/js/main.js'"
|
|
]
|
|
|
|
|
|
def minify_file(path, css_digest, js_digest):
|
|
if not (
|
|
path.endswith('.html') or
|
|
path.endswith('.css')
|
|
):
|
|
return
|
|
|
|
logging.info('Minifying %s', path)
|
|
with open(path, 'rb') as f:
|
|
content = f.read().decode('utf-8')
|
|
if path.endswith('.html'):
|
|
content = minify_html(content)
|
|
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
|
|
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
|
|
# TODO: restore cssmin
|
|
# elif path.endswith('.css'):
|
|
# content = cssmin.cssmin(content)
|
|
# TODO: restore jsmin
|
|
# elif path.endswith('.js'):
|
|
# content = jsmin.jsmin(content)
|
|
with open(path, 'wb') as f:
|
|
f.write(content.encode('utf-8'))
|
|
|
|
|
|
def minify_website(args):
|
|
# Output greenhouse css separately from main bundle to be included via the greenhouse iframe
|
|
command = f"cat '{args.website_dir}/css/greenhouse.css' > '{args.output_dir}/css/greenhouse.css'"
|
|
logging.info(command)
|
|
output = subprocess.check_output(command, shell=True)
|
|
logging.debug(output)
|
|
|
|
css_in = ' '.join(get_css_in(args))
|
|
css_out = f'{args.output_dir}/css/base.css'
|
|
if args.minify:
|
|
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
|
|
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
|
|
else:
|
|
command = f'cat {css_in} > {css_out}'
|
|
|
|
logging.info(command)
|
|
output = subprocess.check_output(command, shell=True)
|
|
logging.debug(output)
|
|
with open(css_out, 'rb') as f:
|
|
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
|
|
|
js_in = get_js_in(args)
|
|
js_out = f'{args.output_dir}/js/base.js'
|
|
if args.minify and False: # TODO: return closure
|
|
js_in = [js[1:-1] for js in js_in]
|
|
closure_args = [
|
|
'--js', *js_in, '--js_output_file', js_out,
|
|
'--compilation_level', 'SIMPLE',
|
|
'--dependency_mode', 'NONE',
|
|
'--third_party', '--use_types_for_optimization',
|
|
'--isolation_mode', 'IIFE'
|
|
]
|
|
logging.info(closure_args)
|
|
if closure.run(*closure_args):
|
|
raise RuntimeError('failed to run closure compiler')
|
|
with open(js_out, 'r') as f:
|
|
js_content = jsmin.jsmin(f.read())
|
|
with open(js_out, 'w') as f:
|
|
f.write(js_content)
|
|
|
|
else:
|
|
js_in = ' '.join(js_in)
|
|
command = f'cat {js_in} > {js_out}'
|
|
logging.info(command)
|
|
output = subprocess.check_output(command, shell=True)
|
|
logging.debug(output)
|
|
with open(js_out, 'rb') as f:
|
|
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
|
logging.info(js_digest)
|
|
|
|
if args.minify:
|
|
logging.info('Minifying website')
|
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
futures = []
|
|
for root, _, filenames in os.walk(args.output_dir):
|
|
for filename in filenames:
|
|
path = os.path.join(root, filename)
|
|
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
|
|
for future in futures:
|
|
exc = future.exception()
|
|
if exc:
|
|
logging.error(exc)
|
|
sys.exit(1)
|
|
|
|
|
|
def process_benchmark_results(args):
|
|
benchmark_root = os.path.join(args.website_dir, 'benchmark')
|
|
required_keys = {
|
|
'dbms': ['result'],
|
|
'hardware': ['result', 'system', 'system_full', 'kind']
|
|
}
|
|
for benchmark_kind in ['dbms', 'hardware']:
|
|
results = []
|
|
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
|
|
for result in sorted(os.listdir(results_root)):
|
|
result_file = os.path.join(results_root, result)
|
|
logging.debug(f'Reading benchmark result from {result_file}')
|
|
with open(result_file, 'r') as f:
|
|
result = json.loads(f.read())
|
|
for item in result:
|
|
for required_key in required_keys[benchmark_kind]:
|
|
assert required_key in item, f'No "{required_key}" in {result_file}'
|
|
results += result
|
|
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
|
|
with open(results_js, 'w') as f:
|
|
data = json.dumps(results)
|
|
f.write(f'var results = {data};')
|