ClickHouse/docs/tools/single_page.py

import logging
import os
import re
import shutil
import subprocess
import yaml

import bs4
import mkdocs.commands.build

import test
import util
import website


def recursive_values(item):
    if isinstance(item, dict):
        for _, value in list(item.items()):
            yield from recursive_values(value)
    elif isinstance(item, list):
        for value in item:
            yield from recursive_values(value)
    elif isinstance(item, str):
        yield item


def concatenate(lang, docs_path, single_page_file, nav):
    lang_path = os.path.join(docs_path, lang)
    az_re = re.compile(r'[a-z]')

    proj_config = f'{docs_path}/toc_{lang}.yml'
    if os.path.exists(proj_config):
        with open(proj_config) as cfg_file:
            nav = yaml.full_load(cfg_file.read())['nav']
    files_to_concatenate = list(recursive_values(nav))
    files_count = len(files_to_concatenate)
    logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
    logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
    assert files_count > 0, f'Empty single-page for {lang}'

    for path in files_to_concatenate:
        if path.endswith('introduction/info.md'):
            continue
        try:
            with open(os.path.join(lang_path, path)) as f:
                anchors = set()
                tmp_path = path.replace('/index.md', '/').replace('.md', '/')
                prefixes = ['', '../', '../../', '../../../']
                parts = tmp_path.split('/')
                anchors.add(parts[-2] + '/')
                anchors.add('/'.join(parts[1:]))

                for part in parts[0:-2] if len(parts) > 2 else parts:
                    for prefix in prefixes:
                        anchor = prefix + tmp_path
                        if anchor:
                            anchors.add(anchor)
                            anchors.add('../' + anchor)
                            anchors.add('../../' + anchor)
                    tmp_path = tmp_path.replace(part, '..')

                for anchor in anchors:
                    if re.search(az_re, anchor):
                        single_page_file.write('<a name="%s"></a>' % anchor)

                single_page_file.write('\n')

                in_metadata = False
                for l in f:
                    if l.startswith('---'):
                        in_metadata = not in_metadata
                    if l.startswith('#'):
                        l = '#' + l
                    if not in_metadata:
                        single_page_file.write(l)
        except IOError as e:
            logging.warning(str(e))

    single_page_file.flush()


def build_single_page_version(lang, args, nav, cfg):
    logging.info(f'Building single page version for {lang}')
    os.environ['SINGLE_PAGE'] = '1'
    extra = cfg.data['extra']
    extra['single_page'] = True
    extra['is_amp'] = False

    with util.autoremoved_file(os.path.join(args.docs_dir, lang, 'single.md')) as single_md:
        concatenate(lang, args.docs_dir, single_md, nav)

        with util.temp_dir() as site_temp:
            with util.temp_dir() as docs_temp:
                docs_src_lang = os.path.join(args.docs_dir, lang)
                docs_temp_lang = os.path.join(docs_temp, lang)
                shutil.copytree(docs_src_lang, docs_temp_lang)
                for root, _, filenames in os.walk(docs_temp_lang):
                    for filename in filenames:
                        if filename != 'single.md' and filename.endswith('.md'):
                            os.unlink(os.path.join(root, filename))

                cfg.load_dict({
                    'docs_dir': docs_temp_lang,
                    'site_dir': site_temp,
                    'extra': extra,
                    'nav': [
                        {cfg.data.get('site_name'): 'single.md'}
                    ]
                })

                if not args.test_only:
                    mkdocs.commands.build.build(cfg)

                    single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')

                    if os.path.exists(single_page_output_path):
                        shutil.rmtree(single_page_output_path)

                    shutil.copytree(
                        os.path.join(site_temp, 'single'),
                        single_page_output_path
                    )

                    single_page_index_html = os.path.join(single_page_output_path, 'index.html')
                    single_page_content_js = os.path.join(single_page_output_path, 'content.js')
                    with open(single_page_index_html, 'r') as f:
                        sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
                    with open(single_page_index_html, 'w') as f:
                        f.write(sp_prefix)
                        f.write(sp_suffix)
                    with open(single_page_content_js, 'w') as f:
                        if args.minify:
                            import jsmin
                            sp_js = jsmin.jsmin(sp_js)
                        f.write(sp_js)

                logging.info(f'Re-building single page for {lang} pdf/test')
                with util.temp_dir() as test_dir:
                    extra['single_page'] = False
                    cfg.load_dict({
                        'docs_dir': docs_temp_lang,
                        'site_dir': test_dir,
                        'extra': extra,
                        'nav': [
                            {cfg.data.get('site_name'): 'single.md'}
                        ]
                    })
                    mkdocs.commands.build.build(cfg)

                    css_in = ' '.join(website.get_css_in(args))
                    js_in = ' '.join(website.get_js_in(args))
                    subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
                    subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
                    if args.save_raw_single_page:
                        shutil.copytree(test_dir, args.save_raw_single_page)

                    logging.info(f'Running tests for {lang}')
                    test.test_single_page(
                        os.path.join(test_dir, 'single', 'index.html'), lang)

                    if not args.skip_pdf:
                        single_page_index_html = os.path.join(test_dir, 'single', 'index.html')
                        single_page_pdf = os.path.abspath(
                            os.path.join(single_page_output_path, f'clickhouse_{lang}.pdf')
                        )

                        with open(single_page_index_html, 'r') as f:
                            soup = bs4.BeautifulSoup(
                                f.read(),
                                features='html.parser'
                            )
                        soup_prefix = f'file://{test_dir}'
                        for img in soup.findAll('img'):
                            if img['src'].startswith('/'):
                                img['src'] = soup_prefix + img['src']
                        for script in soup.findAll('script'):
                            script_src = script.get('src')
                            if script_src:
                                script['src'] = soup_prefix + script_src.split('?', 1)[0]
                        for link in soup.findAll('link'):
                            link['href'] = soup_prefix + link['href'].split('?', 1)[0]

                        with open(single_page_index_html, 'w') as f:
                            f.write(str(soup))

                        create_pdf_command = [
                            'wkhtmltopdf',
                            '--print-media-type',
                            '--log-level', 'warn',
                            single_page_index_html, single_page_pdf
                        ]

                        logging.info(' '.join(create_pdf_command))
                        subprocess.check_call(' '.join(create_pdf_command), shell=True)

        logging.info(f'Finished building single page version for {lang}')
[docs] generate AMP versions for docs articles (#10732) * Refactoring in preparation for AMP support * infrastructure for AMP validation * Add Metrika counter on AMP page + adjust layout * more content fixes * improve amp layout * improve navigation * Move converting admonitions to generation time * strict amp test check * Batch AMP validation * Add date published/modified to docs articles and their ld+json meta * few more content fixes * improve ld+json meta * adjust margins * skip published/modified for stable release docs * adapt single page mode * update po * skip published/modified for single page docs * update po * adjust layout * adjust layout 2020-05-08 08:04:09 +00:00			`import logging`
			`import os`
			`import re`
			`import shutil`
			`import subprocess`
			`import yaml`

			`import bs4`
			`import mkdocs.commands.build`

			`import test`
			`import util`
			`import website`


			`def recursive_values(item):`
			`if isinstance(item, dict):`
Convert to python3 (#15007) 2020-10-02 16:54:07 +00:00			`for _, value in list(item.items()):`
[docs] generate AMP versions for docs articles (#10732) * Refactoring in preparation for AMP support * infrastructure for AMP validation * Add Metrika counter on AMP page + adjust layout * more content fixes * improve amp layout * improve navigation * Move converting admonitions to generation time * strict amp test check * Batch AMP validation * Add date published/modified to docs articles and their ld+json meta * few more content fixes * improve ld+json meta * adjust margins * skip published/modified for stable release docs * adapt single page mode * update po * skip published/modified for single page docs * update po * adjust layout * adjust layout 2020-05-08 08:04:09 +00:00			`yield from recursive_values(value)`
			`elif isinstance(item, list):`
			`for value in item:`
			`yield from recursive_values(value)`
			`elif isinstance(item, str):`
			`yield item`


			`def concatenate(lang, docs_path, single_page_file, nav):`
			`lang_path = os.path.join(docs_path, lang)`
			`az_re = re.compile(r'[a-z]')`

			`proj_config = f'{docs_path}/toc_{lang}.yml'`
			`if os.path.exists(proj_config):`
			`with open(proj_config) as cfg_file:`
			`nav = yaml.full_load(cfg_file.read())['nav']`
			`files_to_concatenate = list(recursive_values(nav))`
			`files_count = len(files_to_concatenate)`
			`logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')`
			`logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))`
			`assert files_count > 0, f'Empty single-page for {lang}'`

			`for path in files_to_concatenate:`
			`if path.endswith('introduction/info.md'):`
			`continue`
			`try:`
			`with open(os.path.join(lang_path, path)) as f:`
			`anchors = set()`
			`tmp_path = path.replace('/index.md', '/').replace('.md', '/')`
			`prefixes = ['', '../', '../../', '../../../']`
			`parts = tmp_path.split('/')`
			`anchors.add(parts[-2] + '/')`
			`anchors.add('/'.join(parts[1:]))`

			`for part in parts[0:-2] if len(parts) > 2 else parts:`
			`for prefix in prefixes:`
			`anchor = prefix + tmp_path`
			`if anchor:`
			`anchors.add(anchor)`
			`anchors.add('../' + anchor)`
			`anchors.add('../../' + anchor)`
			`tmp_path = tmp_path.replace(part, '..')`

			`for anchor in anchors:`
			`if re.search(az_re, anchor):`
			`single_page_file.write('<a name="%s"></a>' % anchor)`

			`single_page_file.write('\n')`

			`in_metadata = False`
			`for l in f:`
			`if l.startswith('---'):`
			`in_metadata = not in_metadata`
			`if l.startswith('#'):`
			`l = '#' + l`
			`if not in_metadata:`
			`single_page_file.write(l)`
			`except IOError as e:`
			`logging.warning(str(e))`

			`single_page_file.flush()`


			`def build_single_page_version(lang, args, nav, cfg):`
			`logging.info(f'Building single page version for {lang}')`
			`os.environ['SINGLE_PAGE'] = '1'`
			`extra = cfg.data['extra']`
			`extra['single_page'] = True`
			`extra['is_amp'] = False`

			`with util.autoremoved_file(os.path.join(args.docs_dir, lang, 'single.md')) as single_md:`
			`concatenate(lang, args.docs_dir, single_md, nav)`

			`with util.temp_dir() as site_temp:`
			`with util.temp_dir() as docs_temp:`
			`docs_src_lang = os.path.join(args.docs_dir, lang)`
			`docs_temp_lang = os.path.join(docs_temp, lang)`
			`shutil.copytree(docs_src_lang, docs_temp_lang)`
			`for root, _, filenames in os.walk(docs_temp_lang):`
			`for filename in filenames:`
			`if filename != 'single.md' and filename.endswith('.md'):`
			`os.unlink(os.path.join(root, filename))`

			`cfg.load_dict({`
			`'docs_dir': docs_temp_lang,`
			`'site_dir': site_temp,`
			`'extra': extra,`
			`'nav': [`
			`{cfg.data.get('site_name'): 'single.md'}`
			`]`
			`})`

			`if not args.test_only:`
			`mkdocs.commands.build.build(cfg)`

Fix issues with docs 2020-12-21 20:04:22 +00:00			`single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')`
[docs] generate AMP versions for docs articles (#10732) * Refactoring in preparation for AMP support * infrastructure for AMP validation * Add Metrika counter on AMP page + adjust layout * more content fixes * improve amp layout * improve navigation * Move converting admonitions to generation time * strict amp test check * Batch AMP validation * Add date published/modified to docs articles and their ld+json meta * few more content fixes * improve ld+json meta * adjust margins * skip published/modified for stable release docs * adapt single page mode * update po * skip published/modified for single page docs * update po * adjust layout * adjust layout 2020-05-08 08:04:09 +00:00
			`if os.path.exists(single_page_output_path):`
			`shutil.rmtree(single_page_output_path)`

			`shutil.copytree(`
			`os.path.join(site_temp, 'single'),`
			`single_page_output_path`
			`)`

			`single_page_index_html = os.path.join(single_page_output_path, 'index.html')`
			`single_page_content_js = os.path.join(single_page_output_path, 'content.js')`
			`with open(single_page_index_html, 'r') as f:`
			`sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')`
			`with open(single_page_index_html, 'w') as f:`
			`f.write(sp_prefix)`
			`f.write(sp_suffix)`
			`with open(single_page_content_js, 'w') as f:`
			`if args.minify:`
			`import jsmin`
			`sp_js = jsmin.jsmin(sp_js)`
			`f.write(sp_js)`

			`logging.info(f'Re-building single page for {lang} pdf/test')`
			`with util.temp_dir() as test_dir:`
			`extra['single_page'] = False`
			`cfg.load_dict({`
			`'docs_dir': docs_temp_lang,`
			`'site_dir': test_dir,`
			`'extra': extra,`
			`'nav': [`
			`{cfg.data.get('site_name'): 'single.md'}`
			`]`
			`})`
			`mkdocs.commands.build.build(cfg)`

			`css_in = ' '.join(website.get_css_in(args))`
			`js_in = ' '.join(website.get_js_in(args))`
			`subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)`
			`subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)`
			`if args.save_raw_single_page:`
			`shutil.copytree(test_dir, args.save_raw_single_page)`

Fix issues with docs 2020-12-21 20:04:22 +00:00			`logging.info(f'Running tests for {lang}')`
			`test.test_single_page(`
			`os.path.join(test_dir, 'single', 'index.html'), lang)`
[docs] generate AMP versions for docs articles (#10732) * Refactoring in preparation for AMP support * infrastructure for AMP validation * Add Metrika counter on AMP page + adjust layout * more content fixes * improve amp layout * improve navigation * Move converting admonitions to generation time * strict amp test check * Batch AMP validation * Add date published/modified to docs articles and their ld+json meta * few more content fixes * improve ld+json meta * adjust margins * skip published/modified for stable release docs * adapt single page mode * update po * skip published/modified for single page docs * update po * adjust layout * adjust layout 2020-05-08 08:04:09 +00:00
			`if not args.skip_pdf:`
			`single_page_index_html = os.path.join(test_dir, 'single', 'index.html')`
			`single_page_pdf = os.path.abspath(`
			`os.path.join(single_page_output_path, f'clickhouse_{lang}.pdf')`
			`)`

			`with open(single_page_index_html, 'r') as f:`
			`soup = bs4.BeautifulSoup(`
			`f.read(),`
			`features='html.parser'`
			`)`
			`soup_prefix = f'file://{test_dir}'`
			`for img in soup.findAll('img'):`
			`if img['src'].startswith('/'):`
			`img['src'] = soup_prefix + img['src']`
			`for script in soup.findAll('script'):`
			`script_src = script.get('src')`
			`if script_src:`
			`script['src'] = soup_prefix + script_src.split('?', 1)[0]`
			`for link in soup.findAll('link'):`
			`link['href'] = soup_prefix + link['href'].split('?', 1)[0]`

			`with open(single_page_index_html, 'w') as f:`
			`f.write(str(soup))`

			`create_pdf_command = [`
			`'wkhtmltopdf',`
			`'--print-media-type',`
			`'--log-level', 'warn',`
			`single_page_index_html, single_page_pdf`
			`]`

			`logging.info(' '.join(create_pdf_command))`
			`subprocess.check_call(' '.join(create_pdf_command), shell=True)`

			`logging.info(f'Finished building single page version for {lang}')`