ClickHouse/docs/tools/test.py
2022-03-22 17:39:58 +01:00

47 lines
1.3 KiB
Python
Executable File

#!/usr/bin/env python3
import logging
import os
import sys
import bs4
import subprocess
def test_single_page(input_path, lang):
if not (lang == "en"):
return
with open(input_path) as f:
soup = bs4.BeautifulSoup(f, features="html.parser")
anchor_points = set()
duplicate_anchor_points = 0
links_to_nowhere = 0
for tag in soup.find_all():
for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
if anchor_point:
anchor_points.add(anchor_point)
for tag in soup.find_all():
href = tag.attrs.get("href")
if href and href.startswith("#") and href != "#":
if href[1:] not in anchor_points:
links_to_nowhere += 1
logging.info("Tag %s", tag)
logging.info("Link to nowhere: %s" % href)
if links_to_nowhere:
logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
sys.exit(1)
if len(anchor_points) <= 10:
logging.error("Html parsing is probably broken")
sys.exit(1)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
test_single_page(sys.argv[1], sys.argv[2])