2020-03-19 11:51:22 +00:00
|
|
|
#!/usr/bin/env python3
|
2020-10-02 16:54:07 +00:00
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
import logging
|
2020-05-06 13:28:02 +00:00
|
|
|
import os
|
2018-12-18 11:32:08 +00:00
|
|
|
import sys
|
|
|
|
import bs4
|
2020-05-08 08:04:09 +00:00
|
|
|
import subprocess
|
|
|
|
|
|
|
|
|
2020-05-06 13:28:02 +00:00
|
|
|
def test_template(template_path):
|
2020-05-08 08:04:09 +00:00
|
|
|
if template_path.endswith('amp.html'):
|
|
|
|
# Inline CSS/JS is ok for AMP pages
|
|
|
|
return
|
|
|
|
|
2020-05-06 13:28:02 +00:00
|
|
|
logging.debug(f'Running tests for {template_path} template')
|
|
|
|
with open(template_path, 'r') as f:
|
|
|
|
soup = bs4.BeautifulSoup(
|
|
|
|
f,
|
|
|
|
features='html.parser'
|
|
|
|
)
|
|
|
|
for tag in soup.find_all():
|
|
|
|
style_attr = tag.attrs.get('style')
|
|
|
|
assert not style_attr, f'Inline CSS is prohibited, found {style_attr} in {template_path}'
|
|
|
|
|
|
|
|
if tag.name == 'script':
|
2020-05-08 08:04:09 +00:00
|
|
|
if tag.attrs.get('type') == 'application/ld+json':
|
|
|
|
continue
|
2020-05-06 13:28:02 +00:00
|
|
|
for content in tag.contents:
|
|
|
|
assert not content, f'Inline JavaScript is prohibited, found "{content}" in {template_path}'
|
|
|
|
|
|
|
|
|
|
|
|
def test_templates(base_dir):
|
|
|
|
logging.info('Running tests for templates')
|
|
|
|
for root, _, filenames in os.walk(base_dir):
|
|
|
|
for filename in filenames:
|
|
|
|
if filename.endswith('.html'):
|
|
|
|
test_template(os.path.join(root, filename))
|
|
|
|
|
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
def test_single_page(input_path, lang):
|
|
|
|
with open(input_path) as f:
|
|
|
|
soup = bs4.BeautifulSoup(
|
|
|
|
f,
|
|
|
|
features='html.parser'
|
|
|
|
)
|
2021-03-14 11:45:47 +00:00
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
anchor_points = set()
|
2021-03-14 11:45:47 +00:00
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
duplicate_anchor_points = 0
|
|
|
|
links_to_nowhere = 0
|
2021-03-14 11:45:47 +00:00
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
for tag in soup.find_all():
|
|
|
|
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]:
|
|
|
|
if anchor_point:
|
2021-03-14 11:45:47 +00:00
|
|
|
anchor_points.add(anchor_point)
|
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
for tag in soup.find_all():
|
|
|
|
href = tag.attrs.get('href')
|
2020-03-30 08:25:29 +00:00
|
|
|
if href and href.startswith('#') and href != '#':
|
2018-12-18 11:32:08 +00:00
|
|
|
if href[1:] not in anchor_points:
|
|
|
|
links_to_nowhere += 1
|
2019-06-07 11:06:23 +00:00
|
|
|
logging.info("Tag %s", tag)
|
2018-12-18 11:32:08 +00:00
|
|
|
logging.info('Link to nowhere: %s' % href)
|
|
|
|
|
2020-05-06 19:28:06 +00:00
|
|
|
if links_to_nowhere:
|
2021-03-14 11:45:47 +00:00
|
|
|
if lang == 'en' or lang == 'ru':
|
2020-06-11 13:01:35 +00:00
|
|
|
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
|
2020-05-15 04:34:54 +00:00
|
|
|
sys.exit(1)
|
2020-06-11 13:01:35 +00:00
|
|
|
else:
|
|
|
|
logging.warning(f'Found {links_to_nowhere} links to nowhere in {lang}')
|
2020-03-22 09:14:59 +00:00
|
|
|
|
|
|
|
if len(anchor_points) <= 10:
|
2020-04-01 09:45:48 +00:00
|
|
|
logging.error('Html parsing is probably broken')
|
2020-03-22 09:14:59 +00:00
|
|
|
sys.exit(1)
|
2019-06-07 11:06:23 +00:00
|
|
|
|
2018-12-18 11:32:08 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
logging.basicConfig(
|
|
|
|
level=logging.DEBUG,
|
|
|
|
stream=sys.stderr
|
|
|
|
)
|
|
|
|
test_single_page(sys.argv[1], sys.argv[2])
|