ClickHouse/docs/tools/purge_cache_for_changed_files.py

79 lines
2.5 KiB
Python
Raw Normal View History

2020-07-30 13:12:23 +00:00
#!/usr/bin/env python3
import subprocess
import requests
import os
import time
2020-07-30 14:38:09 +00:00
FNAME_START = "+++"
2020-07-30 13:12:23 +00:00
2020-07-30 14:38:09 +00:00
CLOUDFLARE_URL = "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache"
2020-07-30 13:12:23 +00:00
# we have changes in revision and commit sha on all pages
# so such changes have to be ignored
MIN_CHANGED_WORDS = 4
def collect_changed_files():
proc = subprocess.Popen("git diff HEAD~1 --word-diff=porcelain | grep -e '^+[^+]\|^\-[^\-]\|^\+\+\+'", stdout=subprocess.PIPE, shell=True)
changed_files = []
current_file_name = ""
2020-07-30 13:12:23 +00:00
changed_words = []
while True:
2020-07-30 14:38:09 +00:00
line = proc.stdout.readline().decode("utf-8").strip()
2020-07-30 13:12:23 +00:00
if not line:
break
if FNAME_START in line:
if changed_words:
if len(changed_words) > MIN_CHANGED_WORDS:
changed_files.append(current_file_name)
changed_words = []
current_file_name = line[6:]
else:
changed_words.append(line)
return changed_files
2020-07-30 14:38:09 +00:00
def filter_and_transform_changed_files(changed_files, base_domain):
2020-07-30 13:12:23 +00:00
result = []
for f in changed_files:
2020-07-30 14:38:09 +00:00
if f.endswith(".html"):
result.append(base_domain + f.replace("index.html", ""))
2020-07-30 13:12:23 +00:00
return result
def convert_to_dicts(changed_files, batch_size):
result = []
2020-07-30 14:38:09 +00:00
current_batch = {"files": []}
2020-07-30 13:12:23 +00:00
for f in changed_files:
2020-07-30 14:38:09 +00:00
if len(current_batch["files"]) >= batch_size:
2020-07-30 13:12:23 +00:00
result.append(current_batch)
2020-07-30 14:38:09 +00:00
current_batch = {"files": []}
current_batch["files"].append(f)
2020-07-30 13:12:23 +00:00
2020-07-30 14:38:09 +00:00
if current_batch["files"]:
2020-07-30 13:12:23 +00:00
result.append(current_batch)
return result
def post_data(prepared_batches, token):
2020-07-30 14:38:09 +00:00
headers = {"Authorization": "Bearer {}".format(token)}
2020-07-30 13:12:23 +00:00
for batch in prepared_batches:
2020-07-30 14:38:09 +00:00
print("Pugring cache for", ", ".join(batch["files"]))
response = requests.post(CLOUDFLARE_URL, json=batch, headers=headers)
2020-07-30 13:12:23 +00:00
response.raise_for_status()
time.sleep(3)
if __name__ == "__main__":
2020-07-30 14:38:09 +00:00
token = os.getenv("CLOUDFLARE_TOKEN")
2020-07-30 13:12:23 +00:00
if not token:
2020-07-30 14:38:09 +00:00
raise Exception("Env variable CLOUDFLARE_TOKEN is empty")
base_domain = os.getenv("BASE_DOMAIN", "https://content.clickhouse.tech/")
2020-07-30 13:12:23 +00:00
changed_files = collect_changed_files()
print("Found", len(changed_files), "changed files")
2020-07-30 14:38:09 +00:00
filtered_files = filter_and_transform_changed_files(changed_files, base_domain)
2020-07-30 13:12:23 +00:00
print("Files rest after filtering", len(filtered_files))
prepared_batches = convert_to_dicts(filtered_files, 25)
post_data(prepared_batches, token)