ClickHouse/docs/tools/purge_cache_for_changed_files.py

#!/usr/bin/env python3

import subprocess
import requests
import os
import time

FNAME_START = "+++"

CLOUDFLARE_URL = "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache"

# we have changes in revision and commit sha on all pages
# so such changes have to be ignored
MIN_CHANGED_WORDS = 4


def collect_changed_files():
    proc = subprocess.Popen("git diff HEAD~1 --word-diff=porcelain | grep -e '^+[^+]\|^\-[^\-]\|^\+\+\+'", stdout=subprocess.PIPE, shell=True)
    changed_files = []
    current_file_name = ""
    changed_words = []
    while True:
        line = proc.stdout.readline().decode("utf-8").strip()
        if not line:
            break
        if FNAME_START in line:
            if changed_words:
                if len(changed_words) > MIN_CHANGED_WORDS:
                    changed_files.append(current_file_name)
                changed_words = []
            current_file_name = line[6:]
        else:
            changed_words.append(line)
    return changed_files


def filter_and_transform_changed_files(changed_files, base_domain):
    result = []
    for f in changed_files:
        if f.endswith(".html"):
            result.append(base_domain + f.replace("index.html", ""))
    return result


def convert_to_dicts(changed_files, batch_size):
    result = []
    current_batch = {"files": []}
    for f in changed_files:
        if len(current_batch["files"]) >= batch_size:
            result.append(current_batch)
            current_batch = {"files": []}
        current_batch["files"].append(f)

    if current_batch["files"]:
        result.append(current_batch)
    return result


def post_data(prepared_batches, token):
    headers = {"Authorization": "Bearer {}".format(token)}
    for batch in prepared_batches:
        print(("Pugring cache for", ", ".join(batch["files"])))
        response = requests.post(CLOUDFLARE_URL, json=batch, headers=headers)
        response.raise_for_status()
        time.sleep(3)


if __name__ == "__main__":
    token = os.getenv("CLOUDFLARE_TOKEN")
    if not token:
        raise Exception("Env variable CLOUDFLARE_TOKEN is empty")
    base_domain = os.getenv("BASE_DOMAIN", "https://content.clickhouse.tech/")
    changed_files = collect_changed_files()
    print(("Found", len(changed_files), "changed files"))
    filtered_files = filter_and_transform_changed_files(changed_files, base_domain)
    print(("Files rest after filtering", len(filtered_files)))
    prepared_batches = convert_to_dicts(filtered_files, 25)
    post_data(prepared_batches, token)
Better deploy script 2020-07-30 13:12:23 +00:00			`#!/usr/bin/env python3`

			`import subprocess`
			`import requests`
			`import os`
			`import time`

Review fixes 2020-07-30 14:38:09 +00:00			`FNAME_START = "+++"`
Better deploy script 2020-07-30 13:12:23 +00:00
Review fixes 2020-07-30 14:38:09 +00:00			`CLOUDFLARE_URL = "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache"`
Better deploy script 2020-07-30 13:12:23 +00:00
			`# we have changes in revision and commit sha on all pages`
			`# so such changes have to be ignored`
			`MIN_CHANGED_WORDS = 4`


			`def collect_changed_files():`
			`proc = subprocess.Popen("git diff HEAD~1 --word-diff=porcelain \| grep -e '^+[^+]\\|^\-[^\-]\\|^\+\+\+'", stdout=subprocess.PIPE, shell=True)`
			`changed_files = []`
Update docs/tools/purge_cache_for_changed_files.py Co-authored-by: Ivan Blinkov <github@blinkov.ru> 2020-07-30 15:14:26 +00:00			`current_file_name = ""`
Better deploy script 2020-07-30 13:12:23 +00:00			`changed_words = []`
			`while True:`
Review fixes 2020-07-30 14:38:09 +00:00			`line = proc.stdout.readline().decode("utf-8").strip()`
Better deploy script 2020-07-30 13:12:23 +00:00			`if not line:`
			`break`
			`if FNAME_START in line:`
			`if changed_words:`
			`if len(changed_words) > MIN_CHANGED_WORDS:`
			`changed_files.append(current_file_name)`
			`changed_words = []`
			`current_file_name = line[6:]`
			`else:`
			`changed_words.append(line)`
			`return changed_files`


Review fixes 2020-07-30 14:38:09 +00:00			`def filter_and_transform_changed_files(changed_files, base_domain):`
Better deploy script 2020-07-30 13:12:23 +00:00			`result = []`
			`for f in changed_files:`
Review fixes 2020-07-30 14:38:09 +00:00			`if f.endswith(".html"):`
			`result.append(base_domain + f.replace("index.html", ""))`
Better deploy script 2020-07-30 13:12:23 +00:00			`return result`


			`def convert_to_dicts(changed_files, batch_size):`
			`result = []`
Review fixes 2020-07-30 14:38:09 +00:00			`current_batch = {"files": []}`
Better deploy script 2020-07-30 13:12:23 +00:00			`for f in changed_files:`
Review fixes 2020-07-30 14:38:09 +00:00			`if len(current_batch["files"]) >= batch_size:`
Better deploy script 2020-07-30 13:12:23 +00:00			`result.append(current_batch)`
Review fixes 2020-07-30 14:38:09 +00:00			`current_batch = {"files": []}`
			`current_batch["files"].append(f)`
Better deploy script 2020-07-30 13:12:23 +00:00
Review fixes 2020-07-30 14:38:09 +00:00			`if current_batch["files"]:`
Better deploy script 2020-07-30 13:12:23 +00:00			`result.append(current_batch)`
			`return result`


			`def post_data(prepared_batches, token):`
Review fixes 2020-07-30 14:38:09 +00:00			`headers = {"Authorization": "Bearer {}".format(token)}`
Better deploy script 2020-07-30 13:12:23 +00:00			`for batch in prepared_batches:`
Convert to python3 (#15007) 2020-10-02 16:54:07 +00:00			`print(("Pugring cache for", ", ".join(batch["files"])))`
Review fixes 2020-07-30 14:38:09 +00:00			`response = requests.post(CLOUDFLARE_URL, json=batch, headers=headers)`
Better deploy script 2020-07-30 13:12:23 +00:00			`response.raise_for_status()`
			`time.sleep(3)`


			`if __name__ == "__main__":`
Review fixes 2020-07-30 14:38:09 +00:00			`token = os.getenv("CLOUDFLARE_TOKEN")`
Better deploy script 2020-07-30 13:12:23 +00:00			`if not token:`
Review fixes 2020-07-30 14:38:09 +00:00			`raise Exception("Env variable CLOUDFLARE_TOKEN is empty")`
			`base_domain = os.getenv("BASE_DOMAIN", "https://content.clickhouse.tech/")`
Better deploy script 2020-07-30 13:12:23 +00:00			`changed_files = collect_changed_files()`
Convert to python3 (#15007) 2020-10-02 16:54:07 +00:00			`print(("Found", len(changed_files), "changed files"))`
Review fixes 2020-07-30 14:38:09 +00:00			`filtered_files = filter_and_transform_changed_files(changed_files, base_domain)`
Convert to python3 (#15007) 2020-10-02 16:54:07 +00:00			`print(("Files rest after filtering", len(filtered_files)))`
Better deploy script 2020-07-30 13:12:23 +00:00			`prepared_batches = convert_to_dicts(filtered_files, 25)`
			`post_data(prepared_batches, token)`