Merge branch 'master' into _shard_num

This commit is contained in:
mergify[bot] 2022-01-27 13:11:42 +00:00 committed by GitHub
commit ad146a67ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 242 additions and 115 deletions

View File

@ -43,7 +43,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
- `HOST ANY` — Пользователь может подключиться с любого хоста. Используется по умолчанию.
- `HOST LOCAL` — Пользователь может подключиться только локально.
- `HOST NAME 'fqdn'` — Хост задается через FQDN. Например, `HOST NAME 'mysite.com'`.
- `HOST NAME REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST NAME REGEXP '.*\.mysite\.com'`.
- `HOST REGEXP 'regexp'` — Позволяет использовать регулярные выражения [pcre](http://www.pcre.org/), чтобы задать хосты. Например, `HOST REGEXP '.*\.mysite\.com'`.
- `HOST LIKE 'template'` — Позволяет использовать оператор [LIKE](../../functions/string-search-functions.md#function-like) для фильтрации хостов. Например, `HOST LIKE '%'` эквивалентен `HOST ANY`; `HOST LIKE '%.mysite.com'` разрешает подключение со всех хостов в домене `mysite.com`.
Также, чтобы задать хост, вы можете использовать `@` вместе с именем пользователя. Примеры:

View File

@ -19,6 +19,7 @@ toc_priority: 76
- [什么是 OLAP?](../faq/general/olap.md)
- [什么是列存储数据库?](../faq/general/columnar-database.md)
- [为何不使用 MapReduce等技术?](../faq/general/mapreduce.md)
- [我如何为 ClickHouse贡献代码?](../faq/general/how-do-i-contribute-code-to-clickhouse.md)
- **[应用案例](../faq/use-cases/index.md)**
- [我能把 ClickHouse 作为时序数据库来使用吗?](../faq/use-cases/time-series.md)
- [我能把 ClickHouse 作为 key-value 键值存储吗?](../faq/use-cases/key-value.md)

View File

@ -364,7 +364,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"clickhouse-git-import",
"clickhouse-compressor",
"clickhouse-format",
"clickhouse-extract-from-config"
"clickhouse-extract-from-config",
"clickhouse-keeper",
"clickhouse-keeper-converter",
};
for (const auto & tool : tools)

View File

@ -1123,7 +1123,7 @@ bool ReplicatedMergeTreeQueue::addFuturePartIfNotCoveredByThem(const String & pa
if (isNotCoveredByFuturePartsImpl(entry, part_name, reject_reason, lock))
{
CurrentlyExecuting::setActualPartName(entry, part_name, *this);
CurrentlyExecuting::setActualPartName(entry, part_name, *this, lock);
return true;
}
@ -1375,7 +1375,8 @@ Int64 ReplicatedMergeTreeQueue::getCurrentMutationVersion(const String & partiti
}
ReplicatedMergeTreeQueue::CurrentlyExecuting::CurrentlyExecuting(const ReplicatedMergeTreeQueue::LogEntryPtr & entry_, ReplicatedMergeTreeQueue & queue_)
ReplicatedMergeTreeQueue::CurrentlyExecuting::CurrentlyExecuting(
const ReplicatedMergeTreeQueue::LogEntryPtr & entry_, ReplicatedMergeTreeQueue & queue_, std::lock_guard<std::mutex> & /* state_lock */)
: entry(entry_), queue(queue_)
{
if (entry->type == ReplicatedMergeTreeLogEntry::DROP_RANGE || entry->type == ReplicatedMergeTreeLogEntry::REPLACE_RANGE)
@ -1397,8 +1398,11 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::CurrentlyExecuting(const Replicate
}
void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName(ReplicatedMergeTreeQueue::LogEntry & entry,
const String & actual_part_name, ReplicatedMergeTreeQueue & queue)
void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName(
ReplicatedMergeTreeQueue::LogEntry & entry,
const String & actual_part_name,
ReplicatedMergeTreeQueue & queue,
std::lock_guard<std::mutex> & /* state_lock */)
{
if (!entry.actual_new_part_name.empty())
throw Exception("Entry actual part isn't empty yet. This is a bug.", ErrorCodes::LOGICAL_ERROR);
@ -1477,7 +1481,7 @@ ReplicatedMergeTreeQueue::SelectedEntryPtr ReplicatedMergeTreeQueue::selectEntry
}
if (entry)
return std::make_shared<SelectedEntry>(entry, std::unique_ptr<CurrentlyExecuting>{ new CurrentlyExecuting(entry, *this) });
return std::make_shared<SelectedEntry>(entry, std::unique_ptr<CurrentlyExecuting>{new CurrentlyExecuting(entry, *this, lock)});
else
return {};
}

View File

@ -251,11 +251,18 @@ private:
friend class ReplicatedMergeTreeQueue;
/// Created only in the selectEntryToProcess function. It is called under mutex.
CurrentlyExecuting(const ReplicatedMergeTreeQueue::LogEntryPtr & entry_, ReplicatedMergeTreeQueue & queue_);
CurrentlyExecuting(
const ReplicatedMergeTreeQueue::LogEntryPtr & entry_,
ReplicatedMergeTreeQueue & queue_,
std::lock_guard<std::mutex> & state_lock);
/// In case of fetch, we determine actual part during the execution, so we need to update entry. It is called under state_mutex.
static void setActualPartName(ReplicatedMergeTreeQueue::LogEntry & entry, const String & actual_part_name,
ReplicatedMergeTreeQueue & queue);
static void setActualPartName(
ReplicatedMergeTreeQueue::LogEntry & entry,
const String & actual_part_name,
ReplicatedMergeTreeQueue & queue,
std::lock_guard<std::mutex> & state_lock);
public:
~CurrentlyExecuting();
};

View File

@ -5,36 +5,66 @@ import json
import logging
import sys
import time
from typing import Optional
import requests
import requests # type: ignore
from ci_config import CI_CONFIG
DOWNLOAD_RETRIES_COUNT = 5
def get_with_retries(
url: str,
retries: int = DOWNLOAD_RETRIES_COUNT,
sleep: int = 3,
**kwargs,
) -> requests.Response:
logging.info("Getting URL with %i and sleep %i in between: %s", retries, sleep, url)
exc = None # type: Optional[Exception]
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
response = requests.get(url, **kwargs)
response.raise_for_status()
break
except Exception as e:
if i + 1 < DOWNLOAD_RETRIES_COUNT:
logging.info("Exception '%s' while getting, retry %i", e, i + 1)
time.sleep(sleep)
exc = e
else:
raise Exception(exc)
return response
def get_build_name_for_check(check_name):
return CI_CONFIG['tests_config'][check_name]['required_build']
return CI_CONFIG["tests_config"][check_name]["required_build"]
def get_build_urls(build_name, reports_path):
for root, _, files in os.walk(reports_path):
for f in files:
if build_name in f :
if build_name in f:
logging.info("Found build report json %s", f)
with open(os.path.join(root, f), 'r', encoding='utf-8') as file_handler:
with open(os.path.join(root, f), "r", encoding="utf-8") as file_handler:
build_report = json.load(file_handler)
return build_report['build_urls']
return build_report["build_urls"]
return []
def dowload_build_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
with open(path, 'wb') as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
with open(path, "wb") as f:
response = get_with_retries(url, retries=1, stream=True)
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
@ -46,32 +76,38 @@ def dowload_build_with_progress(url, path):
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
eq_str = '=' * done
space_str = ' ' * (50 - done)
eq_str = "=" * done
space_str = " " * (50 - done)
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
sys.stdout.flush()
break
except Exception as ex:
sys.stdout.write("\n")
time.sleep(3)
logging.info("Exception while downloading %s, retry %s", ex, i + 1)
except Exception:
if sys.stdout.isatty():
sys.stdout.write("\n")
if i + 1 < DOWNLOAD_RETRIES_COUNT:
time.sleep(3)
if os.path.exists(path):
os.remove(path)
else:
raise Exception(f"Cannot download dataset from {url}, all retries exceeded")
sys.stdout.write("\n")
if sys.stdout.isatty():
sys.stdout.write("\n")
logging.info("Downloading finished")
def download_builds(result_path, build_urls, filter_fn):
for url in build_urls:
if filter_fn(url):
fname = os.path.basename(url.replace('%2B', '+').replace('%20', ' '))
fname = os.path.basename(url.replace("%2B", "+").replace("%20", " "))
logging.info("Will download %s to %s", fname, result_path)
dowload_build_with_progress(url, os.path.join(result_path, fname))
def download_builds_filter(check_name, reports_path, result_path, filter_fn=lambda _: True):
def download_builds_filter(
check_name, reports_path, result_path, filter_fn=lambda _: True
):
build_name = get_build_name_for_check(check_name)
urls = get_build_urls(build_name, reports_path)
print(urls)
@ -81,17 +117,32 @@ def download_builds_filter(check_name, reports_path, result_path, filter_fn=lamb
download_builds(result_path, urls, filter_fn)
def download_all_deb_packages(check_name, reports_path, result_path):
download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('deb'))
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("deb")
)
def download_shared_build(check_name, reports_path, result_path):
download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('shared_build.tgz'))
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("shared_build.tgz")
)
def download_unit_tests(check_name, reports_path, result_path):
download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('unit_tests_dbms'))
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("unit_tests_dbms")
)
def download_clickhouse_binary(check_name, reports_path, result_path):
download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('clickhouse'))
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("clickhouse")
)
def download_performance_build(check_name, reports_path, result_path):
download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('performance.tgz'))
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("performance.tgz")
)

View File

@ -2,28 +2,51 @@
import json
import os
import requests # type: ignore
from unidiff import PatchSet # type: ignore
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID, GITHUB_EVENT_PATH
from build_download_helper import get_with_retries
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
GITHUB_EVENT_PATH,
)
DIFF_IN_DOCUMENTATION_EXT = [
".html",
".md",
".yml",
".txt",
".css",
".js",
".xml",
".ico",
".conf",
".svg",
".png",
".jpg",
".py",
".sh",
".json",
]
RETRY_SLEEP = 0
DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png",
".jpg", ".py", ".sh", ".json"]
def get_pr_for_commit(sha, ref):
if not ref:
return None
try_get_pr_url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
try_get_pr_url = (
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
)
try:
response = requests.get(try_get_pr_url)
response.raise_for_status()
response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP)
data = response.json()
if len(data) > 1:
print("Got more than one pr for commit", sha)
for pr in data:
# refs for pushes looks like refs/head/XX
# refs for RPs looks like XX
if pr['head']['ref'] in ref:
if pr["head"]["ref"] in ref:
return pr
print("Cannot find PR with required ref", ref, "returning first one")
first_pr = data[0]
@ -35,15 +58,22 @@ def get_pr_for_commit(sha, ref):
class PRInfo:
default_event = {
'commits': 1,
'before': 'HEAD~',
'after': 'HEAD',
'ref': None,
}
def __init__(self, github_event=None, need_orgs=False, need_changed_files=False, labels_from_api=False):
"commits": 1,
"before": "HEAD~",
"after": "HEAD",
"ref": None,
}
def __init__(
self,
github_event=None,
need_orgs=False,
need_changed_files=False,
pr_event_from_api=False,
):
if not github_event:
if GITHUB_EVENT_PATH:
with open(GITHUB_EVENT_PATH, 'r', encoding='utf-8') as event_file:
with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file:
github_event = json.load(event_file)
else:
github_event = PRInfo.default_event.copy()
@ -51,22 +81,34 @@ class PRInfo:
self.changed_files = set([])
self.body = ""
ref = github_event.get("ref", "refs/head/master")
if ref and ref.startswith('refs/heads/'):
if ref and ref.startswith("refs/heads/"):
ref = ref[11:]
# workflow completed event, used for PRs only
if 'action' in github_event and github_event['action'] == 'completed':
self.sha = github_event['workflow_run']['head_sha']
prs_for_sha = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}/pulls").json()
if "action" in github_event and github_event["action"] == "completed":
self.sha = github_event["workflow_run"]["head_sha"]
prs_for_sha = get_with_retries(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
"/pulls",
sleep=RETRY_SLEEP,
).json()
if len(prs_for_sha) != 0:
github_event['pull_request'] = prs_for_sha[0]
github_event["pull_request"] = prs_for_sha[0]
if 'pull_request' in github_event: # pull request and other similar events
self.number = github_event['pull_request']['number']
if 'after' in github_event:
self.sha = github_event['after']
if "pull_request" in github_event: # pull request and other similar events
self.number = github_event["pull_request"]["number"]
if pr_event_from_api:
response = get_with_retries(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
f"/pulls/{self.number}",
sleep=RETRY_SLEEP,
)
github_event["pull_request"] = response.json()
if "after" in github_event:
self.sha = github_event["after"]
else:
self.sha = github_event['pull_request']['head']['sha']
self.sha = github_event["pull_request"]["head"]["sha"]
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}"
@ -75,35 +117,35 @@ class PRInfo:
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
self.pr_html_url = f"{repo_prefix}/pull/{self.number}"
self.base_ref = github_event['pull_request']['base']['ref']
self.base_name = github_event['pull_request']['base']['repo']['full_name']
self.head_ref = github_event['pull_request']['head']['ref']
self.head_name = github_event['pull_request']['head']['repo']['full_name']
self.body = github_event['pull_request']['body']
self.base_ref = github_event["pull_request"]["base"]["ref"]
self.base_name = github_event["pull_request"]["base"]["repo"]["full_name"]
self.head_ref = github_event["pull_request"]["head"]["ref"]
self.head_name = github_event["pull_request"]["head"]["repo"]["full_name"]
self.body = github_event["pull_request"]["body"]
self.labels = {
label["name"] for label in github_event["pull_request"]["labels"]
}
if labels_from_api:
response = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/issues/{self.number}/labels")
self.labels = {l['name'] for l in response.json()}
else:
self.labels = {l['name'] for l in github_event['pull_request']['labels']}
self.user_login = github_event['pull_request']['user']['login']
self.user_login = github_event["pull_request"]["user"]["login"]
self.user_orgs = set([])
if need_orgs:
user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url'])
user_orgs_response = get_with_retries(
github_event["pull_request"]["user"]["organizations_url"],
sleep=RETRY_SLEEP,
)
if user_orgs_response.ok:
response_json = user_orgs_response.json()
self.user_orgs = set(org['id'] for org in response_json)
self.user_orgs = set(org["id"] for org in response_json)
self.diff_url = github_event['pull_request']['diff_url']
elif 'commits' in github_event:
self.sha = github_event['after']
pull_request = get_pr_for_commit(self.sha, github_event['ref'])
self.diff_url = github_event["pull_request"]["diff_url"]
elif "commits" in github_event:
self.sha = github_event["after"]
pull_request = get_pr_for_commit(self.sha, github_event["ref"])
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}"
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
self.repo_full_name = GITHUB_REPOSITORY
if pull_request is None or pull_request['state'] == 'closed':
if pull_request is None or pull_request["state"] == "closed":
# it's merged PR to master
self.number = 0
self.labels = {}
@ -112,25 +154,25 @@ class PRInfo:
self.base_name = self.repo_full_name
self.head_ref = ref
self.head_name = self.repo_full_name
self.diff_url = \
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/compare/{github_event['before']}...{self.sha}"
self.diff_url = (
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"compare/{github_event['before']}...{self.sha}"
)
else:
self.number = pull_request['number']
if labels_from_api:
response = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/issues/{self.number}/labels")
self.labels = {l['name'] for l in response.json()}
else:
self.labels = {l['name'] for l in pull_request['labels']}
self.labels = {label["name"] for label in pull_request["labels"]}
self.base_ref = pull_request['base']['ref']
self.base_name = pull_request['base']['repo']['full_name']
self.head_ref = pull_request['head']['ref']
self.head_name = pull_request['head']['repo']['full_name']
self.pr_html_url = pull_request['html_url']
if 'pr-backport' in self.labels:
self.diff_url = f"https://github.com/{GITHUB_REPOSITORY}/compare/master...{self.head_ref}.diff"
self.base_ref = pull_request["base"]["ref"]
self.base_name = pull_request["base"]["repo"]["full_name"]
self.head_ref = pull_request["head"]["ref"]
self.head_name = pull_request["head"]["repo"]["full_name"]
self.pr_html_url = pull_request["html_url"]
if "pr-backport" in self.labels:
self.diff_url = (
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/master...{self.head_ref}.diff"
)
else:
self.diff_url = pull_request['diff_url']
self.diff_url = pull_request["diff_url"]
else:
print(json.dumps(github_event, sort_keys=True, indent=4))
self.sha = os.getenv("GITHUB_SHA")
@ -153,24 +195,27 @@ class PRInfo:
if not self.diff_url:
raise Exception("Diff URL cannot be find for event")
response = requests.get(self.diff_url)
response = get_with_retries(
self.diff_url,
sleep=RETRY_SLEEP,
)
response.raise_for_status()
if 'commits' in self.event and self.number == 0:
if "commits" in self.event and self.number == 0:
diff = response.json()
if 'files' in diff:
self.changed_files = [f['filename'] for f in diff['files']]
if "files" in diff:
self.changed_files = [f["filename"] for f in diff["files"]]
else:
diff_object = PatchSet(response.text)
self.changed_files = {f.path for f in diff_object}
def get_dict(self):
return {
'sha': self.sha,
'number': self.number,
'labels': self.labels,
'user_login': self.user_login,
'user_orgs': self.user_orgs,
"sha": self.sha,
"number": self.number,
"labels": self.labels,
"user_login": self.user_login,
"user_orgs": self.user_orgs,
}
def has_changes_in_documentation(self):
@ -181,49 +226,63 @@ class PRInfo:
for f in self.changed_files:
_, ext = os.path.splitext(f)
path_in_docs = 'docs' in f
path_in_website = 'website' in f
if (ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website)) or 'docker/docs' in f:
path_in_docs = "docs" in f
path_in_website = "website" in f
if (
ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website)
) or "docker/docs" in f:
return True
return False
def can_skip_builds_and_use_version_from_master(self):
if 'force tests' in self.labels:
# TODO: See a broken loop
if "force tests" in self.labels:
return False
if self.changed_files is None or not self.changed_files:
return False
for f in self.changed_files:
if (not f.startswith('tests/queries')
or not f.startswith('tests/integration')
or not f.startswith('tests/performance')):
# TODO: this logic is broken, should be fixed before using
if (
not f.startswith("tests/queries")
or not f.startswith("tests/integration")
or not f.startswith("tests/performance")
):
return False
return True
def can_skip_integration_tests(self):
if 'force tests' in self.labels:
# TODO: See a broken loop
if "force tests" in self.labels:
return False
if self.changed_files is None or not self.changed_files:
return False
for f in self.changed_files:
if not f.startswith('tests/queries') or not f.startswith('tests/performance'):
# TODO: this logic is broken, should be fixed before using
if not f.startswith("tests/queries") or not f.startswith(
"tests/performance"
):
return False
return True
def can_skip_functional_tests(self):
if 'force tests' in self.labels:
# TODO: See a broken loop
if "force tests" in self.labels:
return False
if self.changed_files is None or not self.changed_files:
return False
for f in self.changed_files:
if not f.startswith('tests/integration') or not f.startswith('tests/performance'):
# TODO: this logic is broken, should be fixed before using
if not f.startswith("tests/integration") or not f.startswith(
"tests/performance"
):
return False
return True

View File

@ -204,7 +204,7 @@ def check_pr_description(pr_info):
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
pr_info = PRInfo(need_orgs=True, labels_from_api=True)
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True)
can_run, description = should_run_checks_for_pr(pr_info)
gh = Github(get_best_robot_token())
commit = get_commit(gh, pr_info.sha)
@ -212,6 +212,9 @@ if __name__ == "__main__":
description_report = check_pr_description(pr_info)[:139]
if description_report:
print("::notice ::Cannot run, description does not match the template")
logging.info(
"PR body doesn't match the template: (start)\n%s\n(end)", pr_info.body
)
url = (
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/"
"blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1"