Merge branch 'master' into doc-aspell

This commit is contained in:
Mikhail f. Shiryaev 2022-06-09 11:01:25 +02:00 committed by GitHub
commit 62f60c1462
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 243 additions and 52 deletions

View File

@ -13,9 +13,7 @@ max-statements=200
ignore-long-lines = (# )?<?https?://\S+>?$
[MESSAGES CONTROL]
disable = bad-continuation,
missing-docstring,
bad-whitespace,
disable = missing-docstring,
too-few-public-methods,
invalid-name,
too-many-arguments,

View File

@ -1,15 +1,16 @@
# rebuild in #36968
# docker build -t clickhouse/docs-builder .
# nodejs 17 prefers ipv6 and is broken in our environment
FROM node:16.14.2-alpine3.15
FROM node:16-alpine
RUN apk add --no-cache git openssh bash
# TODO: clean before merge!
ARG DOCS_BRANCH=main
# At this point we want to really update /opt/clickhouse-docs
# despite the cached images
ARG CACHE_INVALIDATOR=0
RUN git clone https://github.com/ClickHouse/clickhouse-docs.git \
--depth=1 --branch=${DOCS_BRANCH} /opt/clickhouse-docs
--depth=1 --branch=main /opt/clickhouse-docs
WORKDIR /opt/clickhouse-docs

View File

@ -8,8 +8,6 @@ if [ "$GIT_DOCS_BRANCH" ] && ! [ "$GIT_DOCS_BRANCH" == "$GIT_BRANCH" ]; then
git fetch origin --depth=1 -- "$GIT_DOCS_BRANCH:$GIT_DOCS_BRANCH"
git checkout "$GIT_DOCS_BRANCH"
else
# Untracked yarn.lock could cause pull to fail
git clean -fdx
# Update docs repo
git pull
fi

View File

@ -42,6 +42,7 @@ DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
COORDINATION_DIR="${DATA_DIR}/coordination"
COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
@ -49,6 +50,7 @@ CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
for dir in "$DATA_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$COORDINATION_DIR" \
"$COORDINATION_LOG_DIR" \
"$COORDINATION_SNAPSHOT_DIR"
do

View File

@ -8,17 +8,16 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
aspell \
curl \
git \
libxml2-utils \
moreutils \
pylint \
python3-fuzzywuzzy \
python3-pip \
shellcheck \
yamllint \
aspell \
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -174,22 +174,24 @@ Result:
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File test_function.xml.
```xml
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py`.
@ -224,6 +226,50 @@ Result:
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File test_function_parameter_python.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py`.
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.

View File

@ -174,22 +174,24 @@ SELECT test_function_sum(2, 2);
Создание `test_function_sum_json` с именноваными аргументами и форматом [JSONEachRow](../../interfaces/formats.md#jsoneachrow) с использованием конфигурации XML.
Файл test_function.xml.
```xml
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Файл скрипта внутри папки `user_scripts` `test_function_sum_json.py`.
@ -224,6 +226,50 @@ SELECT test_function_sum_json(2, 2);
└──────────────────────────────┘
```
Исполняемые пользовательские функции могут принимать константные параметры, их конфигурация является частью настройки `command` (работает только для пользовательских функций с типом `executable`).
Файл test_function_parameter_python.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Файл скрипта внутри папки `user_scripts` `test_function_parameter_python.py`.
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Обработка ошибок {#obrabotka-oshibok}
Некоторые функции могут кидать исключения в случае ошибочных данных. В этом случае, выполнение запроса прерывается, и текст ошибки выводится клиенту. При распределённой обработке запроса, при возникновении исключения на одном из серверов, на другие серверы пытается отправиться просьба тоже прервать выполнение запроса.

View File

@ -19,7 +19,7 @@ public:
/// Updates only a part of taskstats struct's fields:
/// - cpu_run_virtual_total, cpu_delay_total (when /proc/thread-self/schedstat is available)
/// - blkio_delay_total (when /proc/thread-self/stat is available)
/// - rchar, wchar, read_bytes, write_bytes (when /prod/thread-self/io is available)
/// - rchar, wchar, read_bytes, write_bytes (when /proc/thread-self/io is available)
/// See: man procfs
void getTaskStats(::taskstats & out_stats) const;

View File

@ -45,7 +45,7 @@ public:
const auto & settings_ref = context->getSettingsRef();
if constexpr (internal)
return createImpl({}, false /*keep_nullable*/, false /*cast_ipv4_ipv6_default_on_conversion_error*/);
return createImpl({}, false /*keep_nullable*/, settings_ref.cast_ipv4_ipv6_default_on_conversion_error);
return createImpl({}, settings_ref.cast_keep_nullable, settings_ref.cast_ipv4_ipv6_default_on_conversion_error);
}

View File

@ -458,6 +458,8 @@ public:
{
default_cols.emplace_back(result);
}
++current_arguments_index;
}
else
{
@ -465,6 +467,13 @@ public:
default_cols.emplace_back(nullptr);
}
if (current_arguments_index < arguments.size())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {} should be {}",
getName(),
arguments.size(),
current_arguments_index);
auto key_col_with_type = arguments[2];
bool key_is_only_null = key_col_with_type.type->onlyNull();

View File

@ -12,14 +12,14 @@ from typing import Dict, List, Optional, Set, Tuple, Union
from github import Github
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from s3_helper import S3Helper
from pr_info import PRInfo
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from upload_result_helper import upload_results
from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from s3_helper import S3Helper
from stopwatch import Stopwatch
from upload_result_helper import upload_results
NAME = "Push to Dockerhub (actions)"
@ -237,6 +237,8 @@ def build_and_push_one_image(
"docker buildx build --builder default "
f"--label build-url={GITHUB_RUN_URL} "
f"{from_tag_arg}"
# A hack to invalidate cache, grep for it in docker/ dir
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
f"--tag {image.repo}:{version_string} "
f"--cache-from type=registry,ref={image.repo}:{version_string} "
f"--cache-from type=registry,ref={image.repo}:latest "

View File

@ -124,6 +124,7 @@ class TestDockerImageCheck(unittest.TestCase):
self.assertIn(
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg FROM_TAG=version "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version --cache-from type=registry,ref=name:version "
"--cache-from type=registry,ref=name:latest "
"--cache-to type=inline,mode=max --push --progress plain path",
@ -142,6 +143,7 @@ class TestDockerImageCheck(unittest.TestCase):
self.assertIn(
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
"--build-arg FROM_TAG=version2 "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
"--cache-from type=registry,ref=name:latest "
"--cache-to type=inline,mode=max --progress plain path",
@ -159,6 +161,7 @@ class TestDockerImageCheck(unittest.TestCase):
mock_machine.assert_not_called()
self.assertIn(
f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} "
f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} "
"--tag name:version2 --cache-from type=registry,ref=name:version2 "
"--cache-from type=registry,ref=name:latest "
"--cache-to type=inline,mode=max --progress plain path",

View File

@ -0,0 +1,24 @@
SET cast_ipv4_ipv6_default_on_conversion_error = 1;
DROP TABLE IF EXISTS ipv4_test;
CREATE TABLE ipv4_test
(
id UInt64,
value String
) ENGINE=MergeTree ORDER BY id;
ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT '';
DROP TABLE ipv4_test;
DROP TABLE IF EXISTS ipv6_test;
CREATE TABLE ipv6_test
(
id UInt64,
value String
) ENGINE=MergeTree ORDER BY id;
ALTER TABLE ipv6_test MODIFY COLUMN value IPv4 DEFAULT '';
SELECT * FROM ipv6_test;
DROP TABLE ipv6_test;

View File

@ -0,0 +1,4 @@
Value
DefaultValue
Value
DefaultValue

View File

@ -0,0 +1,59 @@
DROP TABLE IF EXISTS dictionary_source_table;
CREATE TABLE dictionary_source_table
(
id UInt64,
value String
) ENGINE=TinyLog;
INSERT INTO dictionary_source_table VALUES (0, 'Value');
DROP DICTIONARY IF EXISTS test_dictionary;
CREATE DICTIONARY test_dictionary
(
id UInt64,
value String
)
PRIMARY KEY id
LAYOUT(FLAT())
SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table'))
LIFETIME(0);
SELECT dictGet('test_dictionary', 'value', 0);
SELECT dictGet('test_dictionary', 'value', 0, 'DefaultValue'); --{serverError 42}
SELECT dictGetOrDefault('test_dictionary', 'value', 1, 'DefaultValue');
SELECT dictGetOrDefault('test_dictionary', 'value', 1, 'DefaultValue', 1); --{serverError 42}
DROP DICTIONARY test_dictionary;
DROP TABLE dictionary_source_table;
CREATE TABLE dictionary_source_table
(
key UInt64,
start UInt64,
end UInt64,
value String
) Engine = TinyLog;
INSERT INTO dictionary_source_table values (0, 0, 5, 'Value');
DROP DICTIONARY IF EXISTS range_hashed_dictionary;
CREATE DICTIONARY range_hashed_dictionary
(
key UInt64,
start UInt64,
end UInt64,
value String
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE 'dictionary_source_table'))
LAYOUT(RANGE_HASHED())
RANGE(MIN start MAX end)
LIFETIME(0);
SELECT dictGet('range_hashed_dictionary', 'value', 0, toUInt64(4));
SELECT dictGet('range_hashed_dictionary', 'value', 4, toUInt64(6), 'DefaultValue'); --{serverError 42}
SELECT dictGetOrDefault('range_hashed_dictionary', 'value', 1, toUInt64(6), 'DefaultValue');
SELECT dictGetOrDefault('range_hashed_dictionary', 'value', 1, toUInt64(6), 'DefaultValue', 1); --{serverError 42}
DROP DICTIONARY range_hashed_dictionary;
DROP TABLE dictionary_source_table;