diff --git a/docker/server/README.md b/docker/server/README.md index d8e9204dffa..6f799d68185 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- 20.12.3.3 ``` -### Volumes +### Volumes Typically you may want to mount the following folders inside your container to archieve persistency: @@ -76,7 +76,7 @@ You may also want to mount: * `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). -### Linux capabilities +### Linux capabilities ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). @@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv ### How to create default database and user on starting -Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`: +Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`: ``` -$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server +$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server ``` ## How to extend this image diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 549ff601c59..0138a165505 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_ CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" +CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" for dir in "$DATA_DIR" \ "$ERROR_LOG_DIR" \ @@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL ${CLICKHOUSE_PASSWORD} default + ${CLICKHOUSE_ACCESS_MANAGEMENT} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 963f9fa18bd..43519bfc8dc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -312,7 +312,7 @@ Enables or disables parsing enum values as enum ids for TSV input format. Possible values: - 0 — Enum values are parsed as values. -- 1 — Enum values are parsed as enum IDs +- 1 — Enum values are parsed as enum IDs. Default value: 0. diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 006542f494a..975695f40b3 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -133,10 +133,9 @@ For example: ### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} -Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if: +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. -- you need fresh TLD list, -- or you have custom. +Can be useful if you need fresh TLD list or you have custom. Configuration example: @@ -149,21 +148,150 @@ Configuration example: ``` -Example: +**Syntax** -- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`. +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name. + +Can be useful if you need fresh TLD list or you have custom. + +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} -Same as `firstSignificantSubdomain` but accept custom TLD list name. +Returns the first significant subdomain. Accepts customs TLD list name. -### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} +Can be useful if you need fresh TLD list or you have custom. -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- First significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### port(URL\[, default_port = 0\]) {#port} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index bf7efe70437..7322b6c9184 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -283,12 +283,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input_format_tsv_empty_as_default {#settings-input-format-tsv-empty-as-default} -Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. +Если эта настройка включена, все пустые поля во входящем TSV заменяются значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. По умолчанию отключена. -Disabled by default. - ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. @@ -708,7 +706,7 @@ ClickHouse использует этот параметр при чтении д Установка логирования запроса. -Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log). +Запросы, переданные в ClickHouse с этой настройкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Пример: @@ -1521,7 +1519,7 @@ ClickHouse генерирует исключение - Тип: секунды - Значение по умолчанию: 60 секунд -Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed_replica_error_half_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки. +Управляет скоростью обнуления счетчика ошибок в распределенных таблицах. Предположим, реплика остается недоступна в течение какого-то времени, и за этот период накопилось 5 ошибок. Если настройка `distributed_replica_error_half_life` установлена в значение 1 секунда, то реплика снова будет считаться доступной через 3 секунды после последней ошибки. См. также: @@ -1673,7 +1671,7 @@ ClickHouse генерирует исключение - Тип: bool - Значение по умолчанию: True -Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. +Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} @@ -1987,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; ## output_format_pretty_grid_charset {#output-format-pretty-grid-charset} -Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII. +Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII. **Пример** diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 1008e2a359c..7541e16bed4 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk') Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня). + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} + +Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Первый существенный поддомен. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + ### port(URL[, default_port = 0]) {#port} Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index e38a6b240a6..164b9565633 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -756,7 +756,11 @@ std::optional Connection::checkPacket(size_t timeout_microseconds) Packet Connection::receivePacket(std::function async_callback) { in->setAsyncCallback(std::move(async_callback)); - SCOPE_EXIT(in->setAsyncCallback({})); + SCOPE_EXIT({ + /// disconnect() will reset "in". + if (in) + in->setAsyncCallback({}); + }); try { diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index bf159e27731..892bd0b2ba9 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -539,7 +539,8 @@ protected: * after transferring all the elements from the old halves you need to [ o x ] * process tail from the collision resolution chain immediately after it [ o x ] */ - for (; !buf[i].isZero(*this); ++i) + size_t new_size = grower.bufSize(); + for (; i < new_size && !buf[i].isZero(*this); ++i) { size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this)); diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp index 41255dcbba1..1c673166ca9 100644 --- a/src/Common/tests/gtest_hash_table.cpp +++ b/src/Common/tests/gtest_hash_table.cpp @@ -317,3 +317,51 @@ TEST(HashTable, SerializationDeserialization) ASSERT_EQ(convertToSet(cont), convertToSet(deserialized)); } } + +template +struct IdentityHash +{ + size_t operator()(T x) const { return x; } +}; + +struct OneElementResizeGrower +{ + /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements. + static constexpr auto performs_linear_probing_with_single_step = true; + + static constexpr size_t initial_count = 1; + + size_t bufSize() const { return buf_size; } + + size_t place(size_t x) const { return x % buf_size; } + + size_t next(size_t pos) const { return (pos + 1) % buf_size; } + + bool overflow(size_t elems) const { return elems >= buf_size; } + + void increaseSize() { ++buf_size; } + + void set(size_t) { } + + void setBufSize(size_t buf_size_) { buf_size = buf_size_; } + + size_t buf_size = initial_count; +}; + +TEST(HashTable, Resize) +{ + { + /// Test edge case if after resize all cells are resized in end of buf and will take half of + /// hash table place. + using HashSet = HashSet, OneElementResizeGrower>; + HashSet cont; + + cont.insert(3); + cont.insert(1); + + std::set expected = {1, 3}; + std::set actual = convertToSet(cont); + + ASSERT_EQ(actual, expected); + } +} diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index 56bef9e09ea..28ba625d9fb 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) @@ -28,7 +29,9 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D if (quotes) { pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; if (*pos == '\\') { @@ -45,9 +48,11 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D else { pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - if (*pos == '{') + else if (*pos == '{') { ++balance; ++pos; diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 782eea84ed7..f8cc8b7febb 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; - Memory() {} + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_) { alloc(); } @@ -140,7 +140,7 @@ protected: Memory<> memory; public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. - BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) + explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) { Base::set(existing_memory ? existing_memory : memory.data(), size); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 8b59b225480..24d0eb4b0de 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -746,7 +746,7 @@ public: IHostContextPtr & getHostContext(); const IHostContextPtr & getHostContext() const; - void initMetadataTransaction(MetadataTransactionPtr txn, bool attach_to_context = false); + void initMetadataTransaction(MetadataTransactionPtr txn, bool attach_existing = false); MetadataTransactionPtr getMetadataTransaction() const; struct MySQLWireContext diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index c342a994395..12fd03b3b70 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -505,7 +505,6 @@ void DDLWorker::processTask(DDLTaskBase & task) } zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral); - } if (!task.was_executed) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e7cd85798b9..3d868812304 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -266,7 +266,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserIdentifier id_parser; ParserKeyword distinct("DISTINCT"); ParserKeyword all("ALL"); - ParserExpressionList contents(false); + ParserExpressionList contents(false, is_table_function); ParserSelectWithUnionQuery select; ParserKeyword over("OVER"); @@ -278,6 +278,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr expr_list_args; ASTPtr expr_list_params; + if (is_table_function) + { + if (ParserTableFunctionView().parse(pos, node, expected)) + return true; + } + if (!id_parser.parse(pos, identifier, expected)) return false; @@ -312,36 +318,6 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (!has_distinct && !has_all) - { - auto old_pos = pos; - auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; - - if (select.parse(pos, query, expected)) - { - auto & select_ast = query->as(); - if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) - { - // It's an subquery. Bail out. - pos = old_pos; - } - else - { - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - auto function_node = std::make_shared(); - tryGetIdentifierNameInto(identifier, function_node->name); - auto expr_list_with_single_query = std::make_shared(); - expr_list_with_single_query->children.push_back(query); - function_node->arguments = expr_list_with_single_query; - function_node->children.push_back(function_node->arguments); - node = function_node; - return true; - } - } - } - const char * contents_begin = pos->begin; if (!contents.parse(pos, expr_list_args, expected)) return false; @@ -477,6 +453,49 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserIdentifier id_parser; + ParserKeyword view("VIEW"); + ParserSelectWithUnionQuery select; + + ASTPtr identifier; + ASTPtr query; + + if (!view.ignore(pos, expected)) + return false; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + + ++pos; + + bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; + + if (!select.parse(pos, query, expected)) + return false; + + auto & select_ast = query->as(); + if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) + { + // It's an subquery. Bail out. + return false; + } + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + auto function_node = std::make_shared(); + tryGetIdentifierNameInto(identifier, function_node->name); + auto expr_list_with_single_query = std::make_shared(); + expr_list_with_single_query->children.push_back(query); + function_node->name = "view"; + function_node->arguments = expr_list_with_single_query; + function_node->children.push_back(function_node->arguments); + node = function_node; + return true; +} + bool ParserWindowReference::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTFunction * function = dynamic_cast(node.get()); diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index ba18fc2cddd..b6194f981fe 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -149,11 +149,25 @@ protected: class ParserFunction : public IParserBase { public: - ParserFunction(bool allow_function_parameters_ = true) : allow_function_parameters(allow_function_parameters_) {} + ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) + : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) + { + } + protected: const char * getName() const override { return "function"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_function_parameters; + bool is_table_function; +}; + +// A special function parser for view table function. +// It parses an SELECT query as its argument and doesn't support getColumnName(). +class ParserTableFunctionView : public IParserBase +{ +protected: + const char * getName() const override { return "function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; // Window reference (the thing that goes after OVER) for window function. diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index afe85f069c7..e9ad65af471 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -468,6 +468,14 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe } +bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (ParserTableFunctionView().parse(pos, node, expected)) + return true; + return elem_parser.parse(pos, node, expected); +} + + bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// try to find any of the valid operators @@ -570,9 +578,10 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected } -ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword) - : impl(std::make_unique(std::make_unique(), - allow_alias_without_as_keyword)) +ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) + : impl(std::make_unique( + is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), + allow_alias_without_as_keyword)) { } @@ -580,7 +589,7 @@ ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_ bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserList( - std::make_unique(allow_alias_without_as_keyword), + std::make_unique(allow_alias_without_as_keyword, is_table_function), std::make_unique(TokenType::Comma)) .parse(pos, node, expected); } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 90b27950873..2371e006c09 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -436,13 +436,26 @@ protected: }; +// It's used to parse expressions in table function. +class ParserTableFunctionExpression : public IParserBase +{ +private: + ParserLambdaExpression elem_parser; + +protected: + const char * getName() const override { return "table function expression"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + using ParserExpression = ParserLambdaExpression; class ParserExpressionWithOptionalAlias : public IParserBase { public: - ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword); + explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function = false); protected: ParserPtr impl; @@ -459,11 +472,12 @@ protected: class ParserExpressionList : public IParserBase { public: - ParserExpressionList(bool allow_alias_without_as_keyword_) - : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} + explicit ParserExpressionList(bool allow_alias_without_as_keyword_, bool is_table_function_ = false) + : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {} protected: bool allow_alias_without_as_keyword; + bool is_table_function; // This expression list is used by a table function const char * getName() const override { return "list of expressions"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -473,7 +487,7 @@ protected: class ParserNotEmptyExpressionList : public IParserBase { public: - ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword) + explicit ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword) : nested_parser(allow_alias_without_as_keyword) {} private: ParserExpressionList nested_parser; diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 1264acefe64..2e20279dbe1 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -22,7 +22,7 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec auto res = std::make_shared(); if (!ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->subquery, expected) - && !ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->table_function, expected) + && !ParserWithOptionalAlias(std::make_unique(true, true), true).parse(pos, res->table_function, expected) && !ParserWithOptionalAlias(std::make_unique(false, true), true).parse(pos, res->database_and_table_name, expected)) return false; diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 8422f09e364..f7f08411dfa 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } @@ -436,9 +437,11 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB if (quotes) { pos = find_first_symbols<'"'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - if (*pos == '"') + else if (*pos == '"') { ++pos; if (loadAtPosition(in, memory, pos) && *pos == '"') @@ -450,9 +453,11 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB else { pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - if (*pos == '"') + else if (*pos == '"') { quotes = true; ++pos; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 6e14a1dc3c8..108f4d9d321 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -11,6 +11,7 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } RegexpRowInputFormat::RegexpRowInputFormat( @@ -182,7 +183,9 @@ static std::pair fileSegmentationEngineRegexpImpl(ReadBuffer & in, while (loadAtPosition(in, memory, pos) && need_more_data) { pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; // Support DOS-style newline ("\r\n") diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 69a5e61caf2..96b01a5bd9b 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } @@ -433,10 +434,11 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer { pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - - if (*pos == '\\') + else if (*pos == '\\') { ++pos; if (loadAtPosition(in, memory, pos)) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 038dfaa1fbb..ff39bf91fbb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -753,7 +753,7 @@ void StorageReplicatedMergeTree::drop() auto zookeeper = global_context.getZooKeeper(); /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. - if (is_readonly || !zookeeper) + if (!zookeeper) throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); shutdown(); diff --git a/tests/queries/0_stateless/01669_columns_declaration_serde.sql b/tests/queries/0_stateless/01669_columns_declaration_serde.sql index 8e3354d63cd..a6bf1184e9f 100644 --- a/tests/queries/0_stateless/01669_columns_declaration_serde.sql +++ b/tests/queries/0_stateless/01669_columns_declaration_serde.sql @@ -22,12 +22,12 @@ DROP TABLE IF EXISTS test_r1; DROP TABLE IF EXISTS test_r2; CREATE TABLE test_r1 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || ' -') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r1') ORDER BY "\\"; +') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r1') ORDER BY "\\"; INSERT INTO test_r1 ("\\") VALUES ('\\'); CREATE TABLE test_r2 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || ' -') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r2') ORDER BY "\\"; +') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r2') ORDER BY "\\"; SYSTEM SYNC REPLICA test_r2; diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.reference b/tests/queries/0_stateless/01715_table_function_view_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql new file mode 100644 index 00000000000..de5150b7b70 --- /dev/null +++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql @@ -0,0 +1 @@ +SELECT view(SELECT 1); -- { clientError 62 } diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.reference b/tests/queries/1_stateful/00158_cache_dictionary_has.reference index f8d5cd4f53d..ad4bce6bec5 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.reference +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.reference @@ -1,6 +1,6 @@ +100 6410 -6410 -25323 +100 25323 -1774655 +100 1774655 diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql index 063e7843fd4..8461728c58e 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql @@ -6,15 +6,15 @@ CREATE DICTIONARY db_dict.cache_hits PRIMARY KEY WatchID SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test')) LIFETIME(MIN 300 MAX 600) -LAYOUT(CACHE(SIZE_IN_CELLS 100000 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); +LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 1400 == 0; -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 350 == 0; -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 5 == 0; DROP DICTIONARY IF EXISTS db_dict.cache_hits; diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 52cef210748..67ec88ebd55 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -726,6 +726,7 @@ "01676_dictget_in_default_expression", "01715_background_checker_blather_zookeeper", "01700_system_zookeeper_path_in", + "01669_columns_declaration_serde", "attach", "ddl_dictionaries", "dictionary",