Merge branch 'master' into estimates_for_select_query

This commit is contained in:
Alexey Milovidov 2021-07-11 02:11:16 +03:00
commit 78cde85ea8
81 changed files with 651 additions and 191 deletions

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54453)
SET(VERSION_REVISION 54454)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 8)
SET(VERSION_MINOR 9)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH fb895056568e26200629c7d19626e92d2dedc70d)
SET(VERSION_DESCRIBE v21.8.1.1-prestable)
SET(VERSION_STRING 21.8.1.1)
SET(VERSION_GITHASH f48c5af90c2ad51955d1ee3b6b05d006b03e4238)
SET(VERSION_DESCRIBE v21.9.1.1-prestable)
SET(VERSION_STRING 21.9.1.1)
# end of autochange

2
contrib/h3 vendored

@ -1 +1 @@
Subproject commit e209086ae1b5477307f545a0f6111780edc59940
Subproject commit c7f46cfd71fb60e2fefc90e28abe81657deff735

View File

@ -3,21 +3,22 @@ set(H3_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib")
set(SRCS
"${H3_SOURCE_DIR}/lib/algos.c"
"${H3_SOURCE_DIR}/lib/baseCells.c"
"${H3_SOURCE_DIR}/lib/bbox.c"
"${H3_SOURCE_DIR}/lib/coordijk.c"
"${H3_SOURCE_DIR}/lib/faceijk.c"
"${H3_SOURCE_DIR}/lib/geoCoord.c"
"${H3_SOURCE_DIR}/lib/h3Index.c"
"${H3_SOURCE_DIR}/lib/h3UniEdge.c"
"${H3_SOURCE_DIR}/lib/linkedGeo.c"
"${H3_SOURCE_DIR}/lib/localij.c"
"${H3_SOURCE_DIR}/lib/mathExtensions.c"
"${H3_SOURCE_DIR}/lib/bbox.c"
"${H3_SOURCE_DIR}/lib/polygon.c"
"${H3_SOURCE_DIR}/lib/h3Index.c"
"${H3_SOURCE_DIR}/lib/vec2d.c"
"${H3_SOURCE_DIR}/lib/vec3d.c"
"${H3_SOURCE_DIR}/lib/vertex.c"
"${H3_SOURCE_DIR}/lib/linkedGeo.c"
"${H3_SOURCE_DIR}/lib/localij.c"
"${H3_SOURCE_DIR}/lib/latLng.c"
"${H3_SOURCE_DIR}/lib/directedEdge.c"
"${H3_SOURCE_DIR}/lib/mathExtensions.c"
"${H3_SOURCE_DIR}/lib/iterators.c"
"${H3_SOURCE_DIR}/lib/vertexGraph.c"
"${H3_SOURCE_DIR}/lib/faceijk.c"
"${H3_SOURCE_DIR}/lib/baseCells.c"
)
configure_file("${H3_SOURCE_DIR}/include/h3api.h.in" "${H3_BINARY_DIR}/include/h3api.h")

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.8.1.1) unstable; urgency=low
clickhouse (21.9.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 28 Jun 2021 00:50:15 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Sat, 10 Jul 2021 08:22:49 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.8.1.*
ARG version=21.9.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.8.1.*
ARG version=21.9.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.8.1.*
ARG version=21.9.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -0,0 +1,114 @@
---
toc_priority: 66
toc_title: ClickHouse Keeper
---
# [pre-production] clickhouse-keeper
ClickHouse server use [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper.
!!! warning "Warning"
This feature currently in pre-production stage. We test it in our CI and on small internal installations.
## Implemetation details
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper `clickhouse-keeper` written in C++ and use [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.
By default, `clickhouse-keeper` provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with `clickhouse-keeper`. Snapshots and logs have an incompatible format with ZooKeeper, but `clickhouse-keeper-converter` tool allows to convert ZooKeeper data to `clickhouse-keeper` snapshot. Interserver protocol in `clickhouse-keeper` also incompatible with ZooKeeper so mixed ZooKeeper/clickhouse-keeper cluster is impossible.
## Configuration
`clickhouse-keeper` can be used as a standalone replacement for ZooKeeper or as an internal part of the `clickhouse-server`, but in both cases configuration is almost the same `.xml` file. The main `clickhouse-keeper` configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
- `tcp_port` — the port for a client to connect (default for ZooKeeper is `2181`)
- `tcp_port_secure` — the secure port for a client to connect
- `server_id` — unique server id, each participant of the clickhouse-keeper cluster must have a unique number (1, 2, 3, and so on)
- `log_storage_path` — path to coordination logs, better to store logs on the non-busy device (same for ZooKeeper)
- `snapshot_storage_path` — path to coordination snapshots
Other common parameters are inherited from clickhouse-server config (`listen_host`, `logger` and so on).
Internal coordination settings are located in `<keeper_server>.<coordination_settings>` section:
- `operation_timeout_ms` — timeout for a single client operation
- `session_timeout_ms` — timeout for client session
- `dead_session_check_period_ms` — how often clickhouse-keeper check dead sessions and remove them
- `heart_beat_interval_ms` — how often a clickhouse-keeper leader will send heartbeats to followers
- `election_timeout_lower_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election
- `election_timeout_upper_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election
- `rotate_log_storage_interval` — how many logs to store in a single file
- `reserved_log_items` — how many coordination logs to store before compaction
- `snapshot_distance` — how often clickhouse-keeper will create new snapshots (in the number of logs)
- `snapshots_to_keep` — how many snapshots to keep
- `stale_log_gap` — the threshold when leader consider follower as stale and send snapshot to it instead of logs
- `force_sync` — call `fsync` on each write to coordination log
- `raft_logs_level` — text logging level about coordination (trace, debug, and so on)
- `shutdown_timeout` — wait to finish internal connections and shutdown
- `startup_timeout` — if the server doesn't connect to other quorum participants in the specified timeout it will terminate
Quorum configuration is located in `<keeper_server>.<raft_configuration>` section and contain servers description. The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The main parameters for each `<server>` are:
- `id` — server_id in quorum
- `hostname` — hostname where this server placed
- `port` — port where this server listen for connections
Examples of configuration for quorum with three nodes can be found in [integration tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) with `test_keeper_` prefix. Example configuration for server #1:
```xml
<keeper_server>
<tcp_port>2181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>zoo1</hostname>
<port>9444</port>
</server>
<server>
<id>2</id>
<hostname>zoo2</hostname>
<port>9444</port>
</server>
<server>
<id>3</id>
<hostname>zoo3</hostname>
<port>9444</port>
</server>
</raft_configuration>
</keeper_server>
```
## How to run
`clickhouse-keeper` is bundled into `clickhouse-server` package, just add configuration of `<keeper_server>` and start clickhouse-server as always. If you want to run standalone `clickhouse-keeper` you can start it in a similar way with:
```bash
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## [experimental] Migration from ZooKeeper
Seamlessly migration from ZooKeeper to `clickhouse-keeper` is impossible you have to stop your ZooKeeper cluster, convert data and start `clickhouse-keeper`. `clickhouse-keeper-converter` tool allows to convert ZooKeeper logs and snapshots to `clickhouse-keeper` snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
1. Stop all ZooKeeper nodes.
2. [optional, but recommended] Found ZooKeeper leader node, start and stop it again. It will force ZooKeeper to create consistent snapshot.
3. Run `clickhouse-keeper-converter` on leader, example
```bash
clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots
```
4. Copy snapshot to `clickhouse-server` nodes with configured `keeper` or start `clickhouse-keeper` instead of ZooKeeper. Snapshot must persist only on leader node, leader will sync it automatically to other nodes.

View File

@ -22,6 +22,23 @@ Some settings specified in the main configuration file can be overridden in othe
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
If you want to replace an entire element with a substitution use `include` as element name.
XML substitution example:
```xml
<yandex>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
<users>
<!-- Replaces `include` element with the subtree found at `/users-in-zookeeper` ZK path. -->
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
```
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## User Settings {#user-settings}
@ -32,6 +49,8 @@ Users configuration can be splitted into separate files similar to `config.xml`
Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.
Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that.
## XML example {#example}
For example, you can have separate config file for each user like this:

View File

@ -24,7 +24,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute.
**Returned value**
@ -237,7 +237,7 @@ dictHas('dict_name', id_expr)
**Arguments**
- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
**Returned value**

View File

@ -87,6 +87,8 @@ Result:
└───────┴───────┘
```
Note: the names are implementation specific and are subject to change. You should not assume specific names of the columns after application of the `untuple`.
Example of using an `EXCEPT` expression:
Query:

View File

@ -81,7 +81,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
**示例**
``` sql
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```
┌─res───────────────────────┐
@ -174,7 +174,7 @@ SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re
│ [3] │
└─────┘
## 位图 {#bitmapor}
## 位图 {#bitmapor}
为两个位图对象进行或操作,返回一个新的位图对象。

View File

@ -430,6 +430,7 @@ private:
{TokenType::ClosingRoundBracket, Replxx::Color::BROWN},
{TokenType::OpeningSquareBracket, Replxx::Color::BROWN},
{TokenType::ClosingSquareBracket, Replxx::Color::BROWN},
{TokenType::DoubleColon, Replxx::Color::BROWN},
{TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE},
{TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE},

View File

@ -388,24 +388,32 @@ void LocalServer::processQueries()
/// Use the same query_id (and thread group) for all queries
CurrentThread::QueryScope query_scope_holder(context);
///Set progress show
/// Set progress show
need_render_progress = config().getBool("progress", false);
std::function<void()> finalize_progress;
if (need_render_progress)
{
/// Set progress callback, which can be run from multiple threads.
context->setProgressCallback([&](const Progress & value)
{
/// Write progress only if progress was updated
if (progress_indication.updateProgress(value))
progress_indication.writeProgress();
});
/// Set finalizing callback for progress, which is called right before finalizing query output.
finalize_progress = [&]()
{
progress_indication.clearProgressOutput();
};
/// Set callback for file processing progress.
progress_indication.setFileProgressCallback(context);
}
bool echo_queries = config().hasOption("echo") || config().hasOption("verbose");
if (need_render_progress)
progress_indication.setFileProgressCallback(context);
std::exception_ptr exception;
for (const auto & query : queries)
@ -425,7 +433,7 @@ void LocalServer::processQueries()
try
{
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {});
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, finalize_progress);
}
catch (...)
{

View File

@ -298,11 +298,19 @@ void ConfigProcessor::doIncludesRecursive(
{
const auto * subst = attributes->getNamedItem(attr_name);
attr_nodes[attr_name] = subst;
substs_count += static_cast<size_t>(subst == nullptr);
substs_count += static_cast<size_t>(subst != nullptr);
}
if (substs_count < SUBSTITUTION_ATTRS.size() - 1) /// only one substitution is allowed
throw Poco::Exception("several substitutions attributes set for element <" + node->nodeName() + ">");
if (substs_count > 1) /// only one substitution is allowed
throw Poco::Exception("More than one substitution attribute is set for element <" + node->nodeName() + ">");
if (node->nodeName() == "include")
{
if (node->hasChildNodes())
throw Poco::Exception("<include> element must have no children");
if (substs_count == 0)
throw Poco::Exception("No substitution attributes set for element <include>, must have exactly one");
}
/// Replace the original contents, not add to it.
bool replace = attributes->getNamedItem("replace");
@ -320,37 +328,57 @@ void ConfigProcessor::doIncludesRecursive(
else if (throw_on_bad_incl)
throw Poco::Exception(error_msg + name);
else
{
if (node->nodeName() == "include")
node->parentNode()->removeChild(node);
LOG_WARNING(log, "{}{}", error_msg, name);
}
}
else
{
Element & element = dynamic_cast<Element &>(*node);
for (const auto & attr_name : SUBSTITUTION_ATTRS)
element.removeAttribute(attr_name);
if (replace)
/// Replace the whole node not just contents.
if (node->nodeName() == "include")
{
while (Node * child = node->firstChild())
node->removeChild(child);
const NodeListPtr children = node_to_include->childNodes();
for (size_t i = 0, size = children->length(); i < size; ++i)
{
NodePtr new_node = config->importNode(children->item(i), true);
node->parentNode()->insertBefore(new_node, node);
}
element.removeAttribute("replace");
node->parentNode()->removeChild(node);
}
const NodeListPtr children = node_to_include->childNodes();
for (size_t i = 0, size = children->length(); i < size; ++i)
else
{
NodePtr new_node = config->importNode(children->item(i), true);
node->appendChild(new_node);
}
Element & element = dynamic_cast<Element &>(*node);
const NamedNodeMapPtr from_attrs = node_to_include->attributes();
for (size_t i = 0, size = from_attrs->length(); i < size; ++i)
{
element.setAttributeNode(dynamic_cast<Attr *>(config->importNode(from_attrs->item(i), true)));
}
for (const auto & attr_name : SUBSTITUTION_ATTRS)
element.removeAttribute(attr_name);
included_something = true;
if (replace)
{
while (Node * child = node->firstChild())
node->removeChild(child);
element.removeAttribute("replace");
}
const NodeListPtr children = node_to_include->childNodes();
for (size_t i = 0, size = children->length(); i < size; ++i)
{
NodePtr new_node = config->importNode(children->item(i), true);
node->appendChild(new_node);
}
const NamedNodeMapPtr from_attrs = node_to_include->attributes();
for (size_t i = 0, size = from_attrs->length(); i < size; ++i)
{
element.setAttributeNode(dynamic_cast<Attr *>(config->importNode(from_attrs->item(i), true)));
}
included_something = true;
}
}
};

View File

@ -10,16 +10,10 @@ namespace fs = std::filesystem;
namespace DB
{
/// Checks if file exists without throwing an exception but with message in console.
bool safeFsExists(const auto & path)
bool safeFsExists(const String & path)
{
std::error_code ec;
bool res = fs::exists(path, ec);
if (ec)
{
std::cerr << "Can't check '" << path << "': [" << ec.value() << "] " << ec.message() << std::endl;
}
return res;
return fs::exists(path, ec);
};
bool configReadClient(Poco::Util::LayeredConfiguration & config, const std::string & home_path)

View File

@ -237,7 +237,12 @@ public:
// 1. Always memcpy 8 times bytes
// 2. Use switch case extension to generate fast dispatching table
// 3. Funcs are named callables that can be force_inlined
//
// NOTE: It relies on Little Endianness
//
// NOTE: It requires padded to 8 bytes keys (IOW you cannot pass
// std::string here, but you can pass i.e. ColumnString::getDataAt()),
// since it copies 8 bytes at a time.
template <typename Self, typename KeyHolder, typename Func>
static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{

View File

@ -4,9 +4,6 @@
#include <Common/UnicodeBar.h>
#include <Databases/DatabaseMemory.h>
/// FIXME: progress bar in clickhouse-local needs to be cleared after query execution
/// - same as it is now in clickhouse-client. Also there is no writeFinalProgress call
/// in clickhouse-local.
namespace DB
{

View File

@ -1,11 +1,13 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
#include <Common/assert_cast.h>
@ -39,6 +41,11 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
return std::make_shared<DataTypeTuple>(elements);
}
if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
{
return std::make_shared<DataTypeMap>(recursiveRemoveLowCardinality(map_type->getKeyType()), recursiveRemoveLowCardinality(map_type->getValueType()));
}
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
return low_cardinality_type->getDictionaryType();
@ -78,6 +85,16 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
return ColumnTuple::create(columns);
}
if (const auto * column_map = typeid_cast<const ColumnMap *>(column.get()))
{
const auto & nested = column_map->getNestedColumnPtr();
auto nested_no_lc = recursiveRemoveLowCardinality(nested);
if (nested.get() == nested_no_lc.get())
return column;
return ColumnMap::create(nested_no_lc);
}
if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
return column_low_cardinality->convertToFullColumn();

View File

@ -7,6 +7,7 @@
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationMap.h>
#include <Parsers/IAST.h>
@ -53,12 +54,24 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu
void DataTypeMap::assertKeyType() const
{
if (!key_type->isValueRepresentedByInteger()
bool type_error = false;
if (key_type->getTypeId() == TypeIndex::LowCardinality)
{
const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
type_error = true;
}
else if (!key_type->isValueRepresentedByInteger()
&& !isStringOrFixedString(*key_type)
&& !WhichDataType(key_type).isNothing()
&& !WhichDataType(key_type).isUUID())
{
type_error = true;
}
if (type_error)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Type of Map key must be a type, that can be represented by integer or string or UUID,"
"Type of Map key must be a type, that can be represented by integer or String or FixedString (possibly LowCardinality) or UUID,"
" but {} given", key_type->getName());
}

View File

@ -28,7 +28,7 @@ public:
static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override;

View File

@ -19,6 +19,7 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/castColumn.h>
#include <IO/WriteHelpers.h>
#include <Common/IPv6ToBinary.h>
#include <Common/formatIPv6.h>
@ -978,7 +979,8 @@ public:
!which.isDateTime64() &&
!which.isUInt() &&
!which.isFloat() &&
!which.isDecimal())
!which.isDecimal() &&
!which.isAggregateFunction())
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -990,6 +992,15 @@ public:
const IColumn * column = arguments[0].column.get();
ColumnPtr res_column;
WhichDataType which(column->getDataType());
if (which.isAggregateFunction())
{
const ColumnPtr to_string = castColumn(arguments[0], std::make_shared<DataTypeString>());
const auto * str_column = checkAndGetColumn<ColumnString>(to_string.get());
tryExecuteString(str_column, res_column);
return res_column;
}
if (tryExecuteUInt<UInt8>(column, res_column) ||
tryExecuteUInt<UInt16>(column, res_column) ||
tryExecuteUInt<UInt32>(column, res_column) ||

View File

@ -163,13 +163,6 @@ public:
arguments[0]->getName(),
getName());
if (!WhichDataType(arguments[1]).isUInt64() &&
!isTuple(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument of function {} must be UInt64 or tuple(...)",
arguments[1]->getName(),
getName());
return std::make_shared<DataTypeUInt8>();
}
@ -189,8 +182,8 @@ public:
auto dictionary_key_type = dictionary->getKeyType();
const ColumnWithTypeAndName & key_column_with_type = arguments[1];
const auto key_column = key_column_with_type.column;
const auto key_column_type = WhichDataType(key_column_with_type.type);
auto key_column = key_column_with_type.column;
auto key_column_type = key_column_with_type.type;
ColumnPtr range_col = nullptr;
DataTypePtr range_col_type = nullptr;
@ -214,7 +207,7 @@ public:
if (dictionary_key_type == DictionaryKeyType::simple)
{
if (!key_column_type.isUInt64())
if (!WhichDataType(key_column_type).isUInt64())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument of function {} must be UInt64 when dictionary is simple. Actual type {}.",
@ -225,24 +218,39 @@ public:
}
else if (dictionary_key_type == DictionaryKeyType::complex)
{
if (!key_column_type.isTuple())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument of function {} must be tuple when dictionary is complex. Actual type {}.",
getName(),
key_column_with_type.type->getName());
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
ColumnPtr key_column_full = key_column->convertToFullColumnIfConst();
key_column = key_column->convertToFullColumnIfConst();
size_t keys_size = dictionary->getStructure().getKeysSize();
const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
const auto & key_types = static_cast<const DataTypeTuple &>(*key_column_with_type.type).getElements();
if (!isTuple(key_column_type))
{
if (keys_size > 1)
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument of function {} must be tuple when dictionary is complex and key contains more than 1 attribute."
"Actual type {}.",
getName(),
key_column_type->getName());
}
else
{
Columns tuple_columns = {std::move(key_column)};
key_column = ColumnTuple::create(tuple_columns);
DataTypes tuple_types = {key_column_type};
key_column_type = std::make_shared<DataTypeTuple>(tuple_types);
}
}
const auto & key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
const auto & key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
return dictionary->hasKeys(key_columns, key_types);
}
else
{
if (!key_column_type.isUInt64())
if (!WhichDataType(key_column_type).isUInt64())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument of function {} must be UInt64 when dictionary is range. Actual type {}.",
@ -346,13 +354,6 @@ public:
Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type);
auto dictionary = helper.getDictionary(dictionary_name);
if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of third argument of function {}, must be UInt64 or tuple(...).",
arguments[2].type->getName(),
getName());
auto dictionary_key_type = dictionary->getKeyType();
size_t current_arguments_index = 3;
@ -446,18 +447,35 @@ public:
}
else if (dictionary_key_type == DictionaryKeyType::complex)
{
if (!isTuple(key_col_with_type.type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument of function {} must be tuple when dictionary is complex. Actual type {}.",
getName(),
key_col_with_type.type->getName());
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
ColumnPtr key_column_full = key_col_with_type.column->convertToFullColumnIfConst();
ColumnPtr key_column = key_col_with_type.column->convertToFullColumnIfConst();
DataTypePtr key_column_type = key_col_with_type.type;
const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
const auto & key_types = static_cast<const DataTypeTuple &>(*key_col_with_type.type).getElements();
size_t keys_size = dictionary->getStructure().getKeysSize();
if (!isTuple(key_column_type))
{
if (keys_size > 1)
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument of function {} must be tuple when dictionary is complex and key contains more than 1 attribute."
"Actual type {}.",
getName(),
key_col_with_type.type->getName());
}
else
{
Columns tuple_columns = {std::move(key_column)};
key_column = ColumnTuple::create(tuple_columns);
DataTypes tuple_types = {key_column_type};
key_column_type = std::make_shared<DataTypeTuple>(tuple_types);
}
}
const auto & key_columns = assert_cast<const ColumnTuple &>(*key_column).getColumnsCopy();
const auto & key_types = assert_cast<const DataTypeTuple &>(*key_column_type).getElements();
result = executeDictionaryRequest(
dictionary,

View File

@ -358,6 +358,10 @@ public:
*/
virtual bool useDefaultImplementationForConstants() const { return false; }
/** Some arguments could remain constant during this implementation.
*/
virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; }
/** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column.
* Otherwise, convert all low cardinality columns to ordinary columns.
* Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality.
@ -367,10 +371,6 @@ public:
/// If it isn't, will convert all ColumnLowCardinality arguments to full columns.
virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; }
/** Some arguments could remain constant during this implementation.
*/
virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; }
/** True if function can be called on default arguments (include Nullable's) and won't throw.
* Counterexample: modulo(0, 0)
*/

View File

@ -6,6 +6,7 @@
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
#include <Common/typeid_cast.h>
#include <Common/NaNUtils.h>
#include <Common/SipHash.h>
#include <common/range.h>
@ -40,6 +41,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
@ -304,6 +306,13 @@ void PointInPolygonWithGrid<CoordinateType>::calcGridAttributes(
y_scale = 1 / cell_height;
x_shift = -min_corner.x();
y_shift = -min_corner.y();
if (!(isFinite(x_scale)
&& isFinite(y_scale)
&& isFinite(x_shift)
&& isFinite(y_shift)
&& isFinite(grid_size)))
throw Exception("Polygon is not valid: bounding box is unbounded", ErrorCodes::BAD_ARGUMENTS);
}
template <typename CoordinateType>
@ -358,7 +367,7 @@ bool PointInPolygonWithGrid<CoordinateType>::contains(CoordinateType x, Coordina
if (has_empty_bound)
return false;
if (std::isnan(x) || std::isnan(y))
if (!isFinite(x) || !isFinite(y))
return false;
CoordinateType float_row = (y + y_shift) * y_scale;

View File

@ -41,6 +41,9 @@ public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isString(arguments[0].type))
@ -65,9 +68,7 @@ public:
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
/// FIXME: convertToFullColumnIfConst() is suboptimal
auto column = arguments[0].column->convertToFullColumnIfConst();
if (const ColumnString * col = checkAndGetColumn<ColumnString>(*column))
if (const ColumnString * col = checkAndGetColumn<ColumnString>(*arguments[0].column))
{
auto col_res = ColumnString::create();
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());

View File

@ -12,6 +12,7 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnMap.h>
#include <Common/typeid_cast.h>
@ -110,6 +111,9 @@ private:
static bool matchKeyToIndexString(const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexFixedString(const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs);
static bool matchKeyToIndexStringConst(const IColumn & data, const Offsets & offsets,
const Field & index, PaddedPODArray<UInt64> & matched_idxs);
@ -767,6 +771,19 @@ struct MatcherString
}
};
struct MatcherFixedString
{
const ColumnFixedString & data;
const ColumnFixedString & index;
bool match(size_t row_data, size_t row_index) const
{
auto data_ref = data.getDataAt(row_data);
auto index_ref = index.getDataAt(row_index);
return memequalSmallAllowOverflow15(index_ref.data, index_ref.size, data_ref.data, data_ref.size);
}
};
struct MatcherStringConst
{
const ColumnString & data;
@ -863,6 +880,23 @@ bool FunctionArrayElement::matchKeyToIndexString(
return true;
}
bool FunctionArrayElement::matchKeyToIndexFixedString(
const IColumn & data, const Offsets & offsets,
const ColumnsWithTypeAndName & arguments, PaddedPODArray<UInt64> & matched_idxs)
{
const auto * index_string = checkAndGetColumn<ColumnFixedString>(arguments[1].column.get());
if (!index_string)
return false;
const auto * data_string = checkAndGetColumn<ColumnFixedString>(&data);
if (!data_string)
return false;
MatcherFixedString matcher{*data_string, *index_string};
executeMatchKeyToIndex(offsets, matched_idxs, matcher);
return true;
}
template <typename DataType>
bool FunctionArrayElement::matchKeyToIndexNumberConst(
const IColumn & data, const Offsets & offsets,
@ -922,8 +956,10 @@ bool FunctionArrayElement::matchKeyToIndex(
|| matchKeyToIndexNumber<Int64>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int128>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<Int256>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UInt256>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexNumber<UUID>(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexString(data, offsets, arguments, matched_idxs);
|| matchKeyToIndexString(data, offsets, arguments, matched_idxs)
|| matchKeyToIndexFixedString(data, offsets, arguments, matched_idxs);
}
bool FunctionArrayElement::matchKeyToIndexConst(

View File

@ -21,6 +21,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int INCORRECT_DATA;
}
namespace
@ -79,11 +80,14 @@ public:
const double lat = col_lat->getFloat64(row);
const UInt8 res = col_res->getUInt(row);
GeoCoord coord;
coord.lon = degsToRads(lon);
LatLng coord;
coord.lng = degsToRads(lon);
coord.lat = degsToRads(lat);
H3Index hindex = geoToH3(&coord, res);
H3Index hindex;
H3Error err = latLngToCell(&coord, res, &hindex);
if (err)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect coordinates latitude: {}, longitude: {}, error: {}", coord.lat, coord.lng, err);
dst_data[row] = hindex;
}

View File

@ -66,7 +66,7 @@ public:
+ " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
// Numerical constant is 180 degrees / pi / Earth radius, Earth radius is from h3 sources
Float64 res = 8.99320592271288084e-6 * edgeLengthM(resolution);
Float64 res = 8.99320592271288084e-6 * getHexagonEdgeLengthAvgM(resolution);
dst_data[row] = res;
}

View File

@ -70,7 +70,7 @@ public:
throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName()
+ " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
Float64 res = edgeLengthM(resolution);
Float64 res = getHexagonEdgeLengthAvgM(resolution);
dst_data[row] = res;
}

View File

@ -59,7 +59,7 @@ public:
{
const UInt64 hindex = col_hindex->getUInt(row);
UInt8 res = h3GetBaseCell(hindex);
UInt8 res = getBaseCellNumber(hindex);
dst_data[row] = res;
}

View File

@ -59,7 +59,7 @@ public:
{
const UInt64 hindex = col_hindex->getUInt(row);
UInt8 res = h3GetResolution(hindex);
UInt8 res = getResolution(hindex);
dst_data[row] = res;
}

View File

@ -65,7 +65,7 @@ public:
throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName()
+ " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
Float64 res = hexAreaM2(resolution);
Float64 res = getHexagonAreaAvgM2(resolution);
dst_data[row] = res;
}

View File

@ -67,7 +67,7 @@ public:
const UInt64 hindex_origin = col_hindex_origin->getUInt(row);
const UInt64 hindex_dest = col_hindex_dest->getUInt(row);
UInt8 res = h3IndexesAreNeighbors(hindex_origin, hindex_dest);
UInt8 res = areNeighborCells(hindex_origin, hindex_dest);
dst_data[row] = res;
}

View File

@ -59,7 +59,7 @@ public:
{
const UInt64 hindex = col_hindex->getUInt(row);
UInt8 is_valid = h3IsValid(hindex) == 0 ? 0 : 1;
UInt8 is_valid = isValidCell(hindex) == 0 ? 0 : 1;
dst_data[row] = is_valid;
}

View File

@ -84,14 +84,14 @@ public:
throw Exception("The argument 'resolution' (" + toString(child_resolution) + ") of function " + getName()
+ " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
const size_t vec_size = maxH3ToChildrenSize(parent_hindex, child_resolution);
const size_t vec_size = cellToChildrenSize(parent_hindex, child_resolution);
if (vec_size > MAX_ARRAY_SIZE)
throw Exception("The result of function" + getName()
+ " (array of " + toString(vec_size) + " elements) will be too large with resolution argument = "
+ toString(child_resolution), ErrorCodes::TOO_LARGE_ARRAY_SIZE);
hindex_vec.resize(vec_size);
h3ToChildren(parent_hindex, child_resolution, hindex_vec.data());
cellToChildren(parent_hindex, child_resolution, hindex_vec.data());
dst_data.reserve(dst_data.size() + vec_size);
for (auto hindex : hindex_vec)

View File

@ -74,7 +74,7 @@ public:
throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName()
+ " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
UInt64 res = h3ToParent(hindex, resolution);
UInt64 res = cellToParent(hindex, resolution);
dst_data[row] = res;
}

View File

@ -66,7 +66,7 @@ public:
{
const UInt64 hindex = col_hindex->getUInt(i);
if (!h3IsValid(hindex))
if (!isValidCell(hindex))
{
throw Exception("Invalid H3 index: " + std::to_string(hindex), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}

View File

@ -77,7 +77,7 @@ public:
const H3Index origin_hindex = col_hindex->getUInt(row);
const int k = col_k->getInt(row);
/// Overflow is possible. The function maxKringSize does not check for overflow.
/// Overflow is possible. The function maxGridDiskSize does not check for overflow.
/// The calculation is similar to square of k but several times more.
/// Let's use huge underestimation as the safe bound. We should not allow to generate too large arrays nevertheless.
constexpr auto max_k = 10000;
@ -86,9 +86,9 @@ public:
if (k < 0)
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Argument 'k' for {} function must be non negative", getName());
const auto vec_size = maxKringSize(k);
const auto vec_size = maxGridDiskSize(k);
hindex_vec.resize(vec_size);
kRing(origin_hindex, k, hindex_vec.data());
gridDisk(origin_hindex, k, hindex_vec.data());
dst_data.reserve(dst_data.size() + vec_size);
for (auto hindex : hindex_vec)

View File

@ -89,6 +89,9 @@ namespace
}
/// Not necessary, but good for performance.
/// We repeat `pad_string` multiple times until it's length becomes 16 or more.
/// It speeds up the function appendTo() because it allows to copy padding characters by portions of at least
/// 16 bytes instead of single bytes.
while (numCharsInPadString() < 16)
{
pad_string += pad_string;
@ -104,6 +107,12 @@ namespace
}
String pad_string;
/// Offsets of code points in `pad_string`:
/// utf8_offsets[0] is the offset of the first code point in `pad_string`, it's always 0;
/// utf8_offsets[1] is the offset of the second code point in `pad_string`;
/// utf8_offsets[2] is the offset of the third code point in `pad_string`;
/// ...
std::vector<size_t> utf8_offsets;
};
@ -243,30 +252,32 @@ namespace
const PaddingChars<is_utf8> & padding_chars,
StringSink & res_sink) const
{
bool is_const_length = lengths.isConst();
bool need_check_length = true;
bool is_const_new_length = lengths.isConst();
size_t new_length = 0;
/// Insert padding characters to each string from `strings`, write the result strings into `res_sink`.
/// If for some input string its current length is greater than the specified new length then that string
/// will be trimmed to the specified new length instead of padding.
for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next())
{
auto str = strings.getWhole();
size_t current_length = getLengthOfSlice<is_utf8>(str);
auto new_length_slice = lengths.getWhole();
size_t new_length = new_length_slice.elements->getUInt(new_length_slice.position);
if (need_check_length)
if (!res_sink.rowNum() || !is_const_new_length)
{
/// If `is_const_new_length` is true we can get and check the new length only once.
auto new_length_slice = lengths.getWhole();
new_length = new_length_slice.elements->getUInt(new_length_slice.position);
if (new_length > MAX_NEW_LENGTH)
{
throw Exception(
"New padded length (" + std::to_string(new_length) + ") is too big, maximum is: " + std::to_string(MAX_NEW_LENGTH),
ErrorCodes::TOO_LARGE_STRING_SIZE);
}
if (is_const_length)
if (is_const_new_length)
{
size_t rows_count = res_sink.offsets.size();
res_sink.reserve((new_length + 1 /* zero terminator */) * rows_count);
need_check_length = false;
}
}

View File

@ -4,7 +4,7 @@ OWNER(g:clickhouse)
LIBRARY()
CFLAGS(
-DUSE_H3 -DUSE_SSL -DUSE_XXHASH
-DUSE_SSL -DUSE_XXHASH
)
ADDINCL(

View File

@ -3,7 +3,7 @@ OWNER(g:clickhouse)
LIBRARY()
CFLAGS(
-DUSE_H3 -DUSE_SSL -DUSE_XXHASH
-DUSE_SSL -DUSE_XXHASH
)
ADDINCL(

View File

@ -686,7 +686,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
ASTs columns;
size_t tid = 0;
for (const auto & name : tuple_type->getElementNames())
for (const auto & name [[maybe_unused]] : tuple_type->getElementNames())
{
auto tuple_ast = function->arguments->children[0];
if (tid != 0)
@ -697,11 +697,6 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
if (tuple_type->haveExplicitNames())
func->setAlias(name);
else
func->setAlias(data.getUniqueName("_ut_" + name));
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));

View File

@ -948,7 +948,8 @@ void executeQuery(
WriteBuffer & ostr,
bool allow_into_outfile,
ContextMutablePtr context,
std::function<void(const String &, const String &, const String &, const String &)> set_result_details)
std::function<void(const String &, const String &, const String &, const String &)> set_result_details,
std::function<void()> before_finalize_callback)
{
PODArray<char> parse_buf;
const char * begin;
@ -1079,6 +1080,8 @@ void executeQuery(
out->onProgress(progress);
});
out->setBeforeFinalizeCallback(before_finalize_callback);
if (set_result_details)
set_result_details(
context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone());

View File

@ -17,7 +17,8 @@ void executeQuery(
WriteBuffer & ostr, /// Where to write query output to.
bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file.
ContextMutablePtr context, /// DB, tables, data types, storage engines, functions, aggregate functions...
std::function<void(const String &, const String &, const String &, const String &)> set_result_details /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone.
std::function<void(const String &, const String &, const String &, const String &)> set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone.
std::function<void()> before_finalize_callback = {} /// Will be set in output format to be called before finalize.
);

View File

@ -76,6 +76,9 @@ void IOutputFormat::work()
if (rows_before_limit_counter && rows_before_limit_counter->hasAppliedLimit())
setRowsBeforeLimit(rows_before_limit_counter->get());
if (before_finalize_callback)
before_finalize_callback();
finalize();
finalized = true;
return;
@ -117,4 +120,3 @@ void IOutputFormat::write(const Block & block)
}
}

View File

@ -67,6 +67,9 @@ public:
/// Passed value are delta, that must be summarized.
virtual void onProgress(const Progress & /*progress*/) {}
/// Set callback, which will be called before call to finalize().
void setBeforeFinalizeCallback(std::function<void()> callback) { before_finalize_callback = callback; }
/// Content-Type to set when sending HTTP response.
virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; }
@ -91,6 +94,7 @@ private:
size_t result_bytes = 0;
bool prefix_written = false;
std::function<void()> before_finalize_callback;
};
}

View File

@ -103,7 +103,7 @@ void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool
out << "digraph\n{\n";
out << " rankdir=\"LR\";\n";
out << " { node [shape = box]\n";
out << " { node [shape = rect]\n";
/// Nodes // TODO quoting and escaping
size_t next_step = 0;

View File

@ -16,7 +16,7 @@ void printPipeline(const Processors & processors, const Statuses & statuses, Wri
{
out << "digraph\n{\n";
out << " rankdir=\"LR\";\n";
out << " { node [shape = box]\n";
out << " { node [shape = rect]\n";
auto get_proc_id = [](const IProcessor & proc) -> UInt64
{

View File

@ -4,7 +4,7 @@
#include <IO/WriteHelpers.h>
#include <Common/StatusInfo.h>
#include <boost/algorithm/string/replace.hpp>
#include <regex>
namespace
{
@ -24,9 +24,13 @@ void writeOutLine(DB::WriteBuffer & wb, T && val, TArgs &&... args)
writeOutLine(wb, std::forward<TArgs>(args)...);
}
void replaceInvalidChars(std::string & metric_name)
/// Returns false if name is not valid
bool replaceInvalidChars(std::string & metric_name)
{
std::replace(metric_name.begin(), metric_name.end(), '.', '_');
/// dirty solution
metric_name = std::regex_replace(metric_name, std::regex("[^a-zA-Z0-9_:]"), "_");
metric_name = std::regex_replace(metric_name, std::regex("^[^a-zA-Z]*"), "");
return !metric_name.empty();
}
}
@ -57,7 +61,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
std::string metric_name{ProfileEvents::getName(static_cast<ProfileEvents::Event>(i))};
std::string metric_doc{ProfileEvents::getDocumentation(static_cast<ProfileEvents::Event>(i))};
replaceInvalidChars(metric_name);
if (!replaceInvalidChars(metric_name))
continue;
std::string key{profile_events_prefix + metric_name};
writeOutLine(wb, "# HELP", key, metric_doc);
@ -75,7 +80,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
std::string metric_name{CurrentMetrics::getName(static_cast<CurrentMetrics::Metric>(i))};
std::string metric_doc{CurrentMetrics::getDocumentation(static_cast<CurrentMetrics::Metric>(i))};
replaceInvalidChars(metric_name);
if (!replaceInvalidChars(metric_name))
continue;
std::string key{current_metrics_prefix + metric_name};
writeOutLine(wb, "# HELP", key, metric_doc);
@ -91,7 +97,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
{
std::string key{asynchronous_metrics_prefix + name_value.first};
replaceInvalidChars(key);
if (!replaceInvalidChars(key))
continue;
auto value = name_value.second;
// TODO: add HELP section? asynchronous_metrics contains only key and value
@ -108,7 +115,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const
std::string metric_name{CurrentStatusInfo::getName(static_cast<CurrentStatusInfo::Status>(i))};
std::string metric_doc{CurrentStatusInfo::getDocumentation(static_cast<CurrentStatusInfo::Status>(i))};
replaceInvalidChars(metric_name);
if (!replaceInvalidChars(metric_name))
continue;
std::string key{current_status_prefix + metric_name};
writeOutLine(wb, "# HELP", key, metric_doc);

View File

@ -95,6 +95,7 @@ const char * auto_contributors[] {
"Anatoly Pugachev",
"ana-uvarova",
"AnaUvarova",
"Andreas Hunkeler",
"AndreevDm",
"Andrei Bodrov",
"Andrei Chulkov",
@ -280,6 +281,7 @@ const char * auto_contributors[] {
"Dongdong Yang",
"DoomzD",
"Dr. Strange Looker",
"d.v.semenov",
"eaxdev",
"eejoin",
"egatov",
@ -290,6 +292,7 @@ const char * auto_contributors[] {
"Eldar Zaitov",
"Elena Baskakova",
"elenaspb2019",
"elevankoff",
"Elghazal Ahmed",
"Elizaveta Mironyuk",
"emakarov",
@ -434,6 +437,7 @@ const char * auto_contributors[] {
"Ivan Starkov",
"ivanzhukov",
"Ivan Zhukov",
"Jack Song",
"JackyWoo",
"Jacob Hayes",
"jakalletti",
@ -476,6 +480,7 @@ const char * auto_contributors[] {
"Konstantin Lebedev",
"Konstantin Malanchev",
"Konstantin Podshumok",
"Konstantin Rudenskii",
"Korenevskiy Denis",
"Korviakov Andrey",
"koshachy",
@ -488,6 +493,7 @@ const char * auto_contributors[] {
"kshvakov",
"kssenii",
"l",
"l1tsolaiki",
"lalex",
"Latysheva Alexandra",
"lehasm",
@ -515,6 +521,7 @@ const char * auto_contributors[] {
"long2ice",
"Lopatin Konstantin",
"Loud_Scream",
"ltybc-coder",
"luc1ph3r",
"Lucid Dreams",
"Luis Bosque",
@ -633,6 +640,7 @@ const char * auto_contributors[] {
"nicelulu",
"Nickita",
"Nickolay Yastrebov",
"nickzhwang",
"Nicolae Vartolomei",
"Nico Mandery",
"Nico Piderman",
@ -871,6 +879,7 @@ const char * auto_contributors[] {
"Veselkov Konstantin",
"vic",
"vicdashkov",
"Victor",
"Victor Tarnavsky",
"Viktor Taranenko",
"vinity",
@ -947,6 +956,7 @@ const char * auto_contributors[] {
"Yuriy Korzhenevskiy",
"Yury Karpovich",
"Yury Stankevich",
"ywill3",
"zamulla",
"zhang2014",
"zhangshengyu",
@ -957,11 +967,13 @@ const char * auto_contributors[] {
"Zhichun Wu",
"Zhipeng",
"zhukai",
"Zijie Lu",
"zlx19950903",
"Zoran Pandovski",
"zvonand",
"zvrr",
"zvvr",
"zxc111",
"zzsmdfj",
"Артем Стрельцов",
"Владислав Тихонов",
@ -980,6 +992,7 @@ const char * auto_contributors[] {
"张风啸",
"徐炘",
"曲正鹏",
"未来星___费",
"极客青年",
"谢磊",
"贾顺名(Jarvis)",

View File

@ -153,11 +153,6 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
if (arg_num < args.size())
throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
/// ExpressionAnalyzer will be created in InterpreterSelectQuery that will meet these `Identifier` when processing the request.
/// We need to mark them as the name of the database or table, because the default value is column.
for (auto ast : args)
setIdentifierSpecial(ast);
if (!cluster_name.empty())
{
/// Use an existing cluster from the main config

View File

@ -10,5 +10,8 @@
<profile>default</profile>
<quota>default</quota>
</default>
<include incl="users_1" />
<include incl="users_2" />
</users>
</yandex>

View File

@ -1,5 +1,5 @@
<yandex>
<include_from>/etc/clickhouse-server/config.d/max_query_size.xml</include_from>
<include_from>/etc/clickhouse-server/config.d/include_from_source.xml</include_from>
<profiles>
<default>
<max_query_size incl="mqs" />
@ -11,5 +11,8 @@
<profile>default</profile>
<quota>default</quota>
</default>
<include incl="users_1" />
<include incl="users_2" />
</users>
</yandex>

View File

@ -11,5 +11,7 @@
<profile>default</profile>
<quota>default</quota>
</default>
<include incl="node_does_not_exist" />
</users>
</yandex>

View File

@ -10,5 +10,8 @@
<profile>default</profile>
<quota>default</quota>
</default>
<include from_zk="/users_from_zk_1" />
<include from_zk="/users_from_zk_2" />
</users>
</yandex>

View File

@ -0,0 +1,17 @@
<yandex>
<mqs>99999</mqs>
<users_1>
<user_1>
<password></password>
<profile>default</profile>
</user_1>
</users_1>
<users_2>
<user_2>
<password></password>
<profile>default</profile>
</user_2>
</users_2>
</yandex>

View File

@ -1,3 +0,0 @@
<yandex>
<mqs>99999</mqs>
</yandex>

View File

@ -8,11 +8,11 @@ node2 = cluster.add_instance('node2', user_configs=['configs/config_env.xml'],
env_variables={"MAX_QUERY_SIZE": "55555"})
node3 = cluster.add_instance('node3', user_configs=['configs/config_zk.xml'], with_zookeeper=True)
node4 = cluster.add_instance('node4', user_configs=['configs/config_incl.xml'],
main_configs=['configs/max_query_size.xml']) # include value 77777
main_configs=['configs/include_from_source.xml']) # include value 77777
node5 = cluster.add_instance('node5', user_configs=['configs/config_allow_databases.xml'])
node6 = cluster.add_instance('node6', user_configs=['configs/config_include_from_env.xml'],
env_variables={"INCLUDE_FROM_ENV": "/etc/clickhouse-server/config.d/max_query_size.xml"},
main_configs=['configs/max_query_size.xml'])
env_variables={"INCLUDE_FROM_ENV": "/etc/clickhouse-server/config.d/include_from_source.xml"},
main_configs=['configs/include_from_source.xml'])
@pytest.fixture(scope="module")
@ -20,6 +20,8 @@ def start_cluster():
try:
def create_zk_roots(zk):
zk.create(path="/setting/max_query_size", value=b"77777", makepath=True)
zk.create(path="/users_from_zk_1", value=b"<user_1><password></password><profile>default</profile></user_1>", makepath=True)
zk.create(path="/users_from_zk_2", value=b"<user_2><password></password><profile>default</profile></user_2>", makepath=True)
cluster.add_zookeeper_startup_command(create_zk_roots)
@ -37,6 +39,18 @@ def test_config(start_cluster):
assert node6.query("select value from system.settings where name = 'max_query_size'") == "99999\n"
def test_include_config(start_cluster):
# <include incl="source tag" />
assert node4.query("select 1")
assert node4.query("select 1", user="user_1")
assert node4.query("select 1", user="user_2")
# <include from_zk="zk path />
assert node3.query("select 1")
assert node3.query("select 1", user="user_1")
assert node3.query("select 1", user="user_2")
def test_allow_databases(start_cluster):
node5.query("CREATE DATABASE db1")
node5.query(

View File

@ -2,7 +2,7 @@
hello 1 3 world
9
9 (0,1)
key v1 v2 v3 v4 v5
key tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 1) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 2) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 3) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 4) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 5)
4 10 20 10 20 30
3 70 20 10 20 30
2 11 20 10 20 30

View File

@ -22,3 +22,9 @@ foobar.com
foobar.com
foobar.com
xx.blogspot.co.at
-- www
www.foo
foo
-- vector
xx.blogspot.co.at

View File

@ -29,3 +29,11 @@ select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_
select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list');
select '-- www';
select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list');
select '-- vector';
select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1);
select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1);

View File

@ -1 +1 @@
select _ut_1 from (select untuple((1,2)));
select * from (select untuple((1,2)));

View File

@ -1 +1,6 @@
yyy
1
1
1
1
1
1

View File

@ -1,9 +1,14 @@
drop table if exists join_tbl;
DROP TABLE IF EXISTS join_tbl;
create table join_tbl (`id` String, `name` String) engine Join(any, left, id);
CREATE TABLE join_tbl (`id` String, `name` String, lcname LowCardinality(String)) ENGINE = Join(any, left, id);
insert into join_tbl values ('xxx', 'yyy');
INSERT INTO join_tbl VALUES ('xxx', 'yyy', 'yyy');
select joinGet('join_tbl', 'name', toLowCardinality('xxx'));
SELECT joinGet('join_tbl', 'name', 'xxx') == 'yyy';
SELECT joinGet('join_tbl', 'name', toLowCardinality('xxx')) == 'yyy';
SELECT joinGet('join_tbl', 'name', toLowCardinality(materialize('xxx'))) == 'yyy';
SELECT joinGet('join_tbl', 'lcname', 'xxx') == 'yyy';
SELECT joinGet('join_tbl', 'lcname', toLowCardinality('xxx')) == 'yyy';
SELECT joinGet('join_tbl', 'lcname', toLowCardinality(materialize('xxx'))) == 'yyy';
drop table if exists join_tbl;
DROP TABLE IF EXISTS join_tbl;

View File

@ -0,0 +1,2 @@
b
{'1':1} 1 0

View File

@ -0,0 +1,12 @@
DROP TABLE IF EXISTS map_lc;
SET allow_experimental_map_type = 1;
CREATE TABLE map_lc
(
`kv` Map(LowCardinality(String), LowCardinality(String))
)
ENGINE = Memory;
INSERT INTO map_lc select map('a', 'b');
SELECT kv['a'] FROM map_lc;
DROP TABLE map_lc;
SELECT map(toFixedString('1',1),1) AS m, m[toFixedString('1',1)],m[toFixedString('1',2)];

View File

@ -33,3 +33,7 @@
1
1
1
1
1
2D000000000000000A
001011010000000000000000000000000000000000000000000000000000000000001010

View File

@ -37,3 +37,9 @@ select bin(unbin('0')) == '00000000';
select hex('') == bin('');
select unhex('') == unbin('');
select unhex('0') == unbin('0');
-- hex and bin support AggregateFunction
select hex(sumState(number)) == hex(toString(sumState(number))) from numbers(10);
select hex(avgState(number)) == hex(toString(avgState(number))) from numbers(99);
select hex(avgState(number)) from numbers(10);
select bin(avgState(number)) from numbers(10);

View File

@ -0,0 +1,15 @@
-- regression test for the following query:
--
-- select * from remote('127.1', system.one, dummy)
--
-- that produce the following error before:
--
-- Unknown column: dummy, there are only columns .
--
-- NOTE: that wrapping column into any function works before.
select * from remote('127.1', system.one, dummy) format Null;
select * from remote('127.1', system.one, identity(dummy)) format Null;
select * from remote('127.1', view(select * from system.one), identity(dummy)) format Null;
select * from remote('127.{1,2}', view(select * from system.one), identity(dummy)) format Null;
select * from remote('127.1', view(select * from system.one), dummy) format Null;
select * from remote('127.{1,2}', view(select * from system.one), dummy) format Null;

View File

@ -0,0 +1 @@
foo.com

View File

@ -0,0 +1,2 @@
select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('foo.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;
select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('bar.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;

View File

@ -0,0 +1,2 @@
SET validate_polygons = 0;
SELECT pointInPolygon((-inf, 1023), [(10.000100135803223, 10000000000.), (inf, 0.9998999834060669), (1.1920928955078125e-7, 100.0000991821289), (1.000100016593933, 100.0000991821289)]);

View File

@ -0,0 +1,10 @@
dictGet
Value
Value
Value
Value
dictHas
1
1
1
1

View File

@ -0,0 +1,26 @@
DROP TABLE IF EXISTS test_dictionary_source;
CREATE TABLE test_dictionary_source (key String, value String) ENGINE=TinyLog;
INSERT INTO test_dictionary_source VALUES ('Key', 'Value');
DROP DICTIONARY IF EXISTS test_dictionary;
CREATE DICTIONARY test_dictionary(key String, value String)
PRIMARY KEY key
LAYOUT(COMPLEX_KEY_HASHED())
SOURCE(CLICKHOUSE(TABLE 'test_dictionary_source'))
LIFETIME(0);
SELECT 'dictGet';
SELECT dictGet('test_dictionary', 'value', tuple('Key'));
SELECT dictGet('test_dictionary', 'value', tuple(materialize('Key')));
SELECT dictGet('test_dictionary', 'value', 'Key');
SELECT dictGet('test_dictionary', 'value', materialize('Key'));
SELECT 'dictHas';
SELECT dictHas('test_dictionary', tuple('Key'));
SELECT dictHas('test_dictionary', tuple(materialize('Key')));
SELECT dictHas('test_dictionary', 'Key');
SELECT dictHas('test_dictionary', materialize('Key'));
DROP DICTIONARY test_dictionary;
DROP TABLE test_dictionary_source;

View File

@ -0,0 +1 @@
100.0000991821289 \N \N 1 1024 \N

View File

@ -0,0 +1 @@
SELECT untuple(tuple(100.0000991821289)), NULL, untuple((toDateTime(9223372036854775806, -1, NULL, NULL, toDateTime(NULL, NULL)), * EXCEPT b)), NULL FROM (SELECT 1 AS a, 1024, NULL AS b);

View File

@ -1,7 +1,10 @@
v21.7.2.7-stable 2021-07-09
v21.6.7.57-stable 2021-07-09
v21.6.6.51-stable 2021-07-02
v21.6.5.37-stable 2021-06-19
v21.6.4.26-stable 2021-06-11
v21.6.3.14-stable 2021-06-04
v21.5.9.4-stable 2021-07-10
v21.5.8.21-stable 2021-07-02
v21.5.7.9-stable 2021-06-22
v21.5.6.6-stable 2021-05-29
@ -11,6 +14,7 @@ v21.4.6.55-stable 2021-04-30
v21.4.5.46-stable 2021-04-24
v21.4.4.30-stable 2021-04-16
v21.4.3.21-stable 2021-04-12
v21.3.15.4-stable 2021-07-10
v21.3.14.1-lts 2021-07-01
v21.3.13.9-lts 2021-06-22
v21.3.12.2-lts 2021-05-25

1 v21.6.6.51-stable v21.7.2.7-stable 2021-07-02 2021-07-09
1 v21.7.2.7-stable 2021-07-09
2 v21.6.7.57-stable 2021-07-09
3 v21.6.6.51-stable v21.6.6.51-stable 2021-07-02 2021-07-02
4 v21.6.5.37-stable v21.6.5.37-stable 2021-06-19 2021-06-19
5 v21.6.4.26-stable v21.6.4.26-stable 2021-06-11 2021-06-11
6 v21.6.3.14-stable v21.6.3.14-stable 2021-06-04 2021-06-04
7 v21.5.9.4-stable 2021-07-10
8 v21.5.8.21-stable v21.5.8.21-stable 2021-07-02 2021-07-02
9 v21.5.7.9-stable v21.5.7.9-stable 2021-06-22 2021-06-22
10 v21.5.6.6-stable v21.5.6.6-stable 2021-05-29 2021-05-29
14 v21.4.5.46-stable v21.4.5.46-stable 2021-04-24 2021-04-24
15 v21.4.4.30-stable v21.4.4.30-stable 2021-04-16 2021-04-16
16 v21.4.3.21-stable v21.4.3.21-stable 2021-04-12 2021-04-12
17 v21.3.15.4-stable 2021-07-10
18 v21.3.14.1-lts v21.3.14.1-lts 2021-07-01 2021-07-01
19 v21.3.13.9-lts v21.3.13.9-lts 2021-06-22 2021-06-22
20 v21.3.12.2-lts v21.3.12.2-lts 2021-05-25 2021-05-25