Merge branch 'master' into consistent_metadata4

This commit is contained in:
alesapin 2020-06-10 20:20:46 +03:00
commit c21fac11cc
55 changed files with 1421 additions and 353 deletions

8
.gitmodules vendored
View File

@ -157,6 +157,14 @@
[submodule "contrib/openldap"]
path = contrib/openldap
url = https://github.com/openldap/openldap.git
[submodule "contrib/cassandra"]
path = contrib/cassandra
url = https://github.com/ClickHouse-Extras/cpp-driver.git
branch = clickhouse
[submodule "contrib/libuv"]
path = contrib/libuv
url = https://github.com/ClickHouse-Extras/libuv.git
branch = clickhouse
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git

View File

@ -360,6 +360,7 @@ include (cmake/find/fastops.cmake)
include (cmake/find/orc.cmake)
include (cmake/find/avro.cmake)
include (cmake/find/msgpack.cmake)
include (cmake/find/cassandra.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)

View File

@ -15,5 +15,6 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -0,0 +1,26 @@
option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES})
if (ENABLE_CASSANDRA)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv")
message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive")
elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive")
else()
set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv")
set (CASSANDRA_INCLUDE_DIR
"${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/")
if (USE_STATIC_LIBRARIES)
set (LIBUV_LIBRARY uv_a)
set (CASSANDRA_LIBRARY cassandra_static)
else()
set (LIBUV_LIBRARY uv)
set (CASSANDRA_LIBRARY cassandra)
endif()
set (USE_CASSANDRA 1)
set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
endif()
endif()
message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}")
message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}")

View File

@ -295,4 +295,10 @@ if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
if (USE_CASSANDRA)
add_subdirectory (libuv)
add_subdirectory (cassandra)
endif()
add_subdirectory (fmtlib-cmake)

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit f7d9ce39f41323300044567be007c233338bb94a
Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56

1
contrib/cassandra vendored Submodule

@ -0,0 +1 @@
Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509

1
contrib/libuv vendored Submodule

@ -0,0 +1 @@
Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28

View File

@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi

View File

@ -0,0 +1,7 @@
version: '2.3'
services:
cassandra1:
image: cassandra
restart: always
ports:
- 9043:9042

View File

@ -347,9 +347,11 @@ create table query_metric_stats engine File(TSVWithNamesAndTypes,
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear.
-- immediately (on the same order of time as noise). We compute average
-- run time between old and new version, and if it is below a threshold,
-- we just skip the query. If there is a significant regression, the
-- average will be above threshold, we'll process it normally and will
-- detect the regression.
(left + right) / 2 < 0.02 as short,
not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
@ -409,11 +411,11 @@ create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json')
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query_display_name
select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
@ -421,9 +423,39 @@ create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
select test, query_index from queries where unstable_show or changed_show
;
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries
group by test having s > 0 order by s desc;
create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
select test, queries, average_time_change from (
select test, count(*) queries,
sum(left) as left, sum(right) as right,
(right - left) / right average_time_change
from queries
group by test
order by abs(average_time_change) desc
)
;
create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as
select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
from queries
group by test
order by total_unstable + total_changed desc
;
create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as
select test,
queries,
coalesce(total_unstable, 0) total_unstable,
coalesce(total_changed, 0) total_changed,
total_unstable + total_changed total_bad,
coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
from test_time_changes_tsv
full join unstable_tests_tsv
using test
where (abs(average_time_change) > 0.05 and queries > 5)
or (total_bad > 0)
order by total_bad desc, average_time_change desc
settings join_use_nulls = 1
;
create table query_time engine Memory as select *
from file('analyze/client-times.tsv', TSV,
@ -464,8 +496,8 @@ create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_display_name
from queries order by test, query_display_name;
stat_threshold, test, query_index, query_display_name
from queries order by test, query_index;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
@ -582,7 +614,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes,
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
group by test, query_index, metric
having d > 0.5
having d > 0.5 and q[3] > 5
) metrics
left join query_display_names using (test, query_index)
order by test, query_index, d desc

View File

@ -207,7 +207,8 @@ if args.report == 'main':
'p&nbsp;<&nbsp;0.001 threshold', # 3
# Failed # 4
'Test', # 5
'Query', # 6
'#', # 6
'Query', # 7
]
print(tableHeader(columns))
@ -248,7 +249,8 @@ if args.report == 'main':
'p&nbsp;<&nbsp;0.001 threshold', #3
# Failed #4
'Test', #5
'Query' #6
'#', #6
'Query' #7
]
print(tableStart('Unstable queries'))
@ -272,9 +274,9 @@ if args.report == 'main':
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Tests with most unstable queries',
['Test', 'Unstable', 'Changed perf', 'Total not OK'],
tsvRows('report/bad-tests.tsv'))
printSimpleTable('Test performance changes',
['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
tsvRows('report/test-perf-changes.tsv'))
def print_test_times():
global slow_average_tests
@ -391,7 +393,8 @@ elif args.report == 'all-queries':
'Times speedup / slowdown', #5
'p&nbsp;<&nbsp;0.001 threshold', #6
'Test', #7
'Query', #8
'#', #8
'Query', #9
]
print(tableStart('All query times'))

View File

@ -10,7 +10,7 @@ Reading is automatically parallelized. During a read, the table indexes on remot
The Distributed engine accepts parameters:
- the cluster name in the servers config file
- the cluster name in the server's config file
- the name of a remote database
@ -31,13 +31,13 @@ Example:
Distributed(logs, default, hits[, sharding_key[, policy_name]])
```
Data will be read from all servers in the logs cluster, from the default.hits table located on every server in the cluster.
Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
logs The cluster name in the servers config file.
logs The cluster name in the server's config file.
Clusters are set like this:
@ -75,15 +75,15 @@ Clusters are set like this:
</remote_servers>
```
Here a cluster is defined with the name logs that consists of two shards, each of which contains two replicas.
Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas.
Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (tcp\_port in the config, usually set to 9000). Do not confuse it with http\_port.
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
@ -97,44 +97,44 @@ You can specify just one of the shards (in this case, query processing should be
You can specify as many clusters as you wish in the configuration.
To view your clusters, use the system.clusters table.
To view your clusters, use the `system.clusters` table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the remote table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster:
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesnt mean anything in this case.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case.
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the internal\_replication parameter defined in the config file.
Each shard can have the `internal_replication` parameter defined in the config file.
If this parameter is set to true, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to false (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from prev\_weight to prev\_weights + weight, where prev\_weights is the total weight of the shards with the smallest number, and weight is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression rand() for random distribution of data, or UserID for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you dont have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the broken subdirectory and no longer used.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
When the max\_parallel\_replicas option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
## Virtual Columns {#virtual-columns}

View File

@ -625,4 +625,43 @@ Setting fields:
- `storage_type` The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`.
- `db_index` The specific numeric index of Redis logical database. May be omitted, default value is 0.
### Cassandra {#dicts-external_dicts_dict_sources-cassandra}
Example of settings:
```xml
<source>
<cassandra>
<host>localhost</host>
<port>9042</port>
<user>username</user>
<password>qwerty123</password>
<keyspase>database_name</keyspase>
<column_family>table_name</column_family>
<allow_filering>1</allow_filering>
<partition_key_prefix>1</partition_key_prefix>
<consistency>One</consistency>
<where>"SomeColumn" = 42</where>
<max_threads>8</max_threads>
</cassandra>
</source>
```
Setting fields:
- `host` The Cassandra host or comma-separated list of hosts.
- `port` The port on the Cassandra servers. If not specified, default port is used.
- `user` Name of the Cassandra user.
- `password` Password of the Cassandra user.
- `keyspace` Name of the keyspace (database).
- `column_family` Name of the column family (table).
- `allow_filering` Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
- `partition_key_prefix` Number of partition key columns in primary key of the Cassandra table.
Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra.
Default value is 1 (the first key column is a partition key and other key columns are clustering key).
- `consistency` Consistency level. Possible values: `One`, `Two`, `Three`,
`All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default is `One`.
- `where` Optional selection criteria.
- `max_threads` The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries.
[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) <!--hide-->

View File

@ -352,6 +352,11 @@ if (USE_OPENCL)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS})
endif ()
if (USE_CASSANDRA)
dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
endif()
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR})

View File

@ -495,6 +495,7 @@ namespace ErrorCodes
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524;
extern const int INCORRECT_DISK_INDEX = 525;
extern const int UNKNOWN_VOLUME_TYPE = 526;
extern const int CASSANDRA_INTERNAL_ERROR = 527;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -9,5 +9,6 @@
#cmakedefine01 USE_BROTLI
#cmakedefine01 USE_UNWIND
#cmakedefine01 USE_OPENCL
#cmakedefine01 USE_CASSANDRA
#cmakedefine01 USE_GRPC
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY

View File

@ -21,6 +21,10 @@ target_link_libraries(clickhouse_dictionaries
string_utils
)
if(USE_CASSANDRA)
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${CASSANDRA_INCLUDE_DIR})
endif()
add_subdirectory(Embedded)
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})

View File

@ -0,0 +1,274 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_CASSANDRA
#include <utility>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Core/ExternalResultDescription.h>
#include <IO/ReadHelpers.h>
#include "CassandraBlockInputStream.h"
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
}
CassandraBlockInputStream::CassandraBlockInputStream(
const CassSessionShared & session_,
const String & query_str,
const Block & sample_block,
size_t max_block_size_)
: session(session_)
, statement(query_str.c_str(), /*parameters count*/ 0)
, max_block_size(max_block_size_)
, has_more_pages(cass_true)
{
description.init(sample_block);
cassandraCheck(cass_statement_set_paging_size(statement, max_block_size));
}
void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value)
{
switch (type)
{
case ValueType::vtUInt8:
{
cass_int8_t value;
cass_value_get_int8(cass_value, &value);
assert_cast<ColumnUInt8 &>(column).insertValue(static_cast<UInt8>(value));
break;
}
case ValueType::vtUInt16:
{
cass_int16_t value;
cass_value_get_int16(cass_value, &value);
assert_cast<ColumnUInt16 &>(column).insertValue(static_cast<UInt16>(value));
break;
}
case ValueType::vtUInt32:
{
cass_int32_t value;
cass_value_get_int32(cass_value, &value);
assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(value));
break;
}
case ValueType::vtUInt64:
{
cass_int64_t value;
cass_value_get_int64(cass_value, &value);
assert_cast<ColumnUInt64 &>(column).insertValue(static_cast<UInt64>(value));
break;
}
case ValueType::vtInt8:
{
cass_int8_t value;
cass_value_get_int8(cass_value, &value);
assert_cast<ColumnInt8 &>(column).insertValue(value);
break;
}
case ValueType::vtInt16:
{
cass_int16_t value;
cass_value_get_int16(cass_value, &value);
assert_cast<ColumnInt16 &>(column).insertValue(value);
break;
}
case ValueType::vtInt32:
{
cass_int32_t value;
cass_value_get_int32(cass_value, &value);
assert_cast<ColumnInt32 &>(column).insertValue(value);
break;
}
case ValueType::vtInt64:
{
cass_int64_t value;
cass_value_get_int64(cass_value, &value);
assert_cast<ColumnInt64 &>(column).insertValue(value);
break;
}
case ValueType::vtFloat32:
{
cass_float_t value;
cass_value_get_float(cass_value, &value);
assert_cast<ColumnFloat32 &>(column).insertValue(value);
break;
}
case ValueType::vtFloat64:
{
cass_double_t value;
cass_value_get_double(cass_value, &value);
assert_cast<ColumnFloat64 &>(column).insertValue(value);
break;
}
case ValueType::vtString:
{
const char * value = nullptr;
size_t value_length;
cass_value_get_string(cass_value, &value, &value_length);
assert_cast<ColumnString &>(column).insertData(value, value_length);
break;
}
case ValueType::vtDate:
{
cass_uint32_t value;
cass_value_get_uint32(cass_value, &value);
assert_cast<ColumnUInt16 &>(column).insertValue(static_cast<UInt16>(value));
break;
}
case ValueType::vtDateTime:
{
cass_int64_t value;
cass_value_get_int64(cass_value, &value);
assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(value / 1000));
break;
}
case ValueType::vtUUID:
{
CassUuid value;
cass_value_get_uuid(cass_value, &value);
std::array<char, CASS_UUID_STRING_LENGTH> uuid_str;
cass_uuid_string(value, uuid_str.data());
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(uuid_str.data(), uuid_str.size()));
break;
}
}
}
void CassandraBlockInputStream::readPrefix()
{
result_future = cass_session_execute(*session, statement);
}
Block CassandraBlockInputStream::readImpl()
{
if (!has_more_pages)
return {};
MutableColumns columns = description.sample_block.cloneEmptyColumns();
cassandraWaitAndCheck(result_future);
CassResultPtr result = cass_future_get_result(result_future);
assert(cass_result_column_count(result) == columns.size());
assertTypes(result);
has_more_pages = cass_result_has_more_pages(result);
if (has_more_pages)
{
cassandraCheck(cass_statement_set_paging_state(statement, result));
result_future = cass_session_execute(*session, statement);
}
CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1]
while (cass_iterator_next(rows_iter))
{
const CassRow * row = cass_iterator_get_row(rows_iter);
for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx)
{
const CassValue * val = cass_row_get_column(row, col_idx);
if (cass_value_is_null(val))
columns[col_idx]->insertDefault();
else if (description.types[col_idx].second)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[col_idx]);
insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val);
column_nullable.getNullMapData().emplace_back(0);
}
else
insertValue(*columns[col_idx], description.types[col_idx].first, val);
}
}
assert(cass_result_row_count(result) == columns.front()->size());
return description.sample_block.cloneWithColumns(std::move(columns));
}
void CassandraBlockInputStream::assertTypes(const CassResultPtr & result)
{
if (!assert_types)
return;
size_t column_count = cass_result_column_count(result);
for (size_t i = 0; i < column_count; ++i)
{
CassValueType expected = CASS_VALUE_TYPE_UNKNOWN;
String expected_text;
/// Cassandra does not support unsigned integers (cass_uint32_t is for Date)
switch (description.types[i].first)
{
case ExternalResultDescription::ValueType::vtInt8:
case ExternalResultDescription::ValueType::vtUInt8:
expected = CASS_VALUE_TYPE_TINY_INT;
expected_text = "tinyint";
break;
case ExternalResultDescription::ValueType::vtInt16:
case ExternalResultDescription::ValueType::vtUInt16:
expected = CASS_VALUE_TYPE_SMALL_INT;
expected_text = "smallint";
break;
case ExternalResultDescription::ValueType::vtUInt32:
case ExternalResultDescription::ValueType::vtInt32:
expected = CASS_VALUE_TYPE_INT;
expected_text = "int";
break;
case ExternalResultDescription::ValueType::vtInt64:
case ExternalResultDescription::ValueType::vtUInt64:
expected = CASS_VALUE_TYPE_BIGINT;
expected_text = "bigint";
break;
case ExternalResultDescription::ValueType::vtFloat32:
expected = CASS_VALUE_TYPE_FLOAT;
expected_text = "float";
break;
case ExternalResultDescription::ValueType::vtFloat64:
expected = CASS_VALUE_TYPE_DOUBLE;
expected_text = "double";
break;
case ExternalResultDescription::ValueType::vtString:
expected = CASS_VALUE_TYPE_TEXT;
expected_text = "text, ascii or varchar";
break;
case ExternalResultDescription::ValueType::vtDate:
expected = CASS_VALUE_TYPE_DATE;
expected_text = "date";
break;
case ExternalResultDescription::ValueType::vtDateTime:
expected = CASS_VALUE_TYPE_TIMESTAMP;
expected_text = "timestamp";
break;
case ExternalResultDescription::ValueType::vtUUID:
expected = CASS_VALUE_TYPE_UUID;
expected_text = "uuid";
break;
}
CassValueType got = cass_result_column_type(result, i);
if (got != expected)
{
if (expected == CASS_VALUE_TYPE_TEXT && (got == CASS_VALUE_TYPE_ASCII || got == CASS_VALUE_TYPE_VARCHAR))
continue;
const auto & column_name = description.sample_block.getColumnsWithTypeAndName()[i].name;
throw Exception("Type mismatch for column " + column_name + ": expected Cassandra type " + expected_text,
ErrorCodes::TYPE_MISMATCH);
}
}
assert_types = false;
}
}
#endif

View File

@ -0,0 +1,47 @@
#pragma once
#include <Dictionaries/CassandraHelpers.h>
#if USE_CASSANDRA
#include <Core/Block.h>
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
namespace DB
{
class CassandraBlockInputStream final : public IBlockInputStream
{
public:
CassandraBlockInputStream(
const CassSessionShared & session_,
const String & query_str,
const Block & sample_block,
size_t max_block_size);
String getName() const override { return "Cassandra"; }
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
void readPrefix() override;
private:
using ValueType = ExternalResultDescription::ValueType;
Block readImpl() override;
static void insertValue(IColumn & column, ValueType type, const CassValue * cass_value);
void assertTypes(const CassResultPtr & result);
CassSessionShared session;
CassStatementPtr statement;
CassFuturePtr result_future;
const size_t max_block_size;
ExternalResultDescription description;
cass_bool_t has_more_pages;
bool assert_types = true;
};
}
#endif

View File

@ -0,0 +1,211 @@
#include "CassandraDictionarySource.h"
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
namespace DB
{
namespace ErrorCodes
{
extern const int SUPPORT_IS_DISABLED;
extern const int NOT_IMPLEMENTED;
}
void registerDictionarySourceCassandra(DictionarySourceFactory & factory)
{
auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct,
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
[[maybe_unused]] const std::string & config_prefix,
[[maybe_unused]] Block & sample_block,
const Context & /* context */,
bool /*check_config*/) -> DictionarySourcePtr
{
#if USE_CASSANDRA
setupCassandraDriverLibraryLogging(CASS_LOG_INFO);
return std::make_unique<CassandraDictionarySource>(dict_struct, config, config_prefix + ".cassandra", sample_block);
#else
throw Exception{"Dictionary source of type `cassandra` is disabled because ClickHouse was built without cassandra support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
};
factory.registerSource("cassandra", create_table_source);
}
}
#if USE_CASSANDRA
#include <IO/WriteHelpers.h>
#include <Common/SipHash.h>
#include "CassandraBlockInputStream.h"
#include <common/logger_useful.h>
#include <DataStreams/UnionBlockInputStream.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INVALID_CONFIG_PARAMETER;
}
CassandraSettings::CassandraSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix)
: host(config.getString(config_prefix + ".host"))
, port(config.getUInt(config_prefix + ".port", 0))
, user(config.getString(config_prefix + ".user", ""))
, password(config.getString(config_prefix + ".password", ""))
, db(config.getString(config_prefix + ".keyspace"))
, table(config.getString(config_prefix + ".column_family"))
, allow_filtering(config.getBool(config_prefix + ".allow_filtering", false))
, partition_key_prefix(config.getUInt(config_prefix + ".partition_key_prefix", 1))
, max_threads(config.getUInt(config_prefix + ".max_threads", 8))
, where(config.getString(config_prefix + ".where", ""))
{
setConsistency(config.getString(config_prefix + ".consistency", "One"));
}
void CassandraSettings::setConsistency(const String & config_str)
{
if (config_str == "One")
consistency = CASS_CONSISTENCY_ONE;
else if (config_str == "Two")
consistency = CASS_CONSISTENCY_TWO;
else if (config_str == "Three")
consistency = CASS_CONSISTENCY_THREE;
else if (config_str == "All")
consistency = CASS_CONSISTENCY_ALL;
else if (config_str == "EachQuorum")
consistency = CASS_CONSISTENCY_EACH_QUORUM;
else if (config_str == "Quorum")
consistency = CASS_CONSISTENCY_QUORUM;
else if (config_str == "LocalQuorum")
consistency = CASS_CONSISTENCY_LOCAL_QUORUM;
else if (config_str == "LocalOne")
consistency = CASS_CONSISTENCY_LOCAL_ONE;
else if (config_str == "Serial")
consistency = CASS_CONSISTENCY_SERIAL;
else if (config_str == "LocalSerial")
consistency = CASS_CONSISTENCY_LOCAL_SERIAL;
else /// CASS_CONSISTENCY_ANY is only valid for writes
throw Exception("Unsupported consistency level: " + config_str, ErrorCodes::INVALID_CONFIG_PARAMETER);
}
static const size_t max_block_size = 8192;
CassandraDictionarySource::CassandraDictionarySource(
const DictionaryStructure & dict_struct_,
const CassandraSettings & settings_,
const Block & sample_block_)
: log(&Poco::Logger::get("CassandraDictionarySource"))
, dict_struct(dict_struct_)
, settings(settings_)
, sample_block(sample_block_)
, query_builder(dict_struct, settings.db, settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes)
{
cassandraCheck(cass_cluster_set_contact_points(cluster, settings.host.c_str()));
if (settings.port)
cassandraCheck(cass_cluster_set_port(cluster, settings.port));
cass_cluster_set_credentials(cluster, settings.user.c_str(), settings.password.c_str());
cassandraCheck(cass_cluster_set_consistency(cluster, settings.consistency));
}
CassandraDictionarySource::CassandraDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
Block & sample_block_)
: CassandraDictionarySource(
dict_struct_,
CassandraSettings(config, config_prefix),
sample_block_)
{
}
void CassandraDictionarySource::maybeAllowFiltering(String & query) const
{
if (!settings.allow_filtering)
return;
query.pop_back(); /// remove semicolon
query += " ALLOW FILTERING;";
}
BlockInputStreamPtr CassandraDictionarySource::loadAll()
{
String query = query_builder.composeLoadAllQuery();
maybeAllowFiltering(query);
LOG_INFO(log, "Loading all using query: {}", query);
return std::make_shared<CassandraBlockInputStream>(getSession(), query, sample_block, max_block_size);
}
std::string CassandraDictionarySource::toString() const
{
return "Cassandra: " + settings.db + '.' + settings.table;
}
BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
String query = query_builder.composeLoadIdsQuery(ids);
maybeAllowFiltering(query);
LOG_INFO(log, "Loading ids using query: {}", query);
return std::make_shared<CassandraBlockInputStream>(getSession(), query, sample_block, max_block_size);
}
BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
{
if (requested_rows.empty())
throw Exception("No rows requested", ErrorCodes::LOGICAL_ERROR);
/// TODO is there a better way to load data by complex keys?
std::unordered_map<UInt64, std::vector<size_t>> partitions;
for (const auto & row : requested_rows)
{
SipHash partition_key;
for (size_t i = 0; i < settings.partition_key_prefix; ++i)
key_columns[i]->updateHashWithValue(row, partition_key);
partitions[partition_key.get64()].push_back(row);
}
BlockInputStreams streams;
for (const auto & partition : partitions)
{
String query = query_builder.composeLoadKeysQuery(key_columns, partition.second, ExternalQueryBuilder::CASSANDRA_SEPARATE_PARTITION_KEY, settings.partition_key_prefix);
maybeAllowFiltering(query);
LOG_INFO(log, "Loading keys for partition hash {} using query: {}", partition.first, query);
streams.push_back(std::make_shared<CassandraBlockInputStream>(getSession(), query, sample_block, max_block_size));
}
if (streams.size() == 1)
return streams.front();
return std::make_shared<UnionBlockInputStream>(streams, nullptr, settings.max_threads);
}
BlockInputStreamPtr CassandraDictionarySource::loadUpdatedAll()
{
throw Exception("Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED);
}
CassSessionShared CassandraDictionarySource::getSession()
{
/// Reuse connection if exists, create new one if not
auto session = maybe_session.lock();
if (session)
return session;
std::lock_guard lock(connect_mutex);
session = maybe_session.lock();
if (session)
return session;
session = std::make_shared<CassSessionPtr>();
CassFuturePtr future = cass_session_connect(*session, cluster);
cassandraWaitAndCheck(future);
maybe_session = session;
return session;
}
}
#endif

View File

@ -0,0 +1,89 @@
#pragma once
#include <Dictionaries/CassandraHelpers.h>
#if USE_CASSANDRA
#include "DictionaryStructure.h"
#include "IDictionarySource.h"
#include "ExternalQueryBuilder.h"
#include <Core/Block.h>
#include <Poco/Logger.h>
#include <mutex>
namespace DB
{
struct CassandraSettings
{
String host;
UInt16 port;
String user;
String password;
String db;
String table;
CassConsistency consistency;
bool allow_filtering;
/// TODO get information about key from the driver
size_t partition_key_prefix;
size_t max_threads;
String where;
CassandraSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
void setConsistency(const String & config_str);
};
class CassandraDictionarySource final : public IDictionarySource
{
public:
CassandraDictionarySource(
const DictionaryStructure & dict_struct,
const CassandraSettings & settings_,
const Block & sample_block);
CassandraDictionarySource(
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
Block & sample_block);
BlockInputStreamPtr loadAll() override;
bool supportsSelectiveLoad() const override { return true; }
bool isModified() const override { return true; }
bool hasUpdateField() const override { return false; }
DictionarySourcePtr clone() const override
{
return std::make_unique<CassandraDictionarySource>(dict_struct, settings, sample_block);
}
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
BlockInputStreamPtr loadUpdatedAll() override;
String toString() const override;
private:
void maybeAllowFiltering(String & query) const;
CassSessionShared getSession();
Poco::Logger * log;
const DictionaryStructure dict_struct;
const CassandraSettings settings;
Block sample_block;
ExternalQueryBuilder query_builder;
std::mutex connect_mutex;
CassClusterPtr cluster;
CassSessionWeak maybe_session;
};
}
#endif

View File

@ -0,0 +1,68 @@
#include <Dictionaries/CassandraHelpers.h>
#if USE_CASSANDRA
#include <Common/Exception.h>
#include <common/logger_useful.h>
#include <mutex>
namespace DB
{
namespace ErrorCodes
{
extern const int CASSANDRA_INTERNAL_ERROR;
}
void cassandraCheck(CassError code)
{
if (code != CASS_OK)
throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code),
ErrorCodes::CASSANDRA_INTERNAL_ERROR);
}
void cassandraWaitAndCheck(CassFuturePtr & future)
{
auto code = cass_future_error_code(future); /// Waits if not ready
if (code == CASS_OK)
return;
/// `future` owns `message` and will free it on destruction
const char * message;
size_t message_len;
cass_future_error_message(future, &message, & message_len);
std::string full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message;
throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR);
}
static std::once_flag setup_logging_flag;
void setupCassandraDriverLibraryLogging(CassLogLevel level)
{
std::call_once(setup_logging_flag, [level]()
{
Poco::Logger * logger = &Poco::Logger::get("CassandraDriverLibrary");
cass_log_set_level(level);
if (level != CASS_LOG_DISABLED)
cass_log_set_callback(cassandraLogCallback, logger);
});
}
void cassandraLogCallback(const CassLogMessage * message, void * data)
{
Poco::Logger * logger = static_cast<Poco::Logger *>(data);
if (message->severity == CASS_LOG_CRITICAL || message->severity == CASS_LOG_ERROR)
LOG_ERROR(logger, message->message);
else if (message->severity == CASS_LOG_WARN)
LOG_WARNING(logger, message->message);
else if (message->severity == CASS_LOG_INFO)
LOG_INFO(logger, message->message);
else if (message->severity == CASS_LOG_DEBUG)
LOG_DEBUG(logger, message->message);
else if (message->severity == CASS_LOG_TRACE)
LOG_TRACE(logger, message->message);
}
}
#endif

View File

@ -0,0 +1,84 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_CASSANDRA
#include <cassandra.h> // Y_IGNORE
#include <utility>
#include <memory>
namespace DB
{
namespace Cassandra
{
template<typename CassT>
CassT * defaultCtor() { return nullptr; }
/// RAII wrapper for raw pointers to objects from cassandra driver library
template<typename CassT, auto Dtor, auto Ctor = defaultCtor<CassT>>
class ObjectHolder
{
CassT * ptr = nullptr;
public:
template<typename... Args>
ObjectHolder(Args &&... args) : ptr(Ctor(std::forward<Args>(args)...)) {}
ObjectHolder(CassT * ptr_) : ptr(ptr_) {}
ObjectHolder(const ObjectHolder &) = delete;
ObjectHolder & operator = (const ObjectHolder &) = delete;
ObjectHolder(ObjectHolder && rhs) noexcept : ptr(rhs.ptr) { rhs.ptr = nullptr; }
ObjectHolder & operator = (ObjectHolder && rhs) noexcept
{
if (ptr)
Dtor(ptr);
ptr = rhs.ptr;
rhs.ptr = nullptr;
return *this;
}
~ObjectHolder()
{
if (ptr)
Dtor(ptr);
}
/// For implicit conversion when passing object to driver library functions
operator CassT * () { return ptr; }
operator const CassT * () const { return ptr; }
};
}
/// These object are created on pointer construction
using CassClusterPtr = Cassandra::ObjectHolder<CassCluster, cass_cluster_free, cass_cluster_new>;
using CassStatementPtr = Cassandra::ObjectHolder<CassStatement, cass_statement_free, cass_statement_new>;
using CassSessionPtr = Cassandra::ObjectHolder<CassSession, cass_session_free, cass_session_new>;
/// Share connections between streams. Executing statements in one session object is thread-safe
using CassSessionShared = std::shared_ptr<CassSessionPtr>;
using CassSessionWeak = std::weak_ptr<CassSessionPtr>;
/// The following objects are created inside Cassandra driver library,
/// but must be freed by user code
using CassFuturePtr = Cassandra::ObjectHolder<CassFuture, cass_future_free>;
using CassResultPtr = Cassandra::ObjectHolder<const CassResult, cass_result_free>;
using CassIteratorPtr = Cassandra::ObjectHolder<CassIterator, cass_iterator_free>;
/// Checks return code, throws exception on error
void cassandraCheck(CassError code);
void cassandraWaitAndCheck(CassFuturePtr & future);
/// By default driver library prints logs to stderr.
/// It should be redirected (or, at least, disabled) before calling other functions from the library.
void setupCassandraDriverLibraryLogging(CassLogLevel level);
void cassandraLogCallback(const CassLogMessage * message, void * data);
}
#endif

View File

@ -63,6 +63,13 @@ void ExternalQueryBuilder::writeQuoted(const std::string & s, WriteBuffer & out)
std::string ExternalQueryBuilder::composeLoadAllQuery() const
{
WriteBufferFromOwnString out;
composeLoadAllQuery(out);
writeChar(';', out);
return out.str();
}
void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const
{
writeString("SELECT ", out);
if (dict_struct.id)
@ -149,24 +156,26 @@ std::string ExternalQueryBuilder::composeLoadAllQuery() const
writeString(" WHERE ", out);
writeString(where, out);
}
writeChar(';', out);
return out.str();
}
std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_field, const std::string & time_point) const
{
std::string out = composeLoadAllQuery();
std::string update_query;
WriteBufferFromOwnString out;
composeLoadAllQuery(out);
if (!where.empty())
update_query = " AND " + update_field + " >= '" + time_point + "'";
writeString(" AND ", out);
else
update_query = " WHERE " + update_field + " >= '" + time_point + "'";
writeString(" WHERE ", out);
return out.insert(out.size() - 1, update_query); /// This is done to insert "update_query" before "out"'s semicolon
writeQuoted(update_field, out);
writeString(" >= '", out);
writeString(time_point, out);
writeChar('\'', out);
writeChar(';', out);
return out.str();
}
@ -241,7 +250,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector<UInt64>
std::string
ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method)
ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method, size_t partition_key_prefix)
{
if (!dict_struct.key)
throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD};
@ -284,9 +293,13 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st
if (!where.empty())
{
writeString("(", out);
if (method != CASSANDRA_SEPARATE_PARTITION_KEY)
writeString("(", out);
writeString(where, out);
writeString(") AND (", out);
if (method != CASSANDRA_SEPARATE_PARTITION_KEY)
writeString(") AND (", out);
else
writeString(" AND ", out);
}
if (method == AND_OR_CHAIN)
@ -298,28 +311,33 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st
writeString(" OR ", out);
first = false;
composeKeyCondition(key_columns, row, out);
writeString("(", out);
composeKeyCondition(key_columns, row, out, 0, key_columns.size());
writeString(")", out);
}
}
else /* if (method == IN_WITH_TUPLES) */
else if (method == IN_WITH_TUPLES)
{
writeString(composeKeyTupleDefinition(), out);
writeString(" IN (", out);
first = true;
for (const auto row : requested_rows)
{
if (!first)
writeString(", ", out);
first = false;
composeKeyTuple(key_columns, row, out);
}
writeString(")", out);
composeInWithTuples(key_columns, requested_rows, out, 0, key_columns.size());
}
else /* if (method == CASSANDRA_SEPARATE_PARTITION_KEY) */
{
/// CQL does not allow using OR conditions
/// and does not allow using multi-column IN expressions with partition key columns.
/// So we have to use multiple queries with conditions like
/// (partition_key_1 = val1 AND partition_key_2 = val2 ...) AND (clustering_key_1, ...) IN ((val3, ...), ...)
/// for each partition key.
/// `partition_key_prefix` is a number of columns from partition key.
/// All `requested_rows` must have the same values of partition key.
composeKeyCondition(key_columns, requested_rows.at(0), out, 0, partition_key_prefix);
if (partition_key_prefix && partition_key_prefix < key_columns.size())
writeString(" AND ", out);
if (partition_key_prefix < key_columns.size())
composeInWithTuples(key_columns, requested_rows, out, partition_key_prefix, key_columns.size());
}
if (!where.empty())
if (!where.empty() && method != CASSANDRA_SEPARATE_PARTITION_KEY)
{
writeString(")", out);
}
@ -330,13 +348,11 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st
}
void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const
void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out,
size_t beg, size_t end) const
{
writeString("(", out);
const auto keys_size = key_columns.size();
auto first = true;
for (const auto i : ext::range(0, keys_size))
for (const auto i : ext::range(beg, end))
{
if (!first)
writeString(" AND ", out);
@ -346,45 +362,60 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons
const auto & key_description = (*dict_struct.key)[i];
/// key_i=value_i
writeString(key_description.name, out);
writeQuoted(key_description.name, out);
writeString("=", out);
key_description.type->serializeAsTextQuoted(*key_columns[i], row, out, format_settings);
}
}
void ExternalQueryBuilder::composeInWithTuples(const Columns & key_columns, const std::vector<size_t> & requested_rows,
WriteBuffer & out, size_t beg, size_t end)
{
composeKeyTupleDefinition(out, beg, end);
writeString(" IN (", out);
bool first = true;
for (const auto row : requested_rows)
{
if (!first)
writeString(", ", out);
first = false;
composeKeyTuple(key_columns, row, out, beg, end);
}
writeString(")", out);
}
std::string ExternalQueryBuilder::composeKeyTupleDefinition() const
void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out, size_t beg, size_t end) const
{
if (!dict_struct.key)
throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD};
std::string result{"("};
writeChar('(', out);
auto first = true;
for (const auto & key : *dict_struct.key)
for (const auto i : ext::range(beg, end))
{
if (!first)
result += ", ";
writeString(", ", out);
first = false;
result += key.name;
writeQuoted((*dict_struct.key)[i].name, out);
}
result += ")";
return result;
writeChar(')', out);
}
void ExternalQueryBuilder::composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const
void ExternalQueryBuilder::composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const
{
writeString("(", out);
const auto keys_size = key_columns.size();
auto first = true;
for (const auto i : ext::range(0, keys_size))
for (const auto i : ext::range(beg, end))
{
if (!first)
writeString(", ", out);

View File

@ -42,30 +42,39 @@ struct ExternalQueryBuilder
std::string composeLoadIdsQuery(const std::vector<UInt64> & ids);
/** Generate a query to load data by set of composite keys.
* There are two methods of specification of composite keys in WHERE:
* There are three methods of specification of composite keys in WHERE:
* 1. (x = c11 AND y = c12) OR (x = c21 AND y = c22) ...
* 2. (x, y) IN ((c11, c12), (c21, c22), ...)
* 3. (x = c1 AND (y, z) IN ((c2, c3), ...))
*/
enum LoadKeysMethod
{
AND_OR_CHAIN,
IN_WITH_TUPLES,
CASSANDRA_SEPARATE_PARTITION_KEY,
};
std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method);
std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method, size_t partition_key_prefix = 0);
private:
const FormatSettings format_settings;
void composeLoadAllQuery(WriteBuffer & out) const;
/// In the following methods `beg` and `end` specifies which columns to write in expression
/// Expression in form (x = c1 AND y = c2 ...)
void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const;
void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const;
/// Expression in form (x, y, ...) IN ((c1, c2, ...), ...)
void composeInWithTuples(const Columns & key_columns, const std::vector<size_t> & requested_rows, WriteBuffer & out, size_t beg, size_t end);
/// Expression in form (x, y, ...)
std::string composeKeyTupleDefinition() const;
void composeKeyTupleDefinition(WriteBuffer & out, size_t beg, size_t end) const;
/// Expression in form (c1, c2, ...)
void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const;
void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const;
/// Write string with specified quoting style.
void writeQuoted(const std::string & s, WriteBuffer & out) const;

View File

@ -13,6 +13,7 @@ void registerDictionaries()
registerDictionarySourceClickHouse(source_factory);
registerDictionarySourceMongoDB(source_factory);
registerDictionarySourceRedis(source_factory);
registerDictionarySourceCassandra(source_factory);
registerDictionarySourceXDBC(source_factory);
registerDictionarySourceJDBC(source_factory);
registerDictionarySourceExecutable(source_factory);

View File

@ -9,6 +9,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & source_factory);
void registerDictionarySourceMysql(DictionarySourceFactory & source_factory);
void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory);
void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory);
void registerDictionarySourceCassandra(DictionarySourceFactory & source_factory);
void registerDictionarySourceRedis(DictionarySourceFactory & source_factory);
void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory);
void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory);

View File

@ -17,6 +17,9 @@ SRCS(
CacheDictionary_generate1.cpp
CacheDictionary_generate2.cpp
CacheDictionary_generate3.cpp
CassandraBlockInputStream.cpp
CassandraDictionarySource.cpp
CassandraHelpers.cpp
ClickHouseDictionarySource.cpp
ComplexKeyCacheDictionary.cpp
ComplexKeyCacheDictionary_createAttributeWithType.cpp

View File

@ -2,6 +2,8 @@
#include <utility>
#include <IO/HTTPCommon.h>
#include <IO/S3/PocoHTTPResponseStream.h>
#include <IO/S3/PocoHTTPResponseStream.cpp>
#include <aws/core/http/HttpRequest.h>
#include <aws/core/http/HttpResponse.h>
#include <aws/core/http/standard/StandardHttpResponse.h>
@ -149,8 +151,7 @@ void PocoHTTPClient::MakeRequestInternal(
response->SetClientErrorMessage(error_message);
}
else
/// TODO: Do not copy whole stream.
Poco::StreamCopier::copyStream(response_body_stream, response->GetResponseBody());
response->GetResponseStream().SetUnderlyingStream(std::make_shared<PocoHTTPResponseStream>(session, response_body_stream));
break;
}

View File

@ -21,10 +21,12 @@ std::shared_ptr<Aws::Http::HttpRequest> PocoHTTPClientFactory::CreateHttpRequest
}
std::shared_ptr<Aws::Http::HttpRequest> PocoHTTPClientFactory::CreateHttpRequest(
const Aws::Http::URI & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const
const Aws::Http::URI & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory &) const
{
auto request = Aws::MakeShared<Aws::Http::Standard::StandardHttpRequest>("PocoHTTPClientFactory", uri, method);
request->SetResponseStreamFactory(streamFactory);
/// Don't create default response stream. Actual response stream will be set later in PocoHTTPClient.
request->SetResponseStreamFactory(null_factory);
return request;
}

View File

@ -4,22 +4,25 @@
namespace Aws::Http
{
class HttpClient;
class HttpRequest;
class HttpClient;
class HttpRequest;
}
namespace DB::S3
{
class PocoHTTPClientFactory : public Aws::Http::HttpClientFactory
{
public:
~PocoHTTPClientFactory() override = default;
[[nodiscard]] std::shared_ptr<Aws::Http::HttpClient> CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const override;
[[nodiscard]] std::shared_ptr<Aws::Http::HttpClient>
CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const override;
[[nodiscard]] std::shared_ptr<Aws::Http::HttpRequest>
CreateHttpRequest(const Aws::String & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const override;
[[nodiscard]] std::shared_ptr<Aws::Http::HttpRequest>
CreateHttpRequest(const Aws::Http::URI & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const override;
private:
const Aws::IOStreamFactory null_factory = []() { return nullptr; };
};
}

View File

@ -0,0 +1,12 @@
#include "PocoHTTPResponseStream.h"
#include <utility>
namespace DB::S3
{
PocoHTTPResponseStream::PocoHTTPResponseStream(std::shared_ptr<Poco::Net::HTTPClientSession> session_, std::istream & response_stream_)
: Aws::IStream(response_stream_.rdbuf()), session(std::move(session_))
{
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <aws/core/utils/stream/ResponseStream.h>
#include <Poco/Net/HTTPClientSession.h>
namespace DB::S3
{
/**
* Wrapper of IStream to store response stream and corresponding HTTP session.
*/
class PocoHTTPResponseStream : public Aws::IStream
{
public:
PocoHTTPResponseStream(std::shared_ptr<Poco::Net::HTTPClientSession> session_, std::istream & response_stream_);
private:
/// Poco HTTP session is holder of response stream.
std::shared_ptr<Poco::Net::HTTPClientSession> session;
};
}

View File

@ -19,6 +19,7 @@ import pprint
import psycopg2
import pymongo
import pymysql
import cassandra.cluster
from dicttoxml import dicttoxml
from kazoo.client import KazooClient
from kazoo.exceptions import KazooException
@ -108,6 +109,7 @@ class ClickHouseCluster:
self.base_zookeeper_cmd = None
self.base_mysql_cmd = []
self.base_kafka_cmd = []
self.base_cassandra_cmd = []
self.pre_zookeeper_commands = []
self.instances = {}
self.with_zookeeper = False
@ -119,6 +121,7 @@ class ClickHouseCluster:
self.with_mongo = False
self.with_net_trics = False
self.with_redis = False
self.with_cassandra = False
self.with_minio = False
self.minio_host = "minio1"
@ -147,7 +150,7 @@ class ClickHouseCluster:
def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None,
with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None,
with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False,
with_redis=False, with_minio=False,
with_redis=False, with_minio=False, with_cassandra=False,
hostname=None, env_variables=None, image="yandex/clickhouse-integration-test",
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None,
zookeeper_docker_compose_path=None, zookeeper_use_tmpfs=True):
@ -169,7 +172,7 @@ class ClickHouseCluster:
instance = ClickHouseInstance(
self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {},
with_zookeeper,
self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio,
self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, with_cassandra,
self.base_configs_dir, self.server_bin_path,
self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname,
env_variables=env_variables or {}, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address,
@ -265,6 +268,12 @@ class ClickHouseCluster:
self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_minio.yml')]
cmds.append(self.base_minio_cmd)
if with_cassandra and not self.with_cassandra:
self.with_cassandra = True
self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_cassandra.yml')])
self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_cassandra.yml')]
return instance
def get_instance_docker_id(self, instance_name):
@ -451,6 +460,18 @@ class ClickHouseCluster:
logging.warning("Can't connect to SchemaRegistry: %s", str(ex))
time.sleep(1)
def wait_cassandra_to_start(self, timeout=30):
cass_client = cassandra.cluster.Cluster(["localhost"], port="9043")
start = time.time()
while time.time() - start < timeout:
try:
cass_client.connect()
logging.info("Connected to Cassandra")
return
except Exception as ex:
logging.warning("Can't connect to Cassandra: %s", str(ex))
time.sleep(1)
def start(self, destroy_dirs=True):
if self.is_up:
return
@ -527,6 +548,10 @@ class ClickHouseCluster:
logging.info("Trying to connect to Minio...")
self.wait_minio_to_start()
if self.with_cassandra and self.base_cassandra_cmd:
subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate'])
self.wait_cassandra_to_start()
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
subprocess_check_call(clickhouse_start_cmd)
@ -656,7 +681,7 @@ class ClickHouseInstance:
def __init__(
self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros,
with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio,
with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, with_cassandra,
base_configs_dir, server_bin_path, odbc_bridge_bin_path,
clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None,
image="yandex/clickhouse-integration-test",
@ -686,6 +711,7 @@ class ClickHouseInstance:
self.with_mongo = with_mongo
self.with_redis = with_redis
self.with_minio = with_minio
self.with_cassandra = with_cassandra
self.path = p.join(self.cluster.instances_dir, name)
self.docker_compose_path = p.join(self.path, 'docker_compose.yml')

View File

@ -2,11 +2,13 @@
import warnings
import pymysql.cursors
import pymongo
import cassandra.cluster
import redis
import aerospike
from tzlocal import get_localzone
import datetime
import os
import uuid
class ExternalSource(object):
@ -405,6 +407,73 @@ class SourceHTTPS(SourceHTTPBase):
def _get_schema(self):
return "https"
class SourceCassandra(ExternalSource):
TYPE_MAPPING = {
'UInt8': 'tinyint',
'UInt16': 'smallint',
'UInt32': 'int',
'UInt64': 'bigint',
'Int8': 'tinyint',
'Int16': 'smallint',
'Int32': 'int',
'Int64': 'bigint',
'UUID': 'uuid',
'Date': 'date',
'DateTime': 'timestamp',
'String': 'text',
'Float32': 'float',
'Float64': 'double'
}
def __init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password):
ExternalSource.__init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password)
self.structure = dict()
def get_source_str(self, table_name):
return '''
<cassandra>
<host>{host}</host>
<port>{port}</port>
<keyspace>test</keyspace>
<column_family>{table}</column_family>
<allow_filtering>1</allow_filtering>
<where>"Int64_" &lt; 1000000000000000000</where>
</cassandra>
'''.format(
host=self.docker_hostname,
port=self.docker_port,
table=table_name,
)
def prepare(self, structure, table_name, cluster):
self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port)
self.session = self.client.connect()
self.session.execute("create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};")
self.session.execute('drop table if exists test."{}"'.format(table_name))
self.structure[table_name] = structure
columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()]
keys = ['"' + col.name + '"' for col in structure.keys]
query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format(
name=table_name, columns=', '.join(columns), pk=', '.join(keys))
self.session.execute(query)
self.prepared = True
def get_value_to_insert(self, value, type):
if type == 'UUID':
return uuid.UUID(value)
elif type == 'DateTime':
local_datetime = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
return get_localzone().localize(local_datetime)
return value
def load_data(self, data, table_name):
names_and_types = [(field.name, field.field_type) for field in self.structure[table_name].get_all_fields()]
columns = ['"' + col[0] + '"' for col in names_and_types]
insert = 'insert into test."{table}" ({columns}) values ({args})'.format(
table=table_name, columns=','.join(columns), args=','.join(['%s']*len(columns)))
for row in data:
values = [self.get_value_to_insert(row.get_value_by_name(col[0]), col[1]) for col in names_and_types]
self.session.execute(insert, values)
class SourceRedis(ExternalSource):
def __init__(

View File

@ -4,7 +4,7 @@ import os
from helpers.cluster import ClickHouseCluster
from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed
from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis
from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra
import math
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@ -117,6 +117,7 @@ LAYOUTS = [
]
SOURCES = [
SourceCassandra("Cassandra", "localhost", "9043", "cassandra1", "9042", "", ""),
SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"),
SourceMongoURI("MongoDB_URI", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"),
SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"),
@ -131,7 +132,7 @@ SOURCES = [
DICTIONARIES = []
# Key-value dictionaries with onle one possible field for key
# Key-value dictionaries with only one possible field for key
SOURCES_KV = [
SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"),
SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"),
@ -183,7 +184,7 @@ def setup_module(module):
for fname in os.listdir(dict_configs_path):
main_configs.append(os.path.join(dict_configs_path, fname))
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_redis=True)
node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_redis=True, with_cassandra=True)
cluster.add_instance('clickhouse1')

View File

@ -1,95 +1,92 @@
<test>
<test max_ignored_relative_change="0.3">
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>select min(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<!-- SocialAction is always empty in hits_100m_single, don't test it -->
<query>select min(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
</test>

View File

@ -1,22 +1,14 @@
<test>
<query>SELECT max(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000)</query>
<stop_conditions>
<all_of>
<iterations>10</iterations>
</all_of>
</stop_conditions>
<query>SELECT min(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000)</query>
<query>SELECT sum(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000)</query>
<query>SELECT max(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
<query>SELECT min(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(500000000)</query>
<query>SELECT min(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
<query>SELECT max(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(500000000)</query>
<query>SELECT sum(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
<query>SELECT min(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(120000000)</query>
<query>SELECT max(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(120000000)</query>
<query>SELECT max(((((number) * 10) * -2) * 3) * 2) + min(((((number) * 10) * -2) * 3) * 2) FROM numbers(120000000)</query>
<query>SELECT max(((((number) * 10) * -2) * 3) * 2) + min(((((number) * 10) * -2) * 3) * 2) FROM numbers(500000000)</query>
</test>

View File

@ -1,8 +1,5 @@
<test>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore([[1], [2]][number % 2 + 2])</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore([[], [2]][number % 2 + 2])</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore([[], []][number % 2 + 2])</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore([[1], [2]][number % 2 + 2])</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], [2]][number % 2 + 2])</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], []][number % 2 + 2])</query>
</test>

View File

@ -1,10 +1,11 @@
<test>
<preconditions>
<table_exists>hits_100m_single</table_exists>
<table_exists>hits_10m_single</table_exists>
</preconditions>
<settings>
<max_threads>1</max_threads>
</settings>
<substitutions>
<substitution>
@ -18,7 +19,7 @@
</substitution>
</substitutions>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(base64Encode({string}))</query>
<query>SELECT count() FROM hits_100m_single WHERE base64Decode(base64Encode({string})) != {string}</query>
<query>SELECT count() FROM hits_100m_single WHERE tryBase64Decode(base64Encode({string})) != {string}</query>
<query>SELECT count() FROM hits_10m_single WHERE NOT ignore(base64Encode({string}))</query>
<query>SELECT count() FROM hits_10m_single WHERE base64Decode(base64Encode({string})) != {string}</query>
<query>SELECT count() FROM hits_10m_single WHERE tryBase64Decode(base64Encode({string})) != {string}</query>
</test>

View File

@ -1,12 +1,12 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_10m_single</table_exists>
</preconditions>
<query>SELECT count() FROM test.hits WHERE NOT ignore(basename(URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(basename(Referer))</query>
<settings>
<max_threads>1</max_threads>
</settings>
<query>SELECT count() FROM hits_10m_single WHERE NOT ignore(basename(URL))</query>
<query>SELECT count() FROM hits_10m_single WHERE NOT ignore(basename(Referer))</query>
</test>

View File

@ -1,11 +1,13 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
</preconditions>
<settings>
<max_threads>1</max_threads>
</settings>
<!-- test.hits is too small for a stable test, but there is no ClientIP6 column in hits_100m_single. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(bitAnd(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000')))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(bitOr(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000')))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(bitXor(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000')))</query>

View File

@ -1,6 +1,4 @@
<test>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(number + 1)))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(0xabcd0123cdef4567)))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(number + 1)))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(0xabcd0123cdef4567)))</query>
</test>

View File

@ -33,7 +33,7 @@
<substitution>
<name>num_rows</name>
<values>
<value>10000000</value>
<value>20000000</value>
</values>
</substitution>
</substitutions>

View File

@ -1,7 +1,4 @@
<test>
<test max_ignored_relative_change="0.3">
<preconditions>
<table_exists>hits_100m_single</table_exists>
<table_exists>hits_10m_single</table_exists>

View File

@ -3,7 +3,7 @@
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 10000000);</query>

View File

@ -1,92 +1,76 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WatchID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(JavaEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(GoodEvent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CounterID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ClientIP)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RegionID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(UserID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CounterClass)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(OS)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(UserAgent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Refresh)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsRobot)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionWidth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionHeight)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionDepth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FlashMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FlashMinor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(NetMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(NetMinor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(UserAgentMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CookieEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(JavascriptEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsMobile)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(MobilePhone)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IPNetworkID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(TraficSourceID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SearchEngineID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(AdvEngineID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsArtifical)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowClientWidth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowClientHeight)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ClientTimeZone)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion1)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion2)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion3)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion4)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CodeVersion)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsLink)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsDownload)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsNotBounce)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FUniqID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsOldCounter)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsEvent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsParameter)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DontCountHits)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WithHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Age)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Sex)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Income)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Interests)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Robotness)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RemoteIP)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowName)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(OpenerName)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HistoryLength)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HTTPError)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SendTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DNSTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ConnectTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResponseStartTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResponseEndTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FetchTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RedirectTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMInteractiveTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMContentLoadedTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMCompleteTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(LoadEventStartTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(LoadEventEndTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(NSToDOMContentLoadedTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FirstPaintTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RedirectCount)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SocialSourceNetworkID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ParamPrice)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ParamCurrencyID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HasGCLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RefererHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(URLHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(YCLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RequestNum)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RequestTry)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WatchID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(JavaEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(GoodEvent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CounterID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ClientIP)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RegionID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CounterClass)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(OS)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserAgent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Refresh)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionWidth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionHeight)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionDepth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FlashMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FlashMinor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(NetMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(NetMinor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserAgentMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CookieEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(JavascriptEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsMobile)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(MobilePhone)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IPNetworkID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(TraficSourceID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SearchEngineID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(AdvEngineID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsArtifical)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WindowClientWidth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WindowClientHeight)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ClientTimeZone)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion1)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion2)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion3)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion4)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CodeVersion)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsLink)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsDownload)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsNotBounce)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FUniqID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsOldCounter)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsEvent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsParameter)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DontCountHits)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WithHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Age)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Sex)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Income)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Interests)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Robotness)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RemoteIP)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WindowName)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(OpenerName)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HistoryLength)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HTTPError)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SendTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DNSTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ConnectTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResponseStartTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResponseEndTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FetchTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SocialSourceNetworkID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ParamPrice)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ParamCurrencyID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HasGCLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RefererHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(URLHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CLID)) SETTINGS max_threads = 1</query>
</test>

View File

@ -1,6 +1,4 @@
<test max_ignored_relative_change="0.4">
<create_query>CREATE TABLE IF NOT EXISTS table_{format} (x UInt64) ENGINE = File(`{format}`)</create_query>
<substitutions>
<substitution>
<name>format</name>
@ -13,22 +11,30 @@
<value>JSONEachRow</value>
<value>TSKV</value>
<value>RowBinary</value>
<value>Native</value>
<value>XML</value>
<value>Parquet</value>
<value>ODBCDriver2</value>
<value>Null</value>
<value>MySQLWire</value>
</values>
</substitution>
</substitutions>
<preconditions>
<table_exists>test.hits</table_exists>
</preconditions>
<substitutions>
<substitution>
<name>format_fast</name>
<values>
<value>Native</value>
<value>Null</value>
</values>
</substitution>
</substitutions>
<query>INSERT INTO table_{format} SELECT number FROM numbers(10000000)</query>
<create_query>CREATE TABLE IF NOT EXISTS table_{format} (x UInt64) ENGINE = File(`{format}`)</create_query>
<create_query>CREATE TABLE IF NOT EXISTS table_{format_fast} (x UInt64) ENGINE = File(`{format}`)</create_query>
<query>INSERT INTO table_{format} SELECT number FROM numbers(10000000)</query>
<query>INSERT INTO table_{format_fast} SELECT number FROM numbers(20000000)</query>
<drop_query>DROP TABLE IF EXISTS table_{format}</drop_query>
<drop_query>DROP TABLE IF EXISTS table_{format_fast}</drop_query>
</test>

View File

@ -4,10 +4,10 @@
INSERT INTO polygons
WITH number + 1 AS radius
SELECT [arrayMap(x -> (cos(x / 90. * pi()) * radius, sin(x / 90. * pi()) * radius), range(180))]
FROM numbers(100000)
FROM numbers(1000000)
</create_query>
<query>SELECT pointInPolygon((100, 100), polygon) FROM polygons</query>
<query>SELECT pointInPolygon((100, 100), polygon) FROM polygons FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS polygons</drop_query>
</test>

View File

@ -1,16 +1,16 @@
<test>
<preconditions>
<!-- We use smaller or larger table based on the result set for a particular
query, so that it runs not too fast and not too slow. -->
<table_exists>hits_10m_single</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>SELECT count() FROM hits_100m_single WHERE UserID IN (SELECT UserID FROM hits_100m_single WHERE AdvEngineID != 0)</query>
<!-- UserID is in primary key, run in one thread so that it's not too fast. -->
<query>SELECT count() FROM hits_100m_single WHERE UserID IN (SELECT UserID FROM hits_100m_single WHERE AdvEngineID != 0) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_10m_single WHERE UserID IN (SELECT UserID FROM hits_10m_single)</query>
<query>SELECT count() FROM hits_10m_single WHERE SearchPhrase IN (SELECT SearchPhrase FROM hits_10m_single)</query>
<query>SELECT count() FROM hits_10m_single WHERE URL IN (SELECT URL FROM hits_10m_single WHERE AdvEngineID != 0)</query>
<query>SELECT count() FROM hits_100m_single WHERE URL IN (SELECT URL FROM hits_100m_single WHERE AdvEngineID != 0)</query>
<query>SELECT count() FROM hits_10m_single WHERE URL IN (SELECT URL FROM hits_10m_single WHERE SearchEngineID != 0)</query>
<query>SELECT count() FROM hits_10m_single WHERE RegionID IN (SELECT RegionID FROM hits_10m_single)</query>
<query>SELECT count() FROM hits_100m_single WHERE RegionID IN (SELECT RegionID FROM hits_100m_single)</query>
</test>

File diff suppressed because one or more lines are too long

View File

@ -7,7 +7,6 @@
<substitution>
<name>scale</name>
<values>
<value>100000</value>
<value>1000000</value>
</values>
</substitution>

View File

@ -55,7 +55,7 @@
<query>SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10</query>
<query>SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10</query>
<query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_100m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
<query>SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10</query>
<query>SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10 SETTINGS max_threads = 1</query>
<query>SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10</query>
<query>SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000</query>
<query>SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate &lt;= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000</query>

View File

@ -21,7 +21,7 @@ BUILD_TARGETS=clickhouse
BUILD_TYPE=Debug
ENABLE_EMBEDDED_COMPILER=0
CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_REDIS=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0"
CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_REDIS=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_CASSANDRA=0"
[[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin