Merge branch 'master' into segmentator-fix

This commit is contained in:
Sergei Trifonov 2023-03-24 16:19:57 +01:00 committed by GitHub
commit a05aa5c1c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
255 changed files with 10160 additions and 2846 deletions

View File

@ -353,12 +353,14 @@ if (COMPILER_CLANG)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
# The LLVM MachO linker (ld64.lld) generates by default unwind info in 'compact' format which the internal unwinder doesn't support
# and the server will not come up ('invalid compact unwind encoding'). Disable it.
# You will see warning during the build "ld64.lld: warning: Option `-no_compact_unwind' is undocumented. Should lld implement it?".
# Yes, ld64.lld does not document the option, likely for compat with Apple's system ld after which ld64.lld is modeled after and
# which also does not document it.
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no_compact_unwind")
# The LLVM MachO linker (ld64.lld, used in native builds) generates by default unwind info in 'compact' format which the internal
# unwinder doesn't support and the server will not come up ('invalid compact unwind encoding'). Disable it. You will see warning
# during the build "ld64.lld: warning: Option `-no_compact_unwind' is undocumented. Should lld implement it?". Yes, ld64.lld does
# not document the option, likely for compat with Apple's system ld after which ld64.lld is modeled after and which also does not
# document it.
if (NOT CMAKE_CROSSCOMPILING)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no_compact_unwind")
endif ()
endif()
# Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.

@ -1 +1 @@
Subproject commit d80af319f5f047067b956b2fe93a6c00038c1e0d
Subproject commit 4bfaeb31dd0ef13f025221f93c138974a3e0a22a

View File

@ -31,6 +31,40 @@
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//
//-----------------------------------------------------------------------------
// Block read - on little-endian machines this is a single load,
// while on big-endian or unknown machines the byte accesses should
// still get optimized into the most efficient instruction.
static inline uint32_t getblock ( const uint32_t * p )
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
return *p;
#else
const uint8_t *c = (const uint8_t *)p;
return (uint32_t)c[0] |
(uint32_t)c[1] << 8 |
(uint32_t)c[2] << 16 |
(uint32_t)c[3] << 24;
#endif
}
static inline uint64_t getblock ( const uint64_t * p )
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
return *p;
#else
const uint8_t *c = (const uint8_t *)p;
return (uint64_t)c[0] |
(uint64_t)c[1] << 8 |
(uint64_t)c[2] << 16 |
(uint64_t)c[3] << 24 |
(uint64_t)c[4] << 32 |
(uint64_t)c[5] << 40 |
(uint64_t)c[6] << 48 |
(uint64_t)c[7] << 56;
#endif
}
//-----------------------------------------------------------------------------
@ -52,7 +86,7 @@ uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
uint32_t k = getblock((const uint32_t *)data);
k *= m;
k ^= k >> r;
@ -105,7 +139,7 @@ uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )
while(data != end)
{
uint64_t k = *data++;
uint64_t k = getblock(data++);
k *= m;
k ^= k >> r;
@ -151,12 +185,12 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
while(len >= 8)
{
uint32_t k1 = *data++;
uint32_t k1 = getblock(data++);
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
uint32_t k2 = getblock(data++);
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
@ -164,7 +198,7 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
if(len >= 4)
{
uint32_t k1 = *data++;
uint32_t k1 = getblock(data++);
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
@ -215,7 +249,7 @@ uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
uint32_t k = getblock((const uint32_t *)data);
mmix(h,k);
@ -278,7 +312,7 @@ public:
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
uint32_t k = getblock((const uint32_t *)data);
mmix(m_hash,k);
@ -427,7 +461,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
while(len >= 4)
{
d = *(uint32_t *)data;
d = getblock((const uint32_t *)data);
t = (t >> sr) | (d << sl);
uint32_t k = t;
@ -492,7 +526,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
{
while(len >= 4)
{
uint32_t k = *(uint32_t *)data;
uint32_t k = getblock((const uint32_t *)data);
MIX(h,k,m);

View File

@ -55,14 +55,32 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r )
FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
{
uint32_t res;
memcpy(&res, p + i, sizeof(res));
return res;
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
return p[i];
#else
const uint8_t *c = (const uint8_t *)&p[i];
return (uint32_t)c[0] |
(uint32_t)c[1] << 8 |
(uint32_t)c[2] << 16 |
(uint32_t)c[3] << 24;
#endif
}
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
return p[i];
#else
const uint8_t *c = (const uint8_t *)&p[i];
return (uint64_t)c[0] |
(uint64_t)c[1] << 8 |
(uint64_t)c[2] << 16 |
(uint64_t)c[3] << 24 |
(uint64_t)c[4] << 32 |
(uint64_t)c[5] << 40 |
(uint64_t)c[6] << 48 |
(uint64_t)c[7] << 56;
#endif
}
//-----------------------------------------------------------------------------
@ -329,9 +347,13 @@ void MurmurHash3_x64_128 ( const void * key, const size_t len,
h1 += h2;
h2 += h1;
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
#else
((uint64_t*)out)[0] = h2;
((uint64_t*)out)[1] = h1;
#endif
}
//-----------------------------------------------------------------------------

View File

@ -0,0 +1,530 @@
#!/bin/bash
ckhost="localhost"
ckport=("9000" "9001" "9002" "9003")
WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
OUTPUT_DIR="${WORKING_DIR}/output"
LOG_DIR="${OUTPUT_DIR}/log"
RAWDATA_DIR="${WORKING_DIR}/rawdata_dir"
database_dir="${WORKING_DIR}/database_dir"
CLIENT_SCRIPTS_DIR="${WORKING_DIR}/client_scripts"
LOG_PACK_FILE="$(date +%Y-%m-%d-%H-%M-%S)"
QUERY_FILE="queries_ssb.sql"
SERVER_BIND_CMD[0]="numactl -m 0 -N 0"
SERVER_BIND_CMD[1]="numactl -m 0 -N 0"
SERVER_BIND_CMD[2]="numactl -m 1 -N 1"
SERVER_BIND_CMD[3]="numactl -m 1 -N 1"
CLIENT_BIND_CMD=""
SSB_GEN_FACTOR=20
TABLE_NAME="lineorder_flat"
TALBE_ROWS="119994608"
CODEC_CONFIG="lz4 deflate zstd"
# define instance number
inst_num=$1
if [ ! -n "$1" ]; then
echo "Please clarify instance number from 1,2,3 or 4"
exit 1
else
echo "Benchmarking with instance number:$1"
fi
if [ ! -d "$OUTPUT_DIR" ]; then
mkdir $OUTPUT_DIR
fi
if [ ! -d "$LOG_DIR" ]; then
mkdir $LOG_DIR
fi
if [ ! -d "$RAWDATA_DIR" ]; then
mkdir $RAWDATA_DIR
fi
# define different directories
dir_server=("" "_s2" "_s3" "_s4")
ckreadSql="
CREATE TABLE customer
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);
CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
CREATE TABLE part
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;
CREATE TABLE supplier
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;
"
supplier_table="
CREATE TABLE supplier
(
S_SUPPKEY UInt32,
S_NAME String,
S_ADDRESS String,
S_CITY LowCardinality(String),
S_NATION LowCardinality(String),
S_REGION LowCardinality(String),
S_PHONE String
)
ENGINE = MergeTree ORDER BY S_SUPPKEY;
"
part_table="
CREATE TABLE part
(
P_PARTKEY UInt32,
P_NAME String,
P_MFGR LowCardinality(String),
P_CATEGORY LowCardinality(String),
P_BRAND LowCardinality(String),
P_COLOR LowCardinality(String),
P_TYPE LowCardinality(String),
P_SIZE UInt8,
P_CONTAINER LowCardinality(String)
)
ENGINE = MergeTree ORDER BY P_PARTKEY;
"
lineorder_table="
CREATE TABLE lineorder
(
LO_ORDERKEY UInt32,
LO_LINENUMBER UInt8,
LO_CUSTKEY UInt32,
LO_PARTKEY UInt32,
LO_SUPPKEY UInt32,
LO_ORDERDATE Date,
LO_ORDERPRIORITY LowCardinality(String),
LO_SHIPPRIORITY UInt8,
LO_QUANTITY UInt8,
LO_EXTENDEDPRICE UInt32,
LO_ORDTOTALPRICE UInt32,
LO_DISCOUNT UInt8,
LO_REVENUE UInt32,
LO_SUPPLYCOST UInt32,
LO_TAX UInt8,
LO_COMMITDATE Date,
LO_SHIPMODE LowCardinality(String)
)
ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY);
"
customer_table="
CREATE TABLE customer
(
C_CUSTKEY UInt32,
C_NAME String,
C_ADDRESS String,
C_CITY LowCardinality(String),
C_NATION LowCardinality(String),
C_REGION LowCardinality(String),
C_PHONE String,
C_MKTSEGMENT LowCardinality(String)
)
ENGINE = MergeTree ORDER BY (C_CUSTKEY);
"
lineorder_flat_table="
SET max_memory_usage = 20000000000;
CREATE TABLE lineorder_flat
ENGINE = MergeTree
PARTITION BY toYear(LO_ORDERDATE)
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
SELECT
l.LO_ORDERKEY AS LO_ORDERKEY,
l.LO_LINENUMBER AS LO_LINENUMBER,
l.LO_CUSTKEY AS LO_CUSTKEY,
l.LO_PARTKEY AS LO_PARTKEY,
l.LO_SUPPKEY AS LO_SUPPKEY,
l.LO_ORDERDATE AS LO_ORDERDATE,
l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
l.LO_QUANTITY AS LO_QUANTITY,
l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
l.LO_DISCOUNT AS LO_DISCOUNT,
l.LO_REVENUE AS LO_REVENUE,
l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
l.LO_TAX AS LO_TAX,
l.LO_COMMITDATE AS LO_COMMITDATE,
l.LO_SHIPMODE AS LO_SHIPMODE,
c.C_NAME AS C_NAME,
c.C_ADDRESS AS C_ADDRESS,
c.C_CITY AS C_CITY,
c.C_NATION AS C_NATION,
c.C_REGION AS C_REGION,
c.C_PHONE AS C_PHONE,
c.C_MKTSEGMENT AS C_MKTSEGMENT,
s.S_NAME AS S_NAME,
s.S_ADDRESS AS S_ADDRESS,
s.S_CITY AS S_CITY,
s.S_NATION AS S_NATION,
s.S_REGION AS S_REGION,
s.S_PHONE AS S_PHONE,
p.P_NAME AS P_NAME,
p.P_MFGR AS P_MFGR,
p.P_CATEGORY AS P_CATEGORY,
p.P_BRAND AS P_BRAND,
p.P_COLOR AS P_COLOR,
p.P_TYPE AS P_TYPE,
p.P_SIZE AS P_SIZE,
p.P_CONTAINER AS P_CONTAINER
FROM lineorder AS l
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
show settings ilike 'max_memory_usage';
"
function insert_data(){
echo "insert_data:$1"
create_table_prefix="clickhouse client --host ${ckhost} --port $2 --multiquery -q"
insert_data_prefix="clickhouse client --query "
case $1 in
all)
clickhouse client --host ${ckhost} --port $2 --multiquery -q"$ckreadSql" && {
${insert_data_prefix} "INSERT INTO customer FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/customer.tbl --port=$2
${insert_data_prefix} "INSERT INTO part FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/part.tbl --port=$2
${insert_data_prefix} "INSERT INTO supplier FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl --port=$2
${insert_data_prefix} "INSERT INTO lineorder FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl --port=$2
}
${create_table_prefix}"${lineorder_flat_table}"
;;
customer)
echo ${create_table_prefix}\"${customer_table}\"
${create_table_prefix}"${customer_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
part)
echo ${create_table_prefix}\"${part_table}\"
${create_table_prefix}"${part_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
supplier)
echo ${create_table_prefix}"${supplier_table}"
${create_table_prefix}"${supplier_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
lineorder)
echo ${create_table_prefix}"${lineorder_table}"
${create_table_prefix}"${lineorder_table}" && {
echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2"
${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2
}
;;
lineorder_flat)
echo ${create_table_prefix}"${lineorder_flat_table}"
${create_table_prefix}"${lineorder_flat_table}"
return 0
;;
*)
exit 0
;;
esac
}
function check_sql(){
select_sql="select * from "$1" limit 1"
clickhouse client --host ${ckhost} --port $2 --multiquery -q"${select_sql}"
}
function check_table(){
checknum=0
source_tables="customer part supplier lineorder lineorder_flat"
test_tables=${1:-${source_tables}}
echo "Checking table data required in server..."
for i in $(seq 0 $[inst_num-1])
do
for j in `echo ${test_tables}`
do
check_sql $j ${ckport[i]} &> /dev/null || {
let checknum+=1 && insert_data "$j" ${ckport[i]}
}
done
done
for i in $(seq 0 $[inst_num-1])
do
echo "clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q\"select count() from ${TABLE_NAME};\""
var=$(clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"select count() from ${TABLE_NAME};")
if [ $var -eq $TALBE_ROWS ];then
echo "Instance_${i} Table data integrity check OK -> Rows:$var"
else
echo "Instance_${i} Table data integrity check Failed -> Rows:$var"
exit 1
fi
done
if [ $checknum -gt 0 ];then
echo "Need sleep 10s after first table data insertion...$checknum"
sleep 10
fi
}
function check_instance(){
instance_alive=0
for i in {1..10}
do
sleep 1
netstat -nltp | grep ${1} > /dev/null
if [ $? -ne 1 ];then
instance_alive=1
break
fi
done
if [ $instance_alive -eq 0 ];then
echo "check_instance -> clickhouse server instance faild to launch due to 10s timeout!"
exit 1
else
echo "check_instance -> clickhouse server instance launch successfully!"
fi
}
function start_clickhouse_for_insertion(){
echo "start_clickhouse_for_insertion"
for i in $(seq 0 $[inst_num-1])
do
echo "cd ${database_dir}/$1${dir_server[i]}"
echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null"
cd ${database_dir}/$1${dir_server[i]}
${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null
check_instance ${ckport[i]}
done
}
function start_clickhouse_for_stressing(){
echo "start_clickhouse_for_stressing"
for i in $(seq 0 $[inst_num-1])
do
echo "cd ${database_dir}/$1${dir_server[i]}"
echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&"
cd ${database_dir}/$1${dir_server[i]}
${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&
check_instance ${ckport[i]}
done
}
yum -y install git make gcc sudo net-tools &> /dev/null
pip3 install clickhouse_driver numpy &> /dev/null
test -d ${RAWDATA_DIR}/ssb-dbgen || git clone https://github.com/vadimtk/ssb-dbgen.git ${RAWDATA_DIR}/ssb-dbgen && cd ${RAWDATA_DIR}/ssb-dbgen
if [ ! -f ${RAWDATA_DIR}/ssb-dbgen/dbgen ];then
make && {
test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y |./dbgen -s ${SSB_GEN_FACTOR} -T c
test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
}
else
test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T c
test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p
test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s
test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d
test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l
fi
filenum=`find ${RAWDATA_DIR}/ssb-dbgen/ -name "*.tbl" | wc -l`
if [ $filenum -ne 5 ];then
echo "generate ssb data file *.tbl faild"
exit 1
fi
function kill_instance(){
instance_alive=1
for i in {1..2}
do
pkill clickhouse && sleep 5
instance_alive=0
for i in $(seq 0 $[inst_num-1])
do
netstat -nltp | grep ${ckport[i]} > /dev/null
if [ $? -ne 1 ];then
instance_alive=1
break;
fi
done
if [ $instance_alive -eq 0 ];then
break;
fi
done
if [ $instance_alive -eq 0 ];then
echo "kill_instance OK!"
else
echo "kill_instance Failed -> clickhouse server instance still alive due to 10s timeout"
exit 1
fi
}
function run_test(){
is_xml=0
for i in $(seq 0 $[inst_num-1])
do
if [ -f ${database_dir}/${1}${dir_server[i]}/config_${1}${dir_server[i]}.xml ]; then
is_xml=$[is_xml+1]
fi
done
if [ $is_xml -eq $inst_num ];then
echo "Benchmark with $inst_num instance"
start_clickhouse_for_insertion ${1}
for i in $(seq 0 $[inst_num-1])
do
clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
done
if [ $? -eq 0 ];then
check_table
fi
kill_instance
if [ $1 == "deflate" ];then
test -f ${LOG_DIR}/${1}_server_log && deflatemsg=`cat ${LOG_DIR}/${1}_server_log | grep DeflateJobHWPool`
if [ -n "$deflatemsg" ];then
echo ------------------------------------------------------
echo $deflatemsg
echo ------------------------------------------------------
fi
fi
echo "Check table data required in server_${1} -> Done! "
start_clickhouse_for_stressing ${1}
for i in $(seq 0 $[inst_num-1])
do
clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null
done
if [ $? -eq 0 ];then
test -d ${CLIENT_SCRIPTS_DIR} && cd ${CLIENT_SCRIPTS_DIR}
echo "Client stressing... "
echo "${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log"
${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log
echo "Completed client stressing, checking log... "
finish_log=`grep "Finished" ${LOG_DIR}/${1}.log | wc -l`
if [ $finish_log -eq 1 ] ;then
kill_instance
test -f ${LOG_DIR}/${1}.log && echo "${1}.log ===> ${LOG_DIR}/${1}.log"
else
kill_instance
echo "No find 'Finished' in client log -> Performance test may fail"
exit 1
fi
else
echo "${1} clickhouse server start fail"
exit 1
fi
else
echo "clickhouse server start fail -> Please check xml files required in ${database_dir} for each instance"
exit 1
fi
}
function clear_log(){
if [ -d "$LOG_DIR" ]; then
cd ${LOG_DIR} && rm -rf *
fi
}
function gather_log_for_codec(){
cd ${OUTPUT_DIR} && mkdir -p ${LOG_PACK_FILE}/${1}
cp -rf ${LOG_DIR} ${OUTPUT_DIR}/${LOG_PACK_FILE}/${1}
}
function pack_log(){
if [ -e "${OUTPUT_DIR}/run.log" ]; then
cp ${OUTPUT_DIR}/run.log ${OUTPUT_DIR}/${LOG_PACK_FILE}/
fi
echo "Please check all log information in ${OUTPUT_DIR}/${LOG_PACK_FILE}"
}
function setup_check(){
iax_dev_num=`accel-config list | grep iax | wc -l`
if [ $iax_dev_num -eq 0 ] ;then
iax_dev_num=`accel-config list | grep iax | wc -l`
if [ $iax_dev_num -eq 0 ] ;then
echo "No IAA devices available -> Please check IAA hardware setup manually!"
exit 1
else
echo "IAA enabled devices number:$iax_dev_num"
fi
else
echo "IAA enabled devices number:$iax_dev_num"
fi
libaccel_version=`accel-config -v`
clickhouser_version=`clickhouse server --version`
kernel_dxd_log=`dmesg | grep dxd`
echo "libaccel_version:$libaccel_version"
echo "clickhouser_version:$clickhouser_version"
echo -e "idxd section in kernel log:\n$kernel_dxd_log"
}
setup_check
export CLICKHOUSE_WATCHDOG_ENABLE=0
for i in ${CODEC_CONFIG[@]}
do
clear_log
codec=${i}
echo "run test------------$codec"
run_test $codec
gather_log_for_codec $codec
done
pack_log
echo "Done."

View File

@ -0,0 +1,278 @@
from operator import eq
import os
import random
import time
import sys
from clickhouse_driver import Client
import numpy as np
import subprocess
import multiprocessing
from multiprocessing import Manager
warmup_runs = 10
calculated_runs = 10
seconds = 30
max_instances_number = 8
retest_number = 3
retest_tolerance = 10
def checkInt(str):
try:
int(str)
return True
except ValueError:
return False
def setup_client(index):
if index < 4:
port_idx = index
else:
port_idx = index + 4
client = Client(
host="localhost",
database="default",
user="default",
password="",
port="900%d" % port_idx,
)
union_mode_query = "SET union_default_mode='DISTINCT'"
client.execute(union_mode_query)
return client
def warm_client(clientN, clientL, query, loop):
for c_idx in range(clientN):
for _ in range(loop):
clientL[c_idx].execute(query)
def read_queries(queries_list):
queries = list()
queries_id = list()
with open(queries_list, "r") as f:
for line in f:
line = line.rstrip()
line = line.split("$")
queries_id.append(line[0])
queries.append(line[1])
return queries_id, queries
def run_task(client, cname, query, loop, query_latency):
start_time = time.time()
for i in range(loop):
client.execute(query)
query_latency.append(client.last_query.elapsed)
end_time = time.time()
p95 = np.percentile(query_latency, 95)
print(
"CLIENT: {0} end. -> P95: %f, qps: %f".format(cname)
% (p95, loop / (end_time - start_time))
)
def run_multi_clients(clientN, clientList, query, loop):
client_pids = {}
start_time = time.time()
manager = multiprocessing.Manager()
query_latency_list0 = manager.list()
query_latency_list1 = manager.list()
query_latency_list2 = manager.list()
query_latency_list3 = manager.list()
query_latency_list4 = manager.list()
query_latency_list5 = manager.list()
query_latency_list6 = manager.list()
query_latency_list7 = manager.list()
for c_idx in range(clientN):
client_name = "Role_%d" % c_idx
if c_idx == 0:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list0),
)
elif c_idx == 1:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list1),
)
elif c_idx == 2:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list2),
)
elif c_idx == 3:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list3),
)
elif c_idx == 4:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list4),
)
elif c_idx == 5:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list5),
)
elif c_idx == 6:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list6),
)
elif c_idx == 7:
client_pids[c_idx] = multiprocessing.Process(
target=run_task,
args=(clientList[c_idx], client_name, query, loop, query_latency_list7),
)
else:
print("ERROR: CLIENT number dismatch!!")
exit()
print("CLIENT: %s start" % client_name)
client_pids[c_idx].start()
for c_idx in range(clientN):
client_pids[c_idx].join()
end_time = time.time()
totalT = end_time - start_time
query_latencyTotal = list()
for item in query_latency_list0:
query_latencyTotal.append(item)
for item in query_latency_list1:
query_latencyTotal.append(item)
for item in query_latency_list2:
query_latencyTotal.append(item)
for item in query_latency_list3:
query_latencyTotal.append(item)
for item in query_latency_list4:
query_latencyTotal.append(item)
for item in query_latency_list5:
query_latencyTotal.append(item)
for item in query_latency_list6:
query_latencyTotal.append(item)
for item in query_latency_list7:
query_latencyTotal.append(item)
totalP95 = np.percentile(query_latencyTotal, 95) * 1000
return totalT, totalP95
def run_task_caculated(client, cname, query, loop):
query_latency = list()
start_time = time.time()
for i in range(loop):
client.execute(query)
query_latency.append(client.last_query.elapsed)
end_time = time.time()
p95 = np.percentile(query_latency, 95)
def run_multi_clients_caculated(clientN, clientList, query, loop):
client_pids = {}
start_time = time.time()
for c_idx in range(clientN):
client_name = "Role_%d" % c_idx
client_pids[c_idx] = multiprocessing.Process(
target=run_task_caculated,
args=(clientList[c_idx], client_name, query, loop),
)
client_pids[c_idx].start()
for c_idx in range(clientN):
client_pids[c_idx].join()
end_time = time.time()
totalT = end_time - start_time
return totalT
if __name__ == "__main__":
client_number = 1
queries = list()
queries_id = list()
if len(sys.argv) != 3:
print(
"usage: python3 client_stressing_test.py [queries_file_path] [client_number]"
)
sys.exit()
else:
queries_list = sys.argv[1]
client_number = int(sys.argv[2])
print(
"queries_file_path: %s, client_number: %d" % (queries_list, client_number)
)
if not os.path.isfile(queries_list) or not os.access(queries_list, os.R_OK):
print("please check the right path for queries file")
sys.exit()
if (
not checkInt(sys.argv[2])
or int(sys.argv[2]) > max_instances_number
or int(sys.argv[2]) < 1
):
print("client_number should be in [1~%d]" % max_instances_number)
sys.exit()
client_list = {}
queries_id, queries = read_queries(queries_list)
for c_idx in range(client_number):
client_list[c_idx] = setup_client(c_idx)
# clear cache
os.system("sync; echo 3 > /proc/sys/vm/drop_caches")
print("###Polit Run Begin")
for i in queries:
warm_client(client_number, client_list, i, 1)
print("###Polit Run End -> Start stressing....")
query_index = 0
for q in queries:
print(
"\n###START -> Index: %d, ID: %s, Query: %s"
% (query_index, queries_id[query_index], q)
)
warm_client(client_number, client_list, q, warmup_runs)
print("###Warm Done!")
for j in range(0, retest_number):
totalT = run_multi_clients_caculated(
client_number, client_list, q, calculated_runs
)
curr_loop = int(seconds * calculated_runs / totalT) + 1
print(
"###Calculation Done! -> loopN: %d, expected seconds:%d"
% (curr_loop, seconds)
)
print("###Stress Running! -> %d iterations......" % curr_loop)
totalT, totalP95 = run_multi_clients(
client_number, client_list, q, curr_loop
)
if totalT > (seconds - retest_tolerance) and totalT < (
seconds + retest_tolerance
):
break
else:
print(
"###totalT:%d is far way from expected seconds:%d. Run again ->j:%d!"
% (totalT, seconds, j)
)
print(
"###Completed! -> ID: %s, clientN: %d, totalT: %.2f s, latencyAVG: %.2f ms, P95: %.2f ms, QPS_Final: %.2f"
% (
queries_id[query_index],
client_number,
totalT,
totalT * 1000 / (curr_loop * client_number),
totalP95,
((curr_loop * client_number) / totalT),
)
)
query_index += 1
print("###Finished!")

View File

@ -0,0 +1,10 @@
Q1.1$SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
Q2.1$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
Q2.2$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
Q2.3$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year,P_BRAND ORDER BY year,P_BRAND;
Q3.1$SELECT C_NATION,S_NATION,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION,S_NATION,year ORDER BY year ASC,revenue DESC;
Q3.2$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
Q3.3$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC;
Q4.1$SELECT toYear(LO_ORDERDATE) AS year,C_NATION,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,C_NATION ORDER BY year ASC,C_NATION ASC;
Q4.2$SELECT toYear(LO_ORDERDATE) AS year,S_NATION,P_CATEGORY,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,S_NATION,P_CATEGORY ORDER BY year ASC,S_NATION ASC,P_CATEGORY ASC;
Q4.3$SELECT toYear(LO_ORDERDATE) AS year,S_CITY,P_BRAND,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year,S_CITY,P_BRAND ORDER BY year ASC,S_CITY ASC,P_BRAND ASC;

View File

@ -0,0 +1,6 @@
WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
if [ ! -d "${WORKING_DIR}/output" ]; then
mkdir ${WORKING_DIR}/output
fi
bash allin1_ssb.sh 2 > ${WORKING_DIR}/output/run.log
echo "Please check log in: ${WORKING_DIR}/output/run.log"

View File

@ -0,0 +1,49 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>deflate_qpl</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -0,0 +1,49 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8124</http_port>
<tcp_port>9001</tcp_port>
<mysql_port>9005</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>deflate_qpl</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -0,0 +1,49 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>lz4</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -0,0 +1,49 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8124</http_port>
<tcp_port>9001</tcp_port>
<mysql_port>9005</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>lz4</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -0,0 +1,49 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>zstd</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

View File

@ -0,0 +1,49 @@
<!-- This file was generated automatically.
Do not edit it: it is likely to be discarded and generated again before it's read next time.
Files used to generate this file:
config.xml -->
<!-- Config that is used when server is run without config file. --><clickhouse>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8124</http_port>
<tcp_port>9001</tcp_port>
<mysql_port>9005</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<compression>
<case>
<method>zstd</method>
</case>
</compression>
<users>
<default>
<password/>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default/>
</quotas>
</clickhouse>

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2
Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30

View File

@ -128,7 +128,7 @@ function run_tests()
set +e
if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 \
clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
--max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
-j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -10,31 +10,38 @@ import requests
import tempfile
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
AVAILABLE_DATASETS = {
'hits': 'hits_v1.tar',
'visits': 'visits_v1.tar',
"hits": "hits_v1.tar",
"visits": "visits_v1.tar",
}
RETRIES_COUNT = 5
def _get_temp_file_name():
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
return os.path.join(
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
)
def build_url(base_url, dataset):
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
def dowload_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(RETRIES_COUNT):
try:
with open(path, 'wb') as f:
with open(path, "wb") as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
sys.stdout.write(
"\r[{}{}] {}%".format(
"=" * done, " " * (50 - done), percent
)
)
sys.stdout.flush()
break
except Exception as ex:
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
if os.path.exists(path):
os.remove(path)
else:
raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
raise Exception(
"Cannot download dataset from {}, all retries exceeded".format(url)
)
sys.stdout.write("\n")
logging.info("Downloading finished")
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
with tarfile.open(tar_path, 'r') as comp_file:
logging.info(
"Will unpack data from temp path %s to clickhouse db %s",
tar_path,
clickhouse_path,
)
with tarfile.open(tar_path, "r") as comp_file:
comp_file.extractall(path=clickhouse_path)
logging.info("Unpack finished")
@ -72,15 +90,21 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description="Simple tool for dowloading datasets for clickhouse from S3")
description="Simple tool for dowloading datasets for clickhouse from S3"
)
parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
parser.add_argument('--url-prefix', default=DEFAULT_URL)
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
parser.add_argument(
"--dataset-names",
required=True,
nargs="+",
choices=list(AVAILABLE_DATASETS.keys()),
)
parser.add_argument("--url-prefix", default=DEFAULT_URL)
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
args = parser.parse_args()
datasets = args.dataset_names
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
for dataset in datasets:
logging.info("Processing %s", dataset)
temp_archive_path = _get_temp_file_name()
@ -92,10 +116,11 @@ if __name__ == "__main__":
logging.info("Some exception occured %s", str(ex))
raise
finally:
logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
logging.info(
"Will remove downloaded file %s from filesystem if it exists",
temp_archive_path,
)
if os.path.exists(temp_archive_path):
os.remove(temp_archive_path)
logging.info("Processing of %s finished", dataset)
logging.info("Fetch finished, enjoy your tables!")

View File

@ -170,6 +170,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
fi
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
# Compress tables.

View File

@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
aspell \
curl \
git \
file \
libxml2-utils \
moreutils \
python3-fuzzywuzzy \
python3-pip \
shellcheck \
yamllint \
&& pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip

View File

@ -0,0 +1,283 @@
---
slug: /en/development/building_and_benchmarking_deflate_qpl
sidebar_position: 73
sidebar_label: Building and Benchmarking DEFLATE_QPL
description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
---
# Build Clickhouse with DEFLATE_QPL
- Make sure your target machine meet the QPL required [Prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- Pass the following flag to CMake when building ClickHouse, depending on the capabilities of your target machine:
``` bash
cmake -DENABLE_AVX2=1 -DENABLE_QPL=1 ..
```
or
``` bash
cmake -DENABLE_AVX512=1 -DENABLE_QPL=1 ..
```
- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
# Run Benchmark with DEFLATE_QPL
## Files list
The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts:
`client_scripts` contains python scripts for running typical benchmark, for example:
- `client_stressing_test.py`: The python script for query stress test with [1~4] server instances.
- `queries_ssb.sql`: The file lists all queries for [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema/)
- `allin1_ssb.sh`: This shell script executes benchmark workflow all in one automatically.
`database_files` means it will store database files according to lz4/deflate/zstd codec.
## Run benchmark automatically for Star Schema:
``` bash
$ cd ./benchmark_sample/client_scripts
$ sh run_ssb.sh
```
After complete, please check all the results in this folder:`./output/`
In case you run into failure, please manually run benchmark as below sections.
## Definition
[CLICKHOUSE_EXE] means the path of clickhouse executable program.
## Environment
- CPU: Sapphire Rapid
- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements)
- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration)
- Install python modules:
``` bash
pip3 install clickhouse_driver numpy
```
[Self-check for IAA]
``` bash
$ accel-config list | grep -P 'iax|state'
```
Expected output like this:
``` bash
"dev":"iax1",
"state":"enabled",
"state":"enabled",
```
If you see nothing output, it means IAA is not ready to work. Please check IAA setup again.
## Generate raw data
``` bash
$ cd ./benchmark_sample
$ mkdir rawdata_dir && cd rawdata_dir
```
Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters:
-s 20
The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`:
## Database setup
Set up database with LZ4 codec
``` bash
$ cd ./database_dir/lz4
$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
$ [CLICKHOUSE_EXE] client
```
Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server.
Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema)
- Creating tables in ClickHouse
- Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data.
- Converting “star schema” to de-normalized “flat schema”
Set up database with with IAA Deflate codec
``` bash
$ cd ./database_dir/deflate
$ [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
$ [CLICKHOUSE_EXE] client
```
Complete three steps same as lz4 above
Set up database with with ZSTD codec
``` bash
$ cd ./database_dir/zstd
$ [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
$ [CLICKHOUSE_EXE] client
```
Complete three steps same as lz4 above
[self-check]
For each codec(lz4/zstd/deflate), please execute below query to make sure the databases are created successfully:
```sql
select count() from lineorder_flat
```
You are expected to see below output:
```sql
┌───count()─┐
│ 119994608 │
└───────────┘
```
[Self-check for IAA Deflate codec]
At the first time you execute insertion or query from client, clickhouse server console is expected to print this log:
```text
Hardware-assisted DeflateQpl codec is ready!
```
If you never find this, but see another log as below:
```text
Initialization of hardware-assisted DeflateQpl codec failed
```
That means IAA devices is not ready, you need check IAA setup again.
## Benchmark with single instance
- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance`
``` bash
$ cpupower idle-set -d 3
$ cpupower frequency-set -g performance
```
- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket.
- Single instance means single server connected with single client
Now run benchmark for LZ4/Deflate/ZSTD respectively:
LZ4:
``` bash
$ cd ./database_dir/lz4
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log
```
IAA deflate:
``` bash
$ cd ./database_dir/deflate
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log
```
ZSTD:
``` bash
$ cd ./database_dir/zstd
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > zstd.log
```
Now three logs should be output as expected:
```text
lz4.log
deflate.log
zstd.log
```
How to check performance metrics:
We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
## Benchmark with multi-instances
- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances.
- Multi-instance means multiple2 or 4servers connected with respective client.
- The cores of one socket need to be divided equally and assigned to the servers respectively.
- For multi-instances, must create new folder for each codec and insert dataset by following the similar steps as single instance.
There are 2 differences:
- For client side, you need launch clickhouse with the assigned port during table creation and data insertion.
- For server side, you need launch clickhouse with the specific xml config file in which port has been assigned. All customized xml config files for multi-instances has been provided under ./server_config.
Here we assume there are 60 cores per socket and take 2 instances for example.
Launch server for first instance
LZ4:
``` bash
$ cd ./database_dir/lz4
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
```
ZSTD:
``` bash
$ cd ./database_dir/zstd
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
```
IAA Deflate:
``` bash
$ cd ./database_dir/deflate
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
```
[Launch server for second instance]
LZ4:
``` bash
$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2
$ cp ../../server_config/config_lz4_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
```
ZSTD:
``` bash
$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2
$ cp ../../server_config/config_zstd_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null&
```
IAA Deflate:
``` bash
$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2
$ cp ../../server_config/config_deflate_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null&
```
Creating tables && Inserting data for second instance
Creating tables:
``` bash
$ [CLICKHOUSE_EXE] client -m --port=9001
```
Inserting data:
``` bash
$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001
```
- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`.
- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance.
Benchmarking with 2 instances
LZ4:
``` bash
$ cd ./database_dir/lz4
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
$ cd ./database_dir/lz4_s2
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log
```
ZSTD:
``` bash
$ cd ./database_dir/zstd
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
$ cd ./database_dir/zstd_s2
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log
```
IAA deflate
``` bash
$ cd ./database_dir/deflate
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
$ cd ./database_dir/deflate_s2
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log
```
Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/
Now three logs should be output as expected:
``` text
lz4_2insts.log
deflate_2insts.log
zstd_2insts.log
```
How to check performance metrics:
We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
Benchmark setup for 4 instances is similar with 2 instances above.
We recommend use 2 instances benchmark data as final report for review.
## Tips
Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one:
``` bash
$ ps -aux| grep clickhouse
$ kill -9 [PID]
```
By comparing the query list in ./client_scripts/queries_ssb.sql with official [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema), you will find 3 queries are not included: Q1.2/Q1.3/Q3.4 . This is because cpu utilization% is very low <10% for these queries which means cannot demonstrate performance differences.

View File

@ -377,8 +377,9 @@ CREATE TABLE table_name
i32 Int32,
s String,
...
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4
INDEX idx1 u64 TYPE bloom_filter GRANULARITY 3,
INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 3,
INDEX idx3 u64 * length(s) TYPE set(1000) GRANULARITY 4
) ENGINE = MergeTree()
...
```
@ -386,8 +387,25 @@ CREATE TABLE table_name
Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries:
``` sql
SELECT count() FROM table WHERE s < 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
SELECT count() FROM table WHERE u64 == 10;
SELECT count() FROM table WHERE u64 * i32 >= 1234
SELECT count() FROM table WHERE u64 * length(s) == 1234
```
Data skipping indexes can also be created on composite columns:
```sql
-- on columns of type Map:
INDEX map_key_index mapKeys(map_column) TYPE bloom_filter
INDEX map_value_index mapValues(map_column) TYPE bloom_filter
-- on columns of type Tuple:
INDEX tuple_1_index tuple_column.1 TYPE bloom_filter
INDEX tuple_2_index tuple_column.2 TYPE bloom_filter
-- on columns of type Nested:
INDEX nested_1_index col.nested_col1 TYPE bloom_filter
INDEX nested_2_index col.nested_col2 TYPE bloom_filter
```
### Available Types of Indices {#available-types-of-indices}
@ -432,20 +450,6 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran
- An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
- An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details.
## Example of index creation for Map data type
```
INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1
INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1
```
``` sql
INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4
```
### Functions Support {#functions-support}
Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.

View File

@ -15,7 +15,7 @@ Usage examples:
## Usage in ClickHouse Server {#usage-in-clickhouse-server}
``` sql
ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
ENGINE = GenerateRandom([random_seed [,max_string_length [,max_array_length]]])
```
The `max_array_length` and `max_string_length` parameters specify maximum length of all

View File

@ -1,7 +0,0 @@
position: 1
label: 'Example Datasets'
collapsible: true
collapsed: true
link:
type: doc
id: en/getting-started/example-datasets/

View File

@ -154,7 +154,7 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe
In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing.
Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array.
Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) structures is represented as an array.
For example:
@ -1150,7 +1150,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y
### Usage of Nested Structures {#jsoneachrow-nested}
If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.
If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting.
For example, consider the following table:
@ -1776,7 +1776,7 @@ message MessageType {
```
ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on).
Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md).
Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/index.md).
Default values defined in a protobuf schema like this
@ -1978,7 +1978,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.

View File

@ -6,7 +6,7 @@ keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, clien
description: ClickHouse provides three network interfaces
---
# Interfaces
# Drivers and Interfaces
ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security):

View File

@ -331,7 +331,7 @@ It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the
<s3>
<volumes>
<main>
<disk>s3</disk>
<disk>s3_plain</disk>
</main>
</volumes>
</s3>

View File

@ -964,7 +964,7 @@ Default value: 1.
### input_format_arrow_import_nested {#input_format_arrow_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
Possible values:
@ -1024,7 +1024,7 @@ Default value: `none`.
### input_format_orc_import_nested {#input_format_orc_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
Possible values:
@ -1073,7 +1073,7 @@ Default value: `none`.
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
Possible values:
@ -1538,6 +1538,6 @@ Default value: `1GiB`.
### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
Allow types conversion in Native input format between columns from input data and requested columns.
Allow types conversion in Native input format between columns from input data and requested columns.
Enabled by default.

View File

@ -3438,7 +3438,7 @@ Default value: `throw`.
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
Possible values:
@ -4049,3 +4049,32 @@ Possible values:
- 1 - enabled
Default value: `0`.
## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterward, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests.
**Example without setting on Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000)
Cancelling query.
Ok.
Query was cancelled.
0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
```
**Example with setting on Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
┌──────sum(number)─┐
│ 1355411451286266 │
└──────────────────┘
1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
```
Possible values: `true`, `false`
Default value: `false`

View File

@ -1,7 +1,7 @@
---
slug: /en/operations/utilities/
sidebar_position: 56
sidebar_label: Utilities
sidebar_label: List of tools and utilities
pagination_next: 'en/operations/utilities/clickhouse-copier'
---

View File

@ -1,13 +1,33 @@
---
slug: /en/sql-reference/data-types/
sidebar_label: Data Types
sidebar_label: List of data types
sidebar_position: 37
---
# Data Types
# ClickHouse Data Types
ClickHouse can store various kinds of data in table cells.
ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any.
This section describes the supported data types and special considerations for using and/or implementing them if any.
:::note
You can check whether a data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
:::
You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
ClickHouse data types include:
- **Integer types**: [signed and unsigned integers](./int-uint.md) (`UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`, `UInt256`, `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `Int256`)
- **Floating-point numbers**: [floats](./float.md)(`Float32` and `Float64`) and [`Decimal` values](./decimal.md)
- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md)
- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md)
- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time
- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column
- **UUID**: a performant option for storing [`UUID` values](./uuid.md)
- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column
- **Arrays**: any column can be defined as an [`Array` of values](./array.md)
- **Maps**: use [`Map`](./map.md) for storing key/value pairs
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
- **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)

View File

@ -1,7 +1,105 @@
---
slug: /en/sql-reference/data-types/nested-data-structures/
sidebar_label: Nested Data Structures
sidebar_position: 54
slug: /en/sql-reference/data-types/nested-data-structures/nested
sidebar_position: 57
sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
---
# Nested Data Structures
# Nested
## Nested(name1 Type1, Name2 Type2, …)
A nested data structure is like a table inside a cell. The parameters of a nested data structure the column names and types are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
Example:
``` sql
CREATE TABLE test.visits
(
CounterID UInt32,
StartDate Date,
Sign Int8,
IsNew UInt8,
VisitID UInt64,
UserID UInt64,
...
Goals Nested
(
ID UInt32,
Serial UInt32,
EventTime DateTime,
Price Int64,
OrderID String,
CurrencyID UInt32
),
...
) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
```
This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the visits table can correspond to zero or any number of conversions.
When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.
Example:
``` sql
SELECT
Goals.ID,
Goals.EventTime
FROM test.visits
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐
│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │
│ [1073752] │ ['2014-03-17 00:28:25'] │
│ [1073752] │ ['2014-03-17 10:46:20'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │
│ [] │ [] │
│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │
│ [] │ [] │
│ [] │ [] │
│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │
└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘
```
It is easiest to think of a nested data structure as a set of multiple column arrays of the same length.
The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example:
``` sql
SELECT
Goal.ID,
Goal.EventTime
FROM test.visits
ARRAY JOIN Goals AS Goal
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goal.ID─┬──────Goal.EventTime─┐
│ 1073752 │ 2014-03-17 16:38:10 │
│ 591325 │ 2014-03-17 16:38:48 │
│ 591325 │ 2014-03-17 16:42:27 │
│ 1073752 │ 2014-03-17 00:28:25 │
│ 1073752 │ 2014-03-17 10:46:20 │
│ 1073752 │ 2014-03-17 13:59:20 │
│ 591325 │ 2014-03-17 22:17:55 │
│ 591325 │ 2014-03-17 22:18:07 │
│ 591325 │ 2014-03-17 22:18:51 │
│ 1073752 │ 2014-03-17 11:37:06 │
└─────────┴─────────────────────┘
```
You cant perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it.
For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length.
For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way.
The ALTER query for elements in a nested data structure has limitations.

View File

@ -1,105 +0,0 @@
---
slug: /en/sql-reference/data-types/nested-data-structures/nested
sidebar_position: 57
sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
---
# Nested
## Nested(name1 Type1, Name2 Type2, …)
A nested data structure is like a table inside a cell. The parameters of a nested data structure the column names and types are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
Example:
``` sql
CREATE TABLE test.visits
(
CounterID UInt32,
StartDate Date,
Sign Int8,
IsNew UInt8,
VisitID UInt64,
UserID UInt64,
...
Goals Nested
(
ID UInt32,
Serial UInt32,
EventTime DateTime,
Price Int64,
OrderID String,
CurrencyID UInt32
),
...
) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
```
This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the visits table can correspond to zero or any number of conversions.
When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported.
In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length.
Example:
``` sql
SELECT
Goals.ID,
Goals.EventTime
FROM test.visits
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐
│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │
│ [1073752] │ ['2014-03-17 00:28:25'] │
│ [1073752] │ ['2014-03-17 10:46:20'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │
│ [] │ [] │
│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │
│ [] │ [] │
│ [] │ [] │
│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │
│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │
└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘
```
It is easiest to think of a nested data structure as a set of multiple column arrays of the same length.
The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example:
``` sql
SELECT
Goal.ID,
Goal.EventTime
FROM test.visits
ARRAY JOIN Goals AS Goal
WHERE CounterID = 101500 AND length(Goals.ID) < 5
LIMIT 10
```
``` text
┌─Goal.ID─┬──────Goal.EventTime─┐
│ 1073752 │ 2014-03-17 16:38:10 │
│ 591325 │ 2014-03-17 16:38:48 │
│ 591325 │ 2014-03-17 16:42:27 │
│ 1073752 │ 2014-03-17 00:28:25 │
│ 1073752 │ 2014-03-17 10:46:20 │
│ 1073752 │ 2014-03-17 13:59:20 │
│ 591325 │ 2014-03-17 22:17:55 │
│ 591325 │ 2014-03-17 22:18:07 │
│ 591325 │ 2014-03-17 22:18:51 │
│ 1073752 │ 2014-03-17 11:37:06 │
└─────────┴─────────────────────┘
```
You cant perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it.
For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length.
For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way.
The ALTER query for elements in a nested data structure has limitations.

View File

@ -1232,12 +1232,14 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## formatDateTime
## formatDateTime {#date_time_functions-formatDateTime}
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).
Alias: `DATE_FORMAT`.
**Syntax**
@ -1257,7 +1259,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
|----------|---------------------------------------------------------|------------|
| %a | abbreviated weekday name (Mon-Sun) | Mon |
| %b | abbreviated month name (Jan-Dec) | Jan |
| %c | month as a decimal number (01-12) | 01 |
| %c | month as an integer number (01-12) | 01 |
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
@ -1273,7 +1275,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %j | day of the year (001-366) | 002 |
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %m | month as a decimal number (01-12) | 01 |
| %m | month as an integer number (01-12) | 01 |
| %M | minute (00-59) | 33 |
| %n | new-line character () | |
| %p | AM or PM designation | PM |
@ -1286,7 +1288,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 |
| %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 |
| %V | ISO 8601 week number (01-53) | 01 |
| %w | weekday as a decimal number with Sunday as 0 (0-6) | 2 |
| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 |
| %W | full weekday name (Monday-Sunday) | Monday |
| %y | Year, last two digits (00-99) | 18 |
| %Y | Year | 2018 |
@ -1328,10 +1330,11 @@ Result:
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
## formatDateTimeInJodaSyntax
## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax}
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).
**Replacement fields**

View File

@ -375,7 +375,7 @@ For a case-insensitive search or/and in UTF-8 format use functions `multiSearchA
In all `multiSearch*` functions the number of needles should be less than 2<sup>8</sup> because of implementation specification.
:::
## match(haystack, pattern)
## match(haystack, pattern), haystack REGEXP pattern operator
Checks whether string `haystack` matches the regular expression `pattern`. The pattern is an [re2 regular expression](https://github.com/google/re2/wiki/Syntax) which has a more limited syntax than Perl regular expressions.

View File

@ -1148,6 +1148,85 @@ Result:
└───────────────────────────┴──────────────────────────────┘
```
## parseDateTime {#type_conversion_functions-parseDateTime}
Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime).
**Syntax**
``` sql
parseDateTime(str, format[, timezone])
```
**Arguments**
- `str` — the String to be parsed
- `format` — the format string
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
**Returned value(s)**
Returns DateTime values parsed from input string according to a MySQL style format string.
**Supported format specifiers**
All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
- %f: fractional second
- %Q: Quarter (1-4)
**Example**
``` sql
SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')
┌─parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')─┐
│ 2021-01-04 23:00:00 │
└───────────────────────────────────────────────────────────┘
```
Alias: `TO_TIMESTAMP`.
## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}
Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).
**Syntax**
``` sql
parseDateTimeInJodaSyntax(str, format[, timezone])
```
**Arguments**
- `str` — the String to be parsed
- `format` — the format string
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
**Returned value(s)**
Returns DateTime values parsed from input string according to a Joda style format.
**Supported format specifiers**
All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except:
- S: fraction of second
- z: time zone
- Z: time zone offset/id
**Example**
``` sql
SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')
┌─parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')─┐
│ 2023-02-24 14:53:31 │
└─────────────────────────────────────────────────────────────────────────────────────────┘
```
## parseDateTimeBestEffort
## parseDateTime32BestEffort
@ -1351,7 +1430,6 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity and returns zero date or zero date time when it encounters a date format that cannot be processed.
## toLowCardinality
Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type.

View File

@ -381,8 +381,8 @@ High compression levels are useful for asymmetric scenarios, like compress once,
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:
- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`.
- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device
- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions. Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details.
- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details.
- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512
### Specialized Codecs

View File

@ -24,9 +24,9 @@ The `DESCRIBE` statement returns a row for each table column with the following
- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/index.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
**Example**

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/statements/
sidebar_position: 1
sidebar_label: Statements
sidebar_label: List of statements
---
# ClickHouse SQL Statements
# ClickHouse SQL Statements
Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has its own syntax and usage details that are described separately:

View File

@ -185,7 +185,7 @@ SETTINGS enable_unaligned_array_join = 1;
## ARRAY JOIN with Nested Data Structure
`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/nested.md):
`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/index.md):
``` sql
CREATE TABLE nested_test

View File

@ -4084,3 +4084,32 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1;
Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## stop_reading_on_first_cancel {#stop_reading_on_first_cancel}
Если установлено значение `true` и пользователь хочет прервать запрос (например, с помощью `Ctrl+C` на клиенте), то запрос продолжает выполнение только для данных, которые уже были считаны из таблицы. После этого он вернет частичный результат запроса для той части таблицы, которая была прочитана. Чтобы полностью остановить выполнение запроса без частичного результата, пользователь должен отправить 2 запроса отмены.
**Пример с выключенной настройкой при нажатии Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000)
Cancelling query.
Ok.
Query was cancelled.
0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.)
```
**Пример с включенной настройкой при нажатии Ctrl+C**
```sql
SELECT sum(number) FROM numbers(10000000000) SETTINGS stop_reading_on_first_cancel=true
┌──────sum(number)─┐
│ 1355411451286266 │
└──────────────────┘
1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.)
```
Возможные значения:: `true`, `false`
Значение по умолчанию: `false`

View File

@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user',
### DDL查询 {#ddl-queries}
MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询则该查询将被忽略。
MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询则该查询将被忽略。
### Data Replication {#data-replication}

View File

@ -109,7 +109,7 @@ MySQL中的Time 类型会被ClickHouse转换成微秒来存储
### DDL Queries {#ddl-queries}
MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。
### 数据复制 {#data-replication}

View File

@ -1,5 +1,5 @@
---
slug: /zh/faq/general
slug: /zh/faq/general/overview
---
# 常见问题 {#chang-jian-wen-ti}

View File

@ -21,8 +21,7 @@ sidebar_label: General
- [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)
!!! info "没找到您需要的内容?"
请查阅 [其他 F.A.Q. 类别](../../faq/) 或者从左侧导航栏浏览其他文档
请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档
{## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##}

View File

@ -338,6 +338,12 @@ UserID.binURL.bin和EventTime.bin是<font face = "monospace">UserID</font>
:::note
- 最后一个颗粒1082颗粒是少于8192行的。
- 我们在本指南开头的“DDL 语句详细信息”中提到,我们禁用了自适应索引粒度(为了简化本指南中的讨论,并使图表和结果可重现)。
因此,示例表中所有颗粒(除了最后一个)都具有相同大小。
- 对于具有自适应索引粒度的表(默认情况下索引粒度是自适应的),某些粒度的大小可以小于 8192 行,具体取决于行数据大小。
- 我们将主键列(<font face = "monospace">UserID</font>, <font face = "monospace">URL</font>)中的一些列值标记为橙色。
这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。

View File

@ -1,10 +0,0 @@
---
slug: /zh/sql-reference/functions/geo/
sidebar_label: Geo
sidebar_position: 62
title: "Geo Functions"
---
import Content from '@site/docs/en/sql-reference/functions/geo/index.md';
<Content />

View File

@ -1,5 +1,5 @@
---
slug: /zh/sql-reference/statements/alter/
slug: /zh/sql-reference/statements/alter/overview
sidebar_position: 35
sidebar_label: ALTER
---

View File

@ -1,11 +0,0 @@
---
slug: /zh/sql-reference/statements/create/
sidebar_label: CREATE
sidebar_position: 34
---
# CREATE语法 {#create-queries}
CREATE语法包含以下子集:
- [DATABASE](../../../sql-reference/statements/create/database.md)

View File

@ -10,7 +10,7 @@ sidebar_position: 31
- [SELECT](../../sql-reference/statements/select/index.md)
- [INSERT INTO](../../sql-reference/statements/insert-into.md)
- [CREATE](../../sql-reference/statements/create/index.md)
- [CREATE](../../sql-reference/statements/create.md)
- [ALTER](../../sql-reference/statements/alter/index.md)
- [SYSTEM](../../sql-reference/statements/system.md)
- [SHOW](../../sql-reference/statements/show.md)

View File

@ -222,6 +222,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user to create")
("group", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_GROUP), "clickhouse group to create")
("noninteractive,y", "run non-interactively")
("link", "create symlink to the binary instead of copying to binary-path")
;
po::variables_map options;
@ -267,8 +269,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// Copy binary to the destination directory.
/// TODO An option to link instead of copy - useful for developers.
fs::path prefix = options["prefix"].as<std::string>();
fs::path bin_dir = prefix / options["binary-path"].as<std::string>();
@ -281,76 +281,129 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
bool old_binary_exists = fs::exists(main_bin_path);
bool already_installed = false;
/// Check if the binary is the same file (already installed).
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
if (options.count("link"))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
}
/// Check if binary has the same content.
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
{
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
main_bin_path.string());
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
if (old_binary_exists)
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
main_bin_path.string(), binary_self_canonical_path.string());
}
}
bool is_symlink = FS::isSymlink(main_bin_path);
fs::path points_to;
if (is_symlink)
points_to = fs::weakly_canonical(FS::readSymlink(main_bin_path));
if (already_installed)
{
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
if (is_symlink && points_to == binary_self_canonical_path)
{
already_installed = true;
}
else
{
if (!is_symlink)
{
fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n",
main_bin_path.string(), main_bin_old_path.string());
fs::rename(main_bin_path, main_bin_old_path);
}
else if (points_to != main_bin_path)
{
fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n",
main_bin_path.string(), points_to.string(), binary_self_canonical_path.string());
fs::remove(main_bin_path);
}
}
}
if (!already_installed)
{
if (!fs::exists(bin_dir))
{
fmt::print("Creating binary directory {}.\n", bin_dir.string());
fs::create_directories(bin_dir);
}
fmt::print("Creating symlink {} to {}.\n", main_bin_path.string(), binary_self_canonical_path.string());
fs::create_symlink(binary_self_canonical_path, main_bin_path);
if (0 != chmod(binary_self_canonical_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", binary_self_canonical_path.string()), ErrorCodes::SYSTEM_ERROR);
}
}
else
{
if (!fs::exists(bin_dir))
bool is_symlink = FS::isSymlink(main_bin_path);
if (!is_symlink)
{
fmt::print("Creating binary directory {}.\n", bin_dir.string());
fs::create_directories(bin_dir);
/// Check if the binary is the same file (already installed).
if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string());
}
/// Check if binary has the same content.
else if (old_binary_exists && binary_size == fs::file_size(main_bin_path))
{
fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n",
main_bin_path.string());
if (filesEqual(binary_self_path.string(), main_bin_path.string()))
{
already_installed = true;
fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n",
main_bin_path.string(), binary_self_canonical_path.string());
}
}
}
size_t available_space = fs::space(bin_dir).available;
if (available_space < binary_size)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
try
if (already_installed)
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR);
}
catch (const Exception & e)
else
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
throw;
if (!fs::exists(bin_dir))
{
fmt::print("Creating binary directory {}.\n", bin_dir.string());
fs::create_directories(bin_dir);
}
size_t available_space = fs::space(bin_dir).available;
if (available_space < binary_size)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.",
bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space));
fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string());
try
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
throw;
}
if (old_binary_exists)
{
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
main_bin_path.string(), main_bin_old_path.string());
/// There is file exchange operation in Linux but it's not portable.
fs::rename(main_bin_path, main_bin_old_path);
}
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
fs::rename(main_bin_tmp_path, main_bin_path);
}
if (old_binary_exists)
{
fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n",
main_bin_path.string(), main_bin_old_path.string());
/// There is file exchange operation in Linux but it's not portable.
fs::rename(main_bin_path, main_bin_old_path);
}
fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string());
fs::rename(main_bin_tmp_path, main_bin_path);
}
/// Create symlinks.
@ -384,7 +437,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (is_symlink)
points_to = fs::weakly_canonical(FS::readSymlink(symlink_path));
if (is_symlink && points_to == main_bin_path)
if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path)))
{
need_to_create = false;
}
@ -709,7 +762,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// dpkg or apt installers can ask for non-interactive work explicitly.
const char * debian_frontend_var = getenv("DEBIAN_FRONTEND"); // NOLINT(concurrency-mt-unsafe)
bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive");
bool noninteractive = (debian_frontend_var && debian_frontend_var == std::string_view("noninteractive"))
|| options.count("noninteractive");
bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty;

View File

@ -703,6 +703,9 @@
actions of previous constraint (defined in other profiles) for the same specific setting, including fields that are not set by new constraint.
It also enables 'changeable_in_readonly' constraint type -->
<settings_constraints_replace_previous>false</settings_constraints_replace_previous>
<!-- Number of seconds since last access a role is stored in the Role Cache -->
<role_cache_expiration_time_seconds>600</role_cache_expiration_time_seconds>
</access_control_improvements>
<!-- Default profile of settings. -->

View File

@ -247,7 +247,7 @@ private:
AccessControl::AccessControl()
: MultipleAccessStorage("user directories"),
context_access_cache(std::make_unique<ContextAccessCache>(*this)),
role_cache(std::make_unique<RoleCache>(*this)),
role_cache(std::make_unique<RoleCache>(*this, 600)),
row_policy_cache(std::make_unique<RowPolicyCache>(*this)),
quota_cache(std::make_unique<QuotaCache>(*this)),
settings_profiles_cache(std::make_unique<SettingsProfilesCache>(*this)),
@ -282,6 +282,8 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
role_cache = std::make_unique<RoleCache>(*this, config_.getInt("access_control_improvements.role_cache_expiration_time_seconds", 600));
}

View File

@ -56,8 +56,8 @@ namespace
}
RoleCache::RoleCache(const AccessControl & access_control_)
: access_control(access_control_), cache(600000 /* 10 minutes */)
RoleCache::RoleCache(const AccessControl & access_control_, int expiration_time_seconds)
: access_control(access_control_), cache(expiration_time_seconds * 1000 /* 10 minutes by default*/)
{
}

View File

@ -16,7 +16,7 @@ using RolePtr = std::shared_ptr<const Role>;
class RoleCache
{
public:
explicit RoleCache(const AccessControl & access_control_);
explicit RoleCache(const AccessControl & access_control_, int expiration_time_seconds);
~RoleCache();
std::shared_ptr<const EnabledRoles> getEnabledRoles(

View File

@ -111,7 +111,6 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int ALIAS_REQUIRED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_PREWHERE;
extern const int UNKNOWN_TABLE;
}
@ -1578,41 +1577,20 @@ void QueryAnalyzer::collectCompoundExpressionValidIdentifiersForTypoCorrection(
const Identifier & valid_identifier_prefix,
std::unordered_set<Identifier> & valid_identifiers_result)
{
std::vector<std::pair<Identifier, const IDataType *>> identifiers_with_types_to_process;
identifiers_with_types_to_process.emplace_back(valid_identifier_prefix, compound_expression_type.get());
while (!identifiers_with_types_to_process.empty())
IDataType::forEachSubcolumn([&](const auto &, const auto & name, const auto &)
{
auto [identifier, type] = identifiers_with_types_to_process.back();
identifiers_with_types_to_process.pop_back();
Identifier subcolumn_indentifier(name);
size_t new_identifier_size = valid_identifier_prefix.getPartsSize() + subcolumn_indentifier.getPartsSize();
if (identifier.getPartsSize() + 1 > unresolved_identifier.getPartsSize())
continue;
while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(type))
type = array->getNestedType().get();
const DataTypeTuple * tuple = checkAndGetDataType<DataTypeTuple>(type);
if (!tuple)
continue;
const auto & tuple_element_names = tuple->getElementNames();
size_t tuple_element_names_size = tuple_element_names.size();
for (size_t i = 0; i < tuple_element_names_size; ++i)
if (new_identifier_size == unresolved_identifier.getPartsSize())
{
const auto & element_name = tuple_element_names[i];
const auto & element_type = tuple->getElements()[i];
auto new_identifier = valid_identifier_prefix;
for (const auto & part : subcolumn_indentifier)
new_identifier.push_back(part);
identifier.push_back(element_name);
valid_identifiers_result.insert(identifier);
identifiers_with_types_to_process.emplace_back(identifier, element_type.get());
identifier.pop_back();
valid_identifiers_result.insert(std::move(new_identifier));
}
}
}, ISerialization::SubstreamData(compound_expression_type->getDefaultSerialization()));
}
/// Get valid identifiers for typo correction from table expression
@ -2374,7 +2352,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const
auto expression_type = compound_expression->getResultType();
if (!nestedIdentifierCanBeResolved(expression_type, nested_path))
if (!expression_type->hasSubcolumn(nested_path.getFullName()))
{
std::unordered_set<Identifier> valid_identifiers;
collectCompoundExpressionValidIdentifiersForTypoCorrection(expression_identifier,
@ -2401,10 +2379,15 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const
getHintsErrorMessageSuffix(hints));
}
auto tuple_element_result = wrapExpressionNodeInTupleElement(compound_expression, nested_path);
resolveFunction(tuple_element_result, scope);
QueryTreeNodePtr get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
auto & get_subcolumn_function_arguments_nodes = get_subcolumn_function->as<FunctionNode>()->getArguments().getNodes();
return tuple_element_result;
get_subcolumn_function_arguments_nodes.reserve(2);
get_subcolumn_function_arguments_nodes.push_back(compound_expression);
get_subcolumn_function_arguments_nodes.push_back(std::make_shared<ConstantNode>(nested_path.getFullName()));
resolveFunction(get_subcolumn_function, scope);
return get_subcolumn_function;
}
/** Resolve identifier from expression arguments.
@ -3708,8 +3691,15 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
{
auto result_type = expression_query_tree_node->getResultType();
while (const auto * array_type = typeid_cast<const DataTypeArray *>(result_type.get()))
result_type = array_type->getNestedType();
while (true)
{
if (const auto * array_type = typeid_cast<const DataTypeArray *>(result_type.get()))
result_type = array_type->getNestedType();
else if (const auto * map_type = typeid_cast<const DataTypeMap *>(result_type.get()))
result_type = map_type->getNestedType();
else
break;
}
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
if (!tuple_data_type)
@ -3729,11 +3719,11 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
if (!matcher_node_typed.isMatchingColumn(element_name))
continue;
auto tuple_element_function = std::make_shared<FunctionNode>("tupleElement");
tuple_element_function->getArguments().getNodes().push_back(expression_query_tree_node);
tuple_element_function->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(element_name));
auto get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
get_subcolumn_function->getArguments().getNodes().push_back(expression_query_tree_node);
get_subcolumn_function->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(element_name));
QueryTreeNodePtr function_query_node = tuple_element_function;
QueryTreeNodePtr function_query_node = get_subcolumn_function;
resolveFunction(function_query_node, scope);
qualified_matcher_element_identifier.push_back(element_name);
@ -6865,13 +6855,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.isGroupByAll())
expandGroupByAll(query_node_typed);
if (query_node_typed.hasPrewhere())
assertNoFunctionNodes(query_node_typed.getPrewhere(),
"arrayJoin",
ErrorCodes::ILLEGAL_PREWHERE,
"ARRAY JOIN",
"in PREWHERE");
validateFilters(query_node);
validateAggregates(query_node, { .group_by_use_nulls = scope.group_by_use_nulls });
for (const auto & column : projection_columns)

View File

@ -472,30 +472,6 @@ QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_nod
return result;
}
bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier)
{
const IDataType * current_type = compound_type.get();
for (const auto & identifier_part : nested_identifier)
{
while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(current_type))
current_type = array->getNestedType().get();
const DataTypeTuple * tuple = checkAndGetDataType<DataTypeTuple>(current_type);
if (!tuple)
return false;
auto position = tuple->tryGetPositionByName(identifier_part);
if (!position)
return false;
current_type = tuple->getElements()[*position].get();
}
return true;
}
namespace
{

View File

@ -60,14 +60,6 @@ QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_n
*/
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_node);
/** Returns true if nested identifier can be resolved from compound type.
* Compound type can be tuple or array of tuples.
*
* Example: Compound type: Tuple(nested_path Tuple(nested_path_2 UInt64)). Nested identifier: nested_path_1.nested_path_2.
* Result: true.
*/
bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier);
/** Assert that there are no function nodes with specified function name in node children.
* Do not visit subqueries.
*/

View File

@ -17,8 +17,50 @@ namespace ErrorCodes
extern const int NOT_AN_AGGREGATE;
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
extern const int ILLEGAL_PREWHERE;
}
namespace
{
void validateFilter(const QueryTreeNodePtr & filter_node, std::string_view exception_place_message, const QueryTreeNodePtr & query_node)
{
auto filter_node_result_type = filter_node->getResultType();
if (!filter_node_result_type->canBeUsedInBooleanContext())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
"Invalid type for filter in {}: {}. In query {}",
exception_place_message,
filter_node_result_type->getName(),
query_node->formatASTForErrorMessage());
}
}
void validateFilters(const QueryTreeNodePtr & query_node)
{
const auto & query_node_typed = query_node->as<QueryNode &>();
if (query_node_typed.hasPrewhere())
{
validateFilter(query_node_typed.getPrewhere(), "PREWHERE", query_node);
assertNoFunctionNodes(query_node_typed.getPrewhere(),
"arrayJoin",
ErrorCodes::ILLEGAL_PREWHERE,
"ARRAY JOIN",
"in PREWHERE");
}
if (query_node_typed.hasWhere())
validateFilter(query_node_typed.getWhere(), "WHERE", query_node);
if (query_node_typed.hasHaving())
validateFilter(query_node_typed.getHaving(), "HAVING", query_node);
}
namespace
{
class ValidateGroupByColumnsVisitor : public ConstInDepthQueryTreeVisitor<ValidateGroupByColumnsVisitor>
{
public:
@ -106,7 +148,9 @@ private:
const QueryTreeNodePtr & query_node;
};
void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params)
}
void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidationParams params)
{
const auto & query_node_typed = query_node->as<QueryNode &>();
auto join_tree_node_type = query_node_typed.getJoinTree()->getNodeType();

View File

@ -5,7 +5,10 @@
namespace DB
{
struct ValidationParams
/// Validate PREWHERE, WHERE, HAVING in query node
void validateFilters(const QueryTreeNodePtr & query_node);
struct AggregatesValidationParams
{
bool group_by_use_nulls = false;
};
@ -20,7 +23,7 @@ struct ValidationParams
* PROJECTION.
* 5. Throws exception if there is GROUPING SETS or ROLLUP or CUBE or WITH TOTALS without aggregation.
*/
void validateAggregates(const QueryTreeNodePtr & query_node, ValidationParams params);
void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidationParams params);
/** Assert that there are no function nodes with specified function name in node children.
* Do not visit subqueries.

View File

@ -441,7 +441,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
if (restore_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
restores_thread_pool.scheduleOrThrowOnError(
[this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
{
doRestore(

View File

@ -261,21 +261,31 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
}
std::atomic_flag exit_on_signal;
std::atomic<Int32> exit_after_signals = 0;
class QueryInterruptHandler : private boost::noncopyable
{
public:
static void start() { exit_on_signal.clear(); }
/// Store how much interrupt signals can be before stopping the query
/// by default stop after the first interrupt signal.
static void start(Int32 signals_before_stop = 1) { exit_after_signals.store(signals_before_stop); }
/// Set value not greater then 0 to mark the query as stopped.
static void stop() { return exit_after_signals.store(0); }
/// Return true if the query was stopped.
static bool stop() { return exit_on_signal.test_and_set(); }
static bool cancelled() { return exit_on_signal.test(); }
/// Query was stopped if it received at least "signals_before_stop" interrupt signals.
static bool try_stop() { return exit_after_signals.fetch_sub(1) <= 0; }
static bool cancelled() { return exit_after_signals.load() <= 0; }
/// Return how much interrupt signals remain before stop.
static Int32 cancelled_status() { return exit_after_signals.load(); }
};
/// This signal handler is set only for SIGINT.
void interruptSignalHandler(int signum)
{
if (QueryInterruptHandler::stop())
if (QueryInterruptHandler::try_stop())
safeExit(128 + signum);
}
@ -850,12 +860,15 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
}
}
const auto & settings = global_context->getSettingsRef();
const Int32 signals_before_stop = settings.stop_reading_on_first_cancel ? 2 : 1;
int retries_left = 10;
while (retries_left)
{
try
{
QueryInterruptHandler::start();
QueryInterruptHandler::start(signals_before_stop);
SCOPE_EXIT({ QueryInterruptHandler::stop(); });
connection->sendQuery(
@ -872,7 +885,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
if (send_external_tables)
sendExternalTables(parsed_query);
receiveResult(parsed_query);
receiveResult(parsed_query, signals_before_stop, settings.stop_reading_on_first_cancel);
break;
}
@ -897,7 +910,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
/// Receives and processes packets coming from server.
/// Also checks if query execution should be cancelled.
void ClientBase::receiveResult(ASTPtr parsed_query)
void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool stop_reading_on_first_cancel)
{
// TODO: get the poll_interval from commandline.
const auto receive_timeout = connection_parameters.timeouts.receive_timeout;
@ -921,7 +934,13 @@ void ClientBase::receiveResult(ASTPtr parsed_query)
/// to avoid losing sync.
if (!cancelled)
{
if (QueryInterruptHandler::cancelled())
if (stop_reading_on_first_cancel && QueryInterruptHandler::cancelled_status() == signals_before_stop - 1)
{
connection->sendCancel();
/// First cancel reading request was sent. Next requests will only be with a full cancel
stop_reading_on_first_cancel = false;
}
else if (QueryInterruptHandler::cancelled())
{
cancelQuery();
}

View File

@ -131,7 +131,7 @@ protected:
private:
void receiveResult(ASTPtr parsed_query);
void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool stop_reading_on_first_cancel);
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
void receiveLogsAndProfileEvents(ASTPtr parsed_query);
bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description, ASTPtr parsed_query);

View File

@ -349,12 +349,14 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
\
M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \
M(ReadBufferFromS3InitMicroseconds, "Time spend initializing connection to S3.") \
M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \
M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
\
M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \
M(WriteBufferFromS3Bytes, "Bytes written to S3.") \
M(WriteBufferFromS3RequestsErrors, "Number of exceptions while writing to S3.") \
\
M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
\

View File

@ -46,8 +46,8 @@ ThreadPoolImpl<Thread>::ThreadPoolImpl(size_t max_threads_)
template <typename Thread>
ThreadPoolImpl<Thread>::ThreadPoolImpl(size_t max_threads_, size_t max_free_threads_, size_t queue_size_, bool shutdown_on_exception_)
: max_threads(max_threads_)
, max_free_threads(max_free_threads_)
, queue_size(queue_size_)
, max_free_threads(std::min(max_free_threads_, max_threads))
, queue_size(queue_size_ ? std::max(queue_size_, max_threads) : 0 /* zero means the queue is unlimited */)
, shutdown_on_exception(shutdown_on_exception_)
{
}
@ -56,10 +56,26 @@ template <typename Thread>
void ThreadPoolImpl<Thread>::setMaxThreads(size_t value)
{
std::lock_guard lock(mutex);
bool need_start_threads = (value > max_threads);
bool need_finish_free_threads = (value < max_free_threads);
max_threads = value;
max_free_threads = std::min(max_free_threads, max_threads);
/// We have to also adjust queue size, because it limits the number of scheduled and already running jobs in total.
queue_size = std::max(queue_size, max_threads);
queue_size = queue_size ? std::max(queue_size, max_threads) : 0;
jobs.reserve(queue_size);
if (need_start_threads)
{
/// Start new threads while there are more scheduled jobs in the queue and the limit `max_threads` is not reached.
startNewThreadsNoLock();
}
else if (need_finish_free_threads)
{
/// Wake up free threads so they can finish themselves.
new_job_or_shutdown.notify_all();
}
}
template <typename Thread>
@ -73,14 +89,22 @@ template <typename Thread>
void ThreadPoolImpl<Thread>::setMaxFreeThreads(size_t value)
{
std::lock_guard lock(mutex);
max_free_threads = value;
bool need_finish_free_threads = (value < max_free_threads);
max_free_threads = std::min(value, max_threads);
if (need_finish_free_threads)
{
/// Wake up free threads so they can finish themselves.
new_job_or_shutdown.notify_all();
}
}
template <typename Thread>
void ThreadPoolImpl<Thread>::setQueueSize(size_t value)
{
std::lock_guard lock(mutex);
queue_size = value;
queue_size = value ? std::max(value, max_threads) : 0;
/// Reserve memory to get rid of allocations
jobs.reserve(queue_size);
}
@ -159,11 +183,42 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, ssize_t priority, std::
++scheduled_jobs;
}
/// Wake up a free thread to run the new job.
new_job_or_shutdown.notify_one();
return static_cast<ReturnType>(true);
}
template <typename Thread>
void ThreadPoolImpl<Thread>::startNewThreadsNoLock()
{
if (shutdown)
return;
/// Start new threads while there are more scheduled jobs in the queue and the limit `max_threads` is not reached.
while (threads.size() < std::min(scheduled_jobs, max_threads))
{
try
{
threads.emplace_front();
}
catch (...)
{
break; /// failed to start more threads
}
try
{
threads.front() = Thread([this, it = threads.begin()] { worker(it); });
}
catch (...)
{
threads.pop_front();
break; /// failed to start more threads
}
}
}
template <typename Thread>
void ThreadPoolImpl<Thread>::scheduleOrThrowOnError(Job job, ssize_t priority)
{
@ -185,20 +240,18 @@ void ThreadPoolImpl<Thread>::scheduleOrThrow(Job job, ssize_t priority, uint64_t
template <typename Thread>
void ThreadPoolImpl<Thread>::wait()
{
{
std::unique_lock lock(mutex);
/// Signal here just in case.
/// If threads are waiting on condition variables, but there are some jobs in the queue
/// then it will prevent us from deadlock.
new_job_or_shutdown.notify_all();
job_finished.wait(lock, [this] { return scheduled_jobs == 0; });
std::unique_lock lock(mutex);
/// Signal here just in case.
/// If threads are waiting on condition variables, but there are some jobs in the queue
/// then it will prevent us from deadlock.
new_job_or_shutdown.notify_all();
job_finished.wait(lock, [this] { return scheduled_jobs == 0; });
if (first_exception)
{
std::exception_ptr exception;
std::swap(exception, first_exception);
std::rethrow_exception(exception);
}
if (first_exception)
{
std::exception_ptr exception;
std::swap(exception, first_exception);
std::rethrow_exception(exception);
}
}
@ -219,10 +272,14 @@ void ThreadPoolImpl<Thread>::finalize()
{
std::lock_guard lock(mutex);
shutdown = true;
/// We don't want threads to remove themselves from `threads` anymore, otherwise `thread.join()` will go wrong below in this function.
threads_remove_themselves = false;
}
/// Wake up threads so they can finish themselves.
new_job_or_shutdown.notify_all();
/// Wait for all currently running jobs to finish (we don't wait for all scheduled jobs here like the function wait() does).
for (auto & thread : threads)
thread.join();
@ -268,38 +325,53 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
CurrentMetrics::Increment metric_all_threads(
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread);
/// Remove this thread from `threads` and detach it, that must be done before exiting from this worker.
/// We can't wrap the following lambda function into `SCOPE_EXIT` because it requires `mutex` to be locked.
auto detach_thread = [this, thread_it]
{
/// `mutex` is supposed to be already locked.
if (threads_remove_themselves)
{
thread_it->detach();
threads.erase(thread_it);
}
};
/// We'll run jobs in this worker while there are scheduled jobs and until some special event occurs (e.g. shutdown, or decreasing the number of max_threads).
/// And if `max_free_threads > 0` we keep this number of threads even when there are no jobs for them currently.
while (true)
{
/// This is inside the loop to also reset previous thread names set inside the jobs.
setThreadName("ThreadPool");
Job job;
bool need_shutdown = false;
/// A copy of parent trace context
DB::OpenTelemetry::TracingContextOnThread parent_thead_trace_context;
/// Get a job from the queue.
Job job;
std::exception_ptr exception_from_job;
bool need_shutdown = false;
{
std::unique_lock lock(mutex);
new_job_or_shutdown.wait(lock, [this] { return shutdown || !jobs.empty(); });
new_job_or_shutdown.wait(lock, [&] { return !jobs.empty() || shutdown || (threads.size() > std::min(max_threads, scheduled_jobs + max_free_threads)); });
need_shutdown = shutdown;
if (!jobs.empty())
if (jobs.empty())
{
/// boost::priority_queue does not provide interface for getting non-const reference to an element
/// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
job = std::move(const_cast<Job &>(jobs.top().job));
parent_thead_trace_context = std::move(const_cast<DB::OpenTelemetry::TracingContextOnThread &>(jobs.top().thread_trace_context));
jobs.pop();
}
else
{
/// shutdown is true, simply finish the thread.
/// No jobs and either `shutdown` is set or this thread is excessive. The worker will stop.
detach_thread();
return;
}
/// boost::priority_queue does not provide interface for getting non-const reference to an element
/// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
job = std::move(const_cast<Job &>(jobs.top().job));
parent_thead_trace_context = std::move(const_cast<DB::OpenTelemetry::TracingContextOnThread &>(jobs.top().thread_trace_context));
jobs.pop();
}
/// Run the job. We don't run jobs after `shutdown` is set.
if (!need_shutdown)
{
ALLOW_ALLOCATIONS_IN_SCOPE;
@ -326,46 +398,47 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
/// job should be reset before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
parent_thead_trace_context.reset();
}
catch (...)
{
thread_trace_context.root_span.addAttribute(std::current_exception());
exception_from_job = std::current_exception();
thread_trace_context.root_span.addAttribute(exception_from_job);
/// job should be reset before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
parent_thead_trace_context.reset();
{
std::lock_guard lock(mutex);
if (!first_exception)
first_exception = std::current_exception(); // NOLINT
if (shutdown_on_exception)
shutdown = true;
--scheduled_jobs;
}
job_finished.notify_all();
new_job_or_shutdown.notify_all();
return;
}
parent_thead_trace_context.reset();
}
/// The job is done.
{
std::lock_guard lock(mutex);
if (exception_from_job)
{
if (!first_exception)
first_exception = exception_from_job;
if (shutdown_on_exception)
shutdown = true;
}
--scheduled_jobs;
if (threads.size() > scheduled_jobs + max_free_threads)
if (threads.size() > std::min(max_threads, scheduled_jobs + max_free_threads))
{
thread_it->detach();
threads.erase(thread_it);
/// This thread is excessive. The worker will stop.
detach_thread();
job_finished.notify_all();
if (shutdown)
new_job_or_shutdown.notify_all(); /// `shutdown` was set, wake up other threads so they can finish themselves.
return;
}
}
job_finished.notify_all();
job_finished.notify_all();
if (shutdown)
new_job_or_shutdown.notify_all(); /// `shutdown` was set, wake up other threads so they can finish themselves.
}
}
}

View File

@ -102,6 +102,7 @@ private:
size_t scheduled_jobs = 0;
bool shutdown = false;
bool threads_remove_themselves = true;
const bool shutdown_on_exception = true;
struct JobWithPriority
@ -129,6 +130,9 @@ private:
void worker(typename std::list<Thread>::iterator thread_it);
/// Tries to start new threads if there are scheduled jobs and the limit `max_threads` is not reached. Must be called with `mutex` locked.
void startNewThreadsNoLock();
void finalize();
void onDestroy();
};
@ -260,6 +264,11 @@ public:
return true;
}
std::thread::id get_id() const
{
return state ? state->thread_id.load() : std::thread::id{};
}
protected:
struct State
{

View File

@ -24,6 +24,8 @@ namespace DB
M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
M(Int32, max_connections, 1024, "Max server connections.", 0) \
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \

View File

@ -44,7 +44,7 @@ class IColumn;
M(UInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
@ -152,6 +152,7 @@ class IColumn;
M(String, cluster_for_parallel_replicas, "default", "Cluster for a shard in which current server is located", 0) \
M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
\
M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
\
@ -280,6 +281,7 @@ class IColumn;
\
M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \
\
M(Bool, stop_reading_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \
/** Settings for testing hedged requests */ \
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
@ -413,8 +415,6 @@ class IColumn;
M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\
M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\
\
M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
M(UInt64, backup_keeper_max_retries, 20, "Max retries for keeper operations during backup", 0) \
M(UInt64, backup_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup", 0) \
M(UInt64, backup_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup", 0) \
@ -760,6 +760,8 @@ class IColumn;
MAKE_OBSOLETE(M, Milliseconds, async_insert_cleanup_timeout_ms, 1000) \
MAKE_OBSOLETE(M, Bool, optimize_fuse_sum_count_avg, 0) \
MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -102,7 +102,7 @@ void IDataType::forEachSubcolumn(
template <typename Ptr>
Ptr IDataType::getForSubcolumn(
const String & subcolumn_name,
std::string_view subcolumn_name,
const SubstreamData & data,
Ptr SubstreamData::*member,
bool throw_if_null) const
@ -120,36 +120,36 @@ Ptr IDataType::getForSubcolumn(
return res;
}
bool IDataType::hasSubcolumn(const String & subcolumn_name) const
bool IDataType::hasSubcolumn(std::string_view subcolumn_name) const
{
return tryGetSubcolumnType(subcolumn_name) != nullptr;
}
DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
DataTypePtr IDataType::tryGetSubcolumnType(std::string_view subcolumn_name) const
{
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, false);
}
DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
DataTypePtr IDataType::getSubcolumnType(std::string_view subcolumn_name) const
{
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
}
ColumnPtr IDataType::tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
ColumnPtr IDataType::tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
{
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
}
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
ColumnPtr IDataType::getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
{
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, true);
}
SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const
SerializationPtr IDataType::getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const
{
auto data = SubstreamData(serialization);
return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);

View File

@ -79,15 +79,15 @@ public:
/// Data type id. It's used for runtime type checks.
virtual TypeIndex getTypeId() const = 0;
bool hasSubcolumn(const String & subcolumn_name) const;
bool hasSubcolumn(std::string_view subcolumn_name) const;
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const;
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
DataTypePtr tryGetSubcolumnType(std::string_view subcolumn_name) const;
DataTypePtr getSubcolumnType(std::string_view subcolumn_name) const;
ColumnPtr tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const;
ColumnPtr getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const;
ColumnPtr tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const;
ColumnPtr getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const;
SerializationPtr getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const;
SerializationPtr getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const;
using SubstreamData = ISerialization::SubstreamData;
using SubstreamPath = ISerialization::SubstreamPath;
@ -315,7 +315,7 @@ public:
private:
template <typename Ptr>
Ptr getForSubcolumn(
const String & subcolumn_name,
std::string_view subcolumn_name,
const SubstreamData & data,
Ptr SubstreamData::*member,
bool throw_if_null) const;

View File

@ -928,7 +928,16 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
for (const auto & table_id : tables_to_create)
{
auto table_name = table_id.getTableName();
auto create_query_string = table_name_to_metadata[table_name];
auto metadata_it = table_name_to_metadata.find(table_name);
if (metadata_it == table_name_to_metadata.end())
{
/// getTablesSortedByDependency() may return some not existing tables or tables from other databases
LOG_WARNING(log, "Got table name {} when resolving table dependencies, "
"but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName());
continue;
}
const auto & create_query_string = metadata_it->second;
if (isTableExist(table_name, getContext()))
{
assert(create_query_string == readMetadataFile(table_name));

View File

@ -2,8 +2,10 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Functions/IFunction.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Functions/IFunction.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/TransformDateTime64.h>
@ -60,6 +62,9 @@ public:
const auto * type_ptr = &type;
if (const auto * lc_type = checkAndGetDataType<DataTypeLowCardinality>(type_ptr))
type_ptr = lc_type->getDictionaryType().get();
if (const auto * nullable_type = checkAndGetDataType<DataTypeNullable>(type_ptr))
type_ptr = nullable_type->getNestedType().get();

View File

@ -26,24 +26,24 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
static String getRootNodeName(UserDefinedSQLObjectType object_type)
namespace
{
switch (object_type)
std::string_view getNodePrefix(UserDefinedSQLObjectType object_type)
{
case UserDefinedSQLObjectType::Function:
return "functions";
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
return "function_";
}
UNREACHABLE();
}
UNREACHABLE();
}
static String getRootNodePath(const String & root_path, UserDefinedSQLObjectType object_type)
{
return root_path + "/" + getRootNodeName(object_type);
}
constexpr std::string_view sql_extension = ".sql";
static String getNodePath(const String & root_path, UserDefinedSQLObjectType object_type, const String & object_name)
{
return getRootNodePath(root_path, object_type) + "/" + escapeForFileName(object_name);
String getNodePath(const String & root_path, UserDefinedSQLObjectType object_type, const String & object_name)
{
return root_path + "/" + String{getNodePrefix(object_type)} + escapeForFileName(object_name) + String{sql_extension};
}
}
@ -119,10 +119,20 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::resetAfterError()
void UserDefinedSQLObjectsLoaderFromZooKeeper::loadObjects()
{
/// loadObjects() is called at start from Server::main(), so it's better not to stop here on no connection to ZooKeeper or any other error.
/// However the watching thread must be started anyway in case the connection will be established later.
if (!objects_loaded)
{
reloadObjects();
try
{
reloadObjects();
}
catch (...)
{
tryLogCurrentException(log, "Failed to load user-defined objects");
}
}
startWatchingThread();
}
@ -188,7 +198,6 @@ void UserDefinedSQLObjectsLoaderFromZooKeeper::createRootNodes(const zkutil::Zoo
{
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/functions", "");
}
bool UserDefinedSQLObjectsLoaderFromZooKeeper::storeObject(
@ -344,17 +353,19 @@ Strings UserDefinedSQLObjectsLoaderFromZooKeeper::getObjectNamesAndSetWatch(
};
Coordination::Stat stat;
const auto path = getRootNodePath(zookeeper_path, object_type);
const auto node_names = zookeeper->getChildrenWatch(path, &stat, object_list_watcher);
const auto node_names = zookeeper->getChildrenWatch(zookeeper_path, &stat, object_list_watcher);
const auto prefix = getNodePrefix(object_type);
Strings object_names;
object_names.reserve(node_names.size());
for (const auto & node_name : node_names)
{
String object_name = unescapeForFileName(node_name);
if (!object_name.empty())
object_names.push_back(std::move(object_name));
if (node_name.starts_with(prefix) && node_name.ends_with(sql_extension))
{
String object_name = unescapeForFileName(node_name.substr(prefix.length(), node_name.length() - prefix.length() - sql_extension.length()));
if (!object_name.empty())
object_names.push_back(std::move(object_name));
}
}
return object_names;

View File

@ -1179,12 +1179,15 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments
|| (res = executeArgument<Int16>(arguments, result_type, builder, input_rows_count))
|| (res = executeArgument<Int32>(arguments, result_type, builder, input_rows_count))
|| (res = executeArgument<Int64>(arguments, result_type, builder, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type.", getName());
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type", getName());
}
else
{
Field index = (*arguments[1].column)[0];
if (index.getType() != Field::Types::UInt64 && index.getType() != Field::Types::Int64)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type", getName());
if (builder)
builder.initSink(input_rows_count);

View File

@ -13,6 +13,7 @@
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/numLiteralChars.h>
#include <IO/WriteHelpers.h>
@ -54,55 +55,19 @@ struct FormatDateTimeTraits
};
template <typename DataType> struct ActionValueTypeMap {};
template <> struct ActionValueTypeMap<DataTypeInt8> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt8> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeInt16> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt16> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeInt32> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt32> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeInt64> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeUInt64> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeDate> { using ActionValueType = UInt16; };
template <> struct ActionValueTypeMap<DataTypeDate32> { using ActionValueType = Int32; };
template <> struct ActionValueTypeMap<DataTypeDateTime> { using ActionValueType = UInt32; };
template <> struct ActionValueTypeMap<DataTypeDateTime64> { using ActionValueType = Int64; };
/// Counts the number of literal characters in Joda format string until the next closing literal
/// sequence single quote. Returns -1 if no literal single quote was found.
/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
/// literal content must be quoted with single quote. and two single quote means literal with one single quote.
/// For example:
/// Format string: "'aaaa'", unescaped literal: "aaaa";
/// Format string: "'aa''aa'", unescaped literal: "aa'aa";
/// Format string: "'aaa''aa" is not valid because of missing of end single quote.
Int64 numLiteralChars(const char * cur, const char * end)
{
bool found = false;
Int64 count = 0;
while (cur < end)
{
if (*cur == '\'')
{
if (cur + 1 < end && *(cur + 1) == '\'')
{
count += 2;
cur += 2;
}
else
{
found = true;
break;
}
}
else
{
++count;
++cur;
}
}
return found ? count : -1;
}
template <typename DataType> struct InstructionValueTypeMap {};
template <> struct InstructionValueTypeMap<DataTypeInt8> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeUInt8> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeInt16> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeUInt16> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeInt32> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeUInt32> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeInt64> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeUInt64> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeDate> { using InstructionValueType = UInt16; };
template <> struct InstructionValueTypeMap<DataTypeDate32> { using InstructionValueType = Int32; };
template <> struct InstructionValueTypeMap<DataTypeDateTime> { using InstructionValueType = UInt32; };
template <> struct InstructionValueTypeMap<DataTypeDateTime64> { using InstructionValueType = Int64; };
/// Cast value from integer to string, making sure digits number in result string is no less than total_digits by padding leading '0'.
String padValue(UInt32 val, size_t min_digits)
@ -184,7 +149,7 @@ private:
}
template <typename Time>
class Action
class Instruction
{
public:
/// Using std::function will cause performance degradation in MySQL format by 0.45x.
@ -201,8 +166,8 @@ private:
/// extra_shift is only used in MySQL format syntax. It is always 0 in Joda format syntax.
size_t extra_shift = 0;
/// Action for appending date/time related number in specified format.
explicit Action(Func && func_) : func(std::move(func_)) {}
/// Instruction for appending date/time related number in specified format.
explicit Instruction(Func && func_) : func(std::move(func_)) {}
void perform(char *& dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
{
@ -825,8 +790,8 @@ public:
if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
scale = times->getScale();
using T = typename ActionValueTypeMap<DataType>::ActionValueType;
std::vector<Action<T>> instructions;
using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
std::vector<Instruction<T>> instructions;
String out_template;
auto result_size = parseFormat(format, instructions, scale, out_template);
@ -898,27 +863,25 @@ public:
}
template <typename T>
size_t parseFormat(const String & format, std::vector<Action<T>> & instructions, UInt32 scale, String & out_template) const
size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
{
static_assert(
format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL || format_syntax == FormatDateTimeTraits::FormatSyntax::Joda,
"format syntax must be one of MySQL or Joda");
if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL)
return parseMySQLFormat(format, instructions, scale, out_template);
else if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::Joda)
return parseJodaFormat(format, instructions, scale, out_template);
else
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Unknown datetime format style {} in function {}",
magic_enum::enum_name(format_syntax),
getName());
return parseJodaFormat(format, instructions, scale, out_template);
}
template <typename T>
size_t parseMySQLFormat(const String & format, std::vector<Action<T>> & instructions, UInt32 scale, String & out_template) const
size_t parseMySQLFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
{
auto add_extra_shift = [&](size_t amount)
{
if (instructions.empty())
instructions.emplace_back(&Action<T>::mysqlNoop);
instructions.emplace_back(&Instruction<T>::mysqlNoop);
instructions.back().extra_shift += amount;
};
@ -931,7 +894,7 @@ public:
};
const char * pos = format.data();
const char * const end = pos + format.size();
const char * const end = format.data() + format.size();
while (true)
{
@ -953,43 +916,43 @@ public:
{
// Abbreviated weekday [Mon...Sun]
case 'a':
instructions.emplace_back(&Action<T>::mysqlDayOfWeekTextShort);
instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextShort);
out_template += "Mon";
break;
// Abbreviated month [Jan...Dec]
case 'b':
instructions.emplace_back(&Action<T>::mysqlMonthOfYearTextShort);
instructions.emplace_back(&Instruction<T>::mysqlMonthOfYearTextShort);
out_template += "Jan";
break;
// Month as a decimal number (01-12)
// Month as a integer number (01-12)
case 'c':
instructions.emplace_back(&Action<T>::mysqlMonth);
instructions.emplace_back(&Instruction<T>::mysqlMonth);
out_template += "00";
break;
// Year, divided by 100, zero-padded
case 'C':
instructions.emplace_back(&Action<T>::mysqlCentury);
instructions.emplace_back(&Instruction<T>::mysqlCentury);
out_template += "00";
break;
// Day of month, zero-padded (01-31)
case 'd':
instructions.emplace_back(&Action<T>::mysqlDayOfMonth);
instructions.emplace_back(&Instruction<T>::mysqlDayOfMonth);
out_template += "00";
break;
// Short MM/DD/YY date, equivalent to %m/%d/%y
case 'D':
instructions.emplace_back(&Action<T>::mysqlAmericanDate);
instructions.emplace_back(&Instruction<T>::mysqlAmericanDate);
out_template += "00/00/00";
break;
// Day of month, space-padded ( 1-31) 23
case 'e':
instructions.emplace_back(&Action<T>::mysqlDayOfMonthSpacePadded);
instructions.emplace_back(&Instruction<T>::mysqlDayOfMonthSpacePadded);
out_template += " 0";
break;
@ -997,86 +960,86 @@ public:
case 'f':
{
/// If the time data type has no fractional part, then we print '0' as the fractional part.
instructions.emplace_back(&Action<T>::mysqlFractionalSecond);
instructions.emplace_back(&Instruction<T>::mysqlFractionalSecond);
out_template += String(std::max<UInt32>(1, scale), '0');
break;
}
// Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23
case 'F':
instructions.emplace_back(&Action<T>::mysqlISO8601Date);
instructions.emplace_back(&Instruction<T>::mysqlISO8601Date);
out_template += "0000-00-00";
break;
// Last two digits of year of ISO 8601 week number (see %G)
case 'g':
instructions.emplace_back(&Action<T>::mysqlISO8601Year2);
instructions.emplace_back(&Instruction<T>::mysqlISO8601Year2);
out_template += "00";
break;
// Year of ISO 8601 week number (see %V)
case 'G':
instructions.emplace_back(&Action<T>::mysqlISO8601Year4);
instructions.emplace_back(&Instruction<T>::mysqlISO8601Year4);
out_template += "0000";
break;
// Day of the year (001-366) 235
case 'j':
instructions.emplace_back(&Action<T>::mysqlDayOfYear);
instructions.emplace_back(&Instruction<T>::mysqlDayOfYear);
out_template += "000";
break;
// Month as a decimal number (01-12)
// Month as a integer number (01-12)
case 'm':
instructions.emplace_back(&Action<T>::mysqlMonth);
instructions.emplace_back(&Instruction<T>::mysqlMonth);
out_template += "00";
break;
// ISO 8601 weekday as number with Monday as 1 (1-7)
case 'u':
instructions.emplace_back(&Action<T>::mysqlDayOfWeek);
instructions.emplace_back(&Instruction<T>::mysqlDayOfWeek);
out_template += "0";
break;
// ISO 8601 week number (01-53)
case 'V':
instructions.emplace_back(&Action<T>::mysqlISO8601Week);
instructions.emplace_back(&Instruction<T>::mysqlISO8601Week);
out_template += "00";
break;
// Weekday as a decimal number with Sunday as 0 (0-6) 4
// Weekday as a integer number with Sunday as 0 (0-6) 4
case 'w':
instructions.emplace_back(&Action<T>::mysqlDayOfWeek0To6);
instructions.emplace_back(&Instruction<T>::mysqlDayOfWeek0To6);
out_template += "0";
break;
// Full weekday [Monday...Sunday]
case 'W':
instructions.emplace_back(&Action<T>::mysqlDayOfWeekTextLong);
instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextLong);
out_template += "Monday";
break;
// Two digits year
case 'y':
instructions.emplace_back(&Action<T>::mysqlYear2);
instructions.emplace_back(&Instruction<T>::mysqlYear2);
out_template += "00";
break;
// Four digits year
case 'Y':
instructions.emplace_back(&Action<T>::mysqlYear4);
instructions.emplace_back(&Instruction<T>::mysqlYear4);
out_template += "0000";
break;
// Quarter (1-4)
case 'Q':
instructions.template emplace_back(&Action<T>::mysqlQuarter);
instructions.template emplace_back(&Instruction<T>::mysqlQuarter);
out_template += "0";
break;
// Offset from UTC timezone as +hhmm or -hhmm
case 'z':
instructions.emplace_back(&Action<T>::mysqlTimezoneOffset);
instructions.emplace_back(&Instruction<T>::mysqlTimezoneOffset);
out_template += "+0000";
break;
@ -1084,79 +1047,79 @@ public:
// Minute (00-59)
case 'M':
add_instruction_or_extra_shift(&Action<T>::mysqlMinute, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlMinute, 2);
out_template += "00";
break;
// AM or PM
case 'p':
add_instruction_or_extra_shift(&Action<T>::mysqlAMPM, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlAMPM, 2);
out_template += "AM";
break;
// 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM
case 'r':
add_instruction_or_extra_shift(&Action<T>::mysqlHHMM12, 8);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHHMM12, 8);
out_template += "12:00 AM";
break;
// 24-hour HH:MM time, equivalent to %H:%i 14:55
case 'R':
add_instruction_or_extra_shift(&Action<T>::mysqlHHMM24, 5);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHHMM24, 5);
out_template += "00:00";
break;
// Seconds
case 's':
add_instruction_or_extra_shift(&Action<T>::mysqlSecond, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlSecond, 2);
out_template += "00";
break;
// Seconds
case 'S':
add_instruction_or_extra_shift(&Action<T>::mysqlSecond, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlSecond, 2);
out_template += "00";
break;
// ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02
case 'T':
add_instruction_or_extra_shift(&Action<T>::mysqlISO8601Time, 8);
add_instruction_or_extra_shift(&Instruction<T>::mysqlISO8601Time, 8);
out_template += "00:00:00";
break;
// Hour in 12h format (01-12)
case 'h':
add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
out_template += "12";
break;
// Hour in 24h format (00-23)
case 'H':
add_instruction_or_extra_shift(&Action<T>::mysqlHour24, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHour24, 2);
out_template += "00";
break;
// Minute of hour range [0, 59]
case 'i':
add_instruction_or_extra_shift(&Action<T>::mysqlMinute, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlMinute, 2);
out_template += "00";
break;
// Hour in 12h format (01-12)
case 'I':
add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
out_template += "12";
break;
// Hour in 24h format (00-23)
case 'k':
add_instruction_or_extra_shift(&Action<T>::mysqlHour24, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHour24, 2);
out_template += "00";
break;
// Hour in 12h format (01-12)
case 'l':
add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
out_template += "12";
break;
@ -1209,7 +1172,7 @@ public:
}
template <typename T>
size_t parseJodaFormat(const String & format, std::vector<Action<T>> & instructions, UInt32, String &) const
size_t parseJodaFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32, String &) const
{
/// If the argument was DateTime, add instruction for printing. If it was date, just append default literal
auto add_instruction = [&](auto && func [[maybe_unused]], const String & default_literal [[maybe_unused]])
@ -1217,13 +1180,12 @@ public:
if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
instructions.emplace_back(func);
else
instructions.emplace_back(std::bind_front(&Action<T>::template jodaLiteral<String>, default_literal));
instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<String>, default_literal));
};
size_t reserve_size = 0;
const char * pos = format.data();
const char * end = pos + format.size();
const char * end = format.data() + format.size();
while (pos < end)
{
const char * cur_token = pos;
@ -1235,7 +1197,7 @@ public:
if (pos + 1 < end && *(pos + 1) == '\'')
{
std::string_view literal(cur_token, 1);
instructions.emplace_back(std::bind_front(&Action<T>::template jodaLiteral<decltype(literal)>, literal));
instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
++reserve_size;
pos += 2;
}
@ -1251,7 +1213,7 @@ public:
{
std::string_view literal(cur_token + i, 1);
instructions.emplace_back(
std::bind_front(&Action<T>::template jodaLiteral<decltype(literal)>, literal));
std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
++reserve_size;
if (*(cur_token + i) == '\'')
i += 1;
@ -1272,115 +1234,115 @@ public:
switch (*cur_token)
{
case 'G':
instructions.emplace_back(std::bind_front(&Action<T>::jodaEra, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaEra, repetitions));
reserve_size += repetitions <= 3 ? 2 : 13;
break;
case 'C':
instructions.emplace_back(std::bind_front(&Action<T>::jodaCenturyOfEra, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaCenturyOfEra, repetitions));
/// Year range [1900, 2299]
reserve_size += std::max(repetitions, 2);
break;
case 'Y':
instructions.emplace_back(std::bind_front(&Action<T>::jodaYearOfEra, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaYearOfEra, repetitions));
/// Year range [1900, 2299]
reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
break;
case 'x':
instructions.emplace_back(std::bind_front(&Action<T>::jodaWeekYear, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaWeekYear, repetitions));
/// weekyear range [1900, 2299]
reserve_size += std::max(repetitions, 4);
break;
case 'w':
instructions.emplace_back(std::bind_front(&Action<T>::jodaWeekOfWeekYear, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaWeekOfWeekYear, repetitions));
/// Week of weekyear range [1, 52]
reserve_size += std::max(repetitions, 2);
break;
case 'e':
instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfWeek1Based, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfWeek1Based, repetitions));
/// Day of week range [1, 7]
reserve_size += std::max(repetitions, 1);
break;
case 'E':
instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfWeekText, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfWeekText, repetitions));
/// Maximum length of short name is 3, maximum length of full name is 9.
reserve_size += repetitions <= 3 ? 3 : 9;
break;
case 'y':
instructions.emplace_back(std::bind_front(&Action<T>::jodaYear, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaYear, repetitions));
/// Year range [1900, 2299]
reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
break;
case 'D':
instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfYear, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfYear, repetitions));
/// Day of year range [1, 366]
reserve_size += std::max(repetitions, 3);
break;
case 'M':
if (repetitions <= 2)
{
instructions.emplace_back(std::bind_front(&Action<T>::jodaMonthOfYear, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaMonthOfYear, repetitions));
/// Month of year range [1, 12]
reserve_size += 2;
}
else
{
instructions.emplace_back(std::bind_front(&Action<T>::jodaMonthOfYearText, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaMonthOfYearText, repetitions));
/// Maximum length of short name is 3, maximum length of full name is 9.
reserve_size += repetitions <= 3 ? 3 : 9;
}
break;
case 'd':
instructions.emplace_back(std::bind_front(&Action<T>::jodaDayOfMonth, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfMonth, repetitions));
/// Day of month range [1, 3]
reserve_size += std::max(repetitions, 3);
break;
case 'a':
/// Default half day of day is "AM"
add_instruction(std::bind_front(&Action<T>::jodaHalfDayOfDay, repetitions), "AM");
add_instruction(std::bind_front(&Instruction<T>::jodaHalfDayOfDay, repetitions), "AM");
reserve_size += 2;
break;
case 'K':
/// Default hour of half day is 0
add_instruction(
std::bind_front(&Action<T>::jodaHourOfHalfDay, repetitions), padValue(0, repetitions));
std::bind_front(&Instruction<T>::jodaHourOfHalfDay, repetitions), padValue(0, repetitions));
/// Hour of half day range [0, 11]
reserve_size += std::max(repetitions, 2);
break;
case 'h':
/// Default clock hour of half day is 12
add_instruction(
std::bind_front(&Action<T>::jodaClockHourOfHalfDay, repetitions),
std::bind_front(&Instruction<T>::jodaClockHourOfHalfDay, repetitions),
padValue(12, repetitions));
/// Clock hour of half day range [1, 12]
reserve_size += std::max(repetitions, 2);
break;
case 'H':
/// Default hour of day is 0
add_instruction(std::bind_front(&Action<T>::jodaHourOfDay, repetitions), padValue(0, repetitions));
add_instruction(std::bind_front(&Instruction<T>::jodaHourOfDay, repetitions), padValue(0, repetitions));
/// Hour of day range [0, 23]
reserve_size += std::max(repetitions, 2);
break;
case 'k':
/// Default clock hour of day is 24
add_instruction(std::bind_front(&Action<T>::jodaClockHourOfDay, repetitions), padValue(24, repetitions));
add_instruction(std::bind_front(&Instruction<T>::jodaClockHourOfDay, repetitions), padValue(24, repetitions));
/// Clock hour of day range [1, 24]
reserve_size += std::max(repetitions, 2);
break;
case 'm':
/// Default minute of hour is 0
add_instruction(std::bind_front(&Action<T>::jodaMinuteOfHour, repetitions), padValue(0, repetitions));
add_instruction(std::bind_front(&Instruction<T>::jodaMinuteOfHour, repetitions), padValue(0, repetitions));
/// Minute of hour range [0, 59]
reserve_size += std::max(repetitions, 2);
break;
case 's':
/// Default second of minute is 0
add_instruction(std::bind_front(&Action<T>::jodaSecondOfMinute, repetitions), padValue(0, repetitions));
add_instruction(std::bind_front(&Instruction<T>::jodaSecondOfMinute, repetitions), padValue(0, repetitions));
/// Second of minute range [0, 59]
reserve_size += std::max(repetitions, 2);
break;
case 'S':
/// Default fraction of second is 0
instructions.emplace_back(std::bind_front(&Action<T>::jodaFractionOfSecond, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaFractionOfSecond, repetitions));
/// 'S' repetitions range [0, 9]
reserve_size += repetitions <= 9 ? repetitions : 9;
break;
@ -1388,7 +1350,7 @@ public:
if (repetitions <= 3)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported");
instructions.emplace_back(std::bind_front(&Action<T>::jodaTimezone, repetitions));
instructions.emplace_back(std::bind_front(&Instruction<T>::jodaTimezone, repetitions));
/// Longest length of full name of time zone is 32.
reserve_size += 32;
break;
@ -1399,7 +1361,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions));
std::string_view literal(cur_token, pos - cur_token);
instructions.emplace_back(std::bind_front(&Action<T>::template jodaLiteral<decltype(literal)>, literal));
instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
reserve_size += pos - cur_token;
break;
}

View File

@ -0,0 +1,67 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
namespace
{
class FunctionGetSubcolumn : public IFunction
{
public:
static constexpr auto name = "getSubcolumn";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionGetSubcolumn>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
auto subcolumn_name = getSubcolumnName(arguments);
return arguments[0].type->getSubcolumnType(subcolumn_name);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
auto subcolumn_name = getSubcolumnName(arguments);
return arguments[0].type->getSubcolumn(subcolumn_name, arguments[0].column);
}
private:
static std::string_view getSubcolumnName(const ColumnsWithTypeAndName & arguments)
{
const auto * column = arguments[1].column.get();
if (!isString(arguments[1].type) || !column || !checkAndGetColumnConstStringOrFixedString(column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"The second argument of function {} should be a constant string with the name of a subcolumn", name);
return column->getDataAt(0).toView();
}
};
}
REGISTER_FUNCTION(GetSubcolumn)
{
factory.registerFunction<FunctionGetSubcolumn>({
R"(
Receives the expression or identifier and constant string with the name of subcolumn.
Returns requested subcolumn extracted from the expression.
)",
Documentation::Examples{{"getSubcolumn", "SELECT getSubcolumn(array_col, 'size0'), getSubcolumn(tuple_col, 'elem_name')"}},
Documentation::Categories{"OtherFunctions"}
});
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include <base/types.h>
namespace DB
{
/// Counts the number of literal characters in Joda format string until the next closing literal
/// sequence single quote. Returns -1 if no literal single quote was found.
/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
/// literal content must be quoted with single quote. and two single quote means literal with one single quote.
/// For example:
/// Format string: "'aaaa'", unescaped literal: "aaaa";
/// Format string: "'aa''aa'", unescaped literal: "aa'aa";
/// Format string: "'aaa''aa" is not valid because of missing of end single quote.
inline Int64 numLiteralChars(const char * cur, const char * end)
{
bool found = false;
Int64 count = 0;
while (cur < end)
{
if (*cur == '\'')
{
if (cur + 1 < end && *(cur + 1) == '\'')
{
count += 2;
cur += 2;
}
else
{
found = true;
break;
}
}
else
{
++count;
++cur;
}
}
return found ? count : -1;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -159,6 +159,8 @@ namespace detail
if (out_stream_callback)
request.setChunkedTransferEncoding(true);
else if (method == Poco::Net::HTTPRequest::HTTP_POST)
request.setContentLength(0); /// No callback - no body
for (auto & [header, value] : http_header_entries)
request.set(header, value);

View File

@ -23,6 +23,8 @@
namespace ProfileEvents
{
extern const Event WriteBufferFromS3Bytes;
extern const Event WriteBufferFromS3Microseconds;
extern const Event WriteBufferFromS3RequestsErrors;
extern const Event S3WriteBytes;
extern const Event S3CreateMultipartUpload;
@ -200,7 +202,11 @@ void WriteBufferFromS3::createMultipartUpload()
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);
Stopwatch watch;
auto outcome = client_ptr->CreateMultipartUpload(req);
watch.stop();
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
if (outcome.IsSuccess())
{
@ -208,7 +214,10 @@ void WriteBufferFromS3::createMultipartUpload()
LOG_TRACE(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id);
}
else
{
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
}
void WriteBufferFromS3::writePart()
@ -345,9 +354,13 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
ResourceCost cost = task.req.GetContentLength();
ResourceGuard rlock(write_settings.resource_link, cost);
Stopwatch watch;
auto outcome = client_ptr->UploadPart(task.req);
watch.stop();
rlock.unlock(); // Avoid acquiring other locks under resource lock
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
if (outcome.IsSuccess())
{
task.tag = outcome.GetResult().GetETag();
@ -356,6 +369,7 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
}
else
{
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
@ -391,27 +405,41 @@ void WriteBufferFromS3::completeMultipartUpload()
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
Stopwatch watch;
auto outcome = client_ptr->CompleteMultipartUpload(req);
watch.stop();
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
if (outcome.IsSuccess())
{
LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size());
break;
}
else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
{
/// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
/// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it
LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size());
return;
}
else
{
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Tags: {}",
outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
{
/// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
/// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it
LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size());
}
else
{
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Tags: {}",
outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
}
}
}
throw S3Exception(
Aws::S3::S3Errors::NO_SUCH_KEY,
"Message: Multipart upload failed with NO_SUCH_KEY error, retries {}, Key: {}, Bucket: {}",
max_retry, key, bucket);
}
void WriteBufferFromS3::makeSinglepartUpload()
@ -501,30 +529,43 @@ void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)
ResourceCost cost = task.req.GetContentLength();
ResourceGuard rlock(write_settings.resource_link, cost);
Stopwatch watch;
auto outcome = client_ptr->PutObject(task.req);
watch.stop();
rlock.unlock();
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
bool with_pool = static_cast<bool>(schedule);
if (outcome.IsSuccess())
{
LOG_TRACE(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}, WithPool: {}", bucket, key, task.req.GetContentLength(), with_pool);
break;
}
else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
{
write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
/// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool);
return;
}
else
{
write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
{
write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
/// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool);
}
else
{
write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
}
}
}
throw S3Exception(
Aws::S3::S3Errors::NO_SUCH_KEY,
"Message: Single part upload failed with NO_SUCH_KEY error, retries {}, Key: {}, Bucket: {}",
max_retry, key, bucket);
}
void WriteBufferFromS3::waitForReadyBackGroundTasks()

View File

@ -6,6 +6,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ZLIB_INFLATE_FAILED;
extern const int ARGUMENT_OUT_OF_BOUND;
}
ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
@ -17,6 +18,11 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
: CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment)
, eof_flag(false)
{
if (buf_size > max_buffer_size)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Zlib does not support decompression with buffer size greater than {}, got buffer size: {}",
max_buffer_size, buf_size);
zstr.zalloc = nullptr;
zstr.zfree = nullptr;
zstr.opaque = nullptr;
@ -31,10 +37,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
window_bits += 16;
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
int rc = inflateInit2(&zstr, window_bits);
#pragma GCC diagnostic pop
if (rc != Z_OK)
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION);
@ -61,16 +64,22 @@ bool ZlibInflatingReadBuffer::nextImpl()
{
in->nextIfAtEnd();
zstr.next_in = reinterpret_cast<unsigned char *>(in->position());
zstr.avail_in = static_cast<unsigned>(in->buffer().end() - in->position());
zstr.avail_in = static_cast<BufferSizeType>(std::min(
static_cast<UInt64>(in->buffer().end() - in->position()),
static_cast<UInt64>(max_buffer_size)));
}
/// init output bytes (place, where decompressed data will be)
zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
zstr.avail_out = static_cast<unsigned>(internal_buffer.size());
zstr.avail_out = static_cast<BufferSizeType>(internal_buffer.size());
size_t old_total_in = zstr.total_in;
int rc = inflate(&zstr, Z_NO_FLUSH);
/// move in stream on place, where reading stopped
in->position() = in->buffer().end() - zstr.avail_in;
size_t bytes_read = zstr.total_in - old_total_in;
in->position() += bytes_read;
/// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values)
working_buffer.resize(internal_buffer.size() - zstr.avail_out);
@ -94,9 +103,10 @@ bool ZlibInflatingReadBuffer::nextImpl()
return true;
}
}
/// If it is not end and not OK, something went wrong, throw exception
if (rc != Z_OK)
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc));
}
while (working_buffer.empty());

View File

@ -4,6 +4,7 @@
#include <IO/CompressedReadBufferWrapper.h>
#include <IO/CompressionMethod.h>
#include <limits>
#include <zlib.h>
@ -33,6 +34,11 @@ private:
z_stream zstr;
bool eof_flag;
/// Limit size of buffer because zlib uses
/// UInt32 for sizes of internal buffers.
using BufferSizeType = decltype(zstr.avail_in);
static constexpr auto max_buffer_size = std::numeric_limits<BufferSizeType>::max();
};
}

View File

@ -9,6 +9,7 @@
#include <Functions/materialize.h>
#include <Functions/FunctionsLogical.h>
#include <Functions/CastOverloadResolver.h>
#include <Functions/indexHint.h>
#include <Interpreters/Context.h>
#include <Interpreters/ArrayJoinAction.h>
#include <IO/WriteBufferFromString.h>
@ -188,9 +189,9 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::strin
}
const ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
NodeRawConstPtrs children,
std::string result_name)
const FunctionOverloadResolverPtr & function,
NodeRawConstPtrs children,
std::string result_name)
{
auto [arguments, all_const] = getFunctionArguments(children);
@ -1364,6 +1365,83 @@ void ActionsDAG::mergeInplace(ActionsDAG && second)
first.projected_output = second.projected_output;
}
void ActionsDAG::mergeNodes(ActionsDAG && second)
{
std::unordered_map<std::string, const ActionsDAG::Node *> node_name_to_node;
for (auto & node : nodes)
node_name_to_node.emplace(node.result_name, &node);
struct Frame
{
ActionsDAG::Node * node = nullptr;
bool visited_children = false;
};
std::unordered_map<const ActionsDAG::Node *, ActionsDAG::Node *> const_node_to_node;
for (auto & node : second.nodes)
const_node_to_node.emplace(&node, &node);
std::vector<Frame> nodes_to_process;
nodes_to_process.reserve(second.getOutputs().size());
for (auto & node : second.getOutputs())
nodes_to_process.push_back({const_node_to_node.at(node), false /*visited_children*/});
std::unordered_set<const ActionsDAG::Node *> nodes_to_move_from_second_dag;
while (!nodes_to_process.empty())
{
auto & node_to_process = nodes_to_process.back();
auto * node = node_to_process.node;
auto node_it = node_name_to_node.find(node->result_name);
if (node_it != node_name_to_node.end())
{
nodes_to_process.pop_back();
continue;
}
if (!node_to_process.visited_children)
{
node_to_process.visited_children = true;
for (auto & child : node->children)
nodes_to_process.push_back({const_node_to_node.at(child), false /*visited_children*/});
/// If node has children process them first
if (!node->children.empty())
continue;
}
for (auto & child : node->children)
child = node_name_to_node.at(child->result_name);
node_name_to_node.emplace(node->result_name, node);
nodes_to_move_from_second_dag.insert(node);
nodes_to_process.pop_back();
}
if (nodes_to_move_from_second_dag.empty())
return;
auto second_nodes_end = second.nodes.end();
for (auto second_node_it = second.nodes.begin(); second_node_it != second_nodes_end;)
{
if (!nodes_to_move_from_second_dag.contains(&(*second_node_it)))
{
++second_node_it;
continue;
}
auto node_to_move_it = second_node_it;
++second_node_it;
nodes.splice(nodes.end(), second.nodes, node_to_move_it);
if (node_to_move_it->type == ActionType::INPUT)
inputs.push_back(&(*node_to_move_it));
}
}
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
{
/// Split DAG into two parts.
@ -2193,7 +2271,8 @@ bool ActionsDAG::isSortingPreserved(
ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
const NodeRawConstPtrs & filter_nodes,
const std::unordered_map<std::string, ColumnWithTypeAndName> & node_name_to_input_node_column,
const ContextPtr & context)
const ContextPtr & context,
bool single_output_condition_node)
{
if (filter_nodes.empty())
return nullptr;
@ -2281,13 +2360,35 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
NodeRawConstPtrs function_children;
function_children.reserve(node->children.size());
FunctionOverloadResolverPtr function_overload_resolver;
if (node->function_base->getName() == "indexHint")
{
ActionsDAG::NodeRawConstPtrs children;
if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node->function_base.get()))
{
if (const auto * index_hint = typeid_cast<const FunctionIndexHint *>(adaptor->getFunction().get()))
{
auto index_hint_filter_dag = buildFilterActionsDAG(index_hint->getActions()->getOutputs(),
node_name_to_input_node_column,
context,
false /*single_output_condition_node*/);
auto index_hint_function_clone = std::make_shared<FunctionIndexHint>();
index_hint_function_clone->setActions(std::move(index_hint_filter_dag));
function_overload_resolver = std::make_shared<FunctionToOverloadResolverAdaptor>(std::move(index_hint_function_clone));
}
}
}
for (const auto & child : node->children)
function_children.push_back(node_to_result_node.find(child)->second);
auto [arguments, all_const] = getFunctionArguments(function_children);
auto function_base = function_overload_resolver ? function_overload_resolver->build(arguments) : node->function_base;
result_node = &result_dag->addFunctionImpl(
node->function_base,
function_base,
std::move(function_children),
std::move(arguments),
{},
@ -2307,7 +2408,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
for (const auto & node : filter_nodes)
result_dag_outputs.push_back(node_to_result_node.find(node)->second);
if (result_dag_outputs.size() > 1)
if (result_dag_outputs.size() > 1 && single_output_condition_node)
{
auto function_builder = FunctionFactory::instance().get("and", context);
result_dag_outputs = { &result_dag->addFunction(function_builder, result_dag_outputs, {}) };

View File

@ -290,6 +290,9 @@ public:
/// So that pointers to nodes are kept valid.
void mergeInplace(ActionsDAG && second);
/// Merge current nodes with specified dag nodes
void mergeNodes(ActionsDAG && second);
using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
/// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
@ -344,15 +347,18 @@ public:
* Additionally during dag construction if node has name that exists in node_name_to_input_column map argument
* in final dag this node is represented as INPUT node with specified column.
*
* Result dag has only single output node:
* If single_output_condition_node = true, result dag has single output node:
* 1. If there is single filter node, result dag output will contain this node.
* 2. If there are multiple filter nodes, result dag output will contain single `and` function node
* and children of this node will be filter nodes.
*
* If single_output_condition_node = false, result dag has multiple output nodes.
*/
static ActionsDAGPtr buildFilterActionsDAG(
const NodeRawConstPtrs & filter_nodes,
const std::unordered_map<std::string, ColumnWithTypeAndName> & node_name_to_input_node_column,
const ContextPtr & context);
const ContextPtr & context,
bool single_output_condition_node = true);
private:
NodeRawConstPtrs getParents(const Node * target) const;

View File

@ -0,0 +1,100 @@
#include <Interpreters/ComparisonTupleEliminationVisitor.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
ASTs splitTuple(const ASTPtr & node)
{
if (const auto * func = node->as<ASTFunction>(); func && func->name == "tuple")
return func->arguments->children;
if (const auto * literal = node->as<ASTLiteral>(); literal && literal->value.getType() == Field::Types::Tuple)
{
ASTs result;
const auto & tuple = literal->value.get<const Tuple &>();
for (const auto & child : tuple)
result.emplace_back(std::make_shared<ASTLiteral>(child));
return result;
}
return {};
}
ASTPtr concatWithAnd(const ASTs & nodes)
{
if (nodes.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot concat empty list of nodes");
if (nodes.size() == 1)
return nodes[0];
auto result = makeASTFunction("and");
result->arguments->children = nodes;
return result;
}
class SplitTupleComparsionExpressionMatcher
{
public:
using Data = ComparisonTupleEliminationMatcher::Data;
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
static void visit(ASTPtr & ast, Data &)
{
auto * func = ast->as<ASTFunction>();
if (!func || func->arguments->children.size() != 2)
return;
if (func->name != "equals" && func->name != "notEquals")
return;
auto lhs = splitTuple(func->arguments->children[0]);
auto rhs = splitTuple(func->arguments->children[1]);
if (lhs.size() != rhs.size() || lhs.empty())
return;
ASTs new_args;
new_args.reserve(lhs.size());
for (size_t i = 0; i < lhs.size(); ++i)
{
new_args.emplace_back(makeASTFunction("equals", lhs[i], rhs[i]));
}
if (func->name == "notEquals")
ast = makeASTFunction("not", concatWithAnd(new_args));
else
ast = concatWithAnd(new_args);
}
};
using SplitTupleComparsionExpressionVisitor = InDepthNodeVisitor<SplitTupleComparsionExpressionMatcher, true>;
}
bool ComparisonTupleEliminationMatcher::needChildVisit(ASTPtr &, const ASTPtr &)
{
return true;
}
void ComparisonTupleEliminationMatcher::visit(ASTPtr & ast, Data & data)
{
auto * select_ast = ast->as<ASTSelectQuery>();
if (!select_ast || !select_ast->where())
return;
if (select_ast->where())
SplitTupleComparsionExpressionVisitor(data).visit(select_ast->refWhere());
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/Aliases.h>
namespace DB
{
class ASTSelectQuery;
struct TableWithColumnNamesAndTypes;
/** Replaces tuple comparisons with multiple comparisons.
*
* Example: SELECT id FROM test_table WHERE (id, value) = (1, 'Value');
* Result: SELECT id FROM test_table WHERE id = 1 AND value = 'Value';
*/
class ComparisonTupleEliminationMatcher
{
public:
struct Data {};
static bool needChildVisit(ASTPtr &, const ASTPtr &);
static void visit(ASTPtr & ast, Data & data);
};
using ComparisonTupleEliminationVisitor = InDepthNodeVisitor<ComparisonTupleEliminationMatcher, true>;
}

View File

@ -1918,8 +1918,13 @@ BackupsWorker & Context::getBackupsWorker() const
const bool allow_concurrent_backups = this->getConfigRef().getBool("backups.allow_concurrent_backups", true);
const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true);
const auto & config = getConfigRef();
const auto & settings = getSettingsRef();
UInt64 backup_threads = config.getUInt64("backup_threads", settings.backup_threads);
UInt64 restore_threads = config.getUInt64("restore_threads", settings.restore_threads);
if (!shared->backups_worker)
shared->backups_worker.emplace(getSettingsRef().backup_threads, getSettingsRef().restore_threads, allow_concurrent_backups, allow_concurrent_restores);
shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
return *shared->backups_worker;
}

View File

@ -415,7 +415,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
RewriteCountDistinctFunctionVisitor(data_rewrite_countdistinct).visit(query_ptr);
}
JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols);
JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols, options_.is_create_parameterized_view);
bool got_storage_from_query = false;
if (!has_input && !storage)
@ -636,14 +636,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(
Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
MergeTreeWhereOptimizer{
current_info,
context,
MergeTreeWhereOptimizer where_optimizer{
std::move(column_compressed_sizes),
metadata_snapshot,
queried_columns,
supported_prewhere_columns,
log};
where_optimizer.optimize(current_info, context);
}
}
@ -2874,8 +2874,10 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st
SortDescription sort_description = getSortDescription(query, context);
const UInt64 limit = getLimitForSorting(query, context);
const auto max_block_size = context->getSettingsRef().max_block_size;
const auto exact_rows_before_limit = context->getSettingsRef().exact_rows_before_limit;
auto merging_sorted = std::make_unique<SortingStep>(query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit);
auto merging_sorted = std::make_unique<SortingStep>(
query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit);
merging_sorted->setStepDescription("Merge sorted streams " + description);
query_plan.addStep(std::move(merging_sorted));
}

View File

@ -262,12 +262,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
{
if (!context_->hasQueryContext())
{
SelectQueryOptions options;
if (is_subquery)
return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
else if (is_create_parameterized_view)
return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()).getSampleBlock();
else
return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
options = options.subquery();
if (is_create_parameterized_view)
options = options.createParameterizedView();
return InterpreterSelectWithUnionQuery(query_ptr_, context_, std::move(options.analyze())).getSampleBlock();
}
auto & cache = context_->getSampleBlockCache();
@ -278,21 +278,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
return cache[key];
}
SelectQueryOptions options;
if (is_subquery)
{
return cache[key]
= InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
}
else if (is_create_parameterized_view)
{
return cache[key]
= InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze())
.getSampleBlock();
}
else
{
return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
}
options = options.subquery();
if (is_create_parameterized_view)
options = options.createParameterizedView();
return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, std::move(options.analyze())).getSampleBlock();
}

View File

@ -173,13 +173,14 @@ using RenameQualifiedIdentifiersVisitor = InDepthNodeVisitor<RenameQualifiedIden
}
JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query_, bool include_all_columns_)
JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query_, bool include_all_columns_, bool is_create_parameterized_view_)
: context(context_)
, table_expressions(getTableExpressions(select_query_))
, include_all_columns(include_all_columns_)
, left_table_expression(extractTableExpression(select_query_, 0))
, left_db_and_table(getDatabaseAndTable(select_query_, 0))
, select_query(select_query_)
, is_create_parameterized_view(is_create_parameterized_view_)
{}
bool JoinedTables::isLeftTableSubquery() const
@ -239,7 +240,7 @@ bool JoinedTables::resolveTables()
const auto & settings = context->getSettingsRef();
bool include_alias_cols = include_all_columns || settings.asterisk_include_alias_columns;
bool include_materialized_cols = include_all_columns || settings.asterisk_include_materialized_columns;
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols);
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols, is_create_parameterized_view);
if (tables_with_columns.size() != table_expressions.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected tables count");

View File

@ -22,7 +22,7 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class JoinedTables
{
public:
JoinedTables(ContextPtr context, const ASTSelectQuery & select_query_, bool include_all_columns_ = false);
JoinedTables(ContextPtr context, const ASTSelectQuery & select_query_, bool include_all_columns_ = false, bool is_create_parameterized_view_ = false);
void reset(const ASTSelectQuery & select_query);
@ -53,6 +53,7 @@ private:
ASTPtr left_table_expression;
std::optional<DatabaseAndTableWithAlias> left_db_and_table;
const ASTSelectQuery & select_query;
const bool is_create_parameterized_view;
};
}

View File

@ -8,6 +8,7 @@
#include <Interpreters/ArrayJoinedColumnsVisitor.h>
#include <Interpreters/CollectJoinOnKeysVisitor.h>
#include <Interpreters/ComparisonTupleEliminationVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
@ -1424,6 +1425,13 @@ void TreeRewriter::normalize(
if (context_->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && settings.normalize_function_names)
FunctionNameNormalizer().visit(query.get());
if (settings.optimize_move_to_prewhere)
{
/// Required for PREWHERE
ComparisonTupleEliminationVisitor::Data data_comparison_tuple_elimination;
ComparisonTupleEliminationVisitor(data_comparison_tuple_elimination).visit(query);
}
/// Common subexpression elimination. Rewrite rules.
QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, is_create_parameterized_view);
QueryNormalizer(normalizer_data).visit(query);

View File

@ -73,18 +73,21 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number
return nullptr;
}
/// The parameter is_create_parameterized_view is used in getSampleBlock of the subquery.
/// If it is set to true, then query parameters are allowed in the subquery, and that expression is not evaluated.
static NamesAndTypesList getColumnsFromTableExpression(
const ASTTableExpression & table_expression,
ContextPtr context,
NamesAndTypesList & materialized,
NamesAndTypesList & aliases,
NamesAndTypesList & virtuals)
NamesAndTypesList & virtuals,
bool is_create_parameterized_view)
{
NamesAndTypesList names_and_type_list;
if (table_expression.subquery)
{
const auto & subquery = table_expression.subquery->children.at(0);
names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context, true).getNamesAndTypesList();
names_and_type_list = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context, true, is_create_parameterized_view).getNamesAndTypesList();
}
else if (table_expression.table_function)
{
@ -117,7 +120,8 @@ TablesWithColumns getDatabaseAndTablesWithColumns(
const ASTTableExprConstPtrs & table_expressions,
ContextPtr context,
bool include_alias_cols,
bool include_materialized_cols)
bool include_materialized_cols,
bool is_create_parameterized_view)
{
TablesWithColumns tables_with_columns;
@ -129,7 +133,7 @@ TablesWithColumns getDatabaseAndTablesWithColumns(
NamesAndTypesList aliases;
NamesAndTypesList virtuals;
NamesAndTypesList names_and_types = getColumnsFromTableExpression(
*table_expression, context, materialized, aliases, virtuals);
*table_expression, context, materialized, aliases, virtuals, is_create_parameterized_view);
removeDuplicateColumns(names_and_types);

View File

@ -20,7 +20,9 @@ const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, siz
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
/// The parameter is_create_parameterized_view is used in getSampleBlock of the subquery. It is forwarded to getColumnsFromTableExpression.
/// If it is set to true, then query parameters are allowed in the subquery, and that expression is not evaluated.
TablesWithColumns getDatabaseAndTablesWithColumns(
const ASTTableExprConstPtrs & table_expressions, ContextPtr context, bool include_alias_cols, bool include_materialized_cols);
const ASTTableExprConstPtrs & table_expressions, ContextPtr context, bool include_alias_cols, bool include_materialized_cols, bool is_create_parameterized_view = false);
}

View File

@ -119,7 +119,6 @@ ASTPtr ASTGrantQuery::clone() const
void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
{
settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (attach_mode ? "ATTACH " : "")
<< (settings.hilite ? hilite_keyword : "") << ((!is_revoke && (replace_access || replace_granted_roles)) ? "REPLACE " : "") << (settings.hilite ? hilite_none : "")
<< (settings.hilite ? hilite_keyword : "") << (is_revoke ? "REVOKE" : "GRANT")
<< (settings.hilite ? IAST::hilite_none : "");
@ -161,6 +160,9 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH GRANT OPTION" << (settings.hilite ? hilite_none : "");
else if (admin_option)
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH ADMIN OPTION" << (settings.hilite ? hilite_none : "");
if (replace_access || replace_granted_roles)
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH REPLACE OPTION" << (settings.hilite ? hilite_none : "");
}
}

View File

@ -2255,6 +2255,7 @@ std::vector<std::pair<const char *, Operator>> ParserExpressionImpl::operators_t
{"ILIKE", Operator("ilike", 8, 2)},
{"NOT LIKE", Operator("notLike", 8, 2)},
{"NOT ILIKE", Operator("notILike", 8, 2)},
{"REGEXP", Operator("match", 8, 2)},
{"IN", Operator("in", 8, 2)},
{"NOT IN", Operator("notIn", 8, 2)},
{"GLOBAL IN", Operator("globalIn", 8, 2)},

View File

@ -10,6 +10,7 @@
#include <Analyzer/TableFunctionNode.h>
#include <Planner/PlannerContext.h>
#include <Planner/PlannerActionsVisitor.h>
namespace DB
{
@ -17,6 +18,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_PREWHERE;
}
namespace
@ -78,23 +80,128 @@ public:
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
private:
PlannerContext & planner_context;
};
class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisitor<CollectPrewhereTableExpressionVisitor>
{
public:
explicit CollectPrewhereTableExpressionVisitor(const QueryTreeNodePtr & query_node_)
: query_node(query_node_)
{}
const QueryTreeNodePtr & getPrewhereTableExpression() const
{
return table_expression;
}
void visitImpl(const QueryTreeNodePtr & node)
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;
auto column_source = column_node->getColumnSourceOrNull();
if (!column_source)
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Invalid column {} in PREWHERE. In query {}",
column_node->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());
auto * table_column_source = column_source->as<TableNode>();
auto * table_function_column_source = column_source->as<TableFunctionNode>();
if (!table_column_source && !table_function_column_source)
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Invalid column {} in PREWHERE. Expected column source to be table or table function. Actual {}. In query {}",
column_node->formatASTForErrorMessage(),
column_source->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());
if (table_expression && table_expression.get() != column_source.get())
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Invalid column {} in PREWHERE. Expected columns from single table or table function {}. Actual {}. In query {}",
column_node->formatASTForErrorMessage(),
table_expression->formatASTForErrorMessage(),
column_source->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());
if (!table_expression)
{
const auto & storage = table_column_source ? table_column_source->getStorage() : table_function_column_source->getStorage();
if (!storage->supportsPrewhere())
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Storage {} (table {}) does not support PREWHERE",
storage->getName(),
storage->getStorageID().getNameForLogs());
table_expression = std::move(column_source);
table_supported_prewhere_columns = storage->supportedPrewhereColumns();
}
if (table_supported_prewhere_columns && !table_supported_prewhere_columns->contains(column_node->getColumnName()))
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Table expression {} does not support column {} in PREWHERE. In query {}",
table_expression->formatASTForErrorMessage(),
column_node->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
private:
QueryTreeNodePtr query_node;
QueryTreeNodePtr table_expression;
std::optional<NameSet> table_supported_prewhere_columns;
};
void checkStorageSupportPrewhere(const QueryTreeNodePtr & table_expression)
{
if (auto * table_node = table_expression->as<TableNode>())
{
auto storage = table_node->getStorage();
if (!storage->supportsPrewhere())
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Storage {} (table {}) does not support PREWHERE",
storage->getName(),
storage->getStorageID().getNameForLogs());
}
else if (auto * table_function_node = table_expression->as<TableFunctionNode>())
{
auto storage = table_function_node->getStorage();
if (!storage->supportsPrewhere())
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Table function storage {} (table {}) does not support PREWHERE",
storage->getName(),
storage->getStorageID().getNameForLogs());
}
else
{
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Subquery {} does not support PREWHERE",
table_expression->formatASTForErrorMessage());
}
}
void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context)
}
void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr & planner_context)
{
auto & query_node_typed = query_node->as<QueryNode &>();
auto table_expressions_nodes = extractTableExpressions(query_node_typed.getJoinTree());
for (auto & table_expression_node : table_expressions_nodes)
{
auto & table_expression_data = planner_context.getOrCreateTableExpressionData(table_expression_node);
auto & table_expression_data = planner_context->getOrCreateTableExpressionData(table_expression_node);
if (auto * table_node = table_expression_node->as<TableNode>())
{
@ -108,8 +215,60 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext &
}
}
CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context);
collect_source_columns_visitor.visit(query_node);
CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context);
for (auto & node : query_node_typed.getChildren())
{
if (!node || node == query_node_typed.getPrewhere())
continue;
auto node_type = node->getNodeType();
if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
continue;
collect_source_columns_visitor.visit(node);
}
if (query_node_typed.hasPrewhere())
{
CollectPrewhereTableExpressionVisitor collect_prewhere_table_expression_visitor(query_node);
collect_prewhere_table_expression_visitor.visit(query_node_typed.getPrewhere());
auto prewhere_table_expression = collect_prewhere_table_expression_visitor.getPrewhereTableExpression();
if (!prewhere_table_expression)
{
prewhere_table_expression = table_expressions_nodes[0];
checkStorageSupportPrewhere(prewhere_table_expression);
}
auto & table_expression_data = planner_context->getOrCreateTableExpressionData(prewhere_table_expression);
const auto & column_names = table_expression_data.getColumnNames();
NameSet required_column_names_without_prewhere(column_names.begin(), column_names.end());
collect_source_columns_visitor.visit(query_node_typed.getPrewhere());
auto prewhere_actions_dag = std::make_shared<ActionsDAG>();
PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/);
auto expression_nodes = visitor.visit(prewhere_actions_dag, query_node_typed.getPrewhere());
if (expression_nodes.size() != 1)
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Invalid PREWHERE. Expected single boolean expression. In query {}",
query_node->formatASTForErrorMessage());
prewhere_actions_dag->getOutputs().push_back(expression_nodes[0]);
for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs())
if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name))
prewhere_actions_dag->getOutputs().push_back(prewhere_input_node);
table_expression_data.setPrewhereFilterActions(std::move(prewhere_actions_dag));
}
}
void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context)
{
CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context);
collect_source_columns_visitor.visit(expression_node);
}
}

View File

@ -12,6 +12,13 @@ namespace DB
*
* ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression.
*/
void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context);
void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr & planner_context);
/** Collect source columns for expression node.
* Collected source columns are registered in planner context.
*
* ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression.
*/
void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context);
}

View File

@ -79,26 +79,14 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int TOO_DEEP_SUBQUERIES;
extern const int NOT_IMPLEMENTED;
extern const int ILLEGAL_PREWHERE;
}
/** ClickHouse query planner.
*
* TODO: Support JOIN with JOIN engine.
* TODO: Support VIEWs.
* TODO: JOIN drop unnecessary columns after ON, USING section
* TODO: Support RBAC. Support RBAC for ALIAS columns
* TODO: Support PREWHERE
* TODO: Support DISTINCT
* TODO: Support trivial count optimization
* TODO: Support projections
* TODO: Support read in order optimization
* TODO: UNION storage limits
* TODO: Support max streams
* TODO: Support ORDER BY read in order optimization
* TODO: Support GROUP BY read in order optimization
* TODO: Support Key Condition. Support indexes for IN function.
* TODO: Better support for quota and limits.
* TODO: Support projections.
* TODO: Support trivial count using partition predicates.
* TODO: Support trivial count for table functions.
* TODO: Support indexes for IN function.
*/
namespace
@ -135,37 +123,6 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context)
}
}
void checkStorageSupportPrewhere(const QueryTreeNodePtr & query_node)
{
auto & query_node_typed = query_node->as<QueryNode &>();
auto table_expression = extractLeftTableExpression(query_node_typed.getJoinTree());
if (auto * table_node = table_expression->as<TableNode>())
{
auto storage = table_node->getStorage();
if (!storage->supportsPrewhere())
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Storage {} (table {}) does not support PREWHERE",
storage->getName(),
storage->getStorageID().getNameForLogs());
}
else if (auto * table_function_node = table_expression->as<TableFunctionNode>())
{
auto storage = table_function_node->getStorage();
if (!storage->supportsPrewhere())
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Table function storage {} (table {}) does not support PREWHERE",
storage->getName(),
storage->getStorageID().getNameForLogs());
}
else
{
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Subquery {} does not support PREWHERE",
query_node->formatASTForErrorMessage());
}
}
/// Extend lifetime of query context, storages, and table locks
void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const PlannerContextPtr & planner_context)
{
@ -568,7 +525,8 @@ void addMergeSortingStep(QueryPlan & query_plan,
auto merging_sorted = std::make_unique<SortingStep>(query_plan.getCurrentDataStream(),
sort_description,
max_block_size,
query_analysis_result.partial_sorting_limit);
query_analysis_result.partial_sorting_limit,
settings.exact_rows_before_limit);
merging_sorted->setStepDescription("Merge sorted streams " + description);
query_plan.addStep(std::move(merging_sorted));
}
@ -1140,18 +1098,6 @@ void Planner::buildPlanForQueryNode()
auto & query_node = query_tree->as<QueryNode &>();
const auto & query_context = planner_context->getQueryContext();
if (query_node.hasPrewhere())
{
checkStorageSupportPrewhere(query_tree);
if (query_node.hasWhere())
query_node.getWhere() = mergeConditionNodes({query_node.getPrewhere(), query_node.getWhere()}, query_context);
else
query_node.getWhere() = query_node.getPrewhere();
query_node.getPrewhere() = {};
}
if (query_node.hasWhere())
{
auto condition_constant = tryExtractConstantFromConditionNode(query_node.getWhere());
@ -1185,8 +1131,8 @@ void Planner::buildPlanForQueryNode()
}
checkStoragesSupportTransactions(planner_context);
collectTableExpressionData(query_tree, *planner_context);
collectSets(query_tree, *planner_context);
collectTableExpressionData(query_tree, planner_context);
auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
auto join_tree_query_plan = buildJoinTreeQueryPlan(query_tree,
@ -1215,6 +1161,12 @@ void Planner::buildPlanForQueryNode()
std::vector<ActionsDAGPtr> result_actions_to_execute;
for (auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData())
{
if (table_expression_data.getPrewhereFilterActions())
result_actions_to_execute.push_back(table_expression_data.getPrewhereFilterActions());
}
if (query_processing_info.isIntermediateStage())
{
addPreliminarySortOrDistinctOrLimitStepsIfNeeded(query_plan,

View File

@ -44,6 +44,264 @@ namespace ErrorCodes
namespace
{
class ActionNodeNameHelper
{
public:
ActionNodeNameHelper(QueryTreeNodeToName & node_to_name_,
const PlannerContext & planner_context_,
bool use_column_identifier_as_action_node_name_)
: node_to_name(node_to_name_)
, planner_context(planner_context_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
{
}
String calculateActionNodeName(const QueryTreeNodePtr & node)
{
auto it = node_to_name.find(node);
if (it != node_to_name.end())
return it->second;
String result;
auto node_type = node->getNodeType();
switch (node_type)
{
case QueryTreeNodeType::COLUMN:
{
const ColumnIdentifier * column_identifier = nullptr;
if (use_column_identifier_as_action_node_name)
column_identifier = planner_context.getColumnNodeIdentifierOrNull(node);
if (column_identifier)
{
result = *column_identifier;
}
else
{
const auto & column_node = node->as<ColumnNode &>();
result = column_node.getColumnName();
}
break;
}
case QueryTreeNodeType::CONSTANT:
{
const auto & constant_node = node->as<ConstantNode &>();
result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
break;
}
case QueryTreeNodeType::FUNCTION:
{
const auto & function_node = node->as<FunctionNode &>();
String in_function_second_argument_node_name;
if (isNameOfInFunction(function_node.getFunctionName()))
{
const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1);
in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node);
}
WriteBufferFromOwnString buffer;
buffer << function_node.getFunctionName();
const auto & function_parameters_nodes = function_node.getParameters().getNodes();
if (!function_parameters_nodes.empty())
{
buffer << '(';
size_t function_parameters_nodes_size = function_parameters_nodes.size();
for (size_t i = 0; i < function_parameters_nodes_size; ++i)
{
const auto & function_parameter_node = function_parameters_nodes[i];
buffer << calculateActionNodeName(function_parameter_node);
if (i + 1 != function_parameters_nodes_size)
buffer << ", ";
}
buffer << ')';
}
const auto & function_arguments_nodes = function_node.getArguments().getNodes();
String function_argument_name;
buffer << '(';
size_t function_arguments_nodes_size = function_arguments_nodes.size();
for (size_t i = 0; i < function_arguments_nodes_size; ++i)
{
if (i == 1 && !in_function_second_argument_node_name.empty())
{
function_argument_name = in_function_second_argument_node_name;
}
else
{
const auto & function_argument_node = function_arguments_nodes[i];
function_argument_name = calculateActionNodeName(function_argument_node);
}
buffer << function_argument_name;
if (i + 1 != function_arguments_nodes_size)
buffer << ", ";
}
buffer << ')';
if (function_node.isWindowFunction())
{
buffer << " OVER (";
buffer << calculateWindowNodeActionName(function_node.getWindowNode());
buffer << ')';
}
result = buffer.str();
break;
}
case QueryTreeNodeType::LAMBDA:
{
auto lambda_hash = node->getTreeHash();
result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second);
break;
}
default:
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage());
}
}
node_to_name.emplace(node, result);
return result;
}
static String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
{
auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal);
return constant_name + "_" + constant_type->getName();
}
static String calculateConstantActionNodeName(const Field & constant_literal)
{
return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
}
String calculateWindowNodeActionName(const QueryTreeNodePtr & node)
{
auto & window_node = node->as<WindowNode &>();
WriteBufferFromOwnString buffer;
if (window_node.hasPartitionBy())
{
buffer << "PARTITION BY ";
auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
size_t partition_by_nodes_size = partition_by_nodes.size();
for (size_t i = 0; i < partition_by_nodes_size; ++i)
{
auto & partition_by_node = partition_by_nodes[i];
buffer << calculateActionNodeName(partition_by_node);
if (i + 1 != partition_by_nodes_size)
buffer << ", ";
}
}
if (window_node.hasOrderBy())
{
if (window_node.hasPartitionBy())
buffer << ' ';
buffer << "ORDER BY ";
auto & order_by_nodes = window_node.getOrderBy().getNodes();
size_t order_by_nodes_size = order_by_nodes.size();
for (size_t i = 0; i < order_by_nodes_size; ++i)
{
auto & sort_node = order_by_nodes[i]->as<SortNode &>();
buffer << calculateActionNodeName(sort_node.getExpression());
auto sort_direction = sort_node.getSortDirection();
buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC");
auto nulls_sort_direction = sort_node.getNullsSortDirection();
if (nulls_sort_direction)
buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST");
if (auto collator = sort_node.getCollator())
buffer << " COLLATE " << collator->getLocale();
if (sort_node.withFill())
{
buffer << " WITH FILL";
if (sort_node.hasFillFrom())
buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom());
if (sort_node.hasFillTo())
buffer << " TO " << calculateActionNodeName(sort_node.getFillTo());
if (sort_node.hasFillStep())
buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep());
}
if (i + 1 != order_by_nodes_size)
buffer << ", ";
}
}
auto & window_frame = window_node.getWindowFrame();
if (!window_frame.is_default)
{
if (window_node.hasPartitionBy() || window_node.hasOrderBy())
buffer << ' ';
buffer << window_frame.type << " BETWEEN ";
if (window_frame.begin_type == WindowFrame::BoundaryType::Current)
{
buffer << "CURRENT ROW";
}
else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded)
{
buffer << "UNBOUNDED";
buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
}
else
{
buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode());
buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
}
buffer << " AND ";
if (window_frame.end_type == WindowFrame::BoundaryType::Current)
{
buffer << "CURRENT ROW";
}
else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded)
{
buffer << "UNBOUNDED";
buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
}
else
{
buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode());
buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
}
}
return buffer.str();
}
private:
std::unordered_map<QueryTreeNodePtr, std::string> & node_to_name;
const PlannerContext & planner_context;
bool use_column_identifier_as_action_node_name = true;
};
class ActionsScopeNode
{
public:
@ -165,7 +423,9 @@ private:
class PlannerActionsVisitorImpl
{
public:
PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_);
PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_);
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
@ -189,10 +449,14 @@ private:
std::vector<ActionsScopeNode> actions_stack;
std::unordered_map<QueryTreeNodePtr, std::string> node_to_node_name;
const PlannerContextPtr planner_context;
ActionNodeNameHelper action_node_name_helper;
};
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_)
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_)
: planner_context(planner_context_)
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
{
actions_stack.emplace_back(std::move(actions_dag), nullptr);
}
@ -236,7 +500,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
{
auto column_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
auto column_node_name = action_node_name_helper.calculateActionNodeName(node);
const auto & column_node = node->as<ColumnNode &>();
Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
@ -386,7 +650,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
{
const auto & function_node = node->as<FunctionNode &>();
auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
auto function_node_name = action_node_name_helper.calculateActionNodeName(node);
auto index_hint_actions_dag = std::make_shared<ActionsDAG>();
auto & index_hint_actions_dag_outputs = index_hint_actions_dag->getOutputs();
@ -428,7 +692,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
if (isNameOfInFunction(function_node.getFunctionName()))
in_function_second_argument_node_name_with_level = makeSetForInFunction(node);
auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name);
auto function_node_name = action_node_name_helper.calculateActionNodeName(node);
/* Aggregate functions, window functions, and GROUP BY expressions were already analyzed in the previous steps.
* If we have already visited some expression, we don't need to revisit it or its arguments again.
@ -516,266 +780,57 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
}
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_)
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
: planner_context(planner_context_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
{}
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node)
{
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context);
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
return actions_visitor_impl.visit(expression_node);
}
String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name)
String calculateActionNodeName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
QueryTreeNodeToName & node_to_name,
bool use_column_identifier_as_action_node_name)
{
auto it = node_to_name.find(node);
if (it != node_to_name.end())
return it->second;
String result;
auto node_type = node->getNodeType();
switch (node_type)
{
case QueryTreeNodeType::COLUMN:
{
const auto * column_identifier = planner_context.getColumnNodeIdentifierOrNull(node);
if (column_identifier)
{
result = *column_identifier;
}
else
{
const auto & column_node = node->as<ColumnNode &>();
result = column_node.getColumnName();
}
break;
}
case QueryTreeNodeType::CONSTANT:
{
const auto & constant_node = node->as<ConstantNode &>();
result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
break;
}
case QueryTreeNodeType::FUNCTION:
{
const auto & function_node = node->as<FunctionNode &>();
String in_function_second_argument_node_name;
if (isNameOfInFunction(function_node.getFunctionName()))
{
const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1);
in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node);
}
WriteBufferFromOwnString buffer;
buffer << function_node.getFunctionName();
const auto & function_parameters_nodes = function_node.getParameters().getNodes();
if (!function_parameters_nodes.empty())
{
buffer << '(';
size_t function_parameters_nodes_size = function_parameters_nodes.size();
for (size_t i = 0; i < function_parameters_nodes_size; ++i)
{
const auto & function_parameter_node = function_parameters_nodes[i];
buffer << calculateActionNodeName(function_parameter_node, planner_context, node_to_name);
if (i + 1 != function_parameters_nodes_size)
buffer << ", ";
}
buffer << ')';
}
const auto & function_arguments_nodes = function_node.getArguments().getNodes();
String function_argument_name;
buffer << '(';
size_t function_arguments_nodes_size = function_arguments_nodes.size();
for (size_t i = 0; i < function_arguments_nodes_size; ++i)
{
if (i == 1 && !in_function_second_argument_node_name.empty())
{
function_argument_name = in_function_second_argument_node_name;
}
else
{
const auto & function_argument_node = function_arguments_nodes[i];
function_argument_name = calculateActionNodeName(function_argument_node, planner_context, node_to_name);
}
buffer << function_argument_name;
if (i + 1 != function_arguments_nodes_size)
buffer << ", ";
}
buffer << ')';
if (function_node.isWindowFunction())
{
buffer << " OVER (";
buffer << calculateWindowNodeActionName(function_node.getWindowNode(), planner_context, node_to_name);
buffer << ')';
}
result = buffer.str();
break;
}
case QueryTreeNodeType::LAMBDA:
{
auto lambda_hash = node->getTreeHash();
result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second);
break;
}
default:
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage());
}
}
node_to_name.emplace(node, result);
return result;
ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
return helper.calculateActionNodeName(node);
}
String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context)
String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
{
QueryTreeNodeToName empty_map;
return calculateActionNodeName(node, planner_context, empty_map);
ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
return helper.calculateActionNodeName(node);
}
String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
{
auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal);
return constant_name + "_" + constant_type->getName();
return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal, constant_type);
}
String calculateConstantActionNodeName(const Field & constant_literal)
{
return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal);
}
String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name)
String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
QueryTreeNodeToName & node_to_name,
bool use_column_identifier_as_action_node_name)
{
auto & window_node = node->as<WindowNode &>();
WriteBufferFromOwnString buffer;
if (window_node.hasPartitionBy())
{
buffer << "PARTITION BY ";
auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
size_t partition_by_nodes_size = partition_by_nodes.size();
for (size_t i = 0; i < partition_by_nodes_size; ++i)
{
auto & partition_by_node = partition_by_nodes[i];
buffer << calculateActionNodeName(partition_by_node, planner_context, node_to_name);
if (i + 1 != partition_by_nodes_size)
buffer << ", ";
}
}
if (window_node.hasOrderBy())
{
if (window_node.hasPartitionBy())
buffer << ' ';
buffer << "ORDER BY ";
auto & order_by_nodes = window_node.getOrderBy().getNodes();
size_t order_by_nodes_size = order_by_nodes.size();
for (size_t i = 0; i < order_by_nodes_size; ++i)
{
auto & sort_node = order_by_nodes[i]->as<SortNode &>();
buffer << calculateActionNodeName(sort_node.getExpression(), planner_context, node_to_name);
auto sort_direction = sort_node.getSortDirection();
buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC");
auto nulls_sort_direction = sort_node.getNullsSortDirection();
if (nulls_sort_direction)
buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST");
if (auto collator = sort_node.getCollator())
buffer << " COLLATE " << collator->getLocale();
if (sort_node.withFill())
{
buffer << " WITH FILL";
if (sort_node.hasFillFrom())
buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom(), planner_context, node_to_name);
if (sort_node.hasFillTo())
buffer << " TO " << calculateActionNodeName(sort_node.getFillTo(), planner_context, node_to_name);
if (sort_node.hasFillStep())
buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep(), planner_context, node_to_name);
}
if (i + 1 != order_by_nodes_size)
buffer << ", ";
}
}
auto & window_frame = window_node.getWindowFrame();
if (!window_frame.is_default)
{
if (window_node.hasPartitionBy() || window_node.hasOrderBy())
buffer << ' ';
buffer << window_frame.type << " BETWEEN ";
if (window_frame.begin_type == WindowFrame::BoundaryType::Current)
{
buffer << "CURRENT ROW";
}
else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded)
{
buffer << "UNBOUNDED";
buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
}
else
{
buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode(), planner_context, node_to_name);
buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
}
buffer << " AND ";
if (window_frame.end_type == WindowFrame::BoundaryType::Current)
{
buffer << "CURRENT ROW";
}
else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded)
{
buffer << "UNBOUNDED";
buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
}
else
{
buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode(), planner_context, node_to_name);
buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
}
}
return buffer.str();
ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
return helper.calculateWindowNodeActionName(node);
}
String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context)
String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
{
QueryTreeNodeToName empty_map;
return calculateWindowNodeActionName(node, planner_context, empty_map);
ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
return helper.calculateWindowNodeActionName(node);
}
}

View File

@ -23,7 +23,7 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
* Preconditions:
* 1. Table expression data for table expression nodes is collected in planner context.
* For column node, that has column table expression source, identifier for column name in table expression data
* is used as action dag node name.
* is used as action dag node name, if use_column_identifier_as_action_node_name = true.
* 2. Sets for IN functions are already collected in planner context.
*
* During actions build, there is special handling for following functions:
@ -33,7 +33,7 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
class PlannerActionsVisitor
{
public:
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_);
explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true);
/** Add actions necessary to calculate expression node into expression dag.
* Necessary actions are not added in actions dag output.
@ -43,21 +43,27 @@ public:
private:
const PlannerContextPtr planner_context;
bool use_column_identifier_as_action_node_name = true;
};
/** Calculate query tree expression node action dag name and add them into node to name map.
* If node exists in map, name from map is used.
*
* For column node column node identifier from planner context is used.
* For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
*/
using QueryTreeNodeToName = std::unordered_map<QueryTreeNodePtr, String>;
String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name);
String calculateActionNodeName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
QueryTreeNodeToName & node_to_name,
bool use_column_identifier_as_action_node_name = true);
/** Calculate query tree expression node action dag name.
*
* For column node column node identifier from planner context is used.
* For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
*/
String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context);
String calculateActionNodeName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
bool use_column_identifier_as_action_node_name = true);
/// Calculate action node name for constant
String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type);
@ -67,12 +73,19 @@ String calculateConstantActionNodeName(const Field & constant_literal);
/** Calculate action node name for window node.
* Window node action name can only be part of window function action name.
* For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
*/
String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name);
String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
QueryTreeNodeToName & node_to_name,
bool use_column_identifier_as_action_node_name = true);
/** Calculate action node name for window node.
* Window node action name can only be part of window function action name.
* For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
*/
String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context);
String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
bool use_column_identifier_as_action_node_name = true);
}

Some files were not shown because too many files have changed in this diff Show More