Merge branch 'master' into fix_aggregation_ttl

This commit is contained in:
mergify[bot] 2021-06-30 09:15:46 +00:00 committed by GitHub
commit 1799804243
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
190 changed files with 3469 additions and 1009 deletions

View File

@ -119,11 +119,16 @@ private:
}
public:
/// We use Int64 instead of time_t because time_t is mapped to the different types (long or long long)
/// on Linux and Darwin (on both of them, long and long long are 64 bit and behaves identically,
/// but they are different types in C++ and this affects function overload resolution).
using Time = Int64;
/// The order of fields matters for alignment and sizeof.
struct Values
{
/// time_t at beginning of the day.
Int64 date;
/// Time at beginning of the day.
Time date;
/// Properties of the day.
UInt16 year;
@ -182,20 +187,20 @@ private:
LUTIndex years_months_lut[DATE_LUT_YEARS * 12];
/// UTC offset at beginning of the Unix epoch. The same as unix timestamp of 1970-01-01 00:00:00 local time.
time_t offset_at_start_of_epoch;
Time offset_at_start_of_epoch;
/// UTC offset at the beginning of the first supported year.
time_t offset_at_start_of_lut;
Time offset_at_start_of_lut;
bool offset_is_whole_number_of_hours_during_epoch;
/// Time zone name.
std::string time_zone;
inline LUTIndex findIndex(time_t t) const
inline LUTIndex findIndex(Time t) const
{
/// First guess.
Int64 guess = (t / 86400) + daynum_offset_epoch;
Time guess = (t / 86400) + daynum_offset_epoch;
/// For negative time_t the integer division was rounded up, so the guess is offset by one.
/// For negative Time the integer division was rounded up, so the guess is offset by one.
if (unlikely(t < 0))
--guess;
@ -227,7 +232,7 @@ private:
return LUTIndex{static_cast<UInt32>(d + daynum_offset_epoch) & date_lut_mask};
}
inline LUTIndex toLUTIndex(time_t t) const
inline LUTIndex toLUTIndex(Time t) const
{
return findIndex(t);
}
@ -280,7 +285,7 @@ public:
/// Round down to start of monday.
template <typename DateOrTime>
inline time_t toFirstDayOfWeek(DateOrTime v) const
inline Time toFirstDayOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i - (lut[i].day_of_week - 1)].date;
@ -295,7 +300,7 @@ public:
/// Round down to start of month.
template <typename DateOrTime>
inline time_t toFirstDayOfMonth(DateOrTime v) const
inline Time toFirstDayOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i - (lut[i].day_of_month - 1)].date;
@ -332,13 +337,13 @@ public:
}
template <typename DateOrTime>
inline time_t toFirstDayOfQuarter(DateOrTime v) const
inline Time toFirstDayOfQuarter(DateOrTime v) const
{
return toDate(toFirstDayOfQuarterIndex(v));
}
/// Round down to start of year.
inline time_t toFirstDayOfYear(time_t t) const
inline Time toFirstDayOfYear(Time t) const
{
return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date;
}
@ -355,14 +360,14 @@ public:
return toDayNum(toFirstDayNumOfYearIndex(v));
}
inline time_t toFirstDayOfNextMonth(time_t t) const
inline Time toFirstDayOfNextMonth(Time t) const
{
LUTIndex index = findIndex(t);
index += 32 - lut[index].day_of_month;
return lut[index - (lut[index].day_of_month - 1)].date;
}
inline time_t toFirstDayOfPrevMonth(time_t t) const
inline Time toFirstDayOfPrevMonth(Time t) const
{
LUTIndex index = findIndex(t);
index -= lut[index].day_of_month;
@ -389,16 +394,16 @@ public:
/** Round to start of day, then shift for specified amount of days.
*/
inline time_t toDateAndShift(time_t t, Int32 days) const
inline Time toDateAndShift(Time t, Int32 days) const
{
return lut[findIndex(t) + days].date;
}
inline time_t toTime(time_t t) const
inline Time toTime(Time t) const
{
const LUTIndex index = findIndex(t);
time_t res = t - lut[index].date;
Time res = t - lut[index].date;
if (res >= lut[index].time_at_offset_change())
res += lut[index].amount_of_offset_change();
@ -406,11 +411,11 @@ public:
return res - offset_at_start_of_epoch; /// Starting at 1970-01-01 00:00:00 local time.
}
inline unsigned toHour(time_t t) const
inline unsigned toHour(Time t) const
{
const LUTIndex index = findIndex(t);
time_t time = t - lut[index].date;
Time time = t - lut[index].date;
if (time >= lut[index].time_at_offset_change())
time += lut[index].amount_of_offset_change();
@ -426,7 +431,7 @@ public:
* then subtract the former from the latter to get the offset result.
* The boundaries when meets DST(daylight saving time) change should be handled very carefully.
*/
inline time_t timezoneOffset(time_t t) const
inline Time timezoneOffset(Time t) const
{
const LUTIndex index = findIndex(t);
@ -434,7 +439,7 @@ public:
/// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin.
/// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date,
/// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now.
time_t res = (lut[index].date - lut[daynum_offset_epoch].date) % 86400;
Time res = (lut[index].date - lut[daynum_offset_epoch].date) % 86400;
/// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder
/// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward.
@ -448,7 +453,7 @@ public:
}
inline unsigned toSecond(time_t t) const
inline unsigned toSecond(Time t) const
{
auto res = t % 60;
if (likely(res >= 0))
@ -456,7 +461,7 @@ public:
return res + 60;
}
inline unsigned toMinute(time_t t) const
inline unsigned toMinute(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return (t / 60) % 60;
@ -474,27 +479,27 @@ public:
}
/// NOTE: Assuming timezone offset is a multiple of 15 minutes.
inline time_t toStartOfMinute(time_t t) const { return roundDown(t, 60); }
inline time_t toStartOfFiveMinute(time_t t) const { return roundDown(t, 300); }
inline time_t toStartOfFifteenMinutes(time_t t) const { return roundDown(t, 900); }
inline Time toStartOfMinute(Time t) const { return roundDown(t, 60); }
inline Time toStartOfFiveMinute(Time t) const { return roundDown(t, 300); }
inline Time toStartOfFifteenMinutes(Time t) const { return roundDown(t, 900); }
inline time_t toStartOfTenMinutes(time_t t) const
inline Time toStartOfTenMinutes(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 600 * 600;
/// More complex logic is for Nepal - it has offset 05:45. Australia/Eucla is also unfortunate.
Int64 date = find(t).date;
Time date = find(t).date;
return date + (t - date) / 600 * 600;
}
/// NOTE: Assuming timezone transitions are multiple of hours. Lord Howe Island in Australia is a notable exception.
inline time_t toStartOfHour(time_t t) const
inline Time toStartOfHour(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 3600 * 3600;
Int64 date = find(t).date;
Time date = find(t).date;
return date + (t - date) / 3600 * 3600;
}
@ -506,11 +511,11 @@ public:
* because the same calendar day starts/ends at different timestamps in different time zones)
*/
inline time_t fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; }
inline time_t fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
inline Time fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; }
inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
template <typename DateOrTime>
inline time_t toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; }
inline Time toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; }
template <typename DateOrTime>
inline unsigned toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
@ -578,7 +583,7 @@ public:
return toDayNum(toFirstDayNumOfISOYearIndex(v));
}
inline time_t toFirstDayOfISOYear(time_t t) const
inline Time toFirstDayOfISOYear(Time t) const
{
return lut[toFirstDayNumOfISOYearIndex(t)].date;
}
@ -773,7 +778,7 @@ public:
}
/// We count all hour-length intervals, unrelated to offset changes.
inline time_t toRelativeHourNum(time_t t) const
inline Time toRelativeHourNum(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 3600;
@ -784,18 +789,18 @@ public:
}
template <typename DateOrTime>
inline time_t toRelativeHourNum(DateOrTime v) const
inline Time toRelativeHourNum(DateOrTime v) const
{
return toRelativeHourNum(lut[toLUTIndex(v)].date);
}
inline time_t toRelativeMinuteNum(time_t t) const
inline Time toRelativeMinuteNum(Time t) const
{
return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60);
}
template <typename DateOrTime>
inline time_t toRelativeMinuteNum(DateOrTime v) const
inline Time toRelativeMinuteNum(DateOrTime v) const
{
return toRelativeMinuteNum(lut[toLUTIndex(v)].date);
}
@ -842,14 +847,14 @@ public:
return ExtendedDayNum(4 + (d - 4) / days * days);
}
inline time_t toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const
inline Time toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const
{
if (days == 1)
return toDate(d);
return lut[toLUTIndex(ExtendedDayNum(d / days * days))].date;
}
inline time_t toStartOfHourInterval(time_t t, UInt64 hours) const
inline Time toStartOfHourInterval(Time t, UInt64 hours) const
{
if (hours == 1)
return toStartOfHour(t);
@ -867,7 +872,7 @@ public:
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
{
/// Align to new hour numbers before rounding.
@ -892,7 +897,7 @@ public:
return values.date + time;
}
inline time_t toStartOfMinuteInterval(time_t t, UInt64 minutes) const
inline Time toStartOfMinuteInterval(Time t, UInt64 minutes) const
{
if (minutes == 1)
return toStartOfMinute(t);
@ -909,7 +914,7 @@ public:
return roundDown(t, seconds);
}
inline time_t toStartOfSecondInterval(time_t t, UInt64 seconds) const
inline Time toStartOfSecondInterval(Time t, UInt64 seconds) const
{
if (seconds == 1)
return t;
@ -934,14 +939,14 @@ public:
return toDayNum(makeLUTIndex(year, month, day_of_month));
}
inline time_t makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
inline Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
{
return lut[makeLUTIndex(year, month, day_of_month)].date;
}
/** Does not accept daylight saving time as argument: in case of ambiguity, it choose greater timestamp.
*/
inline time_t makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
inline Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
{
size_t index = makeLUTIndex(year, month, day_of_month);
UInt32 time_offset = hour * 3600 + minute * 60 + second;
@ -969,7 +974,7 @@ public:
return values.year * 10000 + values.month * 100 + values.day_of_month;
}
inline time_t YYYYMMDDToDate(UInt32 num) const
inline Time YYYYMMDDToDate(UInt32 num) const
{
return makeDate(num / 10000, num / 100 % 100, num % 100);
}
@ -1000,13 +1005,13 @@ public:
TimeComponents time;
};
inline DateComponents toDateComponents(time_t t) const
inline DateComponents toDateComponents(Time t) const
{
const Values & values = getValues(t);
return { values.year, values.month, values.day_of_month };
}
inline DateTimeComponents toDateTimeComponents(time_t t) const
inline DateTimeComponents toDateTimeComponents(Time t) const
{
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
@ -1017,7 +1022,7 @@ public:
res.date.month = values.month;
res.date.day = values.day_of_month;
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1042,7 +1047,7 @@ public:
}
inline UInt64 toNumYYYYMMDDhhmmss(time_t t) const
inline UInt64 toNumYYYYMMDDhhmmss(Time t) const
{
DateTimeComponents components = toDateTimeComponents(t);
@ -1055,7 +1060,7 @@ public:
+ UInt64(components.date.year) * 10000000000;
}
inline time_t YYYYMMDDhhmmssToTime(UInt64 num) const
inline Time YYYYMMDDhhmmssToTime(UInt64 num) const
{
return makeDateTime(
num / 10000000000,
@ -1069,12 +1074,12 @@ public:
/// Adding calendar intervals.
/// Implementation specific behaviour when delta is too big.
inline NO_SANITIZE_UNDEFINED time_t addDays(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
{
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1086,7 +1091,7 @@ public:
return lut[new_index].date + time;
}
inline NO_SANITIZE_UNDEFINED time_t addWeeks(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
{
return addDays(t, delta * 7);
}
@ -1131,14 +1136,14 @@ public:
/// If resulting month has less deys than source month, then saturation can happen.
/// Example: 31 Aug + 1 month = 30 Sep.
inline time_t NO_SANITIZE_UNDEFINED addMonths(time_t t, Int64 delta) const
inline Time NO_SANITIZE_UNDEFINED addMonths(Time t, Int64 delta) const
{
const auto result_day = addMonthsIndex(t, delta);
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1153,7 +1158,7 @@ public:
return toDayNum(addMonthsIndex(d, delta));
}
inline time_t NO_SANITIZE_UNDEFINED addQuarters(time_t t, Int64 delta) const
inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int64 delta) const
{
return addMonths(t, delta * 3);
}
@ -1180,14 +1185,14 @@ public:
}
/// Saturation can occur if 29 Feb is mapped to non-leap year.
inline time_t addYears(time_t t, Int64 delta) const
inline Time addYears(Time t, Int64 delta) const
{
auto result_day = addYearsIndex(t, delta);
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1203,7 +1208,7 @@ public:
}
inline std::string timeToString(time_t t) const
inline std::string timeToString(Time t) const
{
DateTimeComponents components = toDateTimeComponents(t);
@ -1228,7 +1233,7 @@ public:
return s;
}
inline std::string dateToString(time_t t) const
inline std::string dateToString(Time t) const
{
const Values & values = getValues(t);

View File

@ -1,4 +1,7 @@
# This strings autochanged from release_lib.sh:
# This variables autochanged by release_lib.sh:
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54453)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 8)

View File

@ -12,7 +12,6 @@ mkdir root
pushd root
mkdir lib lib64 etc tmp root
cp ${BUILD_DIR}/programs/clickhouse .
cp ${SRC_DIR}/programs/server/{config,users}.xml .
cp /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} lib
cp /lib64/ld-linux-x86-64.so.2 lib64
cp /etc/resolv.conf ./etc

View File

@ -113,6 +113,7 @@ function start_server
echo "ClickHouse server pid '$server_pid' started and responded"
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -103,6 +103,7 @@ function fuzz
kill -0 $server_pid
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-integration-test .
FROM yandex/clickhouse-test-base
SHELL ["/bin/bash", "-c"]
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install \
tzdata \
@ -20,7 +22,9 @@ RUN apt-get update \
krb5-user \
iproute2 \
lsof \
g++
g++ \
default-jre
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
@ -30,6 +34,19 @@ RUN apt-get clean
# Install MySQL ODBC driver
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper.
# ZooKeeper is not started by default, but consumes some space in containers.
# 777 perms used to allow anybody to start/stop ZooKeeper
ENV ZOOKEEPER_VERSION='3.6.3'
RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
RUN echo $'tickTime=2500 \n\
tickTime=2500 \n\
dataDir=/zookeeper \n\
clientPort=2181 \n\
maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg
RUN mkdir /zookeeper && chmod -R 777 /zookeeper
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -409,10 +409,10 @@ create view right_query_log as select *
'$(cat "right-query-log.tsv.columns")');
create view query_logs as
select 0 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
select 0 version, query_id, ProfileEvents.keys, ProfileEvents.values,
query_duration_ms, memory_usage from left_query_log
union all
select 1 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
select 1 version, query_id, ProfileEvents.keys, ProfileEvents.values,
query_duration_ms, memory_usage from right_query_log
;
@ -424,7 +424,7 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-
with (
-- sumMapState with the list of all keys with '-0.' values. Negative zero is because
-- sumMap removes keys with positive zeros.
with (select groupUniqArrayArray(ProfileEvents.Names) from query_logs) as all_names
with (select groupUniqArrayArray(ProfileEvents.keys) from query_logs) as all_names
select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))])
) as all_metrics
select test, query_index, version, query_id,
@ -433,8 +433,8 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-
[
all_metrics,
arrayReduce('sumMapState',
[(ProfileEvents.Names,
arrayMap(x->toFloat64(x), ProfileEvents.Values))]
[(ProfileEvents.keys,
arrayMap(x->toFloat64(x), ProfileEvents.values))]
),
arrayReduce('sumMapState', [(
['client_time', 'server_time', 'memory_usage'],
@ -1005,7 +1005,7 @@ create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select
test, query_index, query_id,
ProfileEvents.Values value, ProfileEvents.Names metric
ProfileEvents.values value, ProfileEvents.keys metric
from query_log array join ProfileEvents
join unstable_query_runs using (query_id)
;
@ -1280,7 +1280,7 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
then
echo Database for test results is not specified, will not upload them.
return 0
fi
fi
set +x # Don't show password in the log
client=(clickhouse-client

View File

@ -1,4 +1,6 @@
#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
set -x
@ -37,6 +39,17 @@ function stop()
function start()
{
# Rename existing log file - it will be more convenient to read separate files for separate server runs.
if [ -f '/var/log/clickhouse-server/clickhouse-server.log' ]
then
log_file_counter=1
while [ -f "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" ]
do
log_file_counter=$((log_file_counter + 1))
done
mv '/var/log/clickhouse-server/clickhouse-server.log' "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}"
fi
counter=0
until clickhouse-client --query "SELECT 1"
do
@ -55,6 +68,7 @@ function start()
done
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
@ -140,7 +154,11 @@ zgrep -Fa "########################################" /test_output/* > /dev/null
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
# Put logs into /test_output/
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
do
pigz < "${log_file}" > /test_output/"$(basename ${log_file})".gz
done
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
mv /var/log/clickhouse-server/stderr.log /test_output/
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:

View File

@ -0,0 +1,3 @@
wget 'https://builds.clickhouse.tech/master/freebsd/clickhouse'
chmod a+x ./clickhouse
sudo ./clickhouse install

View File

@ -0,0 +1,3 @@
wget 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse'
chmod a+x ./clickhouse
./clickhouse

View File

@ -0,0 +1,3 @@
wget 'https://builds.clickhouse.tech/master/macos/clickhouse'
chmod a+x ./clickhouse
./clickhouse

View File

@ -49,6 +49,7 @@ When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](.
| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) |
| ENUM | [Enum](../../sql-reference/data-types/enum.md) |
| STRING | [String](../../sql-reference/data-types/string.md) |
| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |

View File

@ -107,9 +107,10 @@ sudo ./clickhouse install
For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
- [MacOS x86_64](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.tech/master/macos-aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD x86_64](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [Linux AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.

View File

@ -154,5 +154,6 @@ toc_title: Adopters
| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
| <a href="https://www.argedor.com/en/clickhouse/" class="favicon">Argedor</a> | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) |
| <a href="https://signoz.io/" class="favicon">SigNoz</a> | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) |
| <a href="https://chelpipegroup.com/" class="favicon">ChelPipe Group</a> | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -34,14 +34,14 @@ initial_port: 47588
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
http_user_agent:
quota_key:
elapsed: 0.000582537
is_cancelled: 0
read_rows: 0
@ -53,12 +53,10 @@ memory_usage: 0
peak_memory_usage: 0
query: SELECT * from system.processes LIMIT 10 FORMAT Vertical;
thread_ids: [67]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ContextLock','RWLockAcquiredReadLocks']
ProfileEvents.Values: [1,1,36,1,10,1,89,16,1]
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage']
Settings.Values: ['0','in_order','1','10000000000']
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
1 rows in set. Elapsed: 0.002 sec.
1 rows in set. Elapsed: 0.002 sec.
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/processes) <!--hide-->

View File

@ -84,12 +84,10 @@ Columns:
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.
@ -109,68 +107,49 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDa
``` text
Row 1:
──────
type: QueryFinish
event_date: 2021-03-18
event_time: 2021-03-18 20:54:18
event_time_microseconds: 2021-03-18 20:54:18.676686
query_start_time: 2021-03-18 20:54:18
query_start_time_microseconds: 2021-03-18 20:54:18.673934
query_duration_ms: 2
read_rows: 100
read_bytes: 800
written_rows: 0
written_bytes: 0
result_rows: 2
result_bytes: 4858
memory_usage: 0
current_database: default
query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100)
normalized_query_hash: 17858008518552525706
query_kind: Select
databases: ['_table_function']
tables: ['_table_function.numbers']
columns: ['_table_function.numbers.number']
exception_code: 0
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
current_database: default
query: INSERT INTO test1 VALUES
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
address: ::ffff:127.0.0.1
port: 37486
initial_user: default
initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
initial_address: ::ffff:127.0.0.1
initial_port: 37486
interface: 1
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse
client_revision: 54447
client_version_major: 21
client_version_minor: 4
client_version_patch: 1
http_method: 0
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
address: ::ffff:127.0.0.1
port: 33452
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_address: ::ffff:127.0.0.1
initial_port: 33452
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54449
log_comment:
thread_ids: [587,11939]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes']
ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq']
used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array']
used_database_engines: []
used_data_type_families: ['String','Array','Int32','Nullable']
used_dictionaries: []
used_formats: []
used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus']
used_storages: []
used_table_functions: ['numbers']
revision: 54440
thread_ids: []
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
```
**See Also**

View File

@ -58,8 +58,7 @@ Columns:
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events).
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column.
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events).
**Example**
@ -98,17 +97,16 @@ initial_port: 33452
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
http_user_agent:
quota_key:
revision: 54440
ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars']
ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520]
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
```
**See Also**

View File

@ -4,14 +4,14 @@ The `median*` functions are the aliases for the corresponding `quantile*` functi
Functions:
- `median` — Alias for [quantile](#quantile).
- `medianDeterministic` — Alias for [quantileDeterministic](#quantiledeterministic).
- `medianExact` — Alias for [quantileExact](#quantileexact).
- `medianExactWeighted` — Alias for [quantileExactWeighted](#quantileexactweighted).
- `medianTiming` — Alias for [quantileTiming](#quantiletiming).
- `medianTimingWeighted` — Alias for [quantileTimingWeighted](#quantiletimingweighted).
- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted).
- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile).
- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic).
- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact).
- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted).
- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming).
- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted).
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
**Example**

View File

@ -2,7 +2,9 @@
toc_priority: 202
---
# quantileExact {#quantileexact}
# quantileExact Functions {#quantileexact-functions}
## quantileExact {#quantileexact}
Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
@ -49,7 +51,7 @@ Result:
└───────────────────────┘
```
# quantileExactLow {#quantileexactlow}
## quantileExactLow {#quantileexactlow}
Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
@ -66,7 +68,7 @@ SELECT quantileExactLow(0.1)(number) FROM numbers(10)
│ 1 │
└───────────────────────────────┘
```
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
**Syntax**
@ -107,12 +109,11 @@ Result:
│ 4 │
└──────────────────────────┘
```
# quantileExactHigh {#quantileexacthigh}
## quantileExactHigh {#quantileexacthigh}
Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
All the passed values are combined into an array, which is then fully sorted,
to get the exact value. The sorting [algorithm's](https://en.cppreference.com/w/cpp/algorithm/sort) complexity is `O(N·log(N))`, where `N = std::distance(first, last)` comparisons.
All the passed values are combined into an array, which is then fully sorted, to get the exact value. The sorting [algorithm's](https://en.cppreference.com/w/cpp/algorithm/sort) complexity is `O(N·log(N))`, where `N = std::distance(first, last)` comparisons.
The return value depends on the quantile level and the number of elements in the selection, i.e. if the level is 0.5, then the function returns the higher median value for an even number of elements and the middle median value for an odd number of elements. Median is calculated similarly to the [median_high](https://docs.python.org/3/library/statistics.html#statistics.median_high) implementation which is used in python. For all other levels, the element at the index corresponding to the value of `level * size_of_array` is returned.
@ -158,6 +159,111 @@ Result:
│ 5 │
└───────────────────────────┘
```
## quantileExactExclusive {#quantileexactexclusive}
Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
To get exact value, all the passed values are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective.
This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)).
When using multiple `quantileExactExclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function.
**Syntax**
``` sql
quantileExactExclusive(level)(expr)
```
**Arguments**
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
**Parameters**
- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md).
**Returned value**
- Quantile of the specified level.
Type:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Query:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num);
```
Result:
``` text
┌─quantileExactExclusive(0.6)(x)─┐
│ 599.6 │
└────────────────────────────────┘
```
## quantileExactInclusive {#quantileexactinclusive}
Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
To get exact value, all the passed values are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective.
This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)).
When using multiple `quantileExactInclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactinclusive) function.
**Syntax**
``` sql
quantileExactInclusive(level)(expr)
```
**Arguments**
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
**Parameters**
- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md).
**Returned value**
- Quantile of the specified level.
Type:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Query:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num);
```
Result:
``` text
┌─quantileExactInclusive(0.6)(x)─┐
│ 599.4 │
└────────────────────────────────┘
```
**See Also**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)

View File

@ -2,8 +2,114 @@
toc_priority: 201
---
# quantiles {#quantiles}
# quantiles Functions {#quantiles-functions}
## quantiles {#quantiles}
Syntax: `quantiles(level1, level2, …)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive {#quantilesexactexclusive}
Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
To get exact value, all the passed values are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective.
This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)).
Works more efficiently with sets of levels than [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive).
**Syntax**
``` sql
quantilesExactExclusive(level1, level2, ...)(expr)
```
**Arguments**
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
**Parameters**
- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md).
**Returned value**
- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
Type of array values:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Query:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num);
```
Result:
``` text
┌─quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐
│ [249.25,499.5,749.75,899.9,949.9499999999999,989.99,998.999] │
└─────────────────────────────────────────────────────────────────────┘
```
## quantilesExactInclusive {#quantilesexactinclusive}
Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
To get exact value, all the passed values are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective.
This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)).
Works more efficiently with sets of levels than [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive).
**Syntax**
``` sql
quantilesExactInclusive(level1, level2, ...)(expr)
```
**Arguments**
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
**Parameters**
- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md).
**Returned value**
- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
Type of array values:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Query:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num);
```
Result:
``` text
┌─quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
└─────────────────────────────────────────────────────────────────────┘
```

View File

@ -0,0 +1,48 @@
---
toc_priority: 55
toc_title: s3Cluster
---
# s3Cluster Table Function {#s3Cluster-table-function}
Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
**Syntax**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
```
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
**Returned value**
A table with the specified structure for reading or writing data in the specified file.
**Examples**
Select the data from all files in the cluster `cluster_simple`:
``` sql
SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon);
```
Count the total amount of rows in all files in the cluster `cluster_simple`:
``` sql
SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))');
```
!!! warning "Warning"
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**See Also**
- [S3 engine](../../engines/table-engines/integrations/s3.md)
- [s3 table function](../../sql-reference/table-functions/s3.md)

View File

@ -625,10 +625,8 @@ ClickHouseはこのテーブルを作成します。 [query_log](server-configur
- `quota_key` (String) — The “quota key” で指定される。 [クォータ](quotas.md) 設定(参照 `keyed`).
- `revision` (UInt32) — ClickHouse revision.
- `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution.
- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [システムイベント](#system_tables-events)
- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` 列。
- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` パラメータは1。
- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` 列。
- `ProfileEvents` (Map(String, UInt64)) — ProfileEvents that measure different metrics. The description of them could be found in the table [システムイベント](#system_tables-events)
- `Settings` (Map(String, String)) — Settings 列。
それぞれのクエリでは、一つまたは二つの行が `query_log` クエリのステータスに応じて、テーブル:
@ -698,8 +696,7 @@ ClickHouseはこのテーブルを作成します。 [query_thread_log](server-c
- `http_user_agent` (String) — The `UserAgent` HTTP要求で渡されるヘッダー。
- `quota_key` (String) — The “quota key” で指定される。 [クォータ](quotas.md) 設定(参照 `keyed`).
- `revision` (UInt32) — ClickHouse revision.
- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [システムイベント](#system_tables-events)
- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。
- `ProfileEvents` (Map(String, UInt64)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [システムイベント](#system_tables-events)
既定では、ログは7.5秒間隔でテーブルに追加されます。 この間隔は [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) サーバ設定(参照 `flush_interval_milliseconds` 変数)。 ログをメモリバッファからテーブルに強制的にフラッシュするには、 `SYSTEM FLUSH LOGS` クエリ。

View File

@ -84,12 +84,10 @@ ClickHouse не удаляет данные из таблица автомати
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `X-Forwarded-For`.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — `ключ квоты` из настроек [квот](quotas.md) (см. `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse.
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики, перечисленные в столбце `ProfileEvents.Names`.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения настроек, которые перечислены в столбце `Settings.Names`.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.
@ -109,68 +107,49 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDa
``` text
Row 1:
──────
type: QueryFinish
event_date: 2021-03-18
event_time: 2021-03-18 20:54:18
event_time_microseconds: 2021-03-18 20:54:18.676686
query_start_time: 2021-03-18 20:54:18
query_start_time_microseconds: 2021-03-18 20:54:18.673934
query_duration_ms: 2
read_rows: 100
read_bytes: 800
written_rows: 0
written_bytes: 0
result_rows: 2
result_bytes: 4858
memory_usage: 0
current_database: default
query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100)
normalized_query_hash: 17858008518552525706
query_kind: Select
databases: ['_table_function']
tables: ['_table_function.numbers']
columns: ['_table_function.numbers.number']
exception_code: 0
type: QueryStart
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
current_database: default
query: INSERT INTO test1 VALUES
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
address: ::ffff:127.0.0.1
port: 37486
initial_user: default
initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c
initial_address: ::ffff:127.0.0.1
initial_port: 37486
interface: 1
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse
client_revision: 54447
client_version_major: 21
client_version_minor: 4
client_version_patch: 1
http_method: 0
is_initial_query: 1
user: default
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
address: ::ffff:127.0.0.1
port: 33452
initial_user: default
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_address: ::ffff:127.0.0.1
initial_port: 33452
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
http_referer:
forwarded_for:
quota_key:
revision: 54449
log_comment:
thread_ids: [587,11939]
ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes']
ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192]
Settings.Names: ['load_balancing','max_memory_usage']
Settings.Values: ['random','10000000000']
used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq']
used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array']
used_database_engines: []
used_data_type_families: ['String','Array','Int32','Nullable']
used_dictionaries: []
used_formats: []
used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus']
used_storages: []
used_table_functions: ['numbers']
revision: 54440
thread_ids: []
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
```
**Смотрите также**

View File

@ -57,8 +57,7 @@ ClickHouse не удаляет данные из таблицы автомати
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events).
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`.
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events).
**Пример**
@ -97,17 +96,16 @@ initial_port: 33452
interface: 1
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
http_user_agent:
quota_key:
revision: 54440
ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars']
ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520]
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
```
**Смотрите также**

View File

@ -2,7 +2,9 @@
toc_priority: 202
---
# quantileExact {#quantileexact}
# Функции quantileExact {#quantileexact-functions}
## quantileExact {#quantileexact}
Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности.
@ -23,7 +25,6 @@ quantileExact(level)(expr)
- `level` — уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)).
- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types) или типов [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md).
**Возвращаемое значение**
- Квантиль заданного уровня.
@ -50,7 +51,7 @@ SELECT quantileExact(number) FROM numbers(10)
└───────────────────────┘
```
# quantileExactLow {#quantileexactlow}
## quantileExactLow {#quantileexactlow}
Как и `quantileExact`, эта функция вычисляет точный [квантиль](https://en.wikipedia.org/wiki/Quantile) числовой последовательности данных.
@ -67,7 +68,7 @@ SELECT quantileExactLow(0.1)(number) FROM numbers(10)
│ 1 │
└───────────────────────────────┘
```
При использовании в запросе нескольких функций `quantile*` с разными уровнями, внутренние состояния не объединяются (то есть запрос работает менее эффективно). В этом случае используйте функцию [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles).
**Синтаксис**
@ -83,7 +84,6 @@ quantileExact(level)(expr)
- `level` — уровень квантили. Опциональный параметр. Константное занчение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://en.wikipedia.org/wiki/Median).
- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md).
**Возвращаемое значение**
- Квантиль заданного уровня.
@ -109,7 +109,7 @@ SELECT quantileExactLow(number) FROM numbers(10)
│ 4 │
└──────────────────────────┘
```
# quantileExactHigh {#quantileexacthigh}
## quantileExactHigh {#quantileexacthigh}
Как и `quantileExact`, эта функция вычисляет точный [квантиль](https://en.wikipedia.org/wiki/Quantile) числовой последовательности данных.
@ -134,7 +134,6 @@ quantileExactHigh(level)(expr)
- `level` — уровень квантили. Опциональный параметр. Константное занчение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://en.wikipedia.org/wiki/Median).
- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md).
**Возвращаемое значение**
- Квантиль заданного уровня.
@ -161,8 +160,111 @@ SELECT quantileExactHigh(number) FROM numbers(10)
└───────────────────────────┘
```
## quantileExactExclusive {#quantileexactexclusive}
Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности.
Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна.
Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample).
Если в одном запросе вызывается несколько функций `quantileExactExclusive` с разными значениями `level`, эти функции вычисляются независимо друг от друга. В таких случаях используйте функцию [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), запрос будет выполняться эффективнее.
**Синтаксис**
``` sql
quantileExactExclusive(level)(expr)
```
**Аргументы**
- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md).
**Параметры**
- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1) — граничные значения не учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Квантиль заданного уровня.
Тип:
- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа.
- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`.
- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`.
**Пример**
Запрос:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num);
```
Результат:
``` text
┌─quantileExactExclusive(0.6)(x)─┐
│ 599.6 │
└────────────────────────────────┘
```
## quantileExactInclusive {#quantileexactinclusive}
Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности.
Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна.
Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample).
Если в одном запросе вызывается несколько функций `quantileExactInclusive` с разными значениями `level`, эти функции вычисляются независимо друг от друга. В таких случаях используйте функцию [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactinclusive), запрос будет выполняться эффективнее.
**Синтаксис**
``` sql
quantileExactInclusive(level)(expr)
```
**Аргументы**
- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md).
**Параметры**
- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1] — граничные значения учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Квантиль заданного уровня.
Тип:
- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа.
- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`.
- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`.
**Пример**
Запрос:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num);
```
Результат:
``` text
┌─quantileExactInclusive(0.6)(x)─┐
│ 599.4 │
└────────────────────────────────┘
```
**Смотрите также**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -2,9 +2,114 @@
toc_priority: 201
---
# quantiles {#quantiles}
# Функции для нескольких квантилей {#quantiles-functions}
Syntax: `quantiles(level1, level2, …)(x)`
## quantiles {#quantiles}
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
Синтаксис: `quantiles(level1, level2, …)(x)`
Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями.
## quantilesExactExclusive {#quantilesexactexclusive}
Точно вычисляет [квантили](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности.
Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна.
Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample).
С наборами уровней работает эффективнее, чем [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive).
**Синтаксис**
``` sql
quantilesExactExclusive(level1, level2, ...)(expr)
```
**Аргументы**
- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md).
**Параметры**
- `level` — уровень квантилей. Возможные значения: (0, 1) — граничные значения не учитываются. [Float](../../../sql-reference/data-types/float.md).
**Возвращаемые значения**
- [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней.
Тип значений массива:
- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа.
- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`.
- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`.
**Пример**
Запрос:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num);
```
Результат:
``` text
┌─quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐
│ [249.25,499.5,749.75,899.9,949.9499999999999,989.99,998.999] │
└─────────────────────────────────────────────────────────────────────┘
```
## quantilesExactInclusive {#quantilesexactinclusive}
Точно вычисляет [квантили](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности.
Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна.
Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample).
С наборами уровней работает эффективнее, чем [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive).
**Синтаксис**
``` sql
quantilesExactInclusive(level1, level2, ...)(expr)
```
**Аргументы**
- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md).
**Параметры**
- `level` — уровень квантилей. Возможные значения: [0, 1] — граничные значения учитываются. [Float](../../../sql-reference/data-types/float.md).
**Возвращаемые значения**
- [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней.
Тип значений массива:
- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа.
- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`.
- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`.
**Пример**
Запрос:
``` sql
CREATE TABLE num AS numbers(1000);
SELECT quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num);
```
Результат:
``` text
┌─quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
└─────────────────────────────────────────────────────────────────────┘
```

View File

@ -0,0 +1,48 @@
---
toc_priority: 55
toc_title: s3Cluster
---
# Табличная функция s3Cluster {#s3Cluster-table-function}
Позволяет обрабатывать файлы из [Amazon S3](https://aws.amazon.com/s3/) параллельно из многих узлов в указанном кластере. На узле-инициаторе функция создает соединение со всеми узлами в кластере, заменяет символы '*' в пути к файлу S3 и динамически отправляет каждый файл. На рабочем узле функция запрашивает у инициатора следующую задачу и обрабатывает ее. Это повторяется до тех пор, пока все задачи не будут завершены.
**Синтаксис**
``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
```
**Аргументы**
- `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам.
- `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
- `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры.
- `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`.
**Возвращаемое значение**
Таблица с указанной структурой для чтения или записи данных в указанный файл.
**Примеры**
Вывод данных из всех файлов кластера `cluster_simple`:
``` sql
SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon);
```
Подсчет общего количества строк во всех файлах кластера `cluster_simple`:
``` sql
SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))');
```
!!! warning "Внимание"
Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
**Смотрите также**
- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md)
- [Табличная функция s3](../../sql-reference/table-functions/s3.md)

View File

@ -154,9 +154,6 @@ def build(args):
if not args.skip_website:
website.build_website(args)
if not args.skip_test_templates:
test.test_templates(args.website_dir)
if not args.skip_docs:
generate_cmake_flags_files()
@ -197,7 +194,6 @@ if __name__ == '__main__':
arg_parser.add_argument('--skip-blog', action='store_true')
arg_parser.add_argument('--skip-git-log', action='store_true')
arg_parser.add_argument('--skip-docs', action='store_true')
arg_parser.add_argument('--skip-test-templates', action='store_true')
arg_parser.add_argument('--test-only', action='store_true')
arg_parser.add_argument('--minify', action='store_true')
arg_parser.add_argument('--htmlproofer', action='store_true')

View File

@ -7,36 +7,6 @@ import bs4
import subprocess
def test_template(template_path):
if template_path.endswith('amp.html'):
# Inline CSS/JS is ok for AMP pages
return
logging.debug(f'Running tests for {template_path} template')
with open(template_path, 'r') as f:
soup = bs4.BeautifulSoup(
f,
features='html.parser'
)
for tag in soup.find_all():
style_attr = tag.attrs.get('style')
assert not style_attr, f'Inline CSS is prohibited, found {style_attr} in {template_path}'
if tag.name == 'script':
if tag.attrs.get('type') == 'application/ld+json':
continue
for content in tag.contents:
assert not content, f'Inline JavaScript is prohibited, found "{content}" in {template_path}'
def test_templates(base_dir):
logging.info('Running tests for templates')
for root, _, filenames in os.walk(base_dir):
for filename in filenames:
if filename.endswith('.html'):
test_template(os.path.join(root, filename))
def test_single_page(input_path, lang):
with open(input_path) as f:
soup = bs4.BeautifulSoup(

View File

@ -54,7 +54,7 @@ SELECT * FROM file_engine_table
## 在 Clickhouse-local 中的使用 {#zai-clickhouse-local-zhong-de-shi-yong}
使用 [ツ环板-ョツ嘉ッツ偲](../../../engines/table-engines/special/file.md) 时File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0``stdin``1` 或 `stdout`
使用 [clickhouse-local](../../../operations/utilities/clickhouse-local.md) 时File 引擎除了 `Format` 之外,还可以接收文件路径参数。可以使用数字或名称来指定标准输入/输出流,例如 `0``stdin``1` 或 `stdout`
**例如:**
``` bash

View File

@ -796,7 +796,7 @@ ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复
The `index` attribute specifies the node order when trying to connect to the ZooKeeper cluster.
- `session_timeout` — Maximum timeout for the client session in milliseconds.
- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇.貌路.隆拢脳枚脢虏.麓脢for脱 可选。
- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes)被用作根由ClickHouse服务器使用znodes 可选。
- `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional.
**配置示例**
@ -823,7 +823,7 @@ ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复
**另请参阅**
- [复制](../../engines/table-engines/mergetree-family/replication.md)
- [动物园管理员程序员指南](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
- [zookeeper管理指南](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}

View File

@ -16,7 +16,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
**示例**
``` sql
SELECT * FROM system.data_type_families WHERE alias_to = 'String'
SELECT * FROM system.data_type_families WHERE alias_to = 'String';
```
``` text

View File

@ -76,14 +76,14 @@ ClickHouse不会自动从表中删除数据。更多详情请看 [introduction](
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或其他TCP客户端的Patch component。
- `http_method` (UInt8) — 发起查询的HTTP方法. 可能值:
- 0 — TCP接口的查询.
- 1 — `GET`
- 1 — `GET`
- 2 — `POST`
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` The UserAgent header passed in the HTTP request。
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — 在[quotas](../../operations/quotas.md) 配置里设置的“quota key” (见 `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — 参与查询的线程数.
- `ProfileEvents.Names` ([ArrayString)](../../sql-reference/data-types/array.md)) — 衡量不同指标的计数器。 可以在[system.events](../../operations/system-tables/events.md#system_tables-events)中找到它们的描述
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — `ProfileEvents.Names` 列中列出的指标的值。
- `ProfileEvents` ([Map(String, UInt64))](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [系统。活动](../../operations/system-tables/events.md#system_tables-events)
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` 参数为1
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — 参与查询的线程数.
- `Settings.Names` ([ArrayString)](../../sql-reference/data-types/array.md)) — 客户端运行查询时更改的设置的名称。 要启用对设置的日志记录更改请将log_query_settings参数设置为1。
- `Settings.Values` ([ArrayString)](../../sql-reference/data-types/array.md)) — `Settings.Names` 列中列出的设置的值。
**示例**
@ -133,10 +133,8 @@ http_user_agent:
quota_key:
revision: 54434
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage']
Settings.Values: ['0','random','1','10000000000']
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'}
```
**另请参阅**

View File

@ -61,8 +61,7 @@ ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/sys
- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。
- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents.Names` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events).
- `ProfileEvents.Values` ([数组(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。
- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events).
**示例**
@ -108,8 +107,7 @@ http_method: 0
http_user_agent:
quota_key:
revision: 54434
ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds']
ProfileEvents.Values: [1,97,81,5,81]
ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1}
...
```

View File

@ -50,11 +50,15 @@ option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories"
option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL})
if (NOT USE_NURAFT)
# RECONFIGURE_MESSAGE_LEVEL should not be used here,
# since USE_NURAFT is set to OFF for FreeBSD and Darwin.
message (STATUS "clickhouse-keeper will not be built (lack of NuRaft)")
message (STATUS "clickhouse-keeper and clickhouse-keeper-converter will not be built (lack of NuRaft)")
set(ENABLE_CLICKHOUSE_KEEPER OFF)
set(ENABLE_CLICKHOUSE_KEEPER_CONVERTER OFF)
endif()
if (CLICKHOUSE_SPLIT_BINARY)
@ -150,6 +154,12 @@ else()
message(STATUS "ClickHouse keeper mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
message(STATUS "ClickHouse keeper-converter mode: ON")
else()
message(STATUS "ClickHouse keeper-converter mode: OFF")
endif()
if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
set(CLICKHOUSE_ONE_SHARED ON)
endif()
@ -222,6 +232,10 @@ if (ENABLE_CLICKHOUSE_KEEPER)
add_subdirectory (keeper)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_subdirectory (keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)
endif ()
@ -231,9 +245,51 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE)
endif ()
if (CLICKHOUSE_ONE_SHARED)
add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES})
target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK})
target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} ${CLICKHOUSE_KEEPER_INCLUDE})
add_library(clickhouse-lib SHARED
${CLICKHOUSE_SERVER_SOURCES}
${CLICKHOUSE_CLIENT_SOURCES}
${CLICKHOUSE_LOCAL_SOURCES}
${CLICKHOUSE_BENCHMARK_SOURCES}
${CLICKHOUSE_COPIER_SOURCES}
${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES}
${CLICKHOUSE_COMPRESSOR_SOURCES}
${CLICKHOUSE_FORMAT_SOURCES}
${CLICKHOUSE_OBFUSCATOR_SOURCES}
${CLICKHOUSE_GIT_IMPORT_SOURCES}
${CLICKHOUSE_ODBC_BRIDGE_SOURCES}
${CLICKHOUSE_KEEPER_SOURCES}
${CLICKHOUSE_KEEPER_CONVERTER_SOURCES})
target_link_libraries(clickhouse-lib
${CLICKHOUSE_SERVER_LINK}
${CLICKHOUSE_CLIENT_LINK}
${CLICKHOUSE_LOCAL_LINK}
${CLICKHOUSE_BENCHMARK_LINK}
${CLICKHOUSE_COPIER_LINK}
${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK}
${CLICKHOUSE_COMPRESSOR_LINK}
${CLICKHOUSE_FORMAT_LINK}
${CLICKHOUSE_OBFUSCATOR_LINK}
${CLICKHOUSE_GIT_IMPORT_LINK}
${CLICKHOUSE_ODBC_BRIDGE_LINK}
${CLICKHOUSE_KEEPER_LINK}
${CLICKHOUSE_KEEPER_CONVERTER_LINK})
target_include_directories(clickhouse-lib
${CLICKHOUSE_SERVER_INCLUDE}
${CLICKHOUSE_CLIENT_INCLUDE}
${CLICKHOUSE_LOCAL_INCLUDE}
${CLICKHOUSE_BENCHMARK_INCLUDE}
${CLICKHOUSE_COPIER_INCLUDE}
${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE}
${CLICKHOUSE_COMPRESSOR_INCLUDE}
${CLICKHOUSE_FORMAT_INCLUDE}
${CLICKHOUSE_OBFUSCATOR_INCLUDE}
${CLICKHOUSE_GIT_IMPORT_INCLUDE}
${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}
${CLICKHOUSE_KEEPER_INCLUDE}
${CLICKHOUSE_KEEPER_CONVERTER_INCLUDE})
set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "")
install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
endif()
@ -264,6 +320,10 @@ if (CLICKHOUSE_SPLIT_BINARY)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper-converter)
endif ()
set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS})
@ -314,6 +374,9 @@ else ()
if (ENABLE_CLICKHOUSE_KEEPER)
clickhouse_target_link_split_lib(clickhouse keeper)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
clickhouse_target_link_split_lib(clickhouse keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_INSTALL)
clickhouse_target_link_split_lib(clickhouse install)
endif ()
@ -374,6 +437,11 @@ else ()
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_custom_target (clickhouse-keeper-converter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-converter DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -1380,9 +1380,19 @@ private:
have_error = true;
}
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
// Sometimes you may get TOO_DEEP_RECURSION from the server,
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
{
have_error = false;
server_exception.reset();
client_exception.reset();
return true;
}
if (have_error)
{
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message());
// Try to reconnect after errors, for two reasons:

View File

@ -17,3 +17,4 @@
#cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE
#cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER

View File

@ -0,0 +1,9 @@
set (CLICKHOUSE_KEEPER_CONVERTER_SOURCES KeeperConverter.cpp)
set (CLICKHOUSE_KEEPER_CONVERTER_LINK
PRIVATE
boost::program_options
dbms
)
clickhouse_program_add(keeper-converter)

View File

@ -0,0 +1,61 @@
#include <iostream>
#include <optional>
#include <boost/program_options.hpp>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/ZooKeeperDataReader.h>
#include <Common/TerminalSize.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/AutoPtr.h>
#include <Poco/Logger.h>
#include <common/logger_useful.h>
int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
{
using namespace DB;
namespace po = boost::program_options;
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("help,h", "produce help message")
("zookeeper-logs-dir", po::value<std::string>(), "Path to directory with ZooKeeper logs")
("zookeeper-snapshots-dir", po::value<std::string>(), "Path to directory with ZooKeeper snapshots")
("output-dir", po::value<std::string>(), "Directory to place output clickhouse-keeper snapshot")
;
po::variables_map options;
po::store(po::command_line_parser(argc, argv).options(desc).run(), options);
Poco::AutoPtr<Poco::ConsoleChannel> console_channel(new Poco::ConsoleChannel);
Poco::Logger * logger = &Poco::Logger::get("KeeperConverter");
logger->setChannel(console_channel);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " --zookeeper-logs-dir /var/lib/zookeeper/data/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/data/version-2 --output-dir /var/lib/clickhouse/coordination/snapshots" << std::endl;
std::cout << desc << std::endl;
return 0;
}
try
{
DB::KeeperStorage storage(500, "");
DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
DB::KeeperSnapshotManager manager(options["output-dir"].as<std::string>(), 1);
auto snp = manager.serializeSnapshotToBuffer(snapshot);
auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
std::cout << "Snapshot serialized to path:" << path << std::endl;
}
catch (...)
{
std::cerr << getCurrentExceptionMessage(true) << '\n';
return getCurrentExceptionCode();
}
return 0;
}

View File

@ -0,0 +1,2 @@
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
int main(int argc_, char ** argv_) { return mainEntryClickHouseKeeperConverter(argc_, argv_); }

View File

@ -59,6 +59,9 @@ int mainEntryClickHouseGitImport(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER
int mainEntryClickHouseKeeper(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_KEEPER
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_INSTALL
int mainEntryClickHouseInstall(int argc, char ** argv);
int mainEntryClickHouseStart(int argc, char ** argv);
@ -119,6 +122,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
#if ENABLE_CLICKHOUSE_KEEPER
{"keeper", mainEntryClickHouseKeeper},
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
{"keeper-converter", mainEntryClickHouseKeeperConverter},
#endif
#if ENABLE_CLICKHOUSE_INSTALL
{"install", mainEntryClickHouseInstall},
{"start", mainEntryClickHouseStart},

View File

@ -115,6 +115,8 @@ void ODBCBlockInputStream::insertValue(
assert_cast<ColumnFloat64 &>(column).insertValue(row.get<double>(idx));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(row.get<std::string>(idx));
break;

View File

@ -239,6 +239,53 @@ void ZooKeeperListResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out);
}
void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const
{
Coordination::write(path, out);
Coordination::write(acls, out);
Coordination::write(version, out);
}
void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in)
{
Coordination::read(path, in);
Coordination::read(acls, in);
Coordination::read(version, in);
}
void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const
{
Coordination::write(stat, out);
}
void ZooKeeperSetACLResponse::readImpl(ReadBuffer & in)
{
Coordination::read(stat, in);
}
void ZooKeeperGetACLRequest::readImpl(ReadBuffer & in)
{
Coordination::read(path, in);
}
void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const
{
Coordination::write(path, out);
}
void ZooKeeperGetACLResponse::writeImpl(WriteBuffer & out) const
{
Coordination::write(acl, out);
Coordination::write(stat, out);
}
void ZooKeeperGetACLResponse::readImpl(ReadBuffer & in)
{
Coordination::read(acl, in);
Coordination::read(stat, in);
}
void ZooKeeperCheckRequest::writeImpl(WriteBuffer & out) const
{
Coordination::write(path, out);
@ -454,6 +501,8 @@ ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return std::ma
ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return std::make_shared<ZooKeeperCheckResponse>(); }
ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { return std::make_shared<ZooKeeperMultiResponse>(requests); }
ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared<ZooKeeperCloseResponse>(); }
ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperSetACLResponse>(); }
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperGetACLResponse>(); }
void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
{
@ -545,6 +594,8 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
registerZooKeeperRequest<OpNum::Check, ZooKeeperCheckRequest>(*this);
registerZooKeeperRequest<OpNum::Multi, ZooKeeperMultiRequest>(*this);
registerZooKeeperRequest<OpNum::SessionID, ZooKeeperSessionIDRequest>(*this);
registerZooKeeperRequest<OpNum::GetACL, ZooKeeperGetACLRequest>(*this);
registerZooKeeperRequest<OpNum::SetACL, ZooKeeperSetACLRequest>(*this);
}
}

View File

@ -183,6 +183,9 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
bool isReadRequest() const override { return false; }
size_t bytesSize() const override { return CreateRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); }
/// During recovery from log we don't rehash ACLs
bool need_to_hash_acls = true;
};
struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse
@ -350,6 +353,48 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse
size_t bytesSize() const override { return ErrorResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
};
struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest
{
OpNum getOpNum() const override { return OpNum::SetACL; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; }
size_t bytesSize() const override { return SetACLRequest::bytesSize() + sizeof(xid); }
bool need_to_hash_acls = true;
};
struct ZooKeeperSetACLResponse final : SetACLResponse, ZooKeeperResponse
{
void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override;
OpNum getOpNum() const override { return OpNum::SetACL; }
size_t bytesSize() const override { return SetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
};
struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest
{
OpNum getOpNum() const override { return OpNum::GetACL; }
void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override;
ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return true; }
size_t bytesSize() const override { return GetACLRequest::bytesSize() + sizeof(xid); }
};
struct ZooKeeperGetACLResponse final : GetACLResponse, ZooKeeperResponse
{
void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override;
OpNum getOpNum() const override { return OpNum::GetACL; }
size_t bytesSize() const override { return GetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
};
struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
{
OpNum getOpNum() const override { return OpNum::Multi; }

View File

@ -22,6 +22,8 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
static_cast<int32_t>(OpNum::Multi),
static_cast<int32_t>(OpNum::Auth),
static_cast<int32_t>(OpNum::SessionID),
static_cast<int32_t>(OpNum::SetACL),
static_cast<int32_t>(OpNum::GetACL),
};
std::string toString(OpNum op_num)
@ -58,6 +60,10 @@ std::string toString(OpNum op_num)
return "Auth";
case OpNum::SessionID:
return "SessionID";
case OpNum::SetACL:
return "SetACL";
case OpNum::GetACL:
return "GetACL";
}
int32_t raw_op = static_cast<int32_t>(op_num);
throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED);

View File

@ -23,6 +23,8 @@ enum class OpNum : int32_t
Exists = 3,
Get = 4,
Set = 5,
GetACL = 6,
SetACL = 7,
SimpleList = 8,
Sync = 9,
Heartbeat = 11,

View File

@ -9,6 +9,14 @@ void write(size_t x, WriteBuffer & out)
writeBinary(x, out);
}
#ifdef __APPLE__
void write(uint64_t x, WriteBuffer & out)
{
x = __builtin_bswap64(x);
writeBinary(x, out);
}
#endif
void write(int64_t x, WriteBuffer & out)
{
x = __builtin_bswap64(x);
@ -63,6 +71,14 @@ void write(const Error & x, WriteBuffer & out)
write(static_cast<int32_t>(x), out);
}
#ifdef __APPLE__
void read(uint64_t & x, ReadBuffer & in)
{
readBinary(x, in);
x = __builtin_bswap64(x);
}
#endif
void read(size_t & x, ReadBuffer & in)
{
readBinary(x, in);

View File

@ -14,6 +14,12 @@ namespace Coordination
using namespace DB;
void write(size_t x, WriteBuffer & out);
/// uint64_t != size_t on darwin
#ifdef __APPLE__
void write(uint64_t x, WriteBuffer & out);
#endif
void write(int64_t x, WriteBuffer & out);
void write(int32_t x, WriteBuffer & out);
void write(OpNum x, WriteBuffer & out);
@ -39,6 +45,9 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
}
void read(size_t & x, ReadBuffer & in);
#ifdef __APPLE__
void read(uint64_t & x, ReadBuffer & in);
#endif
void read(int64_t & x, ReadBuffer & in);
void read(int32_t & x, ReadBuffer & in);
void read(OpNum & x, ReadBuffer & in);

View File

@ -2,6 +2,8 @@
// .h autogenerated by cmake!
// NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
// only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
#cmakedefine VERSION_REVISION @VERSION_REVISION@
#cmakedefine VERSION_NAME "@VERSION_NAME@"
#define DBMS_NAME VERSION_NAME

View File

@ -99,6 +99,10 @@ namespace
node.acl_id = acl_map.convertACLs(acls);
}
/// Some strange ACLID during deserialization from ZooKeeper
if (node.acl_id == std::numeric_limits<uint64_t>::max())
node.acl_id = 0;
acl_map.addUsage(node.acl_id);
readBinary(node.is_sequental, in);
@ -217,12 +221,14 @@ SnapshotMetadataPtr KeeperStorageSnapshot::deserialize(KeeperStorage & storage,
if (current_version >= SnapshotVersion::V1)
{
size_t acls_map_size;
readBinary(acls_map_size, in);
size_t current_map_size = 0;
while (current_map_size < acls_map_size)
{
uint64_t acl_id;
readBinary(acl_id, in);
size_t acls_size;
readBinary(acls_size, in);
Coordination::ACLs acls;
@ -345,11 +351,23 @@ KeeperSnapshotManager::KeeperSnapshotManager(const std::string & snapshots_path_
for (const auto & p : fs::directory_iterator(snapshots_path))
{
if (startsWith(p.path(), "tmp_")) /// Unfinished tmp files
const auto & path = p.path();
if (!path.has_filename())
continue;
if (startsWith(path.filename(), "tmp_")) /// Unfinished tmp files
{
std::filesystem::remove(p);
continue;
}
/// Not snapshot file
if (!startsWith(path.filename(), "snapshot_"))
{
continue;
}
size_t snapshot_up_to = getSnapshotPathUpToLogIdx(p.path());
existing_snapshots[snapshot_up_to] = p.path();
}

View File

@ -57,7 +57,7 @@ static String generateDigest(const String & userdata)
{
std::vector<String> user_password;
boost::split(user_password, userdata, [](char c) { return c == ':'; });
return user_password[0] + ":" + base64Encode(getSHA1(user_password[1]));
return user_password[0] + ":" + base64Encode(getSHA1(userdata));
}
static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector<KeeperStorage::AuthID> & session_auths)
@ -77,8 +77,10 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c
return true;
for (const auto & session_auth : session_auths)
{
if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id)
return true;
}
}
}
@ -88,7 +90,8 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c
static bool fixupACL(
const std::vector<Coordination::ACL> & request_acls,
const std::vector<KeeperStorage::AuthID> & current_ids,
std::vector<Coordination::ACL> & result_acls)
std::vector<Coordination::ACL> & result_acls,
bool hash_acls)
{
if (request_acls.empty())
return true;
@ -121,7 +124,8 @@ static bool fixupACL(
return false;
valid_found = true;
new_acl.id = generateDigest(new_acl.id);
if (hash_acls)
new_acl.id = generateDigest(new_acl.id);
result_acls.push_back(new_acl);
}
}
@ -263,12 +267,13 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
}
else
{
auto & session_auth_ids = storage.session_and_auth[session_id];
KeeperStorage::Node created_node;
Coordination::ACLs node_acls;
if (!fixupACL(request.acls, session_auth_ids, node_acls))
if (!fixupACL(request.acls, session_auth_ids, node_acls, request.need_to_hash_acls))
{
response.error = Coordination::Error::ZINVALIDACL;
return {response_ptr, {}};
@ -280,6 +285,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
created_node.acl_id = acl_id;
created_node.stat.czxid = zxid;
created_node.stat.mzxid = zxid;
created_node.stat.pzxid = zxid;
created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
created_node.stat.mtime = created_node.stat.ctime;
created_node.stat.numChildren = 0;
@ -302,12 +308,15 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
}
auto child_path = getBaseName(path_created);
container.updateValue(parent_path, [child_path] (KeeperStorage::Node & parent)
int64_t prev_parent_zxid;
container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid] (KeeperStorage::Node & parent)
{
/// Increment sequential number even if node is not sequential
++parent.seq_num;
parent.children.insert(child_path);
++parent.stat.cversion;
prev_parent_zxid = parent.stat.pzxid;
parent.stat.pzxid = zxid;
++parent.stat.numChildren;
});
@ -317,7 +326,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
if (request.is_ephemeral)
ephemerals[session_id].emplace(path_created);
undo = [&storage, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id]
undo = [&storage, prev_parent_zxid, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id]
{
storage.container.erase(path_created);
storage.acl_map.removeUsage(acl_id);
@ -325,11 +334,12 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
if (is_ephemeral)
storage.ephemerals[session_id].erase(path_created);
storage.container.updateValue(parent_path, [child_path] (KeeperStorage::Node & undo_parent)
storage.container.updateValue(parent_path, [child_path, prev_parent_zxid] (KeeperStorage::Node & undo_parent)
{
--undo_parent.stat.cversion;
--undo_parent.stat.numChildren;
--undo_parent.seq_num;
undo_parent.stat.pzxid = prev_parent_zxid;
undo_parent.children.erase(child_path);
});
};
@ -536,6 +546,7 @@ struct KeeperStorageSetRequest final : public KeeperStorageRequest
}
else if (request.version == -1 || request.version == it->value.stat.version)
{
auto prev_node = it->value;
auto itr = container.updateValue(request.path, [zxid, request] (KeeperStorage::Node & value)
@ -667,6 +678,111 @@ struct KeeperStorageCheckRequest final : public KeeperStorageRequest
}
};
struct KeeperStorageSetACLRequest final : public KeeperStorageRequest
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
auto & container = storage.container;
auto it = container.find(zk_request->getPath());
if (it == container.end())
return true;
const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id);
if (node_acls.empty())
return true;
const auto & session_auths = storage.session_and_auth[session_id];
return checkACL(Coordination::ACL::Admin, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id) const override
{
auto & container = storage.container;
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperSetACLResponse & response = dynamic_cast<Coordination::ZooKeeperSetACLResponse &>(*response_ptr);
Coordination::ZooKeeperSetACLRequest & request = dynamic_cast<Coordination::ZooKeeperSetACLRequest &>(*zk_request);
auto it = container.find(request.path);
if (it == container.end())
{
response.error = Coordination::Error::ZNONODE;
}
else if (request.version != -1 && request.version != it->value.stat.aversion)
{
response.error = Coordination::Error::ZBADVERSION;
}
else
{
auto & session_auth_ids = storage.session_and_auth[session_id];
Coordination::ACLs node_acls;
if (!fixupACL(request.acls, session_auth_ids, node_acls, request.need_to_hash_acls))
{
response.error = Coordination::Error::ZINVALIDACL;
return {response_ptr, {}};
}
uint64_t acl_id = storage.acl_map.convertACLs(node_acls);
storage.acl_map.addUsage(acl_id);
storage.container.updateValue(request.path, [acl_id] (KeeperStorage::Node & node)
{
node.acl_id = acl_id;
++node.stat.aversion;
});
response.stat = it->value.stat;
response.error = Coordination::Error::ZOK;
}
/// It cannot be used insied multitransaction?
return { response_ptr, {} };
}
};
struct KeeperStorageGetACLRequest final : public KeeperStorageRequest
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
{
auto & container = storage.container;
auto it = container.find(zk_request->getPath());
if (it == container.end())
return true;
const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id);
if (node_acls.empty())
return true;
const auto & session_auths = storage.session_and_auth[session_id];
/// LOL, GetACL require more permissions, then SetACL...
return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths);
}
using KeeperStorageRequest::KeeperStorageRequest;
std::pair<Coordination::ZooKeeperResponsePtr, Undo> process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override
{
Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse();
Coordination::ZooKeeperGetACLResponse & response = dynamic_cast<Coordination::ZooKeeperGetACLResponse &>(*response_ptr);
Coordination::ZooKeeperGetACLRequest & request = dynamic_cast<Coordination::ZooKeeperGetACLRequest &>(*zk_request);
auto & container = storage.container;
auto it = container.find(request.path);
if (it == container.end())
{
response.error = Coordination::Error::ZNONODE;
}
else
{
response.stat = it->value.stat;
response.acl = storage.acl_map.convertNumber(it->value.acl_id);
}
return {response_ptr, {}};
}
};
struct KeeperStorageMultiRequest final : public KeeperStorageRequest
{
bool checkAuth(KeeperStorage & storage, int64_t session_id) const override
@ -893,10 +1009,12 @@ KeeperWrapperFactory::KeeperWrapperFactory()
registerKeeperRequestWrapper<Coordination::OpNum::SimpleList, KeeperStorageListRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Check, KeeperStorageCheckRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::Multi, KeeperStorageMultiRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::SetACL, KeeperStorageSetACLRequest>(*this);
registerKeeperRequestWrapper<Coordination::OpNum::GetACL, KeeperStorageGetACLRequest>(*this);
}
KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional<int64_t> new_last_zxid)
KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional<int64_t> new_last_zxid, bool check_acl)
{
KeeperStorage::ResponsesForSessions results;
if (new_last_zxid)
@ -954,7 +1072,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request);
Coordination::ZooKeeperResponsePtr response;
if (!storage_request->checkAuth(*this, session_id))
if (check_acl && !storage_request->checkAuth(*this, session_id))
{
response = zk_request->makeResponse();
/// Original ZooKeeper always throws no auth, even when user provided some credentials

View File

@ -116,7 +116,7 @@ public:
session_expiry_queue.update(session_id, session_timeout_ms);
}
ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional<int64_t> new_last_zxid);
ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional<int64_t> new_last_zxid, bool check_acl = true);
void finalize();

View File

@ -0,0 +1,555 @@
#include <Coordination/ZooKeeperDataReader.h>
#include <filesystem>
#include <cstdlib>
#include <IO/ReadHelpers.h>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <IO/ReadBufferFromFile.h>
#include <string>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int CORRUPTED_DATA;
}
static String parentPath(const String & path)
{
auto rslash_pos = path.rfind('/');
if (rslash_pos > 0)
return path.substr(0, rslash_pos);
return "/";
}
static std::string getBaseName(const String & path)
{
size_t basename_start = path.rfind('/');
return std::string{&path[basename_start + 1], path.length() - basename_start - 1};
}
int64_t getZxidFromName(const std::string & filename)
{
std::filesystem::path path(filename);
std::string extension = path.extension();
char * end;
int64_t zxid = std::strtoul(extension.data() + 1, &end, 16);
return zxid;
}
void deserializeSnapshotMagic(ReadBuffer & in)
{
int32_t magic_header, version;
int64_t dbid;
Coordination::read(magic_header, in);
Coordination::read(version, in);
if (version != 2)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot deserialize ZooKeeper data other than version 2, got version {}", version);
Coordination::read(dbid, in);
static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN"
if (magic_header != SNP_HEADER)
throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header);
}
int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in)
{
int32_t count;
Coordination::read(count, in);
int64_t max_session_id = 0;
while (count > 0)
{
int64_t session_id;
int32_t timeout;
Coordination::read(session_id, in);
Coordination::read(timeout, in);
storage.addSessionID(session_id, timeout);
max_session_id = std::max(session_id, max_session_id);
count--;
}
return max_session_id;
}
void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in)
{
int32_t count;
Coordination::read(count, in);
while (count > 0)
{
int64_t map_index;
Coordination::read(map_index, in);
Coordination::ACLs acls;
int32_t acls_len;
Coordination::read(acls_len, in);
while (acls_len > 0)
{
Coordination::ACL acl;
Coordination::read(acl.permissions, in);
Coordination::read(acl.scheme, in);
Coordination::read(acl.id, in);
acls.push_back(acl);
acls_len--;
}
storage.acl_map.addMapping(map_index, acls);
count--;
}
}
int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * log)
{
int64_t max_zxid = 0;
std::string path;
Coordination::read(path, in);
size_t count = 0;
while (path != "/")
{
KeeperStorage::Node node{};
Coordination::read(node.data, in);
Coordination::read(node.acl_id, in);
/// Deserialize stat
Coordination::read(node.stat.czxid, in);
Coordination::read(node.stat.mzxid, in);
/// For some reason ZXID specified in filename can be smaller
/// then actual zxid from nodes. In this case we will use zxid from nodes.
max_zxid = std::max(max_zxid, node.stat.mzxid);
Coordination::read(node.stat.ctime, in);
Coordination::read(node.stat.mtime, in);
Coordination::read(node.stat.version, in);
Coordination::read(node.stat.cversion, in);
Coordination::read(node.stat.aversion, in);
Coordination::read(node.stat.ephemeralOwner, in);
Coordination::read(node.stat.pzxid, in);
if (!path.empty())
{
node.stat.dataLength = node.data.length();
node.seq_num = node.stat.cversion;
storage.container.insertOrReplace(path, node);
if (node.stat.ephemeralOwner != 0)
storage.ephemerals[node.stat.ephemeralOwner].insert(path);
storage.acl_map.addUsage(node.acl_id);
}
Coordination::read(path, in);
count++;
if (count % 1000 == 0)
LOG_INFO(log, "Deserialized nodes from snapshot: {}", count);
}
for (const auto & itr : storage.container)
{
if (itr.key != "/")
{
auto parent_path = parentPath(itr.key);
storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; });
}
}
return max_zxid;
}
void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, Poco::Logger * log)
{
LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path);
int64_t zxid = getZxidFromName(snapshot_path);
ReadBufferFromFile reader(snapshot_path);
deserializeSnapshotMagic(reader);
LOG_INFO(log, "Magic deserialized, looks OK");
auto max_session_id = deserializeSessionAndTimeout(storage, reader);
LOG_INFO(log, "Sessions and timeouts deserialized");
storage.session_id_counter = max_session_id;
deserializeACLMap(storage, reader);
LOG_INFO(log, "ACLs deserialized");
LOG_INFO(log, "Deserializing data from snapshot");
int64_t zxid_from_nodes = deserializeStorageData(storage, reader, log);
storage.zxid = std::max(zxid, zxid_from_nodes);
LOG_INFO(log, "Finished, snapshot ZXID {}", storage.zxid);
}
void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, Poco::Logger * log)
{
namespace fs = std::filesystem;
std::map<int64_t, std::string> existing_snapshots;
for (const auto & p : fs::directory_iterator(path))
{
const auto & log_path = p.path();
if (!log_path.has_filename() || !startsWith(log_path.filename(), "snapshot."))
continue;
int64_t zxid = getZxidFromName(log_path);
existing_snapshots[zxid] = p.path();
}
LOG_INFO(log, "Totally have {} snapshots, will use latest", existing_snapshots.size());
/// deserialize only from latest snapshot
if (!existing_snapshots.empty())
deserializeKeeperStorageFromSnapshot(storage, existing_snapshots.rbegin()->second, log);
else
throw Exception(ErrorCodes::CORRUPTED_DATA, "No snapshots found on path {}. At least one snapshot must exist.", path);
}
void deserializeLogMagic(ReadBuffer & in)
{
int32_t magic_header, version;
int64_t dbid;
Coordination::read(magic_header, in);
Coordination::read(version, in);
Coordination::read(dbid, in);
static constexpr int32_t LOG_HEADER = 1514884167; /// "ZKLG"
if (magic_header != LOG_HEADER)
throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", LOG_HEADER, magic_header);
if (version != 2)
throw Exception(ErrorCodes::NOT_IMPLEMENTED,"Cannot deserialize ZooKeeper data other than version 2, got version {}", version);
}
/// For some reason zookeeper stores slightly different records in log then
/// requests. For example:
/// class CreateTxn {
/// ustring path;
/// buffer data;
/// vector<org.apache.zookeeper.data.ACL> acl;
/// boolean ephemeral;
/// int parentCVersion;
/// }
/// But Create Request:
/// class CreateRequest {
/// ustring path;
/// buffer data;
/// vector<org.apache.zookeeper.data.ACL> acl;
/// int flags;
/// }
///
/// However type is the same OpNum...
///
/// Also there is a comment in ZooKeeper's code base about log structure, but
/// it's almost completely incorrect. Actual ZooKeeper log structure starting from version 3.6+:
///
/// Magic Header: "ZKLG" + 4 byte version + 8 byte dbid.
/// After that goes serialized transactions, in the following format:
/// 8 byte checksum
/// 4 byte transaction length
/// 8 byte session_id (author of the transaction)
/// 4 byte user XID
/// 8 byte ZXID
/// 8 byte transaction time
/// 4 byte transaction type (OpNum)
/// [Transaction body depending on transaction type]
/// 12 bytes tail (starting from 3.6+): 4 byte version + 8 byte checksum of data tree
/// 1 byte -- 0x42
///
/// Transaction body is quite simple for all kinds of transactions except
/// Multitransactions. Their structure is following:
/// 4 byte sub transactions count
/// 4 byte sub transaction length
/// [Transaction body depending on transaction type]
/// and so on
///
/// Gotchas:
///
/// 1) For some reason ZooKeeper store ErrorTxn's in log. It's
/// reasonable for Multitransactions, but why they store standalone errors
/// is not clear.
///
/// 2) For some reason there is no 12 bytes tail (version + checksum of
/// tree) after standalone ErrorTxn.
///
/// 3) The most strange thing: In one of our production logs (about 1.2GB
/// size) we have found Multitransaction with two sub transactions: Error1
/// and Error2, both -1 OpCode. Normal Error transaction has 4 bytes length
/// (for error code), but the Error1 has 550 bytes length. What is more
/// strange, that this 550 bytes obviously was a part of Create transaction,
/// but the operation code was -1. We have added debug prints to original
/// zookeeper (3.6.3) and found that it just reads 550 bytes of this "Error"
/// transaction, tooks the first 4 bytes as an error code (it was 79, non
/// existing code) and skip all remaining 546 bytes. NOTE: it looks like a bug
/// in ZooKeeper.
///
namespace
{
Coordination::ZooKeeperRequestPtr deserializeCreateTxn(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperCreateRequest> result = std::make_shared<Coordination::ZooKeeperCreateRequest>();
Coordination::read(result->path, in);
Coordination::read(result->data, in);
Coordination::read(result->acls, in);
Coordination::read(result->is_ephemeral, in);
result->need_to_hash_acls = false;
/// How we should use it? It should just increment on request execution
int32_t parent_c_version;
Coordination::read(parent_c_version, in);
return result;
}
Coordination::ZooKeeperRequestPtr deserializeDeleteTxn(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperRemoveRequest> result = std::make_shared<Coordination::ZooKeeperRemoveRequest>();
Coordination::read(result->path, in);
return result;
}
Coordination::ZooKeeperRequestPtr deserializeSetTxn(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperSetRequest> result = std::make_shared<Coordination::ZooKeeperSetRequest>();
Coordination::read(result->path, in);
Coordination::read(result->data, in);
Coordination::read(result->version, in);
/// It stores version + 1 (which should be, not for request)
result->version -= 1;
return result;
}
Coordination::ZooKeeperRequestPtr deserializeCheckVersionTxn(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperCheckRequest> result = std::make_shared<Coordination::ZooKeeperCheckRequest>();
Coordination::read(result->path, in);
Coordination::read(result->version, in);
return result;
}
Coordination::ZooKeeperRequestPtr deserializeCreateSession(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperSessionIDRequest> result = std::make_shared<Coordination::ZooKeeperSessionIDRequest>();
int32_t timeout;
Coordination::read(timeout, in);
result->session_timeout_ms = timeout;
return result;
}
Coordination::ZooKeeperRequestPtr deserializeCloseSession(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperCloseRequest> result = std::make_shared<Coordination::ZooKeeperCloseRequest>();
std::vector<std::string> data;
Coordination::read(data, in);
return result;
}
Coordination::ZooKeeperRequestPtr deserializeErrorTxn(ReadBuffer & in)
{
int32_t error;
Coordination::read(error, in);
return nullptr;
}
Coordination::ZooKeeperRequestPtr deserializeSetACLTxn(ReadBuffer & in)
{
std::shared_ptr<Coordination::ZooKeeperSetACLRequest> result = std::make_shared<Coordination::ZooKeeperSetACLRequest>();
Coordination::read(result->path, in);
Coordination::read(result->acls, in);
Coordination::read(result->version, in);
/// It stores version + 1 (which should be, not for request)
result->version -= 1;
result->need_to_hash_acls = false;
return result;
}
Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in);
Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtxn)
{
int32_t type;
Coordination::read(type, in);
Coordination::ZooKeeperRequestPtr result = nullptr;
int32_t sub_txn_length = 0;
if (subtxn)
Coordination::read(sub_txn_length, in);
int64_t in_count_before = in.count();
switch (type)
{
case 1:
result = deserializeCreateTxn(in);
break;
case 2:
result = deserializeDeleteTxn(in);
break;
case 5:
result = deserializeSetTxn(in);
break;
case 7:
result = deserializeSetACLTxn(in);
break;
case 13:
result = deserializeCheckVersionTxn(in);
break;
case 14:
result = deserializeMultiTxn(in);
break;
case -10:
result = deserializeCreateSession(in);
break;
case -11:
result = deserializeCloseSession(in);
break;
case -1:
result = deserializeErrorTxn(in);
break;
default:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented operation {}", type);
}
if (subtxn)
{
int64_t bytes_read = in.count() - in_count_before;
if (bytes_read < sub_txn_length)
in.ignore(sub_txn_length - bytes_read);
}
return result;
}
Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in)
{
int32_t length;
Coordination::read(length, in);
std::shared_ptr<Coordination::ZooKeeperMultiRequest> result = std::make_shared<Coordination::ZooKeeperMultiRequest>();
while (length > 0)
{
auto subrequest = deserializeTxnImpl(in, true);
result->requests.push_back(subrequest);
length--;
}
return result;
}
bool isErrorRequest(Coordination::ZooKeeperRequestPtr request)
{
return request == nullptr;
}
bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request)
{
if (request == nullptr)
return true;
for (const auto & subrequest : dynamic_cast<Coordination::ZooKeeperMultiRequest *>(request.get())->requests) //-V522
if (subrequest == nullptr)
return true;
return false;
}
}
bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*log*/)
{
int64_t checksum;
Coordination::read(checksum, in);
/// Zero padding is possible until file end
if (checksum == 0)
return false;
int32_t txn_len;
Coordination::read(txn_len, in);
int64_t count_before = in.count();
int64_t session_id;
Coordination::read(session_id, in);
int32_t xid;
Coordination::read(xid, in);
int64_t zxid;
Coordination::read(zxid, in);
int64_t time;
Coordination::read(time, in);
Coordination::ZooKeeperRequestPtr request = deserializeTxnImpl(in, false);
/// Skip all other bytes
int64_t bytes_read = in.count() - count_before;
if (bytes_read < txn_len)
in.ignore(txn_len - bytes_read);
/// We don't need to apply error requests
if (isErrorRequest(request))
return true;
request->xid = xid;
if (zxid > storage.zxid)
{
/// Separate processing of session id requests
if (request->getOpNum() == Coordination::OpNum::SessionID)
{
const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast<const Coordination::ZooKeeperSessionIDRequest &>(*request);
storage.getSessionID(session_id_request.session_timeout_ms);
}
else
{
/// Skip failed multirequests
if (request->getOpNum() == Coordination::OpNum::Multi && hasErrorsInMultiRequest(request))
return true;
storage.processRequest(request, session_id, zxid, /* check_acl = */ false);
}
}
return true;
}
void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, Poco::Logger * log)
{
ReadBufferFromFile reader(log_path);
LOG_INFO(log, "Deserializing log {}", log_path);
deserializeLogMagic(reader);
LOG_INFO(log, "Header looks OK");
size_t counter = 0;
while (!reader.eof() && deserializeTxn(storage, reader, log))
{
counter++;
if (counter % 1000 == 0)
LOG_INFO(log, "Deserialized txns log: {}", counter);
int8_t forty_two;
Coordination::read(forty_two, reader);
if (forty_two != 0x42)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Forty two check byte ({}) is not equal 0x42", forty_two);
}
LOG_INFO(log, "Finished {} deserialization, totally read {} records", log_path, counter);
}
void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, Poco::Logger * log)
{
namespace fs = std::filesystem;
std::map<int64_t, std::string> existing_logs;
for (const auto & p : fs::directory_iterator(path))
{
const auto & log_path = p.path();
if (!log_path.has_filename() || !startsWith(log_path.filename(), "log."))
continue;
int64_t zxid = getZxidFromName(log_path);
existing_logs[zxid] = p.path();
}
LOG_INFO(log, "Totally have {} logs", existing_logs.size());
for (auto [zxid, log_path] : existing_logs)
{
if (zxid > storage.zxid)
deserializeLogAndApplyToStorage(storage, log_path, log);
else
LOG_INFO(log, "Skipping log {}, it's ZXID {} is smaller than storages ZXID {}", log_path, zxid, storage.zxid);
}
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <string>
#include <Coordination/KeeperStorage.h>
#include <common/logger_useful.h>
namespace DB
{
void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, Poco::Logger * log);
void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, Poco::Logger * log);
void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, Poco::Logger * log);
void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, Poco::Logger * log);
}

View File

@ -85,7 +85,13 @@
#define DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH 54448
/// Version of ClickHouse TCP protocol. Increment it manually when you change the protocol.
/// Version of ClickHouse TCP protocol.
///
/// Should be incremented manually on protocol changes.
///
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases).
#define DBMS_TCP_PROTOCOL_VERSION 54449
#define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449

View File

@ -67,9 +67,9 @@ void ExternalResultDescription::init(const Block & sample_block_)
else if (which.isUUID())
types.emplace_back(ValueType::vtUUID, is_nullable);
else if (which.isEnum8())
types.emplace_back(ValueType::vtString, is_nullable);
types.emplace_back(ValueType::vtEnum8, is_nullable);
else if (which.isEnum16())
types.emplace_back(ValueType::vtString, is_nullable);
types.emplace_back(ValueType::vtEnum16, is_nullable);
else if (which.isDateTime64())
types.emplace_back(ValueType::vtDateTime64, is_nullable);
else if (which.isDecimal32())

View File

@ -22,6 +22,8 @@ struct ExternalResultDescription
vtInt64,
vtFloat32,
vtFloat64,
vtEnum8,
vtEnum16,
vtString,
vtDate,
vtDateTime,

View File

@ -298,7 +298,6 @@ namespace MySQLReplication
}
/// Types that do not used in the binlog event:
/// MYSQL_TYPE_ENUM
/// MYSQL_TYPE_SET
/// MYSQL_TYPE_TINY_BLOB
/// MYSQL_TYPE_MEDIUM_BLOB
@ -562,6 +561,22 @@ namespace MySQLReplication
row.push_back(dispatch((meta >> 8) & 0xFF, meta & 0xFF, read_decimal));
break;
}
case MYSQL_TYPE_ENUM:
{
if ((meta & 0xFF) == 1)
{
UInt8 val = 0;
payload.readStrict(reinterpret_cast<char *>(&val), 1);
row.push_back(Field{UInt8{val}});
}
else
{
UInt16 val = 0;
payload.readStrict(reinterpret_cast<char *>(&val), 2);
row.push_back(Field{UInt16{val}});
}
break;
}
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VAR_STRING:
{

View File

@ -0,0 +1,53 @@
#pragma once
#include <map>
#include <list>
#include <optional>
#include <string>
#include <set>
#include <initializer_list>
#include <DataTypes/IDataType.h>
#include <Core/Names.h>
namespace DB
{
class NameAndAliasPair
{
public:
NameAndAliasPair(const String & name_, const DataTypePtr & type_, const String & expression_)
: name(name_)
, type(type_)
, expression(expression_)
{}
String name;
DataTypePtr type;
String expression;
};
/// This needed to use structured bindings for NameAndTypePair
/// const auto & [name, type] = name_and_type
template <int I>
decltype(auto) get(const NameAndAliasPair & name_and_alias)
{
if constexpr (I == 0)
return name_and_alias.name;
else if constexpr (I == 1)
return name_and_alias.type;
else if constexpr (I == 2)
return name_and_alias.expression;
}
using NamesAndAliases = std::vector<NameAndAliasPair>;
}
namespace std
{
template <> struct tuple_size<DB::NameAndAliasPair> : std::integral_constant<size_t, 2> {};
template <> struct tuple_element<0, DB::NameAndAliasPair> { using type = DB::String; };
template <> struct tuple_element<1, DB::NameAndAliasPair> { using type = DB::DataTypePtr; };
template <> struct tuple_element<2, DB::NameAndAliasPair> { using type = DB::String; };
}

View File

@ -2,6 +2,7 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Common/typeid_cast.h>
#include <string.h>
#include <boost/program_options/options_description.hpp>
@ -56,40 +57,28 @@ void Settings::loadSettingsFromConfig(const String & path, const Poco::Util::Abs
}
}
void Settings::dumpToArrayColumns(IColumn * column_names_, IColumn * column_values_, bool changed_only)
void Settings::dumpToMapColumn(IColumn * column, bool changed_only)
{
/// Convert ptr and make simple check
auto * column_names = (column_names_) ? &typeid_cast<ColumnArray &>(*column_names_) : nullptr;
auto * column_values = (column_values_) ? &typeid_cast<ColumnArray &>(*column_values_) : nullptr;
auto * column_map = column ? &typeid_cast<ColumnMap &>(*column) : nullptr;
if (!column_map)
return;
size_t count = 0;
auto & offsets = column_map->getNestedColumn().getOffsets();
auto & tuple_column = column_map->getNestedData();
auto & key_column = tuple_column.getColumn(0);
auto & value_column = tuple_column.getColumn(1);
size_t size = 0;
for (const auto & setting : all(changed_only ? SKIP_UNCHANGED : SKIP_NONE))
{
if (column_names)
{
auto name = setting.getName();
column_names->getData().insertData(name.data(), name.size());
}
if (column_values)
column_values->getData().insert(setting.getValueString());
++count;
auto name = setting.getName();
key_column.insertData(name.data(), name.size());
value_column.insert(setting.getValueString());
size++;
}
if (column_names)
{
auto & offsets = column_names->getOffsets();
offsets.push_back(offsets.back() + count);
}
/// Nested columns case
bool the_same_offsets = column_names && column_values && column_names->getOffsetsPtr() == column_values->getOffsetsPtr();
if (column_values && !the_same_offsets)
{
auto & offsets = column_values->getOffsets();
offsets.push_back(offsets.back() + count);
}
offsets.push_back(offsets.back() + size);
}
void Settings::addProgramOptions(boost::program_options::options_description & options)

View File

@ -441,7 +441,7 @@ class IColumn;
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, allow_experimental_map_type, false, "Allow data type Map", 0) \
M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
@ -596,8 +596,8 @@ struct Settings : public BaseSettings<SettingsTraits>
/// Load settings from configuration file, at "path" prefix in configuration.
void loadSettingsFromConfig(const String & path, const Poco::Util::AbstractConfiguration & config);
/// Dumps profile events to two columns of type Array(String)
void dumpToArrayColumns(IColumn * column_names, IColumn * column_values, bool changed_only = true);
/// Dumps profile events to column of type Map(String, String)
void dumpToMapColumn(IColumn * column, bool changed_only = true);
/// Adds program options to set the settings from a command line.
/// (Don't forget to call notify() on the `variables_map` after parsing it!)

View File

@ -243,6 +243,8 @@ namespace
insertNumber<Float64>(column, value, name);
break;
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
{
if (value.type() == Poco::MongoDB::ElementTraits<ObjectId::Ptr>::TypeId)

View File

@ -157,6 +157,8 @@ void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view
assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
break;

View File

@ -262,6 +262,9 @@ void registerDataTypeEnum(DataTypeFactory & factory)
factory.registerDataType("Enum8", createExact<DataTypeEnum<Int8>>);
factory.registerDataType("Enum16", createExact<DataTypeEnum<Int16>>);
factory.registerDataType("Enum", create);
/// MySQL
factory.registerAlias("ENUM", "Enum", DataTypeFactory::CaseInsensitive);
}
}

View File

@ -288,21 +288,35 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
ErrorCodes::NO_COMMON_TYPE);
if (have_datetime64 == 0)
{
for (const auto & type : types)
{
if (isDateTime(type))
return type;
}
return std::make_shared<DataTypeDateTime>();
}
UInt8 max_scale = 0;
size_t max_scale_date_time_index = 0;
for (const auto & t : types)
for (size_t i = 0; i < types.size(); ++i)
{
if (const auto * dt64 = typeid_cast<const DataTypeDateTime64 *>(t.get()))
const auto & type = types[i];
if (const auto * date_time64_type = typeid_cast<const DataTypeDateTime64 *>(type.get()))
{
const auto scale = dt64->getScale();
if (scale > max_scale)
const auto scale = date_time64_type->getScale();
if (scale >= max_scale)
{
max_scale_date_time_index = i;
max_scale = scale;
}
}
}
return std::make_shared<DataTypeDateTime64>(max_scale);
return types[max_scale_date_time_index];
}
}

View File

@ -110,6 +110,8 @@ void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, co
assert_cast<ColumnFloat64 &>(column).insertValue(value);
break;
}
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
{
const char * value = nullptr;

View File

@ -91,6 +91,8 @@ namespace DB
case ValueType::vtFloat64:
insert<Float64>(column, string_value);
break;
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(parse<String>(string_value));
break;

View File

@ -10,6 +10,7 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
@ -157,6 +158,14 @@ namespace
assert_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
read_bytes_size += 8;
break;
case ValueType::vtEnum8:
assert_cast<ColumnInt8 &>(column).insertValue(assert_cast<const DataTypeEnum<Int8> &>(data_type).castToValue(value.data()).get<Int8>());
read_bytes_size += assert_cast<ColumnInt8 &>(column).byteSize();
break;
case ValueType::vtEnum16:
assert_cast<ColumnInt16 &>(column).insertValue(assert_cast<const DataTypeEnum<Int16> &>(data_type).castToValue(value.data()).get<Int16>());
read_bytes_size += assert_cast<ColumnInt16 &>(column).byteSize();
break;
case ValueType::vtString:
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
read_bytes_size += assert_cast<ColumnString &>(column).byteSize();

View File

@ -4,6 +4,7 @@
#include <Core/DecimalFunctions.h>
#include <Common/Exception.h>
#include <common/DateLUTImpl.h>
#include <common/DateLUT.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <Functions/FunctionHelpers.h>
@ -863,19 +864,27 @@ struct DateTimeTransformImpl
{
using Op = Transformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform>;
size_t time_zone_argument_position = 1;
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
time_zone_argument_position = 2;
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0);
const ColumnPtr source_col = arguments[0].column;
if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
{
auto mutable_result_col = result_type->createColumn();
auto * col_to = assert_cast<typename ToDataType::ColumnType *>(mutable_result_col.get());
Op::vector(sources->getData(), col_to->getData(), time_zone, transform);
WhichDataType result_data_type(result_type);
if (result_data_type.isDateTime() || result_data_type.isDateTime64())
{
const auto & time_zone = dynamic_cast<const TimezoneMixin &>(*result_type).getTimeZone();
Op::vector(sources->getData(), col_to->getData(), time_zone, transform);
}
else
{
size_t time_zone_argument_position = 1;
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
time_zone_argument_position = 2;
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform);
}
return mutable_result_col;
}

View File

@ -15,7 +15,7 @@ ADDINCL(
contrib/libs/libdivide
contrib/libs/rapidjson/include
contrib/libs/xxhash
contrib/restricted/murmurhash
GLOBAL contrib/restricted/murmurhash
)
PEERDIR(

View File

@ -14,7 +14,7 @@ ADDINCL(
contrib/libs/libdivide
contrib/libs/rapidjson/include
contrib/libs/xxhash
contrib/restricted/murmurhash
GLOBAL contrib/restricted/murmurhash
)
PEERDIR(

View File

@ -11,17 +11,16 @@
namespace DB
{
Block AsynchronousMetricLogElement::createBlock()
NamesAndTypesList AsynchronousMetricLogElement::getNamesAndTypes()
{
ColumnsWithTypeAndName columns;
columns.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
columns.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
columns.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds");
columns.emplace_back(std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "name");
columns.emplace_back(std::make_shared<DataTypeFloat64>(), "value");
return Block(columns);
return
{
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"name", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"value", std::make_shared<DataTypeFloat64>(),}
};
}

View File

@ -12,8 +12,8 @@
namespace DB
{
typedef double AsynchronousMetricValue;
typedef std::unordered_map<std::string, AsynchronousMetricValue> AsynchronousMetricValues;
using AsynchronousMetricValue = double;
using AsynchronousMetricValues = std::unordered_map<std::string, AsynchronousMetricValue>;
/** AsynchronousMetricLog is a log of metric values measured at regular time interval.
*/
@ -27,7 +27,8 @@ struct AsynchronousMetricLogElement
double value;
static std::string name() { return "AsynchronousMetricLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -457,6 +457,14 @@ struct ContextSharedPart
{
auto lock = std::lock_guard(mutex);
/** Compiled expressions stored in cache need to be destroyed before destruction of static objects.
* Because CHJIT instance can be static object.
*/
#if USE_EMBEDDED_COMPILER
if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache())
cache->reset();
#endif
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
/// TODO: Get rid of this.

View File

@ -18,21 +18,21 @@ namespace DB
std::weak_ptr<CrashLog> CrashLog::crash_log;
Block CrashLogElement::createBlock()
NamesAndTypesList CrashLogElement::getNamesAndTypes()
{
return
{
{std::make_shared<DataTypeDate>(), "event_date"},
{std::make_shared<DataTypeDateTime>(), "event_time"},
{std::make_shared<DataTypeUInt64>(), "timestamp_ns"},
{std::make_shared<DataTypeInt32>(), "signal"},
{std::make_shared<DataTypeUInt64>(), "thread_id"},
{std::make_shared<DataTypeString>(), "query_id"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "trace"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "trace_full"},
{std::make_shared<DataTypeString>(), "version"},
{std::make_shared<DataTypeUInt32>(), "revision"},
{std::make_shared<DataTypeString>(), "build_id"},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"timestamp_ns", std::make_shared<DataTypeUInt64>()},
{"signal", std::make_shared<DataTypeInt32>()},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"query_id", std::make_shared<DataTypeString>()},
{"trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"trace_full", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"version", std::make_shared<DataTypeString>()},
{"revision", std::make_shared<DataTypeUInt32>()},
{"build_id", std::make_shared<DataTypeString>()},
};
}
@ -60,7 +60,6 @@ void CrashLogElement::appendToBlock(MutableColumns & columns) const
}
void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, const StackTrace & stack_trace)
{
using namespace DB;

View File

@ -24,7 +24,8 @@ struct CrashLogElement
Array trace_full;
static std::string name() { return "CrashLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -1504,7 +1504,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
prewhere_info = std::make_shared<PrewhereDAGInfo>(actions, query.prewhere()->getColumnName(settings));
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName(settings));
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
{
@ -1725,7 +1725,6 @@ void ExpressionAnalysisResult::checkActions() const
check_actions(prewhere_info->prewhere_actions);
check_actions(prewhere_info->alias_actions);
check_actions(prewhere_info->remove_columns_actions);
}
}

View File

@ -239,7 +239,7 @@ struct ExpressionAnalysisResult
/// Columns will be removed after prewhere actions execution.
NameSet columns_to_remove_after_prewhere;
PrewhereDAGInfoPtr prewhere_info;
PrewhereInfoPtr prewhere_info;
FilterDAGInfoPtr filter_info;
ConstantFilterDescription prewhere_constant_filter_description;
ConstantFilterDescription where_constant_filter_description;

View File

@ -204,6 +204,7 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
if (table_join->dictionary_reader)
{
LOG_DEBUG(log, "Performing join over dict");
data->type = Type::DICT;
std::get<MapsOne>(data->maps).create(Type::DICT);
chooseMethod(key_columns, key_sizes); /// init key_sizes
@ -319,30 +320,23 @@ public:
using Mapped = RowRef;
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes &, void *)
: key_columns(key_columns_)
{}
FindResult findKey(const TableJoin & table_join, size_t row, const Arena &)
KeyGetterForDict(const TableJoin & table_join, const ColumnRawPtrs & key_columns)
{
const DictionaryReader & reader = *table_join.dictionary_reader;
if (!read_result)
{
reader.readKeys(*key_columns[0], read_result, found, positions);
result.block = &read_result;
table_join.dictionary_reader->readKeys(*key_columns[0], read_result, found, positions);
if (table_join.forceNullableRight())
for (auto & column : read_result)
if (table_join.rightBecomeNullable(column.type))
JoinCommon::convertColumnToNullable(column);
}
for (ColumnWithTypeAndName & column : read_result)
if (table_join.rightBecomeNullable(column.type))
JoinCommon::convertColumnToNullable(column);
}
FindResult findKey(void *, size_t row, const Arena &)
{
result.block = &read_result;
result.row_num = positions[row];
return FindResult(&result, found[row], 0);
}
private:
const ColumnRawPtrs & key_columns;
Block read_result;
Mapped result;
ColumnVector<UInt8>::Container found;
@ -851,6 +845,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse
/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS).
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map, bool need_filter, bool has_null_map>
NO_INLINE IColumn::Filter joinRightColumns(
KeyGetter && key_getter,
const Map & map,
AddedColumns & added_columns,
const ConstNullMapPtr & null_map [[maybe_unused]],
@ -880,8 +875,6 @@ NO_INLINE IColumn::Filter joinRightColumns(
if constexpr (need_replication)
added_columns.offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(added_columns.key_columns, added_columns.key_sizes);
IColumn::Offset current_offset = 0;
for (size_t i = 0; i < rows; ++i)
@ -980,35 +973,51 @@ NO_INLINE IColumn::Filter joinRightColumns(
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename KeyGetter, typename Map>
IColumn::Filter joinRightColumnsSwitchNullability(
const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
KeyGetter && key_getter,
const Map & map,
AddedColumns & added_columns,
const ConstNullMapPtr & null_map,
JoinStuff::JoinUsedFlags & used_flags)
{
if (added_columns.need_filter)
{
if (null_map)
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(map, added_columns, null_map, used_flags);
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, true>(
std::forward<KeyGetter>(key_getter), map, added_columns, null_map, used_flags);
else
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(map, added_columns, nullptr, used_flags);
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, true, false>(
std::forward<KeyGetter>(key_getter), map, added_columns, nullptr, used_flags);
}
else
{
if (null_map)
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(map, added_columns, null_map, used_flags);
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, true>(
std::forward<KeyGetter>(key_getter), map, added_columns, null_map, used_flags);
else
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(map, added_columns, nullptr, used_flags);
return joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, false, false>(
std::forward<KeyGetter>(key_getter), map, added_columns, nullptr, used_flags);
}
}
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
IColumn::Filter switchJoinRightColumns(
const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags)
const Maps & maps_,
AddedColumns & added_columns,
HashJoin::Type type,
const ConstNullMapPtr & null_map,
JoinStuff::JoinUsedFlags & used_flags)
{
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
switch (type)
{
#define M(TYPE) \
case HashJoin::Type::TYPE: \
return joinRightColumnsSwitchNullability<KIND, STRICTNESS,\
typename KeyGetterForType<HashJoin::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type>(\
*maps_.TYPE, added_columns, null_map, used_flags);
{ \
using KeyGetter = typename KeyGetterForType<HashJoin::Type::TYPE, const std::remove_reference_t<decltype(*maps_.TYPE)>>::Type; \
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(added_columns.key_columns, added_columns.key_sizes); \
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetter>( \
std::move(key_getter), *maps_.TYPE, added_columns, null_map, used_flags); \
}
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
@ -1025,8 +1034,12 @@ IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedCo
STRICTNESS == ASTTableJoin::Strictness::Semi ||
STRICTNESS == ASTTableJoin::Strictness::Anti))
{
assert(added_columns.key_columns.size() == 1);
JoinStuff::JoinUsedFlags flags;
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map, flags);
KeyGetterForDict key_getter(table_join, added_columns.key_columns);
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(
std::move(key_getter), nullptr, added_columns, null_map, flags);
}
throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR);

View File

@ -302,6 +302,35 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns)
return columns_list;
}
ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns, const NamesAndAliases & alias_columns)
{
std::shared_ptr<ASTExpressionList> columns_list = std::static_pointer_cast<ASTExpressionList>(formatColumns(columns));
for (const auto & alias_column : alias_columns)
{
const auto column_declaration = std::make_shared<ASTColumnDeclaration>();
column_declaration->name = alias_column.name;
ParserDataType type_parser;
String type_name = alias_column.type->getName();
const char * type_pos = type_name.data();
const char * type_end = type_pos + type_name.size();
column_declaration->type = parseQuery(type_parser, type_pos, type_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
column_declaration->default_specifier = "ALIAS";
const auto & alias = alias_column.expression;
const char * alias_pos = alias.data();
const char * alias_end = alias_pos + alias.size();
ParserExpression expression_parser;
column_declaration->default_expression = parseQuery(expression_parser, alias_pos, alias_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
columns_list->children.emplace_back(column_declaration);
}
return columns_list;
}
ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
{
auto columns_list = std::make_shared<ASTExpressionList>();
@ -646,23 +675,6 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
}
}
}
if (!create.attach && !settings.allow_experimental_map_type)
{
for (const auto & name_and_type_pair : properties.columns.getAllPhysical())
{
WhichDataType which(*name_and_type_pair.type);
if (which.isMap())
{
const auto & type_name = name_and_type_pair.type->getName();
String message = "Cannot create table with column '" + name_and_type_pair.name + "' which type is '"
+ type_name + "' because experimental Map type is not allowed. "
+ "Set 'allow_experimental_map_type = 1' setting to enable";
throw Exception(message, ErrorCodes::ILLEGAL_COLUMN);
}
}
}
}
void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const

View File

@ -1,12 +1,12 @@
#pragma once
#include <Core/NamesAndAliases.h>
#include <Access/AccessRightsElement.h>
#include <Interpreters/IInterpreter.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/ConstraintsDescription.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Common/ThreadPool.h>
namespace DB
@ -31,8 +31,8 @@ public:
/// List of columns and their types in AST.
static ASTPtr formatColumns(const NamesAndTypesList & columns);
static ASTPtr formatColumns(const NamesAndTypesList & columns, const NamesAndAliases & alias_columns);
static ASTPtr formatColumns(const ColumnsDescription & columns);
static ASTPtr formatIndices(const IndicesDescription & indices);
static ASTPtr formatConstraints(const ConstraintsDescription & constraints);
static ASTPtr formatProjections(const ProjectionsDescription & projections);

View File

@ -958,11 +958,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
if (expressions.prewhere_info)
{
if (expressions.prewhere_info->row_level_filter_actions)
if (expressions.prewhere_info->row_level_filter)
{
auto row_level_filter_step = std::make_unique<FilterStep>(
query_plan.getCurrentDataStream(),
expressions.prewhere_info->row_level_filter_actions,
expressions.prewhere_info->row_level_filter,
expressions.prewhere_info->row_level_column_name,
false);
@ -978,18 +978,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
prewhere_step->setStepDescription("PREWHERE");
query_plan.addStep(std::move(prewhere_step));
// To remove additional columns in dry run
// For example, sample column which can be removed in this stage
// TODO There seems to be no place initializing remove_columns_actions
if (expressions.prewhere_info->remove_columns_actions)
{
auto remove_columns = std::make_unique<ExpressionStep>(
query_plan.getCurrentDataStream(), expressions.prewhere_info->remove_columns_actions);
remove_columns->setStepDescription("Remove unnecessary columns after PREWHERE");
query_plan.addStep(std::move(remove_columns));
}
}
}
else
@ -1479,33 +1467,29 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(
if (prewhere_info.alias_actions)
{
pipe.addSimpleTransform(
[&](const Block & header) { return std::make_shared<ExpressionTransform>(header, prewhere_info.alias_actions); });
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(header,
std::make_shared<ExpressionActions>(prewhere_info.alias_actions));
});
}
if (prewhere_info.row_level_filter)
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<FilterTransform>(header, prewhere_info.row_level_filter, prewhere_info.row_level_column_name, true);
return std::make_shared<FilterTransform>(header,
std::make_shared<ExpressionActions>(prewhere_info.row_level_filter),
prewhere_info.row_level_column_name, true);
});
}
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<FilterTransform>(
header, prewhere_info.prewhere_actions, prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column);
header, std::make_shared<ExpressionActions>(prewhere_info.prewhere_actions),
prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column);
});
// To remove additional columns
// In some cases, we did not read any marks so that the pipeline.streams is empty
// Thus, some columns in prewhere are not removed as expected
// This leads to mismatched header in distributed table
if (prewhere_info.remove_columns_actions)
{
pipe.addSimpleTransform(
[&](const Block & header) { return std::make_shared<ExpressionTransform>(header, prewhere_info.remove_columns_actions); });
}
}
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
@ -1560,7 +1544,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
if (does_storage_support_prewhere && settings.optimize_move_to_prewhere)
{
/// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
expressions.prewhere_info = std::make_shared<PrewhereDAGInfo>(
expressions.prewhere_info = std::make_shared<PrewhereInfo>(
std::move(expressions.filter_info->actions),
std::move(expressions.filter_info->column_name));
expressions.prewhere_info->prewhere_actions->projectInput(false);
@ -1572,9 +1556,9 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
else
{
/// Add row level security actions to prewhere.
expressions.prewhere_info->row_level_filter_actions = std::move(expressions.filter_info->actions);
expressions.prewhere_info->row_level_filter = std::move(expressions.filter_info->actions);
expressions.prewhere_info->row_level_column_name = std::move(expressions.filter_info->column_name);
expressions.prewhere_info->row_level_filter_actions->projectInput(false);
expressions.prewhere_info->row_level_filter->projectInput(false);
expressions.filter_info = nullptr;
}
}
@ -1613,9 +1597,9 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames();
required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
if (prewhere_info->row_level_filter_actions)
if (prewhere_info->row_level_filter)
{
auto row_level_required_columns = prewhere_info->row_level_filter_actions->getRequiredColumns().getNames();
auto row_level_required_columns = prewhere_info->row_level_filter->getRequiredColumns().getNames();
required_columns_from_prewhere.insert(row_level_required_columns.begin(), row_level_required_columns.end());
}
}
@ -1898,28 +1882,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
auto & prewhere_info = analysis_result.prewhere_info;
if (prewhere_info)
{
auto actions_settings = ExpressionActionsSettings::fromContext(context, CompileExpressions::yes);
query_info.prewhere_info = std::make_shared<PrewhereInfo>();
query_info.prewhere_info->prewhere_actions
= std::make_shared<ExpressionActions>(prewhere_info->prewhere_actions, actions_settings);
if (prewhere_info->row_level_filter_actions)
query_info.prewhere_info->row_level_filter
= std::make_shared<ExpressionActions>(prewhere_info->row_level_filter_actions, actions_settings);
if (prewhere_info->alias_actions)
query_info.prewhere_info->alias_actions
= std::make_shared<ExpressionActions>(prewhere_info->alias_actions, actions_settings);
if (prewhere_info->remove_columns_actions)
query_info.prewhere_info->remove_columns_actions
= std::make_shared<ExpressionActions>(prewhere_info->remove_columns_actions, actions_settings);
query_info.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name;
query_info.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column;
query_info.prewhere_info->row_level_column_name = prewhere_info->row_level_column_name;
query_info.prewhere_info->need_filter = prewhere_info->need_filter;
}
query_info.prewhere_info = prewhere_info;
/// Create optimizer with prepared actions.
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.

View File

@ -8,21 +8,21 @@
namespace DB
{
Block MetricLogElement::createBlock()
NamesAndTypesList MetricLogElement::getNamesAndTypes()
{
ColumnsWithTypeAndName columns_with_type_and_name;
NamesAndTypesList columns_with_type_and_name;
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "milliseconds");
columns_with_type_and_name.emplace_back("event_date", std::make_shared<DataTypeDate>());
columns_with_type_and_name.emplace_back("event_time", std::make_shared<DataTypeDateTime>());
columns_with_type_and_name.emplace_back("event_time_microseconds", std::make_shared<DataTypeDateTime64>(6));
columns_with_type_and_name.emplace_back("milliseconds", std::make_shared<DataTypeUInt64>());
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
{
std::string name;
name += "ProfileEvent_";
name += ProfileEvents::getName(ProfileEvents::Event(i));
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), std::move(name));
columns_with_type_and_name.emplace_back(std::move(name), std::make_shared<DataTypeUInt64>());
}
for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i)
@ -30,10 +30,10 @@ Block MetricLogElement::createBlock()
std::string name;
name += "CurrentMetric_";
name += CurrentMetrics::getName(CurrentMetrics::Metric(i));
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeInt64>(), std::move(name));
columns_with_type_and_name.emplace_back(std::move(name), std::make_shared<DataTypeInt64>());
}
return Block(columns_with_type_and_name);
return columns_with_type_and_name;
}

View File

@ -25,7 +25,8 @@ struct MetricLogElement
std::vector<CurrentMetrics::Metric> current_metrics;
static std::string name() { return "MetricLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -92,22 +92,40 @@ static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_defini
}
ASTPtr data_type = declare_column->data_type;
auto * data_type_function = data_type->as<ASTFunction>();
if (is_unsigned)
if (data_type_function)
{
auto * data_type_function = data_type->as<ASTFunction>();
String type_name_upper = Poco::toUpper(data_type_function->name);
if (data_type_function)
if (is_unsigned)
{
String type_name_upper = Poco::toUpper(data_type_function->name);
/// For example(in MySQL): CREATE TABLE test(column_name INT NOT NULL ... UNSIGNED)
if (type_name_upper.find("INT") != std::string::npos && !endsWith(type_name_upper, "SIGNED")
if (type_name_upper.find("INT") != String::npos && !endsWith(type_name_upper, "SIGNED")
&& !endsWith(type_name_upper, "UNSIGNED"))
data_type_function->name = type_name_upper + " UNSIGNED";
}
}
/// Transforms MySQL ENUM's list of strings to ClickHouse string-integer pairs
/// For example ENUM('a', 'b', 'c') -> ENUM('a'=1, 'b'=2, 'c'=3)
/// Elements on a position further than 32767 are assigned negative values, starting with -32768.
/// Note: Enum would be transformed to Enum8 if number of elements is less then 128, otherwise it would be transformed to Enum16.
if (type_name_upper.find("ENUM") != String::npos)
{
UInt16 i = 0;
for (ASTPtr & child : data_type_function->arguments->children)
{
auto new_child = std::make_shared<ASTFunction>();
new_child->name = "equals";
auto * literal = child->as<ASTLiteral>();
new_child->arguments = std::make_shared<ASTExpressionList>();
new_child->arguments->children.push_back(std::make_shared<ASTLiteral>(literal->value.get<String>()));
new_child->arguments->children.push_back(std::make_shared<ASTLiteral>(Int16(++i)));
child = new_child;
}
}
}
if (is_nullable)
data_type = makeASTFunction("Nullable", data_type);
@ -564,7 +582,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries(
if (alter_command->type == MySQLParser::ASTAlterCommand::ADD_COLUMN)
{
const auto & additional_columns_name_and_type = getColumnsList(alter_command->additional_columns);
const auto & additional_columns = InterpreterCreateQuery::formatColumns(additional_columns_name_and_type);
const auto & additional_columns_description = createColumnsDescription(additional_columns_name_and_type, alter_command->additional_columns);
const auto & additional_columns = InterpreterCreateQuery::formatColumns(additional_columns_description);
for (size_t index = 0; index < additional_columns_name_and_type.size(); ++index)
{
@ -658,7 +677,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries(
if (!alter_command->old_name.empty())
modify_columns.front().name = alter_command->old_name;
rewritten_command->col_decl = InterpreterCreateQuery::formatColumns(modify_columns)->children[0];
const auto & modify_columns_description = createColumnsDescription(modify_columns, alter_command->additional_columns);
rewritten_command->col_decl = InterpreterCreateQuery::formatColumns(modify_columns_description)->children[0];
if (!alter_command->column_name.empty())
{

View File

@ -235,3 +235,25 @@ TEST(MySQLCreateRewritten, QueryWithColumnComments)
std::string(MATERIALIZEMYSQL_TABLE_COLUMNS) +
") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");
}
TEST(MySQLCreateRewritten, QueryWithEnum)
{
tryRegisterFunctions();
const auto & context_holder = getContext();
EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, `test` ENUM('a','b','c'))", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` Nullable(Enum8('a' = 1, 'b' = 2, 'c' = 3))" +
std::string(MATERIALIZEMYSQL_TABLE_COLUMNS) +
") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");
EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, `test` ENUM('a','b','c') NOT NULL)", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` Enum8('a' = 1, 'b' = 2, 'c' = 3)" +
std::string(MATERIALIZEMYSQL_TABLE_COLUMNS) +
") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");
EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, `test` ENUM('a','b','c') COMMENT 'test_comment')", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` Nullable(Enum8('a' = 1, 'b' = 2, 'c' = 3)) COMMENT 'test_comment'" +
std::string(MATERIALIZEMYSQL_TABLE_COLUMNS) +
") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");
}

View File

@ -6,6 +6,7 @@
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeUUID.h>
#include <Common/hex.h>
@ -14,13 +15,13 @@
namespace DB
{
Block OpenTelemetrySpanLogElement::createBlock()
NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes()
{
return {
{std::make_shared<DataTypeUUID>(), "trace_id"},
{std::make_shared<DataTypeUInt64>(), "span_id"},
{std::make_shared<DataTypeUInt64>(), "parent_span_id"},
{std::make_shared<DataTypeString>(), "operation_name"},
{"trace_id", std::make_shared<DataTypeUUID>()},
{"span_id", std::make_shared<DataTypeUInt64>()},
{"parent_span_id", std::make_shared<DataTypeUInt64>()},
{"operation_name", std::make_shared<DataTypeString>()},
// DateTime64 is really unwieldy -- there is no "normal" way to convert
// it to an UInt64 count of microseconds, except:
// 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just
@ -31,16 +32,21 @@ Block OpenTelemetrySpanLogElement::createBlock()
// Also subtraction of two DateTime64 points doesn't work, so you can't
// get duration.
// It is much less hassle to just use UInt64 of microseconds.
{std::make_shared<DataTypeUInt64>(), "start_time_us"},
{std::make_shared<DataTypeUInt64>(), "finish_time_us"},
{std::make_shared<DataTypeDate>(), "finish_date"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()),
"attribute.names"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()),
"attribute.values"}
{"start_time_us", std::make_shared<DataTypeUInt64>()},
{"finish_time_us", std::make_shared<DataTypeUInt64>()},
{"finish_date", std::make_shared<DataTypeDate>()},
{"attribute", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>())},
};
}
NamesAndAliases OpenTelemetrySpanLogElement::getNamesAndAliases()
{
return
{
{"attribute.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "mapKeys(attribute)"},
{"attribute.values", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "mapKeys(attribute)"}
};
}
void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const
{
@ -53,17 +59,16 @@ void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(start_time_us);
columns[i++]->insert(finish_time_us);
columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000).toUnderType());
columns[i++]->insert(attribute_names);
// The user might add some ints values, and we will have Int Field, and the
// insert will fail because the column requires Strings. Convert the fields
// here, because it's hard to remember to convert them in all other places.
Array string_values;
string_values.reserve(attribute_values.size());
for (const auto & value : attribute_values)
Map map(attribute_names.size());
for (size_t attr_idx = 0; attr_idx < map.size(); ++attr_idx)
{
string_values.push_back(toString(value));
map[attr_idx] = Tuple{attribute_names[attr_idx], toString(attribute_values[attr_idx])};
}
columns[i++]->insert(string_values);
columns[i++]->insert(map);
}

View File

@ -27,7 +27,8 @@ struct OpenTelemetrySpanLogElement : public OpenTelemetrySpan
: OpenTelemetrySpan(span) {}
static std::string name() { return "OpenTelemetrySpanLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases();
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -16,7 +16,7 @@
namespace DB
{
Block PartLogElement::createBlock()
NamesAndTypesList PartLogElement::getNamesAndTypes()
{
auto event_type_datatype = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
@ -33,35 +33,34 @@ Block PartLogElement::createBlock()
ColumnsWithTypeAndName columns_with_type_and_name;
return {
{"query_id", std::make_shared<DataTypeString>()},
{"event_type", std::move(event_type_datatype)},
{"event_date", std::make_shared<DataTypeDate>()},
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "query_id"),
columns_with_type_and_name.emplace_back(std::move(event_type_datatype), "event_type"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date"),
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"),
{"duration_ms", std::make_shared<DataTypeUInt64>()},
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "duration_ms"),
{"database", std::make_shared<DataTypeString>()},
{"table", std::make_shared<DataTypeString>()},
{"part_name", std::make_shared<DataTypeString>()},
{"partition_id", std::make_shared<DataTypeString>()},
{"path_on_disk", std::make_shared<DataTypeString>()},
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "database"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "table"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "part_name"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "partition_id"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "path_on_disk"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "rows"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "size_in_bytes"), // On disk
{"rows", std::make_shared<DataTypeUInt64>()},
{"size_in_bytes", std::make_shared<DataTypeUInt64>()}, // On disk
/// Merge-specific info
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "merged_from"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "bytes_uncompressed"), // Result bytes
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_rows"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "read_bytes"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "peak_memory_usage"),
{"merged_from", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"bytes_uncompressed", std::make_shared<DataTypeUInt64>()}, // Result bytes
{"read_rows", std::make_shared<DataTypeUInt64>()},
{"read_bytes", std::make_shared<DataTypeUInt64>()},
{"peak_memory_usage", std::make_shared<DataTypeUInt64>()},
/// Is there an error during the execution or commit
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt16>(), "error"),
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeString>(), "exception"),
{"error", std::make_shared<DataTypeUInt16>()},
{"exception", std::make_shared<DataTypeString>()},
};
}

View File

@ -52,7 +52,8 @@ struct PartLogElement
static std::string name() { return "PartLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -3,6 +3,7 @@
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
@ -11,14 +12,18 @@ namespace ProfileEvents
{
/// Put implementation here to avoid extra linking dependencies for clickhouse_common_io
void dumpToArrayColumns(const Counters & counters, DB::IColumn * column_names_, DB::IColumn * column_values_, bool nonzero_only)
void dumpToMapColumn(const Counters & counters, DB::IColumn * column, bool nonzero_only)
{
/// Convert ptr and make simple check
auto * column_names = (column_names_) ? &typeid_cast<DB::ColumnArray &>(*column_names_) : nullptr;
auto * column_values = (column_values_) ? &typeid_cast<DB::ColumnArray &>(*column_values_) : nullptr;
auto * column_map = column ? &typeid_cast<DB::ColumnMap &>(*column) : nullptr;
if (!column_map)
return;
auto & offsets = column_map->getNestedColumn().getOffsets();
auto & tuple_column = column_map->getNestedData();
auto & key_column = tuple_column.getColumn(0);
auto & value_column = tuple_column.getColumn(1);
size_t size = 0;
for (Event event = 0; event < Counters::num_counters; ++event)
{
UInt64 value = counters[event].load(std::memory_order_relaxed);
@ -26,34 +31,13 @@ void dumpToArrayColumns(const Counters & counters, DB::IColumn * column_names_,
if (nonzero_only && 0 == value)
continue;
++size;
if (column_names)
{
const char * desc = ProfileEvents::getName(event);
column_names->getData().insertData(desc, strlen(desc));
}
if (column_values)
column_values->getData().insert(value);
const char * desc = ProfileEvents::getName(event);
key_column.insertData(desc, strlen(desc));
value_column.insert(value);
size++;
}
if (column_names)
{
auto & offsets = column_names->getOffsets();
offsets.push_back(offsets.back() + size);
}
if (column_values)
{
/// Nested columns case
bool the_same_offsets = column_names && column_names->getOffsetsPtr().get() == column_values->getOffsetsPtr().get();
if (!the_same_offsets)
{
auto & offsets = column_values->getOffsets();
offsets.push_back(offsets.back() + size);
}
}
offsets.push_back(offsets.back() + size);
}
}

View File

@ -6,7 +6,7 @@
namespace ProfileEvents
{
/// Dumps profile events to two columns Array(String) and Array(UInt64)
void dumpToArrayColumns(const Counters & counters, DB::IColumn * column_names, DB::IColumn * column_value, bool nonzero_only = true);
/// Dumps profile events to columns Map(String, UInt64)
void dumpToMapColumn(const Counters & counters, DB::IColumn * column, bool nonzero_only = true);
}

View File

@ -10,6 +10,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
@ -19,12 +20,13 @@
#include <Common/ClickHouseRevision.h>
#include <Common/IPv6ToBinary.h>
#include <Common/ProfileEvents.h>
#include <Common/typeid_cast.h>
namespace DB
{
Block QueryLogElement::createBlock()
NamesAndTypesList QueryLogElement::getNamesAndTypes()
{
auto query_status_datatype = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
@ -37,86 +39,94 @@ Block QueryLogElement::createBlock()
return
{
{std::move(query_status_datatype), "type"},
{std::make_shared<DataTypeDate>(), "event_date"},
{std::make_shared<DataTypeDateTime>(), "event_time"},
{std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"},
{std::make_shared<DataTypeDateTime>(), "query_start_time"},
{std::make_shared<DataTypeDateTime64>(6), "query_start_time_microseconds"},
{std::make_shared<DataTypeUInt64>(), "query_duration_ms"},
{"type", std::move(query_status_datatype)},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"query_start_time", std::make_shared<DataTypeDateTime>()},
{"query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"query_duration_ms", std::make_shared<DataTypeUInt64>()},
{std::make_shared<DataTypeUInt64>(), "read_rows"},
{std::make_shared<DataTypeUInt64>(), "read_bytes"},
{std::make_shared<DataTypeUInt64>(), "written_rows"},
{std::make_shared<DataTypeUInt64>(), "written_bytes"},
{std::make_shared<DataTypeUInt64>(), "result_rows"},
{std::make_shared<DataTypeUInt64>(), "result_bytes"},
{std::make_shared<DataTypeUInt64>(), "memory_usage"},
{"read_rows", std::make_shared<DataTypeUInt64>()},
{"read_bytes", std::make_shared<DataTypeUInt64>()},
{"written_rows", std::make_shared<DataTypeUInt64>()},
{"written_bytes", std::make_shared<DataTypeUInt64>()},
{"result_rows", std::make_shared<DataTypeUInt64>()},
{"result_bytes", std::make_shared<DataTypeUInt64>()},
{"memory_usage", std::make_shared<DataTypeUInt64>()},
{std::make_shared<DataTypeString>(), "current_database"},
{std::make_shared<DataTypeString>(), "query"},
{std::make_shared<DataTypeUInt64>(), "normalized_query_hash"},
{std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "query_kind"},
{std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())), "databases"},
{std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())), "tables"},
{std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())), "columns"},
{std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())), "projections"},
{std::make_shared<DataTypeInt32>(), "exception_code"},
{std::make_shared<DataTypeString>(), "exception"},
{std::make_shared<DataTypeString>(), "stack_trace"},
{"current_database", std::make_shared<DataTypeString>()},
{"query", std::make_shared<DataTypeString>()},
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
{"query_kind", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"databases", std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
{"tables", std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
{"columns", std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
{"projections", std::make_shared<DataTypeArray>(
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
{"exception_code", std::make_shared<DataTypeInt32>()},
{"exception", std::make_shared<DataTypeString>()},
{"stack_trace", std::make_shared<DataTypeString>()},
{std::make_shared<DataTypeUInt8>(), "is_initial_query"},
{std::make_shared<DataTypeString>(), "user"},
{std::make_shared<DataTypeString>(), "query_id"},
{DataTypeFactory::instance().get("IPv6"), "address"},
{std::make_shared<DataTypeUInt16>(), "port"},
{std::make_shared<DataTypeString>(), "initial_user"},
{std::make_shared<DataTypeString>(), "initial_query_id"},
{DataTypeFactory::instance().get("IPv6"), "initial_address"},
{std::make_shared<DataTypeUInt16>(), "initial_port"},
{std::make_shared<DataTypeDateTime>(), "initial_query_start_time"},
{std::make_shared<DataTypeDateTime64>(6), "initial_query_start_time_microseconds"},
{std::make_shared<DataTypeUInt8>(), "interface"},
{std::make_shared<DataTypeString>(), "os_user"},
{std::make_shared<DataTypeString>(), "client_hostname"},
{std::make_shared<DataTypeString>(), "client_name"},
{std::make_shared<DataTypeUInt32>(), "client_revision"},
{std::make_shared<DataTypeUInt32>(), "client_version_major"},
{std::make_shared<DataTypeUInt32>(), "client_version_minor"},
{std::make_shared<DataTypeUInt32>(), "client_version_patch"},
{std::make_shared<DataTypeUInt8>(), "http_method"},
{std::make_shared<DataTypeString>(), "http_user_agent"},
{std::make_shared<DataTypeString>(), "http_referer"},
{std::make_shared<DataTypeString>(), "forwarded_for"},
{std::make_shared<DataTypeString>(), "quota_key"},
{"is_initial_query", std::make_shared<DataTypeUInt8>()},
{"user", std::make_shared<DataTypeString>()},
{"query_id", std::make_shared<DataTypeString>()},
{"address", DataTypeFactory::instance().get("IPv6")},
{"port", std::make_shared<DataTypeUInt16>()},
{"initial_user", std::make_shared<DataTypeString>()},
{"initial_query_id", std::make_shared<DataTypeString>()},
{"initial_address", DataTypeFactory::instance().get("IPv6")},
{"initial_port", std::make_shared<DataTypeUInt16>()},
{"initial_query_start_time", std::make_shared<DataTypeDateTime>()},
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"interface", std::make_shared<DataTypeUInt8>()},
{"os_user", std::make_shared<DataTypeString>()},
{"client_hostname", std::make_shared<DataTypeString>()},
{"client_name", std::make_shared<DataTypeString>()},
{"client_revision", std::make_shared<DataTypeUInt32>()},
{"client_version_major", std::make_shared<DataTypeUInt32>()},
{"client_version_minor", std::make_shared<DataTypeUInt32>()},
{"client_version_patch", std::make_shared<DataTypeUInt32>()},
{"http_method", std::make_shared<DataTypeUInt8>()},
{"http_user_agent", std::make_shared<DataTypeString>()},
{"http_referer", std::make_shared<DataTypeString>()},
{"forwarded_for", std::make_shared<DataTypeString>()},
{"quota_key", std::make_shared<DataTypeString>()},
{std::make_shared<DataTypeUInt32>(), "revision"},
{"revision", std::make_shared<DataTypeUInt32>()},
{std::make_shared<DataTypeString>(), "log_comment"},
{"log_comment", std::make_shared<DataTypeString>()},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "thread_ids"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "ProfileEvents.Names"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "ProfileEvents.Values"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Settings.Names"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Settings.Values"},
{"thread_ids", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
{"Settings", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>())},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_aggregate_functions"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_aggregate_function_combinators"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_database_engines"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_data_type_families"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_dictionaries"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_formats"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_functions"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_storages"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_table_functions"}
{"used_aggregate_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_aggregate_function_combinators", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_database_engines", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_data_type_families", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_dictionaries", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_formats", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_storages", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"used_table_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}
};
}
NamesAndAliases QueryLogElement::getNamesAndAliases()
{
return
{
{"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
{"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"},
{"Settings.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(Settings)" },
{"Settings.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapValues(Settings)"}
};
}
void QueryLogElement::appendToBlock(MutableColumns & columns) const
{
@ -188,26 +198,22 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
if (profile_counters)
{
auto * column_names = columns[i++].get();
auto * column_values = columns[i++].get();
ProfileEvents::dumpToArrayColumns(*profile_counters, column_names, column_values, true);
auto * column = columns[i++].get();
ProfileEvents::dumpToMapColumn(*profile_counters, column, true);
}
else
{
columns[i++]->insertDefault();
columns[i++]->insertDefault();
}
if (query_settings)
{
auto * column_names = columns[i++].get();
auto * column_values = columns[i++].get();
query_settings->dumpToArrayColumns(column_names, column_values, true);
auto * column = columns[i++].get();
query_settings->dumpToMapColumn(column, true);
}
else
{
columns[i++]->insertDefault();
columns[i++]->insertDefault();
}
{

View File

@ -1,9 +1,9 @@
#pragma once
#include <Core/NamesAndAliases.h>
#include <Interpreters/SystemLog.h>
#include <Interpreters/ClientInfo.h>
namespace ProfileEvents
{
class Counters;
@ -83,7 +83,8 @@ struct QueryLogElement
static std::string name() { return "QueryLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases();
void appendToBlock(MutableColumns & columns) const;
static void appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i);

View File

@ -5,6 +5,7 @@
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeFactory.h>
@ -18,59 +19,68 @@
namespace DB
{
Block QueryThreadLogElement::createBlock()
NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
{
return {
{std::make_shared<DataTypeDate>(), "event_date"},
{std::make_shared<DataTypeDateTime>(), "event_time"},
{std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"},
{std::make_shared<DataTypeDateTime>(), "query_start_time"},
{std::make_shared<DataTypeDateTime64>(6), "query_start_time_microseconds"},
{std::make_shared<DataTypeUInt64>(), "query_duration_ms"},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"query_start_time", std::make_shared<DataTypeDateTime>()},
{"query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"query_duration_ms", std::make_shared<DataTypeUInt64>()},
{std::make_shared<DataTypeUInt64>(), "read_rows"},
{std::make_shared<DataTypeUInt64>(), "read_bytes"},
{std::make_shared<DataTypeUInt64>(), "written_rows"},
{std::make_shared<DataTypeUInt64>(), "written_bytes"},
{std::make_shared<DataTypeInt64>(), "memory_usage"},
{std::make_shared<DataTypeInt64>(), "peak_memory_usage"},
{"read_rows", std::make_shared<DataTypeUInt64>()},
{"read_bytes", std::make_shared<DataTypeUInt64>()},
{"written_rows", std::make_shared<DataTypeUInt64>()},
{"written_bytes", std::make_shared<DataTypeUInt64>()},
{"memory_usage", std::make_shared<DataTypeInt64>()},
{"peak_memory_usage", std::make_shared<DataTypeInt64>()},
{std::make_shared<DataTypeString>(), "thread_name"},
{std::make_shared<DataTypeUInt64>(), "thread_id"},
{std::make_shared<DataTypeUInt64>(), "master_thread_id"},
{std::make_shared<DataTypeString>(), "current_database"},
{std::make_shared<DataTypeString>(), "query"},
{std::make_shared<DataTypeUInt64>(), "normalized_query_hash"},
{"thread_name", std::make_shared<DataTypeString>()},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"master_thread_id", std::make_shared<DataTypeUInt64>()},
{"current_database", std::make_shared<DataTypeString>()},
{"query", std::make_shared<DataTypeString>()},
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
{std::make_shared<DataTypeUInt8>(), "is_initial_query"},
{std::make_shared<DataTypeString>(), "user"},
{std::make_shared<DataTypeString>(), "query_id"},
{DataTypeFactory::instance().get("IPv6"), "address"},
{std::make_shared<DataTypeUInt16>(), "port"},
{std::make_shared<DataTypeString>(), "initial_user"},
{std::make_shared<DataTypeString>(), "initial_query_id"},
{DataTypeFactory::instance().get("IPv6"), "initial_address"},
{std::make_shared<DataTypeUInt16>(), "initial_port"},
{std::make_shared<DataTypeDateTime>(), "initial_query_start_time"},
{std::make_shared<DataTypeDateTime64>(6), "initial_query_start_time_microseconds"},
{std::make_shared<DataTypeUInt8>(), "interface"},
{std::make_shared<DataTypeString>(), "os_user"},
{std::make_shared<DataTypeString>(), "client_hostname"},
{std::make_shared<DataTypeString>(), "client_name"},
{std::make_shared<DataTypeUInt32>(), "client_revision"},
{std::make_shared<DataTypeUInt32>(), "client_version_major"},
{std::make_shared<DataTypeUInt32>(), "client_version_minor"},
{std::make_shared<DataTypeUInt32>(), "client_version_patch"},
{std::make_shared<DataTypeUInt8>(), "http_method"},
{std::make_shared<DataTypeString>(), "http_user_agent"},
{std::make_shared<DataTypeString>(), "http_referer"},
{std::make_shared<DataTypeString>(), "forwarded_for"},
{std::make_shared<DataTypeString>(), "quota_key"},
{"is_initial_query", std::make_shared<DataTypeUInt8>()},
{"user", std::make_shared<DataTypeString>()},
{"query_id", std::make_shared<DataTypeString>()},
{"address", DataTypeFactory::instance().get("IPv6")},
{"port", std::make_shared<DataTypeUInt16>()},
{"initial_user", std::make_shared<DataTypeString>()},
{"initial_query_id", std::make_shared<DataTypeString>()},
{"initial_address", DataTypeFactory::instance().get("IPv6")},
{"initial_port", std::make_shared<DataTypeUInt16>()},
{"initial_query_start_time", std::make_shared<DataTypeDateTime>()},
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"interface", std::make_shared<DataTypeUInt8>()},
{"os_user", std::make_shared<DataTypeString>()},
{"client_hostname", std::make_shared<DataTypeString>()},
{"client_name", std::make_shared<DataTypeString>()},
{"client_revision", std::make_shared<DataTypeUInt32>()},
{"client_version_major", std::make_shared<DataTypeUInt32>()},
{"client_version_minor", std::make_shared<DataTypeUInt32>()},
{"client_version_patch", std::make_shared<DataTypeUInt32>()},
{"http_method", std::make_shared<DataTypeUInt8>()},
{"http_user_agent", std::make_shared<DataTypeString>()},
{"http_referer", std::make_shared<DataTypeString>()},
{"forwarded_for", std::make_shared<DataTypeString>()},
{"quota_key", std::make_shared<DataTypeString>()},
{std::make_shared<DataTypeUInt32>(), "revision"},
{"revision", std::make_shared<DataTypeUInt32>()},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "ProfileEvents.Names"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "ProfileEvents.Values"}
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
};
}
NamesAndAliases QueryThreadLogElement::getNamesAndAliases()
{
return
{
{"ProfileEvents.Names", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, "mapKeys(ProfileEvents)"},
{"ProfileEvents.Values", {std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())}, "mapValues(ProfileEvents)"}
};
}
@ -107,14 +117,12 @@ void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const
if (profile_counters)
{
auto * column_names = columns[i++].get();
auto * column_values = columns[i++].get();
dumpToArrayColumns(*profile_counters, column_names, column_values, true);
auto * column = columns[i++].get();
ProfileEvents::dumpToMapColumn(*profile_counters, column, true);
}
else
{
columns[i++]->insertDefault();
columns[i++]->insertDefault();
}
}

View File

@ -49,7 +49,8 @@ struct QueryThreadLogElement
static std::string name() { return "QueryThreadLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases();
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -52,7 +52,8 @@ namespace DB
/// fields
static std::string name();
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases();
void appendToBlock(MutableColumns & columns) const;
};
*/
@ -451,10 +452,18 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
/// is called from single thread.
prepareTable();
Block block = LogElement::createBlock();
ColumnsWithTypeAndName log_element_columns;
auto log_element_names_and_types = LogElement::getNamesAndTypes();
for (auto name_and_type : log_element_names_and_types)
log_element_columns.emplace_back(name_and_type.type, name_and_type.name);
Block block(std::move(log_element_columns));
MutableColumns columns = block.mutateColumns();
for (const auto & elem : to_flush)
elem.appendToBlock(columns);
block.setColumns(std::move(columns));
/// We write to table indirectly, using InterpreterInsertQuery.
@ -500,11 +509,14 @@ void SystemLog<LogElement>::prepareTable()
if (table)
{
auto metadata_snapshot = table->getInMemoryMetadataPtr();
const Block expected = LogElement::createBlock();
const Block actual = metadata_snapshot->getSampleBlockNonMaterialized();
auto metadata_columns = table->getInMemoryMetadataPtr()->getColumns();
auto old_query = InterpreterCreateQuery::formatColumns(metadata_columns);
if (!blocksHaveEqualStructure(actual, expected))
auto ordinary_columns = LogElement::getNamesAndTypes();
auto alias_columns = LogElement::getNamesAndAliases();
auto current_query = InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns);
if (old_query->getTreeHash() != current_query->getTreeHash())
{
/// Rename the existing table.
int suffix = 0;
@ -575,10 +587,10 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
create->database = table_id.database_name;
create->table = table_id.table_name;
Block sample = LogElement::createBlock();
auto ordinary_columns = LogElement::getNamesAndTypes();
auto alias_columns = LogElement::getNamesAndAliases();
auto new_columns_list = std::make_shared<ASTColumns>();
new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList()));
new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns));
create->set(create->columns_list, new_columns_list);
ParserStorage storage_parser;

View File

@ -14,7 +14,7 @@
namespace DB
{
Block TextLogElement::createBlock()
NamesAndTypesList TextLogElement::getNamesAndTypes()
{
auto priority_datatype = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
@ -31,23 +31,23 @@ Block TextLogElement::createBlock()
return
{
{std::make_shared<DataTypeDate>(), "event_date"},
{std::make_shared<DataTypeDateTime>(), "event_time"},
{std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"},
{std::make_shared<DataTypeUInt32>(), "microseconds"},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"microseconds", std::make_shared<DataTypeUInt32>()},
{std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "thread_name"},
{std::make_shared<DataTypeUInt64>(), "thread_id"},
{"thread_name", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{std::move(priority_datatype), "level"},
{std::make_shared<DataTypeString>(), "query_id"},
{std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "logger_name"},
{std::make_shared<DataTypeString>(), "message"},
{"level", std::move(priority_datatype)},
{"query_id", std::make_shared<DataTypeString>()},
{"logger_name", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"message", std::make_shared<DataTypeString>()},
{std::make_shared<DataTypeUInt32>(), "revision"},
{"revision", std::make_shared<DataTypeUInt32>()},
{std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "source_file"},
{std::make_shared<DataTypeUInt64>(), "source_line"}
{"source_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"source_line", std::make_shared<DataTypeUInt64>()}
};
}

View File

@ -25,7 +25,8 @@ struct TextLogElement
UInt64 source_line{};
static std::string name() { return "TextLog"; }
static Block createBlock();
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;
};

View File

@ -21,20 +21,20 @@ const TraceDataType::Values TraceLogElement::trace_values =
{"MemorySample", static_cast<UInt8>(TraceType::MemorySample)},
};
Block TraceLogElement::createBlock()
NamesAndTypesList TraceLogElement::getNamesAndTypes()
{
return
{
{std::make_shared<DataTypeDate>(), "event_date"},
{std::make_shared<DataTypeDateTime>(), "event_time"},
{std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds"},
{std::make_shared<DataTypeUInt64>(), "timestamp_ns"},
{std::make_shared<DataTypeUInt32>(), "revision"},
{std::make_shared<TraceDataType>(trace_values), "trace_type"},
{std::make_shared<DataTypeUInt64>(), "thread_id"},
{std::make_shared<DataTypeString>(), "query_id"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "trace"},
{std::make_shared<DataTypeInt64>(), "size"},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"timestamp_ns", std::make_shared<DataTypeUInt64>()},
{"revision", std::make_shared<DataTypeUInt32>()},
{"trace_type", std::make_shared<TraceDataType>(trace_values)},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"query_id", std::make_shared<DataTypeString>()},
{"trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"size", std::make_shared<DataTypeInt64>()},
};
}

Some files were not shown because too many files have changed in this diff Show More