diff --git a/.gitmodules b/.gitmodules index 0a7a6b4a3f9..1d9d4d25baf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -168,9 +168,6 @@ [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git -[submodule "contrib/antlr4-runtime"] - path = contrib/antlr4-runtime - url = https://github.com/ClickHouse-Extras/antlr4-runtime.git [submodule "contrib/sentry-native"] path = contrib/sentry-native url = https://github.com/ClickHouse-Extras/sentry-native.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cf8188cc8e..d23e5f540d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -184,10 +184,27 @@ endif () set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") + +if (NOT OBJCOPY_PATH AND OS_DARWIN) + find_program (BREW_PATH NAMES "brew") + if (BREW_PATH) + execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) + if (LLVM_PREFIX) + find_program (OBJCOPY_PATH NAMES "llvm-objcopy" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + endif () + if (NOT OBJCOPY_PATH) + execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) + if (BINUTILS_PREFIX) + find_program (OBJCOPY_PATH NAMES "objcopy" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + endif () + endif () + endif () +endif () + if (OBJCOPY_PATH) - message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") + message (STATUS "Using objcopy: ${OBJCOPY_PATH}") else () - message(FATAL_ERROR "Cannot find objcopy.") + message (FATAL_ERROR "Cannot find objcopy.") endif () if (OS_DARWIN) diff --git a/README.md b/README.md index 21eda470f49..496a6357f44 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,8 @@ ClickHouse® is an open-source column-oriented database management system that a * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-qfort0u8-TWqK4wIP0YSdoDE0btKa1w) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. - -## Upcoming Events -* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021. diff --git a/base/common/DateLUT.h b/base/common/DateLUT.h index 378b4360f3b..31fc6b1e24b 100644 --- a/base/common/DateLUT.h +++ b/base/common/DateLUT.h @@ -17,7 +17,7 @@ class DateLUT : private boost::noncopyable { public: /// Return singleton DateLUTImpl instance for the default time zone. - static ALWAYS_INLINE const DateLUTImpl & instance() + static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071 { const auto & date_lut = getInstance(); return *date_lut.default_impl.load(std::memory_order_acquire); diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index 9e60181e802..2ccad4be348 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -119,11 +119,16 @@ private: } public: + /// We use Int64 instead of time_t because time_t is mapped to the different types (long or long long) + /// on Linux and Darwin (on both of them, long and long long are 64 bit and behaves identically, + /// but they are different types in C++ and this affects function overload resolution). + using Time = Int64; + /// The order of fields matters for alignment and sizeof. struct Values { - /// time_t at beginning of the day. - Int64 date; + /// Time at beginning of the day. + Time date; /// Properties of the day. UInt16 year; @@ -182,20 +187,20 @@ private: LUTIndex years_months_lut[DATE_LUT_YEARS * 12]; /// UTC offset at beginning of the Unix epoch. The same as unix timestamp of 1970-01-01 00:00:00 local time. - time_t offset_at_start_of_epoch; + Time offset_at_start_of_epoch; /// UTC offset at the beginning of the first supported year. - time_t offset_at_start_of_lut; + Time offset_at_start_of_lut; bool offset_is_whole_number_of_hours_during_epoch; /// Time zone name. std::string time_zone; - inline LUTIndex findIndex(time_t t) const + inline LUTIndex findIndex(Time t) const { /// First guess. - Int64 guess = (t / 86400) + daynum_offset_epoch; + Time guess = (t / 86400) + daynum_offset_epoch; - /// For negative time_t the integer division was rounded up, so the guess is offset by one. + /// For negative Time the integer division was rounded up, so the guess is offset by one. if (unlikely(t < 0)) --guess; @@ -227,7 +232,7 @@ private: return LUTIndex{static_cast(d + daynum_offset_epoch) & date_lut_mask}; } - inline LUTIndex toLUTIndex(time_t t) const + inline LUTIndex toLUTIndex(Time t) const { return findIndex(t); } @@ -280,7 +285,7 @@ public: /// Round down to start of monday. template - inline time_t toFirstDayOfWeek(DateOrTime v) const + inline Time toFirstDayOfWeek(DateOrTime v) const { const LUTIndex i = toLUTIndex(v); return lut[i - (lut[i].day_of_week - 1)].date; @@ -295,7 +300,7 @@ public: /// Round down to start of month. template - inline time_t toFirstDayOfMonth(DateOrTime v) const + inline Time toFirstDayOfMonth(DateOrTime v) const { const LUTIndex i = toLUTIndex(v); return lut[i - (lut[i].day_of_month - 1)].date; @@ -332,13 +337,13 @@ public: } template - inline time_t toFirstDayOfQuarter(DateOrTime v) const + inline Time toFirstDayOfQuarter(DateOrTime v) const { return toDate(toFirstDayOfQuarterIndex(v)); } /// Round down to start of year. - inline time_t toFirstDayOfYear(time_t t) const + inline Time toFirstDayOfYear(Time t) const { return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date; } @@ -355,14 +360,14 @@ public: return toDayNum(toFirstDayNumOfYearIndex(v)); } - inline time_t toFirstDayOfNextMonth(time_t t) const + inline Time toFirstDayOfNextMonth(Time t) const { LUTIndex index = findIndex(t); index += 32 - lut[index].day_of_month; return lut[index - (lut[index].day_of_month - 1)].date; } - inline time_t toFirstDayOfPrevMonth(time_t t) const + inline Time toFirstDayOfPrevMonth(Time t) const { LUTIndex index = findIndex(t); index -= lut[index].day_of_month; @@ -389,16 +394,16 @@ public: /** Round to start of day, then shift for specified amount of days. */ - inline time_t toDateAndShift(time_t t, Int32 days) const + inline Time toDateAndShift(Time t, Int32 days) const { return lut[findIndex(t) + days].date; } - inline time_t toTime(time_t t) const + inline Time toTime(Time t) const { const LUTIndex index = findIndex(t); - time_t res = t - lut[index].date; + Time res = t - lut[index].date; if (res >= lut[index].time_at_offset_change()) res += lut[index].amount_of_offset_change(); @@ -406,11 +411,11 @@ public: return res - offset_at_start_of_epoch; /// Starting at 1970-01-01 00:00:00 local time. } - inline unsigned toHour(time_t t) const + inline unsigned toHour(Time t) const { const LUTIndex index = findIndex(t); - time_t time = t - lut[index].date; + Time time = t - lut[index].date; if (time >= lut[index].time_at_offset_change()) time += lut[index].amount_of_offset_change(); @@ -426,7 +431,7 @@ public: * then subtract the former from the latter to get the offset result. * The boundaries when meets DST(daylight saving time) change should be handled very carefully. */ - inline time_t timezoneOffset(time_t t) const + inline Time timezoneOffset(Time t) const { const LUTIndex index = findIndex(t); @@ -434,7 +439,7 @@ public: /// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin. /// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date, /// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now. - time_t res = (lut[index].date - lut[daynum_offset_epoch].date) % 86400; + Time res = (lut[index].date - lut[daynum_offset_epoch].date) % 86400; /// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder /// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward. @@ -448,7 +453,7 @@ public: } - inline unsigned toSecond(time_t t) const + inline unsigned toSecond(Time t) const { auto res = t % 60; if (likely(res >= 0)) @@ -456,7 +461,7 @@ public: return res + 60; } - inline unsigned toMinute(time_t t) const + inline unsigned toMinute(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) return (t / 60) % 60; @@ -474,27 +479,27 @@ public: } /// NOTE: Assuming timezone offset is a multiple of 15 minutes. - inline time_t toStartOfMinute(time_t t) const { return roundDown(t, 60); } - inline time_t toStartOfFiveMinute(time_t t) const { return roundDown(t, 300); } - inline time_t toStartOfFifteenMinutes(time_t t) const { return roundDown(t, 900); } + inline Time toStartOfMinute(Time t) const { return roundDown(t, 60); } + inline Time toStartOfFiveMinute(Time t) const { return roundDown(t, 300); } + inline Time toStartOfFifteenMinutes(Time t) const { return roundDown(t, 900); } - inline time_t toStartOfTenMinutes(time_t t) const + inline Time toStartOfTenMinutes(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) return t / 600 * 600; /// More complex logic is for Nepal - it has offset 05:45. Australia/Eucla is also unfortunate. - Int64 date = find(t).date; + Time date = find(t).date; return date + (t - date) / 600 * 600; } /// NOTE: Assuming timezone transitions are multiple of hours. Lord Howe Island in Australia is a notable exception. - inline time_t toStartOfHour(time_t t) const + inline Time toStartOfHour(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) return t / 3600 * 3600; - Int64 date = find(t).date; + Time date = find(t).date; return date + (t - date) / 3600 * 3600; } @@ -506,11 +511,11 @@ public: * because the same calendar day starts/ends at different timestamps in different time zones) */ - inline time_t fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; } - inline time_t fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; } + inline Time fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; } + inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; } template - inline time_t toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; } + inline Time toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; } template inline unsigned toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; } @@ -578,7 +583,7 @@ public: return toDayNum(toFirstDayNumOfISOYearIndex(v)); } - inline time_t toFirstDayOfISOYear(time_t t) const + inline Time toFirstDayOfISOYear(Time t) const { return lut[toFirstDayNumOfISOYearIndex(t)].date; } @@ -773,7 +778,7 @@ public: } /// We count all hour-length intervals, unrelated to offset changes. - inline time_t toRelativeHourNum(time_t t) const + inline Time toRelativeHourNum(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) return t / 3600; @@ -784,18 +789,18 @@ public: } template - inline time_t toRelativeHourNum(DateOrTime v) const + inline Time toRelativeHourNum(DateOrTime v) const { return toRelativeHourNum(lut[toLUTIndex(v)].date); } - inline time_t toRelativeMinuteNum(time_t t) const + inline Time toRelativeMinuteNum(Time t) const { return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60); } template - inline time_t toRelativeMinuteNum(DateOrTime v) const + inline Time toRelativeMinuteNum(DateOrTime v) const { return toRelativeMinuteNum(lut[toLUTIndex(v)].date); } @@ -842,14 +847,14 @@ public: return ExtendedDayNum(4 + (d - 4) / days * days); } - inline time_t toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const + inline Time toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const { if (days == 1) return toDate(d); return lut[toLUTIndex(ExtendedDayNum(d / days * days))].date; } - inline time_t toStartOfHourInterval(time_t t, UInt64 hours) const + inline Time toStartOfHourInterval(Time t, UInt64 hours) const { if (hours == 1) return toStartOfHour(t); @@ -867,7 +872,7 @@ public: const LUTIndex index = findIndex(t); const Values & values = lut[index]; - time_t time = t - values.date; + Time time = t - values.date; if (time >= values.time_at_offset_change()) { /// Align to new hour numbers before rounding. @@ -892,7 +897,7 @@ public: return values.date + time; } - inline time_t toStartOfMinuteInterval(time_t t, UInt64 minutes) const + inline Time toStartOfMinuteInterval(Time t, UInt64 minutes) const { if (minutes == 1) return toStartOfMinute(t); @@ -909,7 +914,7 @@ public: return roundDown(t, seconds); } - inline time_t toStartOfSecondInterval(time_t t, UInt64 seconds) const + inline Time toStartOfSecondInterval(Time t, UInt64 seconds) const { if (seconds == 1) return t; @@ -934,14 +939,14 @@ public: return toDayNum(makeLUTIndex(year, month, day_of_month)); } - inline time_t makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const + inline Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const { return lut[makeLUTIndex(year, month, day_of_month)].date; } /** Does not accept daylight saving time as argument: in case of ambiguity, it choose greater timestamp. */ - inline time_t makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const + inline Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const { size_t index = makeLUTIndex(year, month, day_of_month); UInt32 time_offset = hour * 3600 + minute * 60 + second; @@ -969,7 +974,7 @@ public: return values.year * 10000 + values.month * 100 + values.day_of_month; } - inline time_t YYYYMMDDToDate(UInt32 num) const + inline Time YYYYMMDDToDate(UInt32 num) const { return makeDate(num / 10000, num / 100 % 100, num % 100); } @@ -1000,13 +1005,13 @@ public: TimeComponents time; }; - inline DateComponents toDateComponents(time_t t) const + inline DateComponents toDateComponents(Time t) const { const Values & values = getValues(t); return { values.year, values.month, values.day_of_month }; } - inline DateTimeComponents toDateTimeComponents(time_t t) const + inline DateTimeComponents toDateTimeComponents(Time t) const { const LUTIndex index = findIndex(t); const Values & values = lut[index]; @@ -1017,7 +1022,7 @@ public: res.date.month = values.month; res.date.day = values.day_of_month; - time_t time = t - values.date; + Time time = t - values.date; if (time >= values.time_at_offset_change()) time += values.amount_of_offset_change(); @@ -1042,7 +1047,7 @@ public: } - inline UInt64 toNumYYYYMMDDhhmmss(time_t t) const + inline UInt64 toNumYYYYMMDDhhmmss(Time t) const { DateTimeComponents components = toDateTimeComponents(t); @@ -1055,7 +1060,7 @@ public: + UInt64(components.date.year) * 10000000000; } - inline time_t YYYYMMDDhhmmssToTime(UInt64 num) const + inline Time YYYYMMDDhhmmssToTime(UInt64 num) const { return makeDateTime( num / 10000000000, @@ -1069,12 +1074,12 @@ public: /// Adding calendar intervals. /// Implementation specific behaviour when delta is too big. - inline NO_SANITIZE_UNDEFINED time_t addDays(time_t t, Int64 delta) const + inline NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const { const LUTIndex index = findIndex(t); const Values & values = lut[index]; - time_t time = t - values.date; + Time time = t - values.date; if (time >= values.time_at_offset_change()) time += values.amount_of_offset_change(); @@ -1086,7 +1091,7 @@ public: return lut[new_index].date + time; } - inline NO_SANITIZE_UNDEFINED time_t addWeeks(time_t t, Int64 delta) const + inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const { return addDays(t, delta * 7); } @@ -1131,14 +1136,14 @@ public: /// If resulting month has less deys than source month, then saturation can happen. /// Example: 31 Aug + 1 month = 30 Sep. - inline time_t NO_SANITIZE_UNDEFINED addMonths(time_t t, Int64 delta) const + inline Time NO_SANITIZE_UNDEFINED addMonths(Time t, Int64 delta) const { const auto result_day = addMonthsIndex(t, delta); const LUTIndex index = findIndex(t); const Values & values = lut[index]; - time_t time = t - values.date; + Time time = t - values.date; if (time >= values.time_at_offset_change()) time += values.amount_of_offset_change(); @@ -1153,7 +1158,7 @@ public: return toDayNum(addMonthsIndex(d, delta)); } - inline time_t NO_SANITIZE_UNDEFINED addQuarters(time_t t, Int64 delta) const + inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int64 delta) const { return addMonths(t, delta * 3); } @@ -1180,14 +1185,14 @@ public: } /// Saturation can occur if 29 Feb is mapped to non-leap year. - inline time_t addYears(time_t t, Int64 delta) const + inline Time addYears(Time t, Int64 delta) const { auto result_day = addYearsIndex(t, delta); const LUTIndex index = findIndex(t); const Values & values = lut[index]; - time_t time = t - values.date; + Time time = t - values.date; if (time >= values.time_at_offset_change()) time += values.amount_of_offset_change(); @@ -1203,7 +1208,7 @@ public: } - inline std::string timeToString(time_t t) const + inline std::string timeToString(Time t) const { DateTimeComponents components = toDateTimeComponents(t); @@ -1228,7 +1233,7 @@ public: return s; } - inline std::string dateToString(time_t t) const + inline std::string dateToString(Time t) const { const Values & values = getValues(t); diff --git a/base/common/FunctorToStaticMethodAdaptor.h b/base/common/FunctorToStaticMethodAdaptor.h new file mode 100644 index 00000000000..9f55b52a79a --- /dev/null +++ b/base/common/FunctorToStaticMethodAdaptor.h @@ -0,0 +1,41 @@ +#include + +/** Adapt functor to static method where functor passed as context. + * Main use case to convert lambda into function that can be passed into JIT code. + */ +template +class FunctorToStaticMethodAdaptor : public FunctorToStaticMethodAdaptor +{ +}; + +template +class FunctorToStaticMethodAdaptor +{ +public: + static R call(C * ptr, Args &&... arguments) + { + return std::invoke(&C::operator(), ptr, std::forward(arguments)...); + } + + static R unsafeCall(char * ptr, Args &&... arguments) + { + C * ptr_typed = reinterpret_cast(ptr); + return std::invoke(&C::operator(), ptr_typed, std::forward(arguments)...); + } +}; + +template +class FunctorToStaticMethodAdaptor +{ +public: + static R call(C * ptr, Args &&... arguments) + { + return std::invoke(&C::operator(), ptr, std::forward(arguments)...); + } + + static R unsafeCall(char * ptr, Args &&... arguments) + { + C * ptr_typed = static_cast(ptr); + return std::invoke(&C::operator(), ptr_typed, std::forward(arguments)...); + } +}; diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index 7893e56d751..9c65b1dfe4c 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -1,8 +1,9 @@ #include #include -#include -#include +#include +#include +#include #include #include #include @@ -24,6 +25,94 @@ void trim(String & s) s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } +/// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. +/// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) +/// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) +/// Copyright (c) 2010, Pieter Noordhuis (pcnoordhuis at gmail dot com) +std::string replxx_now_ms_str() +{ + std::chrono::milliseconds ms(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch())); + time_t t = ms.count() / 1000; + tm broken; + if (!localtime_r(&t, &broken)) + { + return std::string(); + } + + static int const BUFF_SIZE(32); + char str[BUFF_SIZE]; + strftime(str, BUFF_SIZE, "%Y-%m-%d %H:%M:%S.", &broken); + snprintf(str + sizeof("YYYY-mm-dd HH:MM:SS"), 5, "%03d", static_cast(ms.count() % 1000)); + return str; +} + +/// Convert from readline to replxx format. +/// +/// replxx requires each history line to prepended with time line: +/// +/// ### YYYY-MM-DD HH:MM:SS.SSS +/// select 1 +/// +/// And w/o those service lines it will load all lines from history file as +/// one history line for suggestion. And if there are lots of lines in file it +/// will take lots of time (getline() + tons of reallocations). +/// +/// NOTE: this code uses std::ifstream/std::ofstream like original replxx code. +void convertHistoryFile(const std::string & path, replxx::Replxx & rx) +{ + std::ifstream in(path); + if (!in) + { + rx.print("Cannot open %s reading (for conversion): %s\n", + path.c_str(), errnoToString(errno).c_str()); + return; + } + + std::string line; + if (!getline(in, line).good()) + { + rx.print("Cannot read from %s (for conversion): %s\n", + path.c_str(), errnoToString(errno).c_str()); + return; + } + + /// This is the marker of the date, no need to convert. + static char const REPLXX_TIMESTAMP_PATTERN[] = "### dddd-dd-dd dd:dd:dd.ddd"; + if (line.starts_with("### ") && line.size() == strlen(REPLXX_TIMESTAMP_PATTERN)) + { + return; + } + + std::vector lines; + in.seekg(0); + while (getline(in, line).good()) + { + lines.push_back(line); + } + in.close(); + + size_t lines_size = lines.size(); + std::sort(lines.begin(), lines.end()); + lines.erase(std::unique(lines.begin(), lines.end()), lines.end()); + rx.print("The history file (%s) is in old format. %zu lines, %zu unique lines.\n", + path.c_str(), lines_size, lines.size()); + + std::ofstream out(path); + if (!out) + { + rx.print("Cannot open %s for writing (for conversion): %s\n", + path.c_str(), errnoToString(errno).c_str()); + return; + } + + const std::string & timestamp = replxx_now_ms_str(); + for (const auto & out_line : lines) + { + out << "### " << timestamp << "\n" << out_line << std::endl; + } + out.close(); +} + } ReplxxLineReader::ReplxxLineReader( @@ -47,6 +136,8 @@ ReplxxLineReader::ReplxxLineReader( } else { + convertHistoryFile(history_file_path, rx); + if (flock(history_file_fd, LOCK_SH)) { rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str()); diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 34de50e9f8a..49cf30d2556 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,12 @@ -# This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54452) +# This variables autochanged by release_lib.sh: + +# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, +# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. +SET(VERSION_REVISION 54453) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 976ccc2e908ac3bc28f763bfea8134ea0a121b40) -SET(VERSION_DESCRIBE v21.7.1.1-prestable) -SET(VERSION_STRING 21.7.1.1) +SET(VERSION_GITHASH fb895056568e26200629c7d19626e92d2dedc70d) +SET(VERSION_DESCRIBE v21.8.1.1-prestable) +SET(VERSION_STRING 21.8.1.1) # end of autochange diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake index d520de1ab6f..d15962c05d4 100644 --- a/cmake/embed_binary.cmake +++ b/cmake/embed_binary.cmake @@ -33,44 +33,25 @@ macro(clickhouse_embed_binaries) message(FATAL_ERROR "The list of binary resources to embed may not be empty") endif() - # If cross-compiling, ensure we use the toolchain file and target the - # actual target architecture - if (CMAKE_CROSSCOMPILING) - set(CROSS_COMPILE_FLAGS "--target=${CMAKE_C_COMPILER_TARGET} --gcc-toolchain=${TOOLCHAIN_FILE}") - else() - set(CROSS_COMPILE_FLAGS "") - endif() + add_library("${EMBED_TARGET}" STATIC) + set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in") - set(RESOURCE_OBJS) - foreach(RESOURCE_FILE ${EMBED_RESOURCES}) - set(RESOURCE_OBJ "${RESOURCE_FILE}.o") - list(APPEND RESOURCE_OBJS "${RESOURCE_OBJ}") - # Normalize the name of the resource + foreach(RESOURCE_FILE ${EMBED_RESOURCES}) + set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") set(BINARY_FILE_NAME "${RESOURCE_FILE}") + + # Normalize the name of the resource. string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}") - set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") - # Put the configured assembly file in the output directory. - # This is so we can clean it up as usual, and we CD to the - # source directory before compiling, so that the assembly - # `.incbin` directive can find the file. + # Generate the configured assembly file in the output directory. configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY) - # Generate the output object file by compiling the assembly, in the directory of - # the sources so that the resource file may also be found - add_custom_command( - OUTPUT ${RESOURCE_OBJ} - COMMAND cd "${EMBED_RESOURCE_DIR}" && - ${CMAKE_C_COMPILER} "${CROSS_COMPILE_FLAGS}" -c -o - "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" - "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" - ) - set_source_files_properties("${RESOURCE_OBJ}" PROPERTIES EXTERNAL_OBJECT true GENERATED true) - endforeach() + # Set the include directory for relative paths specified for `.incbin` directive. + set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}") - add_library("${EMBED_TARGET}" STATIC ${RESOURCE_OBJS}) - set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) + target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}") + endforeach() endmacro() diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake index 7bda3484101..e3924fdc537 100644 --- a/cmake/linux/toolchain-aarch64.cmake +++ b/cmake/linux/toolchain-aarch64.cmake @@ -4,7 +4,6 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu") set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu") set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu") set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc") -get_filename_component (TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" REALPATH) # We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9. set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 710c8c7fca5..164692fb893 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -34,7 +34,6 @@ endif() set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) add_subdirectory (abseil-cpp-cmake) -add_subdirectory (antlr4-runtime-cmake) add_subdirectory (boost-cmake) add_subdirectory (cctz-cmake) add_subdirectory (consistent-hashing) diff --git a/contrib/antlr4-runtime b/contrib/antlr4-runtime deleted file mode 160000 index 672643e9a42..00000000000 --- a/contrib/antlr4-runtime +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 672643e9a427ef803abf13bc8cb4989606553d64 diff --git a/contrib/antlr4-runtime-cmake/CMakeLists.txt b/contrib/antlr4-runtime-cmake/CMakeLists.txt deleted file mode 100644 index 4f639a33ebf..00000000000 --- a/contrib/antlr4-runtime-cmake/CMakeLists.txt +++ /dev/null @@ -1,156 +0,0 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/antlr4-runtime") - -set (SRCS - "${LIBRARY_DIR}/ANTLRErrorListener.cpp" - "${LIBRARY_DIR}/ANTLRErrorStrategy.cpp" - "${LIBRARY_DIR}/ANTLRFileStream.cpp" - "${LIBRARY_DIR}/ANTLRInputStream.cpp" - "${LIBRARY_DIR}/atn/AbstractPredicateTransition.cpp" - "${LIBRARY_DIR}/atn/ActionTransition.cpp" - "${LIBRARY_DIR}/atn/AmbiguityInfo.cpp" - "${LIBRARY_DIR}/atn/ArrayPredictionContext.cpp" - "${LIBRARY_DIR}/atn/ATN.cpp" - "${LIBRARY_DIR}/atn/ATNConfig.cpp" - "${LIBRARY_DIR}/atn/ATNConfigSet.cpp" - "${LIBRARY_DIR}/atn/ATNDeserializationOptions.cpp" - "${LIBRARY_DIR}/atn/ATNDeserializer.cpp" - "${LIBRARY_DIR}/atn/ATNSerializer.cpp" - "${LIBRARY_DIR}/atn/ATNSimulator.cpp" - "${LIBRARY_DIR}/atn/ATNState.cpp" - "${LIBRARY_DIR}/atn/AtomTransition.cpp" - "${LIBRARY_DIR}/atn/BasicBlockStartState.cpp" - "${LIBRARY_DIR}/atn/BasicState.cpp" - "${LIBRARY_DIR}/atn/BlockEndState.cpp" - "${LIBRARY_DIR}/atn/BlockStartState.cpp" - "${LIBRARY_DIR}/atn/ContextSensitivityInfo.cpp" - "${LIBRARY_DIR}/atn/DecisionEventInfo.cpp" - "${LIBRARY_DIR}/atn/DecisionInfo.cpp" - "${LIBRARY_DIR}/atn/DecisionState.cpp" - "${LIBRARY_DIR}/atn/EmptyPredictionContext.cpp" - "${LIBRARY_DIR}/atn/EpsilonTransition.cpp" - "${LIBRARY_DIR}/atn/ErrorInfo.cpp" - "${LIBRARY_DIR}/atn/LexerAction.cpp" - "${LIBRARY_DIR}/atn/LexerActionExecutor.cpp" - "${LIBRARY_DIR}/atn/LexerATNConfig.cpp" - "${LIBRARY_DIR}/atn/LexerATNSimulator.cpp" - "${LIBRARY_DIR}/atn/LexerChannelAction.cpp" - "${LIBRARY_DIR}/atn/LexerCustomAction.cpp" - "${LIBRARY_DIR}/atn/LexerIndexedCustomAction.cpp" - "${LIBRARY_DIR}/atn/LexerModeAction.cpp" - "${LIBRARY_DIR}/atn/LexerMoreAction.cpp" - "${LIBRARY_DIR}/atn/LexerPopModeAction.cpp" - "${LIBRARY_DIR}/atn/LexerPushModeAction.cpp" - "${LIBRARY_DIR}/atn/LexerSkipAction.cpp" - "${LIBRARY_DIR}/atn/LexerTypeAction.cpp" - "${LIBRARY_DIR}/atn/LL1Analyzer.cpp" - "${LIBRARY_DIR}/atn/LookaheadEventInfo.cpp" - "${LIBRARY_DIR}/atn/LoopEndState.cpp" - "${LIBRARY_DIR}/atn/NotSetTransition.cpp" - "${LIBRARY_DIR}/atn/OrderedATNConfigSet.cpp" - "${LIBRARY_DIR}/atn/ParseInfo.cpp" - "${LIBRARY_DIR}/atn/ParserATNSimulator.cpp" - "${LIBRARY_DIR}/atn/PlusBlockStartState.cpp" - "${LIBRARY_DIR}/atn/PlusLoopbackState.cpp" - "${LIBRARY_DIR}/atn/PrecedencePredicateTransition.cpp" - "${LIBRARY_DIR}/atn/PredicateEvalInfo.cpp" - "${LIBRARY_DIR}/atn/PredicateTransition.cpp" - "${LIBRARY_DIR}/atn/PredictionContext.cpp" - "${LIBRARY_DIR}/atn/PredictionMode.cpp" - "${LIBRARY_DIR}/atn/ProfilingATNSimulator.cpp" - "${LIBRARY_DIR}/atn/RangeTransition.cpp" - "${LIBRARY_DIR}/atn/RuleStartState.cpp" - "${LIBRARY_DIR}/atn/RuleStopState.cpp" - "${LIBRARY_DIR}/atn/RuleTransition.cpp" - "${LIBRARY_DIR}/atn/SemanticContext.cpp" - "${LIBRARY_DIR}/atn/SetTransition.cpp" - "${LIBRARY_DIR}/atn/SingletonPredictionContext.cpp" - "${LIBRARY_DIR}/atn/StarBlockStartState.cpp" - "${LIBRARY_DIR}/atn/StarLoopbackState.cpp" - "${LIBRARY_DIR}/atn/StarLoopEntryState.cpp" - "${LIBRARY_DIR}/atn/TokensStartState.cpp" - "${LIBRARY_DIR}/atn/Transition.cpp" - "${LIBRARY_DIR}/atn/WildcardTransition.cpp" - "${LIBRARY_DIR}/BailErrorStrategy.cpp" - "${LIBRARY_DIR}/BaseErrorListener.cpp" - "${LIBRARY_DIR}/BufferedTokenStream.cpp" - "${LIBRARY_DIR}/CharStream.cpp" - "${LIBRARY_DIR}/CommonToken.cpp" - "${LIBRARY_DIR}/CommonTokenFactory.cpp" - "${LIBRARY_DIR}/CommonTokenStream.cpp" - "${LIBRARY_DIR}/ConsoleErrorListener.cpp" - "${LIBRARY_DIR}/DefaultErrorStrategy.cpp" - "${LIBRARY_DIR}/dfa/DFA.cpp" - "${LIBRARY_DIR}/dfa/DFASerializer.cpp" - "${LIBRARY_DIR}/dfa/DFAState.cpp" - "${LIBRARY_DIR}/dfa/LexerDFASerializer.cpp" - "${LIBRARY_DIR}/DiagnosticErrorListener.cpp" - "${LIBRARY_DIR}/Exceptions.cpp" - "${LIBRARY_DIR}/FailedPredicateException.cpp" - "${LIBRARY_DIR}/InputMismatchException.cpp" - "${LIBRARY_DIR}/InterpreterRuleContext.cpp" - "${LIBRARY_DIR}/IntStream.cpp" - "${LIBRARY_DIR}/Lexer.cpp" - "${LIBRARY_DIR}/LexerInterpreter.cpp" - "${LIBRARY_DIR}/LexerNoViableAltException.cpp" - "${LIBRARY_DIR}/ListTokenSource.cpp" - "${LIBRARY_DIR}/misc/InterpreterDataReader.cpp" - "${LIBRARY_DIR}/misc/Interval.cpp" - "${LIBRARY_DIR}/misc/IntervalSet.cpp" - "${LIBRARY_DIR}/misc/MurmurHash.cpp" - "${LIBRARY_DIR}/misc/Predicate.cpp" - "${LIBRARY_DIR}/NoViableAltException.cpp" - "${LIBRARY_DIR}/Parser.cpp" - "${LIBRARY_DIR}/ParserInterpreter.cpp" - "${LIBRARY_DIR}/ParserRuleContext.cpp" - "${LIBRARY_DIR}/ProxyErrorListener.cpp" - "${LIBRARY_DIR}/RecognitionException.cpp" - "${LIBRARY_DIR}/Recognizer.cpp" - "${LIBRARY_DIR}/RuleContext.cpp" - "${LIBRARY_DIR}/RuleContextWithAltNum.cpp" - "${LIBRARY_DIR}/RuntimeMetaData.cpp" - "${LIBRARY_DIR}/support/Any.cpp" - "${LIBRARY_DIR}/support/Arrays.cpp" - "${LIBRARY_DIR}/support/CPPUtils.cpp" - "${LIBRARY_DIR}/support/guid.cpp" - "${LIBRARY_DIR}/support/StringUtils.cpp" - "${LIBRARY_DIR}/Token.cpp" - "${LIBRARY_DIR}/TokenSource.cpp" - "${LIBRARY_DIR}/TokenStream.cpp" - "${LIBRARY_DIR}/TokenStreamRewriter.cpp" - "${LIBRARY_DIR}/tree/ErrorNode.cpp" - "${LIBRARY_DIR}/tree/ErrorNodeImpl.cpp" - "${LIBRARY_DIR}/tree/IterativeParseTreeWalker.cpp" - "${LIBRARY_DIR}/tree/ParseTree.cpp" - "${LIBRARY_DIR}/tree/ParseTreeListener.cpp" - "${LIBRARY_DIR}/tree/ParseTreeVisitor.cpp" - "${LIBRARY_DIR}/tree/ParseTreeWalker.cpp" - "${LIBRARY_DIR}/tree/pattern/Chunk.cpp" - "${LIBRARY_DIR}/tree/pattern/ParseTreeMatch.cpp" - "${LIBRARY_DIR}/tree/pattern/ParseTreePattern.cpp" - "${LIBRARY_DIR}/tree/pattern/ParseTreePatternMatcher.cpp" - "${LIBRARY_DIR}/tree/pattern/RuleTagToken.cpp" - "${LIBRARY_DIR}/tree/pattern/TagChunk.cpp" - "${LIBRARY_DIR}/tree/pattern/TextChunk.cpp" - "${LIBRARY_DIR}/tree/pattern/TokenTagToken.cpp" - "${LIBRARY_DIR}/tree/TerminalNode.cpp" - "${LIBRARY_DIR}/tree/TerminalNodeImpl.cpp" - "${LIBRARY_DIR}/tree/Trees.cpp" - "${LIBRARY_DIR}/tree/xpath/XPath.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathElement.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathLexer.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathLexerErrorListener.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathRuleAnywhereElement.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathRuleElement.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathTokenAnywhereElement.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathTokenElement.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathWildcardAnywhereElement.cpp" - "${LIBRARY_DIR}/tree/xpath/XPathWildcardElement.cpp" - "${LIBRARY_DIR}/UnbufferedCharStream.cpp" - "${LIBRARY_DIR}/UnbufferedTokenStream.cpp" - "${LIBRARY_DIR}/Vocabulary.cpp" - "${LIBRARY_DIR}/WritableToken.cpp" -) - -add_library (antlr4-runtime ${SRCS}) - -target_include_directories (antlr4-runtime SYSTEM PUBLIC ${LIBRARY_DIR}) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 96e2af5fb03..d6697fd5d78 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -26,7 +26,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY) set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ}) set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ}) endif() - + set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") diff --git a/contrib/libpq b/contrib/libpq index c7624588ddd..e071ea570f8 160000 --- a/contrib/libpq +++ b/contrib/libpq @@ -1 +1 @@ -Subproject commit c7624588ddd84f153dd5990e81b886e4568bddde +Subproject commit e071ea570f8985aa00e34f5b9d50a3cfe666327e diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 028fabe52b8..4f6a1554d10 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/fe-lobj.c" "${LIBPQ_SOURCE_DIR}/fe-misc.c" "${LIBPQ_SOURCE_DIR}/fe-print.c" - "${LIBPQ_SOURCE_DIR}/fe-protocol2.c" + "${LIBPQ_SOURCE_DIR}/fe-trace.c" "${LIBPQ_SOURCE_DIR}/fe-protocol3.c" "${LIBPQ_SOURCE_DIR}/fe-secure.c" "${LIBPQ_SOURCE_DIR}/fe-secure-common.c" @@ -18,8 +18,12 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/pqexpbuffer.c" "${LIBPQ_SOURCE_DIR}/common/scram-common.c" - "${LIBPQ_SOURCE_DIR}/common/sha2_openssl.c" + "${LIBPQ_SOURCE_DIR}/common/sha2.c" + "${LIBPQ_SOURCE_DIR}/common/sha1.c" "${LIBPQ_SOURCE_DIR}/common/md5.c" + "${LIBPQ_SOURCE_DIR}/common/md5_common.c" + "${LIBPQ_SOURCE_DIR}/common/hmac_openssl.c" + "${LIBPQ_SOURCE_DIR}/common/cryptohash.c" "${LIBPQ_SOURCE_DIR}/common/saslprep.c" "${LIBPQ_SOURCE_DIR}/common/unicode_norm.c" "${LIBPQ_SOURCE_DIR}/common/ip.c" diff --git a/contrib/libunwind b/contrib/libunwind index a491c27b331..6b816d2fba3 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit a491c27b33109a842d577c0f7ac5f5f218859181 +Subproject commit 6b816d2fba3991f8fd6aaec17d92f68947eab667 diff --git a/contrib/murmurhash/CMakeLists.txt b/contrib/murmurhash/CMakeLists.txt index c5e467a2d6d..2d9cb3e6382 100644 --- a/contrib/murmurhash/CMakeLists.txt +++ b/contrib/murmurhash/CMakeLists.txt @@ -1,7 +1,7 @@ add_library(murmurhash - src/murmurhash2.cpp - src/murmurhash3.cpp - include/murmurhash2.h - include/murmurhash3.h) + src/MurmurHash2.cpp + src/MurmurHash3.cpp + include/MurmurHash2.h + include/MurmurHash3.h) target_include_directories (murmurhash PUBLIC include) diff --git a/contrib/murmurhash/include/MurmurHash2.h b/contrib/murmurhash/include/MurmurHash2.h new file mode 100644 index 00000000000..22e47f5c8e9 --- /dev/null +++ b/contrib/murmurhash/include/MurmurHash2.h @@ -0,0 +1,49 @@ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef MURMURHASH2_H +#define MURMURHASH2_H + +#include + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +#ifdef __cplusplus +extern "C" { +#endif + +uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed ); +uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed ); +uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed ); +uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed ); +uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed ); +uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed ); + +#ifdef __cplusplus +} +#endif + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH2_H_ + diff --git a/contrib/murmurhash/include/murmurhash3.h b/contrib/murmurhash/include/MurmurHash3.h similarity index 70% rename from contrib/murmurhash/include/murmurhash3.h rename to contrib/murmurhash/include/MurmurHash3.h index eb16425576a..e9db8f1e878 100644 --- a/contrib/murmurhash/include/murmurhash3.h +++ b/contrib/murmurhash/include/MurmurHash3.h @@ -2,7 +2,10 @@ // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. -#pragma once +#ifndef MURMURHASH3_H +#define MURMURHASH3_H + +#include //----------------------------------------------------------------------------- // Platform-specific functions and macros @@ -23,20 +26,22 @@ typedef unsigned __int64 uint64_t; #endif // !defined(_MSC_VER) +//----------------------------------------------------------------------------- + #ifdef __cplusplus extern "C" { #endif -//----------------------------------------------------------------------------- +void MurmurHash3_x86_32 ( const void * key, size_t len, uint32_t seed, void * out ); -void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); +void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out ); -void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); - -void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); - -//----------------------------------------------------------------------------- +void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out ); #ifdef __cplusplus } #endif + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/contrib/murmurhash/include/murmurhash2.h b/contrib/murmurhash/include/murmurhash2.h deleted file mode 100644 index 0fc95ef1c42..00000000000 --- a/contrib/murmurhash/include/murmurhash2.h +++ /dev/null @@ -1,31 +0,0 @@ -//----------------------------------------------------------------------------- -// MurmurHash2 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -#pragma once - -//----------------------------------------------------------------------------- -// Platform-specific functions and macros - -// Microsoft Visual Studio - -#if defined(_MSC_VER) && (_MSC_VER < 1600) - -typedef unsigned char uint8_t; -typedef unsigned int uint32_t; -typedef unsigned __int64 uint64_t; - -// Other compilers - -#else // defined(_MSC_VER) - -#include - -#endif // !defined(_MSC_VER) - -uint32_t MurmurHash2 (const void * key, int len, uint32_t seed); -uint64_t MurmurHash64A (const void * key, int len, uint64_t seed); -uint64_t MurmurHash64B (const void * key, int len, uint64_t seed); -uint32_t MurmurHash2A (const void * key, int len, uint32_t seed); -uint32_t MurmurHashNeutral2 (const void * key, int len, uint32_t seed); -uint32_t MurmurHashAligned2 (const void * key, int len, uint32_t seed); diff --git a/contrib/murmurhash/src/MurmurHash2.cpp b/contrib/murmurhash/src/MurmurHash2.cpp new file mode 100644 index 00000000000..1c4469b0a02 --- /dev/null +++ b/contrib/murmurhash/src/MurmurHash2.cpp @@ -0,0 +1,523 @@ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +#include "MurmurHash2.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const uint32_t m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash2, 64-bit versions, by Austin Appleby + +// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment +// and endian-ness issues if used across multiple platforms. + +// 64-bit hash for 64-bit platforms + +uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed ) +{ + const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len/8); + + while(data != end) + { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char * data2 = (const unsigned char*)data; + + switch(len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + + +// 64-bit hash for 32-bit platforms + +uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + uint32_t h1 = uint32_t(seed) ^ len; + uint32_t h2 = uint32_t(seed >> 32); + + const uint32_t * data = (const uint32_t *)key; + + while(len >= 8) + { + uint32_t k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + + uint32_t k2 = *data++; + k2 *= m; k2 ^= k2 >> r; k2 *= m; + h2 *= m; h2 ^= k2; + len -= 4; + } + + if(len >= 4) + { + uint32_t k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + } + + switch(len) + { + case 3: h2 ^= ((unsigned char*)data)[2] << 16; + case 2: h2 ^= ((unsigned char*)data)[1] << 8; + case 1: h2 ^= ((unsigned char*)data)[0]; + h2 *= m; + }; + + h1 ^= h2 >> 18; h1 *= m; + h2 ^= h1 >> 22; h2 *= m; + h1 ^= h2 >> 17; h1 *= m; + h2 ^= h1 >> 19; h2 *= m; + + uint64_t h = h1; + + h = (h << 32) | h2; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash2A, by Austin Appleby + +// This is a variant of MurmurHash2 modified to use the Merkle-Damgard +// construction. Bulk speed should be identical to Murmur2, small-key speed +// will be 10%-20% slower due to the added overhead at the end of the hash. + +// This variant fixes a minor issue where null keys were more likely to +// collide with each other than expected, and also makes the function +// more amenable to incremental implementations. + +#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + uint32_t l = len; + + const unsigned char * data = (const unsigned char *)key; + + uint32_t h = seed; + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + mmix(h,k); + + data += 4; + len -= 4; + } + + uint32_t t = 0; + + switch(len) + { + case 3: t ^= data[2] << 16; + case 2: t ^= data[1] << 8; + case 1: t ^= data[0]; + }; + + mmix(h,t); + mmix(h,l); + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// CMurmurHash2A, by Austin Appleby + +// This is a sample implementation of MurmurHash2A designed to work +// incrementally. + +// Usage - + +// CMurmurHash2A hasher +// hasher.Begin(seed); +// hasher.Add(data1,size1); +// hasher.Add(data2,size2); +// ... +// hasher.Add(dataN,sizeN); +// uint32_t hash = hasher.End() + +class CMurmurHash2A +{ +public: + + void Begin ( uint32_t seed = 0 ) + { + m_hash = seed; + m_tail = 0; + m_count = 0; + m_size = 0; + } + + void Add ( const unsigned char * data, size_t len ) + { + m_size += len; + + MixTail(data,len); + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + mmix(m_hash,k); + + data += 4; + len -= 4; + } + + MixTail(data,len); + } + + uint32_t End ( void ) + { + mmix(m_hash,m_tail); + mmix(m_hash,m_size); + + m_hash ^= m_hash >> 13; + m_hash *= m; + m_hash ^= m_hash >> 15; + + return m_hash; + } + +private: + + static const uint32_t m = 0x5bd1e995; + static const int r = 24; + + void MixTail ( const unsigned char * & data, size_t & len ) + { + while( len && ((len<4) || m_count) ) + { + m_tail |= (*data++) << (m_count * 8); + + m_count++; + len--; + + if(m_count == 4) + { + mmix(m_hash,m_tail); + m_tail = 0; + m_count = 0; + } + } + } + + uint32_t m_hash; + uint32_t m_tail; + uint32_t m_count; + uint32_t m_size; +}; + +//----------------------------------------------------------------------------- +// MurmurHashNeutral2, by Austin Appleby + +// Same as MurmurHash2, but endian- and alignment-neutral. +// Half the speed though, alas. + +uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + uint32_t h = seed ^ len; + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + uint32_t k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby + +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. + +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + + +uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + const unsigned char * data = (const unsigned char *)key; + + uint32_t h = seed ^ len; + + size_t align = (uint64_t)data & 3; + + if(align && (len >= 4)) + { + // Pre-load the temp registers + + uint32_t t = 0, d = 0; + + switch(align) + { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int sl = 8 * (4-align); + int sr = 8 * align; + + // Mix + + while(len >= 4) + { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) + { + switch(align) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + } + else + { + switch(len) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); + h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } + else + { + while(len >= 4) + { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} + +//----------------------------------------------------------------------------- + diff --git a/contrib/murmurhash/src/murmurhash3.cpp b/contrib/murmurhash/src/MurmurHash3.cpp similarity index 95% rename from contrib/murmurhash/src/murmurhash3.cpp rename to contrib/murmurhash/src/MurmurHash3.cpp index d6062340d03..cf5158e97ad 100644 --- a/contrib/murmurhash/src/murmurhash3.cpp +++ b/contrib/murmurhash/src/MurmurHash3.cpp @@ -1,3 +1,4 @@ +//----------------------------------------------------------------------------- // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. @@ -6,8 +7,8 @@ // compile and run any of them on any platform, but your performance with the // non-native version will be less than optimal. -#include "murmurhash3.h" -#include +#include "MurmurHash3.h" +#include //----------------------------------------------------------------------------- // Platform-specific functions and macros @@ -93,7 +94,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k ) //----------------------------------------------------------------------------- -void MurmurHash3_x86_32 ( const void * key, int len, +void MurmurHash3_x86_32 ( const void * key, size_t len, uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; @@ -149,7 +150,7 @@ void MurmurHash3_x86_32 ( const void * key, int len, //----------------------------------------------------------------------------- -void MurmurHash3_x86_128 ( const void * key, const int len, +void MurmurHash3_x86_128 ( const void * key, const size_t len, uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; @@ -254,7 +255,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len, //----------------------------------------------------------------------------- -void MurmurHash3_x64_128 ( const void * key, const int len, +void MurmurHash3_x64_128 ( const void * key, const size_t len, const uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; @@ -332,3 +333,6 @@ void MurmurHash3_x64_128 ( const void * key, const int len, ((uint64_t*)out)[0] = h1; ((uint64_t*)out)[1] = h2; } + +//----------------------------------------------------------------------------- + diff --git a/contrib/murmurhash/src/murmurhash2.cpp b/contrib/murmurhash/src/murmurhash2.cpp deleted file mode 100644 index 7b659f50b4c..00000000000 --- a/contrib/murmurhash/src/murmurhash2.cpp +++ /dev/null @@ -1,423 +0,0 @@ -// MurmurHash2 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -// Note - This code makes a few assumptions about how your machine behaves - - -// 1. We can read a 4-byte value from any address without crashing -// 2. sizeof(int) == 4 - -// And it has a few limitations - - -// 1. It will not work incrementally. -// 2. It will not produce the same results on little-endian and big-endian -// machines. - -#include "murmurhash2.h" -#include - -// Platform-specific functions and macros -// Microsoft Visual Studio - -#if defined(_MSC_VER) - -#define BIG_CONSTANT(x) (x) - -// Other compilers - -#else // defined(_MSC_VER) - -#define BIG_CONSTANT(x) (x##LLU) - -#endif // !defined(_MSC_VER) - - -uint32_t MurmurHash2(const void * key, int len, uint32_t seed) -{ - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - - const uint32_t m = 0x5bd1e995; - const int r = 24; - - // Initialize the hash to a 'random' value - - uint32_t h = seed ^ len; - - // Mix 4 bytes at a time into the hash - - const unsigned char * data = reinterpret_cast(key); - - while (len >= 4) - { - uint32_t k; - memcpy(&k, data, sizeof(k)); - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - // Handle the last few bytes of the input array - - switch (len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - - // Do a few final mixes of the hash to ensure the last few - // bytes are well-incorporated. - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -// MurmurHash2, 64-bit versions, by Austin Appleby - -// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment -// and endian-ness issues if used across multiple platforms. - -// 64-bit hash for 64-bit platforms - -uint64_t MurmurHash64A(const void * key, int len, uint64_t seed) -{ - const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); - const int r = 47; - - uint64_t h = seed ^ (len * m); - - const uint64_t * data = reinterpret_cast(key); - const uint64_t * end = data + (len/8); - - while (data != end) - { - uint64_t k = *data++; - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - const unsigned char * data2 = reinterpret_cast(data); - - switch (len & 7) - { - case 7: h ^= static_cast(data2[6]) << 48; - case 6: h ^= static_cast(data2[5]) << 40; - case 5: h ^= static_cast(data2[4]) << 32; - case 4: h ^= static_cast(data2[3]) << 24; - case 3: h ^= static_cast(data2[2]) << 16; - case 2: h ^= static_cast(data2[1]) << 8; - case 1: h ^= static_cast(data2[0]); - h *= m; - }; - - h ^= h >> r; - h *= m; - h ^= h >> r; - - return h; -} - - -// 64-bit hash for 32-bit platforms - -uint64_t MurmurHash64B(const void * key, int len, uint64_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int r = 24; - - uint32_t h1 = static_cast(seed) ^ len; - uint32_t h2 = static_cast(seed >> 32); - - const uint32_t * data = reinterpret_cast(key); - - while (len >= 8) - { - uint32_t k1 = *data++; - k1 *= m; k1 ^= k1 >> r; k1 *= m; - h1 *= m; h1 ^= k1; - len -= 4; - - uint32_t k2 = *data++; - k2 *= m; k2 ^= k2 >> r; k2 *= m; - h2 *= m; h2 ^= k2; - len -= 4; - } - - if (len >= 4) - { - uint32_t k1 = *data++; - k1 *= m; k1 ^= k1 >> r; k1 *= m; - h1 *= m; h1 ^= k1; - len -= 4; - } - - switch (len) - { - case 3: h2 ^= reinterpret_cast(data)[2] << 16; - case 2: h2 ^= reinterpret_cast(data)[1] << 8; - case 1: h2 ^= reinterpret_cast(data)[0]; - h2 *= m; - }; - - h1 ^= h2 >> 18; h1 *= m; - h2 ^= h1 >> 22; h2 *= m; - h1 ^= h2 >> 17; h1 *= m; - h2 ^= h1 >> 19; h2 *= m; - - uint64_t h = h1; - - h = (h << 32) | h2; - - return h; -} - -// MurmurHash2A, by Austin Appleby - -// This is a variant of MurmurHash2 modified to use the Merkle-Damgard -// construction. Bulk speed should be identical to Murmur2, small-key speed -// will be 10%-20% slower due to the added overhead at the end of the hash. - -// This variant fixes a minor issue where null keys were more likely to -// collide with each other than expected, and also makes the function -// more amenable to incremental implementations. - -#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -uint32_t MurmurHash2A(const void * key, int len, uint32_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int r = 24; - uint32_t l = len; - - const unsigned char * data = reinterpret_cast(key); - - uint32_t h = seed; - - while (len >= 4) - { - uint32_t k = *reinterpret_cast(data); - mmix(h,k); - data += 4; - len -= 4; - } - - uint32_t t = 0; - - switch (len) - { - case 3: t ^= data[2] << 16; - case 2: t ^= data[1] << 8; - case 1: t ^= data[0]; - }; - - mmix(h,t); - mmix(h,l); - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -// MurmurHashNeutral2, by Austin Appleby - -// Same as MurmurHash2, but endian- and alignment-neutral. -// Half the speed though, alas. - -uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int r = 24; - - uint32_t h = seed ^ len; - - const unsigned char * data = reinterpret_cast(key); - - while (len >= 4) - { - uint32_t k; - - k = data[0]; - k |= data[1] << 8; - k |= data[2] << 16; - k |= data[3] << 24; - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - switch (len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -//----------------------------------------------------------------------------- -// MurmurHashAligned2, by Austin Appleby - -// Same algorithm as MurmurHash2, but only does aligned reads - should be safer -// on certain platforms. - -// Performance will be lower than MurmurHash2 - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - - -uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed) -{ - const uint32_t m = 0x5bd1e995; - const int r = 24; - - const unsigned char * data = reinterpret_cast(key); - - uint32_t h = seed ^ len; - - int align = reinterpret_cast(data) & 3; - - if (align && (len >= 4)) - { - // Pre-load the temp registers - - uint32_t t = 0, d = 0; - - switch (align) - { - case 1: t |= data[2] << 16; - case 2: t |= data[1] << 8; - case 3: t |= data[0]; - } - - t <<= (8 * align); - - data += 4-align; - len -= 4-align; - - int sl = 8 * (4-align); - int sr = 8 * align; - - // Mix - - while (len >= 4) - { - d = *(reinterpret_cast(data)); - t = (t >> sr) | (d << sl); - - uint32_t k = t; - - MIX(h,k,m); - - t = d; - - data += 4; - len -= 4; - } - - // Handle leftover data in temp registers - - d = 0; - - if (len >= align) - { - switch (align) - { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - } - - uint32_t k = (t >> sr) | (d << sl); - MIX(h,k,m); - - data += align; - len -= align; - - //---------- - // Handle tail bytes - - switch (len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - } - else - { - switch (len) - { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - case 0: h ^= (t >> sr) | (d << sl); - h *= m; - } - } - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } - else - { - while (len >= 4) - { - uint32_t k = *reinterpret_cast(data); - - MIX(h,k,m); - - data += 4; - len -= 4; - } - - // Handle tail bytes - - switch (len) - { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; - h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } -} diff --git a/debian/changelog b/debian/changelog index e1c46dae3a8..36c29fce1d0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.7.1.1) unstable; urgency=low +clickhouse (21.8.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Thu, 20 May 2021 22:23:29 +0300 + -- clickhouse-release Mon, 28 Jun 2021 00:50:15 +0300 diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index d7d87c6d53c..4e70d382b36 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -43,29 +43,6 @@ command -v flock >/dev/null && FLOCK=flock # Override defaults from optional config file test -f /etc/default/clickhouse && . /etc/default/clickhouse -# On x86_64, check for required instruction set. -if uname -mpi | grep -q 'x86_64'; then - if ! grep -q 'sse4_2' /proc/cpuinfo; then - # On KVM, cpuinfo could falsely not report SSE 4.2 support, so skip the check. - if ! grep -q 'Common KVM processor' /proc/cpuinfo; then - - # Some other VMs also report wrong flags in cpuinfo. - # Tricky way to test for instruction set: - # create temporary binary and run it; - # if it get caught illegal instruction signal, - # then required instruction set is not supported really. - # - # Generated this way: - # gcc -xc -Os -static -nostdlib - <<< 'void _start() { __asm__("pcmpgtq %%xmm0, %%xmm1; mov $0x3c, %%rax; xor %%rdi, %%rdi; syscall":::"memory"); }' && strip -R .note.gnu.build-id -R .comment -R .eh_frame -s ./a.out && gzip -c -9 ./a.out | base64 -w0; echo - - if ! (echo -n 'H4sICAwAW1cCA2Eub3V0AKt39XFjYmRkgAEmBjsGEI+H0QHMd4CKGyCUAMUsGJiBJDNQNUiYlQEZOKDQclB9cnD9CmCSBYqJBRxQOvBpSQobGfqIAWn8FuYnPI4fsAGyPQz/87MeZtArziguKSpJTGLQK0mtKGGgGHADMSgoYH6AhTMPNHyE0NQzYuEzYzEXFr6CBPQDANAsXKTwAQAA' | base64 -d | gzip -d > /tmp/clickhouse_test_sse42 && chmod a+x /tmp/clickhouse_test_sse42 && /tmp/clickhouse_test_sse42); then - echo 'Warning! SSE 4.2 instruction set is not supported' - #exit 3 - fi - fi - fi -fi - die() { @@ -116,7 +93,7 @@ forcestop() service_or_func() { if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then - service $PROGRAM $1 + systemctl $1 $PROGRAM else $1 fi diff --git a/docker/bare/prepare b/docker/bare/prepare index 10d791cac73..912b16634c7 100755 --- a/docker/bare/prepare +++ b/docker/bare/prepare @@ -12,7 +12,6 @@ mkdir root pushd root mkdir lib lib64 etc tmp root cp ${BUILD_DIR}/programs/clickhouse . -cp ${SRC_DIR}/programs/server/{config,users}.xml . cp /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} lib cp /lib64/ld-linux-x86-64.so.2 lib64 cp /etc/resolv.conf ./etc diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 79ac92f2277..19cadccb926 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.7.1.* +ARG version=21.8.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 56b2af5cf84..29225bbfeb8 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -72,7 +72,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ && cd .. \ && rm -rf apple-libtapi -# Build and install tools for cross-linking to Darwin +# Build and install tools for cross-linking to Darwin (x86-64) RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && cd cctools-port/cctools \ && ./configure --prefix=/cctools --with-libtapi=/cctools \ @@ -81,8 +81,17 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && cd ../.. \ && rm -rf cctools-port -# Download toolchain for Darwin -RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz +# Build and install tools for cross-linking to Darwin (aarch64) +RUN git clone https://github.com/tpoechtrager/cctools-port.git \ + && cd cctools-port/cctools \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=aarch64-apple-darwin \ + && make install \ + && cd ../.. \ + && rm -rf cctools-port + +# Download toolchain and SDK for Darwin +RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz # Download toolchain for ARM # It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling. diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index cf74105fbbb..d6614bbb9e2 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -3,7 +3,9 @@ set -x -e mkdir -p build/cmake/toolchain/darwin-x86_64 -tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 +tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 + +ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 mkdir -p build/cmake/toolchain/linux-aarch64 tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1 diff --git a/docker/packager/packager b/docker/packager/packager index 81474166cc9..c05c85d3e28 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -58,6 +58,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries): CLANG_PREFIX = "clang" DARWIN_SUFFIX = "-darwin" + DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" @@ -66,9 +67,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ is_clang = compiler.startswith(CLANG_PREFIX) is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) + is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) - is_cross_compile = is_cross_darwin or is_cross_arm or is_cross_freebsd + is_cross_compile = is_cross_darwin or is_cross_darwin_arm or is_cross_arm or is_cross_freebsd # Explicitly use LLD with Clang by default. # Don't force linker for cross-compilation. @@ -82,6 +84,13 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib") cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld") cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake") + elif is_cross_darwin_arm: + cc = compiler[:-len(DARWIN_ARM_SUFFIX)] + cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") + cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool") + cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib") + cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld") + cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake") elif is_cross_arm: cc = compiler[:-len(ARM_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake") @@ -185,8 +194,8 @@ if __name__ == "__main__": parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)) parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd", - "gcc-10"), default="clang-11") + parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", + "clang-11-freebsd", "gcc-10"), default="clang-11") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--unbundled", action="store_true") parser.add_argument("--split-binary", action="store_true") diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 52dcb6caae5..65d90bf52ce 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.7.1.* +ARG version=21.8.1.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 9809a36395d..687393025f0 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.7.1.* +ARG version=21.8.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 44b9d42d6a1..a722132c3a5 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -46,6 +46,7 @@ RUN apt-get update \ pigz \ pkg-config \ tzdata \ + pv \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index cc6aeff357f..bba20f64e5a 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -113,6 +113,7 @@ function start_server echo "ClickHouse server pid '$server_pid' started and responded" echo " +set follow-fork-mode child handle all noprint handle SIGSEGV stop print handle SIGBUS stop print @@ -159,7 +160,6 @@ function clone_submodules SUBMODULES_TO_UPDATE=( contrib/abseil-cpp - contrib/antlr4-runtime contrib/boost contrib/zlib-ng contrib/libxml2 @@ -373,14 +373,11 @@ function run_tests # Depends on AWS 01801_s3_cluster - # Depends on LLVM JIT - 01072_nullable_jit - 01852_jit_if - 01865_jit_comparison_constant_result - 01871_merge_tree_compile_expressions - # needs psql 01889_postgresql_protocol_null_fields + + # needs pv + 01923_network_receive_time_metric_insert ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 45709e5c501..3ca67a58278 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -103,6 +103,7 @@ function fuzz kill -0 $server_pid echo " +set follow-fork-mode child handle all noprint handle SIGSEGV stop print handle SIGBUS stop print diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 1c962f1bf8f..e15697da029 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -1,6 +1,8 @@ # docker build -t yandex/clickhouse-integration-test . FROM yandex/clickhouse-test-base +SHELL ["/bin/bash", "-c"] + RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install \ tzdata \ @@ -20,7 +22,9 @@ RUN apt-get update \ krb5-user \ iproute2 \ lsof \ - g++ + g++ \ + default-jre + RUN rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ @@ -30,6 +34,19 @@ RUN apt-get clean # Install MySQL ODBC driver RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so +# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. +# ZooKeeper is not started by default, but consumes some space in containers. +# 777 perms used to allow anybody to start/stop ZooKeeper +ENV ZOOKEEPER_VERSION='3.6.3' +RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" +RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz +RUN echo $'tickTime=2500 \n\ +tickTime=2500 \n\ +dataDir=/zookeeper \n\ +clientPort=2181 \n\ +maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg +RUN mkdir /zookeeper && chmod -R 777 /zookeeper + ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index c4a506ad356..4b83ed21410 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -11,6 +11,7 @@ services: interval: 10s timeout: 5s retries: 5 + command: [ "postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2"] networks: default: aliases: @@ -22,4 +23,4 @@ services: volumes: - type: ${POSTGRES_LOGS_FS:-tmpfs} source: ${POSTGRES_DIR:-} - target: /postgres/ \ No newline at end of file + target: /postgres/ diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 2621a894dd7..9a8ffff7cd9 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -319,14 +319,14 @@ function get_profiles wait - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: & + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > left-query-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: & + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > right-query-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & @@ -409,10 +409,10 @@ create view right_query_log as select * '$(cat "right-query-log.tsv.columns")'); create view query_logs as - select 0 version, query_id, ProfileEvents.Names, ProfileEvents.Values, + select 0 version, query_id, ProfileEvents, query_duration_ms, memory_usage from left_query_log union all - select 1 version, query_id, ProfileEvents.Names, ProfileEvents.Values, + select 1 version, query_id, ProfileEvents, query_duration_ms, memory_usage from right_query_log ; @@ -424,7 +424,7 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric- with ( -- sumMapState with the list of all keys with '-0.' values. Negative zero is because -- sumMap removes keys with positive zeros. - with (select groupUniqArrayArray(ProfileEvents.Names) from query_logs) as all_names + with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))]) ) as all_metrics select test, query_index, version, query_id, @@ -433,8 +433,8 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric- [ all_metrics, arrayReduce('sumMapState', - [(ProfileEvents.Names, - arrayMap(x->toFloat64(x), ProfileEvents.Values))] + [(mapKeys(ProfileEvents), + arrayMap(x->toFloat64(x), mapValues(ProfileEvents)))] ), arrayReduce('sumMapState', [( ['client_time', 'server_time', 'memory_usage'], @@ -1003,10 +1003,11 @@ create view query_log as select * create table unstable_run_metrics engine File(TSVWithNamesAndTypes, 'unstable-run-metrics.$version.rep') as - select - test, query_index, query_id, - ProfileEvents.Values value, ProfileEvents.Names metric - from query_log array join ProfileEvents + select test, query_index, query_id, value, metric + from query_log + array join + mapValues(ProfileEvents) as value, + mapKeys(ProfileEvents) as metric join unstable_query_runs using (query_id) ; @@ -1177,11 +1178,11 @@ create view right_async_metric_log as -- Use the right log as time reference because it may have higher precision. create table metrics engine File(TSV, 'metrics/metrics.tsv') as with (select min(event_time) from right_async_metric_log) as min_time - select name metric, r.event_time - min_time event_time, l.value as left, r.value as right + select metric, r.event_time - min_time event_time, l.value as left, r.value as right from right_async_metric_log r asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes, '$(cat left-async-metric-log.tsv.columns)') l - on l.name = r.name and r.event_time <= l.event_time + on l.metric = r.metric and r.event_time <= l.event_time order by metric, event_time ; @@ -1280,7 +1281,7 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') then echo Database for test results is not specified, will not upload them. return 0 - fi + fi set +x # Don't show password in the log client=(clickhouse-client diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 2c06be9bb91..1f5218c2d10 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -23,6 +23,7 @@ 0 + 0 diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 13b18cda326..35e1008e0d7 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -561,7 +561,7 @@ if args.report == 'main': # Don't show mildly unstable queries, only the very unstable ones we # treat as errors. if very_unstable_queries: - if very_unstable_queries > 3: + if very_unstable_queries > 5: error_tests += very_unstable_queries status = 'failure' message_array.append(str(very_unstable_queries) + ' unstable') diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 58b1d18a681..a7fb956bf94 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -35,7 +35,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then # simpliest way to forward env variables to server sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon else - service clickhouse-server start + sudo clickhouse start fi if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 8016b2c59f3..428fdb9fdb7 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -1,4 +1,6 @@ #!/bin/bash +# shellcheck disable=SC2094 +# shellcheck disable=SC2086 set -x @@ -37,6 +39,17 @@ function stop() function start() { + # Rename existing log file - it will be more convenient to read separate files for separate server runs. + if [ -f '/var/log/clickhouse-server/clickhouse-server.log' ] + then + log_file_counter=1 + while [ -f "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" ] + do + log_file_counter=$((log_file_counter + 1)) + done + mv '/var/log/clickhouse-server/clickhouse-server.log' "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" + fi + counter=0 until clickhouse-client --query "SELECT 1" do @@ -55,6 +68,7 @@ function start() done echo " +set follow-fork-mode child handle all noprint handle SIGSEGV stop print handle SIGBUS stop print @@ -140,7 +154,11 @@ zgrep -Fa "########################################" /test_output/* > /dev/null && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv # Put logs into /test_output/ -pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz +for log_file in /var/log/clickhouse-server/clickhouse-server.log* +do + pigz < "${log_file}" > /test_output/"$(basename ${log_file})".gz +done + tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: mv /var/log/clickhouse-server/stderr.log /test_output/ tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||: diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md index 7dfda35e34a..db9138fbbb7 100644 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ b/docs/_includes/cmake_in_clickhouse_header.md @@ -2,18 +2,16 @@ ## TL; DR How to make ClickHouse compile and link faster? -Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`. +Minimal ClickHouse build example: -```cmake +```bash cmake .. \ - -DCMAKE_C_COMPILER=/bin/clang-10 \ - -DCMAKE_CXX_COMPILER=/bin/clang++-10 \ + -DCMAKE_C_COMPILER=$(which clang-11) \ + -DCMAKE_CXX_COMPILER=$(which clang++-11) \ -DCMAKE_BUILD_TYPE=Debug \ -DENABLE_CLICKHOUSE_ALL=OFF \ -DENABLE_CLICKHOUSE_SERVER=ON \ -DENABLE_CLICKHOUSE_CLIENT=ON \ - -DUSE_STATIC_LIBRARIES=OFF \ - -DSPLIT_SHARED_LIBRARIES=ON \ -DENABLE_LIBRARIES=OFF \ -DUSE_UNWIND=ON \ -DENABLE_UTILS=OFF \ diff --git a/docs/_includes/install/arm.sh b/docs/_includes/install/arm.sh new file mode 100644 index 00000000000..d6f6020a1a1 --- /dev/null +++ b/docs/_includes/install/arm.sh @@ -0,0 +1,6 @@ +# ARM (AArch64) build works on Amazon Graviton, Oracle Cloud, Huawei Cloud ARM machines. +# The support for AArch64 is pre-production ready. + +wget 'https://builds.clickhouse.tech/master/aarch64/clickhouse' +chmod a+x ./clickhouse +sudo ./clickhouse install diff --git a/docs/_includes/install/freebsd.sh b/docs/_includes/install/freebsd.sh new file mode 100644 index 00000000000..d664ea19a18 --- /dev/null +++ b/docs/_includes/install/freebsd.sh @@ -0,0 +1,3 @@ +wget 'https://builds.clickhouse.tech/master/freebsd/clickhouse' +chmod a+x ./clickhouse +sudo ./clickhouse install diff --git a/docs/_includes/install/mac-arm.sh b/docs/_includes/install/mac-arm.sh new file mode 100644 index 00000000000..9fc5c0cef22 --- /dev/null +++ b/docs/_includes/install/mac-arm.sh @@ -0,0 +1,3 @@ +wget 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' +chmod a+x ./clickhouse +./clickhouse diff --git a/docs/_includes/install/mac-x86.sh b/docs/_includes/install/mac-x86.sh new file mode 100644 index 00000000000..1423769b6d5 --- /dev/null +++ b/docs/_includes/install/mac-x86.sh @@ -0,0 +1,3 @@ +wget 'https://builds.clickhouse.tech/master/macos/clickhouse' +chmod a+x ./clickhouse +./clickhouse diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index a862bdeb299..687e0179e07 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -33,7 +33,7 @@ Reboot. ``` bash brew update -brew install cmake ninja libtool gettext llvm gcc +brew install cmake ninja libtool gettext llvm gcc binutils ``` ## Checkout ClickHouse Sources {#checkout-clickhouse-sources} diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 64ca2387029..ac39c496c72 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -7,13 +7,13 @@ toc_title: Third-Party Libraries Used The list of third-party libraries can be obtained by the following query: -``` +``` sql SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en' ``` [Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) -| library_name | license_type | license_path | +| library_name | license_type | license_path | |:-|:-|:-| | abseil-cpp | Apache | /contrib/abseil-cpp/LICENSE | | AMQP-CPP | Apache | /contrib/AMQP-CPP/LICENSE | @@ -89,3 +89,15 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li | xz | Public Domain | /contrib/xz/COPYING | | zlib-ng | zLib | /contrib/zlib-ng/LICENSE.md | | zstd | BSD | /contrib/zstd/LICENSE | + +## Guidelines for adding new third-party libraries and maintaining custom changes in them {#adding-third-party-libraries} + +1. All external third-party code should reside in the dedicated directories under `contrib` directory of ClickHouse repo. Prefer Git submodules, when available. +2. Fork/mirror the official repo in [Clickhouse-extras](https://github.com/ClickHouse-Extras). Prefer official GitHub repos, when available. +3. Branch from the branch you want to integrate, e.g., `master` -> `clickhouse/master`, or `release/vX.Y.Z` -> `clickhouse/release/vX.Y.Z`. +4. All forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras) can be automatically synchronized with upstreams. `clickhouse/...` branches will remain unaffected, since virtually nobody is going to use that naming pattern in their upstream repos. +5. Add submodules under `contrib` of ClickHouse repo that refer the above forks/mirrors. Set the submodules to track the corresponding `clickhouse/...` branches. +6. Every time the custom changes have to be made in the library code, a dedicated branch should be created, like `clickhouse/my-fix`. Then this branch should be merged into the branch, that is tracked by the submodule, e.g., `clickhouse/master` or `clickhouse/release/vX.Y.Z`. +7. No code should be pushed in any branch of the forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras), whose names do not follow `clickhouse/...` pattern. +8. Always write the custom changes with the official repo in mind. Once the PR is merged from (a feature/fix branch in) your personal fork into the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras), and the submodule is bumped in ClickHouse repo, consider opening another PR from (a feature/fix branch in) the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras) to the official repo of the library. This will make sure, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers. +9. When a submodule needs to start using a newer code from the original branch (e.g., `master`), and since the custom changes might be merged in the branch it is tracking (e.g., `clickhouse/master`) and so it may diverge from its original counterpart (i.e., `master`), a careful merge should be carried out first, i.e., `master` -> `clickhouse/master`, and only then the submodule can be bumped in ClickHouse. diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ac6d4a2b563..90f406f3ba8 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -237,6 +237,8 @@ The description of ClickHouse architecture can be found here: https://clickhouse The Code Style Guide: https://clickhouse.tech/docs/en/development/style/ +Adding third-party libraries: https://clickhouse.tech/docs/en/development/contrib/#adding-third-party-libraries + Writing tests: https://clickhouse.tech/docs/en/development/tests/ List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3A%22easy+task%22 diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 2151735c2f4..c495e3f0417 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -628,7 +628,7 @@ If the class is not intended for polymorphic use, you do not need to make functi **18.** Encodings. -Use UTF-8 everywhere. Use `std::string`and`char *`. Do not use `std::wstring`and`wchar_t`. +Use UTF-8 everywhere. Use `std::string` and `char *`. Do not use `std::wstring` and `wchar_t`. **19.** Logging. @@ -749,17 +749,9 @@ If your code in the `master` branch is not buildable yet, exclude it from the bu **1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks. -**2.** If necessary, you can use any well-known libraries available in the OS package. +**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse. -If there is a good solution already available, then use it, even if it means you have to install another library. - -(But be prepared to remove bad libraries from code.) - -**3.** You can install a library that isn’t in the packages, if the packages do not have what you need or have an outdated version or the wrong type of compilation. - -**4.** If the library is small and does not have its own complex build system, put the source files in the `contrib` folder. - -**5.** Preference is always given to libraries that are already in use. +**3.** Preference is always given to libraries that are already in use. ## General Recommendations {#general-recommendations-1} diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md index 69d3122c268..93e4aedfd5a 100644 --- a/docs/en/engines/database-engines/materialize-mysql.md +++ b/docs/en/engines/database-engines/materialize-mysql.md @@ -49,6 +49,7 @@ When working with the `MaterializeMySQL` database engine, [ReplacingMergeTree](. | DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | +| ENUM | [Enum](../../sql-reference/data-types/enum.md) | | STRING | [String](../../sql-reference/data-types/string.md) | | VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md new file mode 100644 index 00000000000..6b8c6e86952 --- /dev/null +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -0,0 +1,71 @@ +--- +toc_priority: 30 +toc_title: MaterializedPostgreSQL +--- + +# MaterializedPostgreSQL {#materialize-postgresql} + +## Creating a Database {#creating-a-database} + +``` sql +CREATE DATABASE test_database +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password' + +SELECT * FROM test_database.postgres_table; +``` + + +## Settings {#settings} + +1. `materialized_postgresql_max_block_size` - Number of rows collected before flushing data into table. Default: `65536`. + +2. `materialized_postgresql_tables_list` - List of tables for MaterializedPostgreSQL database engine. Default: `whole database`. + +3. `materialized_postgresql_allow_automatic_update` - Allow to reload table in the background, when schema changes are detected. Default: `0` (`false`). + +``` sql +CREATE DATABASE test_database +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password' +SETTINGS materialized_postgresql_max_block_size = 65536, + materialized_postgresql_tables_list = 'table1,table2,table3'; + +SELECT * FROM test_database.table1; +``` + + +## Requirements {#requirements} + +- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file. + +- Each replicated table must have one of the following **replica identity**: + +1. **default** (primary key) + +2. **index** + +``` bash +postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL); +postgres# CREATE unique INDEX postgres_table_index on postgres_table(a, c, e); +postgres# ALTER TABLE postgres_table REPLICA IDENTITY USING INDEX postgres_table_index; +``` + + +Primary key is always checked first. If it is absent, then index, defined as replica identity index, is checked. +If index is used as replica identity, there has to be only one such index in a table. +You can check what type is used for a specific table with the following command: + +``` bash +postgres# SELECT CASE relreplident + WHEN 'd' THEN 'default' + WHEN 'n' THEN 'nothing' + WHEN 'f' THEN 'full' + WHEN 'i' THEN 'index' + END AS replica_identity +FROM pg_class +WHERE oid = 'postgres_table'::regclass; +``` + + +## Warning {#warning} + +1. **TOAST** values convertion is not supported. Default value for the data type will be used. diff --git a/docs/en/engines/table-engines/integrations/ExternalDistributed.md b/docs/en/engines/table-engines/integrations/ExternalDistributed.md new file mode 100644 index 00000000000..819abdbf9d7 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/ExternalDistributed.md @@ -0,0 +1,53 @@ +--- +toc_priority: 12 +toc_title: ExternalDistributed +--- + +# ExternalDistributed {#externaldistributed} + +The `ExternalDistributed` engine allows to perform `SELECT` queries on data that is stored on a remote servers MySQL or PostgreSQL. Accepts [MySQL](../../../engines/table-engines/integrations/mysql.md) or [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) engines as an argument so sharding is possible. + +## Creating a Table {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + ... +) ENGINE = ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password'); +``` + +See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query. + +The table structure can differ from the original table structure: + +- Column names should be the same as in the original table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. + +**Engine Parameters** + +- `engine` — The table engine `MySQL` or `PostgreSQL`. +- `host:port` — MySQL or PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — User name. +- `password` — User password. + +## Implementation Details {#implementation-details} + +Supports multiple replicas that must be listed by `|` and shards must be listed by `,`. For example: + +```sql +CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) ENGINE = ExternalDistributed('MySQL', `mysql{1|2}:3306,mysql{3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse'); +``` + +When specifying replicas, one of the available replicas is selected for each of the shards when reading. If the connection fails, the next replica is selected, and so on for all the replicas. If the connection attempt fails for all the replicas, the attempt is repeated the same way several times. + +You can specify any number of shards and any number of replicas for each shard. + +**See Also** + +- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) +- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) +- [Distributed table engine](../../../engines/table-engines/special/distributed.md) diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md new file mode 100644 index 00000000000..70f61c5b550 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -0,0 +1,46 @@ +--- +toc_priority: 12 +toc_title: MateriaziePostgreSQL +--- + +# MaterializedPostgreSQL {#materialize-postgresql} + +## Creating a Table {#creating-a-table} + +``` sql +CREATE TABLE test.postgresql_replica (key UInt64, value UInt64) +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password') +PRIMARY KEY key; +``` + + +## Requirements {#requirements} + +- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file. + +- A table with engine `MaterializedPostgreSQL` must have a primary key - the same as a replica identity index (default: primary key) of a postgres table (See [details on replica identity index](../../database-engines/materialized-postgresql.md#requirements)). + +- Only database `Atomic` is allowed. + + +## Virtual columns {#creating-a-table} + +- `_version` (`UInt64`) + +- `_sign` (`Int8`) + +These columns do not need to be added, when table is created. They are always accessible in `SELECT` query. +`_version` column equals `LSN` position in `WAL`, so it might be used to check how up-to-date replication is. + +``` sql +CREATE TABLE test.postgresql_replica (key UInt64, value UInt64) +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password') +PRIMARY KEY key; + +SELECT key, value, _version FROM test.postgresql_replica; +``` + + +## Warning {#warning} + +1. **TOAST** values convertion is not supported. Default value for the data type will be used. diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index 013add6c249..a6402e00bc9 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -28,8 +28,8 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original MySQL table structure: - Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. -- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is true, if false - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types. +- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** @@ -55,6 +55,12 @@ Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL s The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes. +Supports multiple replicas that must be listed by `|`. For example: + +```sql +CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL(`mysql{2|3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse'); +``` + ## Usage Example {#usage-example} Table in MySQL: diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index 26bfb6aeb0d..ab39fb7a811 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -29,7 +29,7 @@ The table structure can differ from the source table structure: - Column names should be the same as in the source table, but you can use just some of these columns and in any order. - Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. -- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is true, if false - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 4474b764d2e..1a8f2c4b758 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -23,8 +23,8 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original PostgreSQL table structure: - Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. -- Setting `external_table_functions_use_nulls` defines how to handle Nullable columns. Default is 1, if 0 - table function will not make nullable columns and will insert default values instead of nulls. This is also applicable for null values inside array data types. +- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** @@ -49,6 +49,12 @@ PostgreSQL `Array` types are converted into ClickHouse arrays. !!! info "Note" Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column. + +Supports multiple replicas that must be listed by `|`. For example: + +```sql +CREATE TABLE test_replicas (id UInt32, name String) ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword'); +``` Replicas priority for PostgreSQL dictionary source is supported. The bigger the number in map, the less the priority. The highest priority is `0`. diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index 0f4e81c8470..22c068af358 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -65,7 +65,7 @@ By checking the row count: Query: -``` sq; +``` sql SELECT count() FROM recipes; ``` diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 4256de49e4a..517be8704e4 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -94,11 +94,11 @@ For production environments, it’s recommended to use the latest `stable`-versi To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside. -### Single Binary +### Single Binary {#from-single-binary} -You can install ClickHouse on Linux using single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.tech/master/amd64/clickhouse]. +You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.tech/master/amd64/clickhouse]. -``` +``` bash curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse sudo ./clickhouse install ``` @@ -107,9 +107,10 @@ sudo ./clickhouse install For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). -- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` -- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` -- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` +- [MacOS x86_64](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` +- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.tech/master/macos-aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse` +- [FreeBSD x86_64](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` +- [Linux AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 25127b0ea00..eb288721231 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1302,6 +1302,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` | | `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | +| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | Arrays can be nested and can have a value of the `Nullable` type as an argument. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index dec3c839020..f1ed69304a6 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -498,7 +498,7 @@ Return a message. Say Hi! - + ``` ``` bash diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 8d72e12f01b..47927cd306a 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -110,7 +110,7 @@ toc_title: Adopters | SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | | Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | | seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| SGK | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| SGK | Government Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | | Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | | SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | | Spark New Zealand | Telecommunications | Security Operations | — | — | [Blog Post, Feb 2020](https://blog.n0p.me/2020/02/2020-02-05-dnsmonster/) | @@ -154,5 +154,7 @@ toc_title: Adopters | Hydrolix | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) | | Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | | SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | +| ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | +| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2bde3b03048..fc5a911cd7a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -379,7 +379,7 @@ Default value: `1`. ## insert_null_as_default {#insert_null_as_default} -Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type. +Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type. If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause. @@ -1182,7 +1182,7 @@ Possible values: Default value: `1`. -**Additional Info** +**Additional Info** This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases: @@ -1194,21 +1194,22 @@ This setting is useful for replicated tables with a sampling key. A query may be !!! warning "Warning" This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details. -## compile {#compile} +## compile_expressions {#compile-expressions} -Enable compilation of queries. By default, 0 (disabled). +Enables or disables compilation of frequently used simple functions and operators to native code with LLVM at runtime. -The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). -If this portion of the pipeline was compiled, the query may run faster due to the deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. +Possible values: -## min_count_to_compile {#min-count-to-compile} +- 0 — Disabled. +- 1 — Enabled. -How many times to potentially use a compiled chunk of code before running compilation. By default, 3. -For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values ​​starting with 1. Compilation normally takes about 5-10 seconds. -If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including queries that are currently running. +Default value: `1`. -Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. -The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they do not use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. +## min_count_to_compile_expression {#min-count-to-compile-expression} + +Minimum count of executing same expression before it is get compiled. + +Default value: `3`. ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} @@ -1558,7 +1559,7 @@ Possible values: - 0 — Disabled (final query processing is done on the initiator node). - 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards. -- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possilbe when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). +- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). **Example** @@ -1622,7 +1623,7 @@ Possible values: Default value: 0 -## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shardslrewrite-in} +## optimize_skip_unused_shards_rewrite_in {#optimize-skip-unused-shards-rewrite-in} Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards). @@ -1727,6 +1728,28 @@ Possible values: Default value: 0. +## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} + +Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. + +These functions can be transformed: + +- [length](../../sql-reference/functions/array-functions.md#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [empty](../../sql-reference/functions/array-functions.md#function-empty) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. +- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read the [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn. +- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read the [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn. + +Possible values: + +- 0 — Optimization disabled. +- 1 — Optimization enabled. + +Default value: `0`. + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - Type: seconds @@ -1802,6 +1825,27 @@ Possible values: Default value: 0. +## distributed_directory_monitor_split_batch_on_failure {#distributed_directory_monitor_split_batch_on_failure} + +Enables/disables splitting batches on failures. + +Sometimes sending particular batch to the remote shard may fail, because of some complex pipeline after (i.e. `MATERIALIZED VIEW` with `GROUP BY`) due to `Memory limit exceeded` or similar errors. In this case, retrying will not help (and this will stuck distributed sends for the table) but sending files from that batch one by one may succeed INSERT. + +So installing this setting to `1` will disable batching for such batches (i.e. temporary disables `distributed_directory_monitor_batch_inserts` for failed batches). + +Possible values: + +- 1 — Enabled. +- 0 — Disabled. + +Default value: 0. + +!!! note "Note" + This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine). + +!!! warning "Warning" + You should not rely on automatic batch splitting, since this may hurt performance. + ## os_thread_priority {#setting-os-thread-priority} Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. @@ -2085,7 +2129,7 @@ Default value: 128. ## background_fetches_pool_size {#background_fetches_pool_size} -Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recomended to use default value. +Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster is recommended to use default value. Possible values: @@ -2672,7 +2716,7 @@ Default value: `0`. ## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty} Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility. -It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries. +It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries. Possible values: @@ -2856,7 +2900,7 @@ Default value: `0`. ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} -Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. +Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. Possible values: @@ -2962,7 +3006,7 @@ Enables or disables using the original column names instead of aliases in query Possible values: - 0 — The column name is substituted with the alias. -- 1 — The column name is not substituted with the alias. +- 1 — The column name is not substituted with the alias. Default value: `0`. @@ -3075,7 +3119,7 @@ SELECT sum(a), sumCount(b).1, sumCount(b).2, - (sumCount(b).1) / (sumCount(b).2) + (sumCount(b).1) / (sumCount(b).2) FROM fuse_tbl ``` @@ -3144,4 +3188,17 @@ SETTINGS index_granularity = 8192 │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) +## external_table_functions_use_nulls {#external-table-functions-use-nulls} + +Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns. + +Possible values: + +- 0 — The table function explicitly uses Nullable columns. +- 1 — The table function implicitly uses Nullable columns. + +Default value: `1`. + +**Usage** + +If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays. diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 5dcfca5fbda..b0480dc256a 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -36,4 +36,4 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 - [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics, calculated periodically in the background. - [system.metric_log](../system-tables/metric_log.md) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metric_log) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index b27434793c7..fc801aa1c80 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -33,6 +33,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metrics) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metrics) diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 096eca12e7d..16cf183de53 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -68,4 +68,4 @@ estimated_recovery_time: 0 - [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/clusters) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 9160dca9a1a..2a8009dddee 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -69,4 +69,21 @@ is_in_sampling_key: 0 compression_codec: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) +The `system.columns` table contains the following columns (the column type is shown in brackets): + +- `database` (String) — Database name. +- `table` (String) — Table name. +- `name` (String) — Column name. +- `type` (String) — Column type. +- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. +- `marks_bytes` (UInt64) — The size of marks, in bytes. +- `comment` (String) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/columns) diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index 37d01ef6204..a718c403c11 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -38,4 +38,4 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' │ Olga Khvostikova │ └──────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/contributors) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/contributors) diff --git a/docs/en/operations/system-tables/current-roles.md b/docs/en/operations/system-tables/current-roles.md index f10dbe69918..56dbb602637 100644 --- a/docs/en/operations/system-tables/current-roles.md +++ b/docs/en/operations/system-tables/current-roles.md @@ -8,4 +8,4 @@ Columns: - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a role with `ADMIN OPTION` privilege. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a default role. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/current-roles) + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/current-roles) diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md new file mode 100644 index 00000000000..515f704797a --- /dev/null +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -0,0 +1,39 @@ +# system.data_skipping_indices {#system-data-skipping-indices} + +Contains information about existing data skipping indices in all the tables. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Index name. +- `type` ([String](../../sql-reference/data-types/string.md)) — Index type. +- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression used to calculate the index. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of granules in the block. + +**Example** + + +```sql +SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical; +``` + +```text +Row 1: +────── +database: default +table: user_actions +name: clicks_idx +type: minmax +expr: clicks +granularity: 1 + +Row 2: +────── +database: default +table: users +name: contacts_null_idx +type: minmax +expr: assumeNotNull(contacts_null) +granularity: 1 +``` diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index 4e439f13aa5..fdce9c33b37 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -33,4 +33,4 @@ SELECT * FROM system.data_type_families WHERE alias_to = 'String' - [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/data_type_families) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/data_type_families) diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 8ef5551d9b0..2c78fd25c2b 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -35,4 +35,4 @@ SELECT * FROM system.databases └────────────────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/databases) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/databases) diff --git a/docs/en/operations/system-tables/detached_parts.md b/docs/en/operations/system-tables/detached_parts.md index ade89bd40c4..a5748128426 100644 --- a/docs/en/operations/system-tables/detached_parts.md +++ b/docs/en/operations/system-tables/detached_parts.md @@ -8,4 +8,4 @@ For the description of other columns, see [system.parts](../../operations/system If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/detached_parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/detached_parts) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 2bc1be51f19..a34e893599c 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -61,4 +61,4 @@ SELECT * FROM system.dictionaries └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/dictionaries) diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index e9d324580d8..833a0b3b16b 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -10,9 +10,6 @@ Columns: - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/disks) - - **Example** ```sql @@ -27,5 +24,4 @@ Columns: 1 rows in set. Elapsed: 0.001 sec. ``` - - +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/disks) diff --git a/docs/en/operations/system-tables/enabled-roles.md b/docs/en/operations/system-tables/enabled-roles.md index 27875fcf984..c03129b32dd 100644 --- a/docs/en/operations/system-tables/enabled-roles.md +++ b/docs/en/operations/system-tables/enabled-roles.md @@ -9,4 +9,4 @@ Columns: - `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a current role of a current user. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a default role. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/enabled-roles) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/enabled-roles) diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index b4ced6e6bf6..2fcb5d8edec 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -31,4 +31,4 @@ SELECT * FROM system.events LIMIT 5 - [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/events) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/events) diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index fbcd4b7b723..888e768fc93 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,8 +7,6 @@ Columns: - `name`(`String`) – The name of the function. - `is_aggregate`(`UInt8`) — Whether the function is aggregate. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/functions) - **Example** ```sql @@ -30,4 +28,6 @@ Columns: └──────────────────────────┴──────────────┴──────────────────┴──────────┘ 10 rows in set. Elapsed: 0.002 sec. -``` \ No newline at end of file +``` + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/functions) diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index fb2a91ab30a..927fa4f3227 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -21,4 +21,4 @@ Columns: - `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/grants) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/grants) diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index 7ae5e0e36a8..0d56242dc95 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -14,4 +14,4 @@ Columns: - `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. - `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/graphite_retentions) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/graphite_retentions) diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index c95e4e8b9b4..a9cada507c6 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -36,4 +36,4 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/licenses) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/licenses) diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index c2c5703f869..309c1cbc9d1 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -51,4 +51,4 @@ type: SettingUInt64 4 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merge_tree_settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merge_tree_settings) diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index 3e712e2962c..c7bdaee42e1 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -22,4 +22,4 @@ Columns: - `merge_type` — The type of current merge. Empty if it's an mutation. - `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merges) diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 1f72c9a7358..ab149703309 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -48,4 +48,4 @@ CurrentMetric_DistributedFilesToInsert: 0 - [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metric_log) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index decae8ea7fb..4afab40764b 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -38,4 +38,4 @@ SELECT * FROM system.metrics LIMIT 10 - [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metrics) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metrics) diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index e5ea7eab457..24fa559197c 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -45,4 +45,4 @@ If there were problems with mutating some data parts, the following columns cont - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine - [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/mutations) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/mutations) diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index d1737c9abbb..bf948d9dd5b 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -29,4 +29,4 @@ Reads from this table are not parallelized. 10 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers) diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index b40dc9a2d6f..d7df1bc1e0e 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -27,4 +27,4 @@ Used for tests. 10 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers_mt) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers_mt) diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index 51316dfbc44..10b2a1757d0 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -20,4 +20,4 @@ This is similar to the `DUAL` table found in other DBMSs. 1 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/one) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/one) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 3f9110349dd..b815d2366bb 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -66,4 +66,4 @@ error: 0 exception: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/part_log) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 5a4715a4513..b9b5aa09b64 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -155,4 +155,4 @@ move_ttl_info.max: [] - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) - [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/parts) diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 9ef3c648006..6090c5e4555 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -14,7 +14,6 @@ Columns: - `query` (String) – The query text. For `INSERT`, it does not include the data to insert. - `query_id` (String) – Query ID, if defined. - ```sql :) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; ``` @@ -34,14 +33,14 @@ initial_port: 47588 interface: 1 os_user: bharatnc client_hostname: tower -client_name: ClickHouse +client_name: ClickHouse client_revision: 54437 client_version_major: 20 client_version_minor: 7 client_version_patch: 2 http_method: 0 -http_user_agent: -quota_key: +http_user_agent: +quota_key: elapsed: 0.000582537 is_cancelled: 0 read_rows: 0 @@ -53,12 +52,10 @@ memory_usage: 0 peak_memory_usage: 0 query: SELECT * from system.processes LIMIT 10 FORMAT Vertical; thread_ids: [67] -ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ContextLock','RWLockAcquiredReadLocks'] -ProfileEvents.Values: [1,1,36,1,10,1,89,16,1] -Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] -Settings.Values: ['0','in_order','1','10000000000'] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} +Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'} -1 rows in set. Elapsed: 0.002 sec. +1 rows in set. Elapsed: 0.002 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/processes) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/processes) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 85f0679fe37..d58e549616f 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -84,12 +84,10 @@ Columns: - `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query. - `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) +- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. - `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. - `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) -- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column. -- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. @@ -109,72 +107,53 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDa ``` text Row 1: ────── -type: QueryFinish -event_date: 2021-03-18 -event_time: 2021-03-18 20:54:18 -event_time_microseconds: 2021-03-18 20:54:18.676686 -query_start_time: 2021-03-18 20:54:18 -query_start_time_microseconds: 2021-03-18 20:54:18.673934 -query_duration_ms: 2 -read_rows: 100 -read_bytes: 800 -written_rows: 0 -written_bytes: 0 -result_rows: 2 -result_bytes: 4858 -memory_usage: 0 -current_database: default -query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100) -normalized_query_hash: 17858008518552525706 -query_kind: Select -databases: ['_table_function'] -tables: ['_table_function.numbers'] -columns: ['_table_function.numbers.number'] -exception_code: 0 +type: QueryStart +event_date: 2020-09-11 +event_time: 2020-09-11 10:08:17 +event_time_microseconds: 2020-09-11 10:08:17.063321 +query_start_time: 2020-09-11 10:08:17 +query_start_time_microseconds: 2020-09-11 10:08:17.063321 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +current_database: default +query: INSERT INTO test1 VALUES +exception_code: 0 exception: stack_trace: -is_initial_query: 1 -user: default -query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c -address: ::ffff:127.0.0.1 -port: 37486 -initial_user: default -initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c -initial_address: ::ffff:127.0.0.1 -initial_port: 37486 -interface: 1 -os_user: sevirov -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse -client_revision: 54447 -client_version_major: 21 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 +is_initial_query: 1 +user: default +query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +address: ::ffff:127.0.0.1 +port: 33452 +initial_user: default +initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +initial_address: ::ffff:127.0.0.1 +initial_port: 33452 +interface: 1 +os_user: bharatnc +client_hostname: tower +client_name: ClickHouse +client_revision: 54437 +client_version_major: 20 +client_version_minor: 7 +client_version_patch: 2 +http_method: 0 http_user_agent: -http_referer: -forwarded_for: quota_key: -revision: 54449 -log_comment: -thread_ids: [587,11939] -ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes'] -ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192] -Settings.Names: ['load_balancing','max_memory_usage'] -Settings.Values: ['random','10000000000'] -used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq'] -used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array'] -used_database_engines: [] -used_data_type_families: ['String','Array','Int32','Nullable'] -used_dictionaries: [] -used_formats: [] -used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus'] -used_storages: [] -used_table_functions: ['numbers'] +revision: 54440 +thread_ids: [] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} +Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'} ``` **See Also** - [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_log) diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index 296a33259b3..7ecea2971b4 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -58,8 +58,7 @@ Columns: - `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. - `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). -- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). **Example** @@ -98,21 +97,20 @@ initial_port: 33452 interface: 1 os_user: bharatnc client_hostname: tower -client_name: ClickHouse +client_name: ClickHouse client_revision: 54437 client_version_major: 20 client_version_minor: 7 client_version_patch: 2 http_method: 0 -http_user_agent: -quota_key: +http_user_agent: +quota_key: revision: 54440 -ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars'] -ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} ``` **See Also** - [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_thread_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_thread_log) diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 11616990206..0088b086e8c 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -17,3 +17,5 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds. + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_limits) diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 89fdfe70069..2f35b6b7dae 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -28,3 +28,5 @@ Columns: ## See Also {#see-also} - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_usage) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index 3e797c9bdc6..6acc349a54f 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -24,5 +24,5 @@ Columns: - [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas) diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index 04cf91cb990..6ba88cb935a 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -30,4 +30,6 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) \ No newline at end of file +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas_usage) diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index 63a2141e399..5a6ec54723b 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -120,5 +120,5 @@ WHERE If this query does not return anything, it means that everything is fine. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/replicas) diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 5eb18b0dca7..d90bc1f77be 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -18,4 +18,4 @@ Columns: - 1 — The role has `ADMIN OPTION` privilege. - 0 — The role without `ADMIN OPTION` privilege. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/role-grants) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/role-grants) diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 4ab5102dfc8..e68d5ed290a 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -12,4 +12,4 @@ Columns: - [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/roles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/roles) diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index 97474d1b3ee..767270d64ae 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -31,4 +31,4 @@ Columns: - [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/row_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/row_policies) diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index 7034fe1204f..cfd9f43655a 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -50,4 +50,4 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' - [Constraints on Settings](../../operations/settings/constraints-on-settings.md) - [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings) diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index d0f2c3c4527..3c8c728e645 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -27,4 +27,4 @@ Columns: - `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profile_elements) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profile_elements) diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index a06b26b9cb6..80dc5172f4e 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -21,4 +21,4 @@ Columns: - [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profiles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profiles) diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 5adab1cb2aa..4b07b593926 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -14,4 +14,4 @@ Columns: If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/storage_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/storage_policies) diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 30122cb133e..45ff6f1ac19 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -35,4 +35,4 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') - Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) - Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/table_engines) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/table_engines) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 480db3087f6..4d7b20be311 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -117,4 +117,4 @@ lifetime_bytes: ᴺᵁᴸᴸ comment: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/tables) diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index f5f53c95653..ad95e91f0d2 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -50,4 +50,4 @@ source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void source_line: 45 ``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/text_log) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/text_log) diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 1b84ae7fe37..fa467124884 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -27,4 +27,4 @@ SELECT * FROM system.time_zones LIMIT 10 └────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/time_zones) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/time_zones) diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index e4c01a65d9d..5de597a0a51 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -55,4 +55,3 @@ size: 5244400 ``` [Original article](https://clickhouse.tech/docs/en/operations/system-tables/trace_log) - diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index 2227816aff3..11fdeb1e9ae 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -31,4 +31,4 @@ Columns: - [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/users) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/users) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 3b8db14934e..52d1c686e52 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -72,4 +72,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/zookeeper) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/zookeeper) diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 48c6f3f8c05..51ebf2fc0b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -31,6 +31,8 @@ ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this const The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. +However `SELECT count(nullable_column) FROM table` query can be optimized by enabling the [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT count(n) FROM table` transforms to `SELECT sum(NOT n.null) FROM table`. + **Examples** Example 1: diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index b4f38a9b562..d487a187945 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -1,17 +1,22 @@ +--- +toc_priority: 212 +--- + # median {#median} The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample. Functions: -- `median` — Alias for [quantile](#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). +- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). +- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). +- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). +- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). +- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md new file mode 100644 index 00000000000..b914e1feedf --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -0,0 +1,64 @@ +--- +toc_priority: 209 +--- + +# quantileBFloat16 {#quantilebfloat16} + +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample consisting of [bfloat16](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) numbers. `bfloat16` is a floating-point data type with 1 sign bit, 8 exponent bits and 7 fraction bits. +The function converts input values to 32-bit floats and takes the most significant 16 bits. Then it calculates `bfloat16` quantile value and converts the result to a 64-bit float by appending zero bits. +The function is a fast quantile estimator with a relative error no more than 0.390625%. + +**Syntax** + +``` sql +quantileBFloat16[(level)](expr) +``` + +Alias: `medianBFloat16` + +**Arguments** + +- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). + +**Parameters** + +- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md). + +**Returned value** + +- Approximate quantile of the specified level. + +Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Example** + +Input table has an integer and a float columns: + +``` text +┌─a─┬─────b─┐ +│ 1 │ 1.001 │ +│ 2 │ 1.002 │ +│ 3 │ 1.003 │ +│ 4 │ 1.004 │ +└───┴───────┘ +``` + +Query to calculate 0.75-quantile (third quartile): + +``` sql +SELECT quantileBFloat16(0.75)(a), quantileBFloat16(0.75)(b) FROM example_table; +``` + +Result: + +``` text +┌─quantileBFloat16(0.75)(a)─┬─quantileBFloat16(0.75)(b)─┐ +│ 3 │ 1 │ +└───────────────────────────┴───────────────────────────┘ +``` +Note that all floating point values in the example are truncated to 1.0 when converting to `bfloat16`. + +**See Also** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 06ef7ccfbd3..069aadc225b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -2,7 +2,9 @@ toc_priority: 202 --- -# quantileExact {#quantileexact} +# quantileExact Functions {#quantileexact-functions} + +## quantileExact {#quantileexact} Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -49,7 +51,7 @@ Result: └───────────────────────┘ ``` -# quantileExactLow {#quantileexactlow} +## quantileExactLow {#quantileexactlow} Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -66,13 +68,13 @@ SELECT quantileExactLow(0.1)(number) FROM numbers(10) │ 1 │ └───────────────────────────────┘ ``` - + When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. **Syntax** ``` sql -quantileExact(level)(expr) +quantileExactLow(level)(expr) ``` Alias: `medianExactLow`. @@ -107,12 +109,11 @@ Result: │ 4 │ └──────────────────────────┘ ``` -# quantileExactHigh {#quantileexacthigh} +## quantileExactHigh {#quantileexacthigh} Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. -All the passed values are combined into an array, which is then fully sorted, -to get the exact value. The sorting [algorithm's](https://en.cppreference.com/w/cpp/algorithm/sort) complexity is `O(N·log(N))`, where `N = std::distance(first, last)` comparisons. +All the passed values are combined into an array, which is then fully sorted, to get the exact value. The sorting [algorithm's](https://en.cppreference.com/w/cpp/algorithm/sort) complexity is `O(N·log(N))`, where `N = std::distance(first, last)` comparisons. The return value depends on the quantile level and the number of elements in the selection, i.e. if the level is 0.5, then the function returns the higher median value for an even number of elements and the middle median value for an odd number of elements. Median is calculated similarly to the [median_high](https://docs.python.org/3/library/statistics.html#statistics.median_high) implementation which is used in python. For all other levels, the element at the index corresponding to the value of `level * size_of_array` is returned. @@ -158,6 +159,111 @@ Result: │ 5 │ └───────────────────────────┘ ``` + +## quantileExactExclusive {#quantileexactexclusive} + +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + +When using multiple `quantileExactExclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. + +**Syntax** + +``` sql +quantileExactExclusive(level)(expr) +``` + +**Arguments** + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Parameters** + +- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantileExactExclusive(0.6)(x)─┐ +│ 599.6 │ +└────────────────────────────────┘ +``` + +## quantileExactInclusive {#quantileexactinclusive} + +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + +When using multiple `quantileExactInclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactinclusive) function. + +**Syntax** + +``` sql +quantileExactInclusive(level)(expr) +``` + +**Arguments** + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Parameters** + +- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantileExactInclusive(0.6)(x)─┐ +│ 599.4 │ +└────────────────────────────────┘ +``` + **See Also** - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index abce6a9e7f0..67d1c1ca7e5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -2,8 +2,114 @@ toc_priority: 201 --- -# quantiles {#quantiles} +# quantiles Functions {#quantiles-functions} + +## quantiles {#quantiles} Syntax: `quantiles(level1, level2, …)(x)` -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. + +## quantilesExactExclusive {#quantilesexactexclusive} + +Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + +Works more efficiently with sets of levels than [quantileExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). + +**Syntax** + +``` sql +quantilesExactExclusive(level1, level2, ...)(expr) +``` + +**Arguments** + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Parameters** + +- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). + +**Returned value** + +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. + +Type of array values: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.25,499.5,749.75,899.9,949.9499999999999,989.99,998.999] │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## quantilesExactInclusive {#quantilesexactinclusive} + +Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + +Works more efficiently with sets of levels than [quantileExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactinclusive). + +**Syntax** + +``` sql +quantilesExactInclusive(level1, level2, ...)(expr) +``` + +**Arguments** + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Parameters** + +- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). + +**Returned value** + +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. + +Type of array values: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ +└─────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 58634e5b669..86ea55004fd 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -8,6 +8,7 @@ toc_title: Map(key, value) `Map(key, value)` data type stores `key:value` pairs. **Parameters** + - `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). - `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). @@ -75,6 +76,36 @@ SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map └───────────────────────────────┘ ``` +## Map.keys and Map.values Subcolumns {#map-subcolumns} + +To optimize `Map` column processing, in some cases you can use the `keys` and `values` subcolumns instead of reading the whole column. + +**Example** + +Query: + +``` sql +CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; + +INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); + +SELECT a.keys FROM t_map; + +SELECT a.values FROM t_map; +``` + +Result: + +``` text +┌─a.keys─────────────────┐ +│ ['key1','key2','key3'] │ +└────────────────────────┘ + +┌─a.values─┐ +│ [1,2,3] │ +└──────────┘ +``` + **See Also** - [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index e339461e428..afef6ae249d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -57,7 +57,7 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con When updating the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`. +- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`). - Dictionaries from other sources are updated every time by default. For other sources (ODBC, PostgreSQL, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: @@ -88,13 +88,13 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported. -It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after previous update. If `update_field` is specified in as part of dictionary source configuration value of previous update time in seconds will be added to data request. Depends of source type Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC different logic will be applied to `update_field` before request data from external source. +It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source. -- If source is HTTP then `update_field` will be added as query parameter with last update time as parameter value. -- If source is Executable then `update_field` will be added as executable script argument with last update time as argument value. -- If source is ClickHouse, MySQL, PostgreSQL, ODBC there will be additional part of WHERE, where `update_field` is compared as greater or equal with last update time. +- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. +- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. +- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. -If `update_field` option is set. Additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. +If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. Example of settings: @@ -116,4 +116,4 @@ or ... SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) ... -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index a7ab23da7cb..bee77a382d7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -159,7 +159,7 @@ Configuration fields: | Tag | Description | Required | |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| | `name` | Column name. | Yes | -| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | +| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | | `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | | `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | | `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).

Default value: `false`. | No | diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 6495a26a426..b56d403edf6 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -11,18 +11,24 @@ Returns 1 for an empty array, or 0 for a non-empty array. The result type is UInt8. The function also works for strings. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM table` transforms to `SELECT arr.size0 = 0 FROM TABLE`. + ## notEmpty {#function-notempty} Returns 0 for an empty array, or 1 for a non-empty array. The result type is UInt8. The function also works for strings. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. + ## length {#array_functions-length} Returns the number of items in the array. The result type is UInt64. The function also works for strings. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5d72bb099fe..db36a5b5637 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -80,6 +80,7 @@ SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, toInt32(time_samoa) AS int32samoa FORMAT Vertical; ``` + Result: ```text @@ -1014,7 +1015,7 @@ Result: ## dateName {#dataname} -Returns part of date with specified date part. +Returns specified part of date. **Syntax** @@ -1024,13 +1025,13 @@ dateName(date_part, date) **Arguments** -- `date_part` - Date part. Possible values . -- `date` — Date [Date](../../sql-reference/data-types/date.md) or DateTime [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md). - +- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). +- `date` — Date. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** -- Specified date part of date. +- The specified part of date. Type: [String](../../sql-reference/data-types/string.md#string) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index b464e070acc..b6393e7b4e5 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -224,7 +224,7 @@ Accepts an integer. Returns an array of UInt64 numbers containing the list of po ## bitPositionsToArray(num) {#bitpositionstoarraynum} -Accepts an integer, argument will be converted to unsigned integer type. Returns an array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order. +Accepts an integer and converts it to an unsigned integer. Returns an array of `UInt64` numbers containing the list of positions of bits of `arg` that equal `1`, in ascending order. **Syntax** @@ -234,11 +234,13 @@ bitPositionsToArray(arg) **Arguments** -- `arg` — Integer value.Types: [Int/UInt](../../sql-reference/data-types/int-uint.md) +- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -An array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order. +- An array containing a list of positions of bits that equal `1`, in ascending order. + +Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). **Example** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 8b0710c0182..dcfa18e04bf 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -70,23 +70,23 @@ Result: Collect all the keys and sum corresponding values. -**Syntax** +**Syntax** ``` sql -mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) +mapAdd(arg1, arg2 [, ...]) ``` -**Arguments** +**Arguments** -Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** -Query: +Query with a tuple map: ``` sql SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; @@ -100,6 +100,11 @@ Result: └───────────────┴────────────────────────────────────┘ ``` +Query with `Map` type: + +``` sql +``` + ## mapSubtract {#function-mapsubtract} Collect all the keys and subtract corresponding values. @@ -220,6 +225,8 @@ Result: Returns all keys from the `map` parameter. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. + **Syntax** ```sql @@ -261,6 +268,8 @@ Result: Returns all values from the `map` parameter. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. + **Syntax** ```sql diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 268e56a5034..f5158cf9ffb 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -283,6 +283,8 @@ ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. - `0` otherwise. - For other values, the `IS NULL` operator always returns `0`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT n IS NULL FROM table` transforms to `SELECT n.null FROM TABLE`. + ``` sql @@ -313,3 +315,5 @@ SELECT * FROM t_null WHERE y IS NOT NULL │ 2 │ 3 │ └───┴───┘ ``` + +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT n IS NOT NULL FROM table` transforms to `SELECT NOT n.null FROM TABLE`. diff --git a/docs/en/sql-reference/statements/alter/index/index.md b/docs/en/sql-reference/statements/alter/index/index.md index 56d81aaf52f..fd5657c3666 100644 --- a/docs/en/sql-reference/statements/alter/index/index.md +++ b/docs/en/sql-reference/statements/alter/index/index.md @@ -8,7 +8,7 @@ toc_title: INDEX The following operations are available: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. - `ALTER TABLE [db].name DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 86381d3c6a4..090cbe93c54 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -19,6 +19,8 @@ The following operations with [partitions](../../../engines/table-engines/merget - [UNFREEZE PARTITION](#alter_unfreeze-partition) — Removes a backup of a partition. - [FETCH PARTITION\|PART](#alter_fetch-partition) — Downloads a part or partition from another server. - [MOVE PARTITION\|PART](#alter_move-partition) — Move partition/data part to another disk or volume. +- [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. +- [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 1708d594641..d1526c10203 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -119,7 +119,7 @@ For manage uncompressed data cache parameters use following server level setting ## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache} Reset the compiled expression cache. Used in development of ClickHouse and performance tests. -Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile) +Compiled expression cache used when query/user/profile enable option [compile-expressions](../../operations/settings/settings.md#compile-expressions) ## FLUSH LOGS {#query_language-system-flush_logs} diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index 7b4e2a301b3..a174786d4b7 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -39,6 +39,18 @@ Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on t The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes. +Supports multiple replicas that must be listed by `|`. For example: + +```sql +SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` + +or + +```sql +SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` + **Returned Value** A table object with the same columns as the original MySQL table. diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 3eab572ac12..7ef664de269 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -43,8 +43,20 @@ PostgreSQL Array types converts into ClickHouse arrays. !!! info "Note" Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows. + +Supports multiple replicas that must be listed by `|`. For example: -Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`. +```sql +SELECT name FROM postgresql(`postgres{1|2|3}:5432`, 'postgres_database', 'postgres_table', 'user', 'password'); +``` + +or + +```sql +SELECT name FROM postgresql(`postgres1:5431|postgres2:5432`, 'postgres_database', 'postgres_table', 'user', 'password'); +``` + +Supports replicas priority for PostgreSQL dictionary source. The bigger the number in map, the less the priority. The highest priority is `0`. **Examples** diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md new file mode 100644 index 00000000000..65565aa92cb --- /dev/null +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -0,0 +1,48 @@ +--- +toc_priority: 55 +toc_title: s3Cluster +--- + +# s3Cluster Table Function {#s3Cluster-table-function} + +Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. + +**Syntax** + +``` sql +s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Select the data from all files in the cluster `cluster_simple`: + +``` sql +SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); +``` + +Count the total amount of rows in all files in the cluster `cluster_simple`: + +``` sql +SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); +``` + +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + +**See Also** + +- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [s3 table function](../../sql-reference/table-functions/s3.md) diff --git a/docs/en/whats-new/security-changelog.md b/docs/en/whats-new/security-changelog.md index aecd7a26508..bebc9a6035f 100644 --- a/docs/en/whats-new/security-changelog.md +++ b/docs/en/whats-new/security-changelog.md @@ -7,7 +7,7 @@ toc_title: Security Changelog ### CVE-2019-15024 {#cve-2019-15024} -Аn attacker that has write access to ZooKeeper and who ican run a custom server available from the network where ClickHouse runs, can create a custom-built malicious server that will act as a ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from the malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. +Аn attacker that has write access to ZooKeeper and who can run a custom server available from the network where ClickHouse runs, can create a custom-built malicious server that will act as a ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from the malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Credits: Eldar Zaitov of Yandex Information Security Team diff --git a/docs/ja/development/style.md b/docs/ja/development/style.md index f4b3f9c77dd..596e29f4414 100644 --- a/docs/ja/development/style.md +++ b/docs/ja/development/style.md @@ -749,19 +749,11 @@ CPU命令セットは、サーバー間でサポートされる最小のセッ ## 図書館 {#libraries} -**1.** C++20標準ライブラリが使用されています(実験的な拡張が許可されています)。 `boost` と `Poco` フレームワーク +**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks. -**2.** 必要に応じて、OSパッケージで利用可能な既知のライブラリを使用できます。 +**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse. -すでに利用可能な良い解決策がある場合は、別のライブラリをインストールする必要がある場合でも、それを使用してください。 - -(が準備をしておいてくださ去の悪い図書館からのコードです。) - -**3.** パッケージに必要なものがない場合や、古いバージョンや間違った種類のコンパイルがある場合は、パッケージにないライブラリをインストールできます。 - -**4.** ライブラリが小さく、独自の複雑なビルドシステムがない場合は、ソースファイルを `contrib` フォルダ。 - -**5.** すでに使用されているライブラリが優先されます。 +**3.** Preference is always given to libraries that are already in use. ## 一般的な推奨事項 {#general-recommendations-1} diff --git a/docs/ja/interfaces/http.md b/docs/ja/interfaces/http.md index 79c4ba372ee..84850c159a4 100644 --- a/docs/ja/interfaces/http.md +++ b/docs/ja/interfaces/http.md @@ -478,7 +478,7 @@ max_alter_threads 2 Say Hi! - + ``` ``` bash diff --git a/docs/ja/operations/settings/settings.md b/docs/ja/operations/settings/settings.md index 530edf780f0..8ba30f318ab 100644 --- a/docs/ja/operations/settings/settings.md +++ b/docs/ja/operations/settings/settings.md @@ -817,22 +817,6 @@ load_balancing = first_or_random のための一貫性を異なる部分に同じデータを分割)、このオプションにしているときだけサンプリングキーを設定します。 レプリカラグは制御されません。 -## コンパイル {#compile} - -を編集ます。 既定では、0(無効)です。 - -コンパイルは、クエリ処理パイプラインの一部にのみ使用されます。 -この部分のパイプラインのためのクエリを実行するアによる展開の短サイクルinlining集計機能。 複数の単純な集計関数を使用するクエリでは、最大のパフォーマンスの向上が見られます。 通常、性能は軽微であります。 非常に珍しい例で遅くなクエリを実行します。 - -## min_count_to_compile {#min-count-to-compile} - -り方を潜在的に利用コチャンクのコードの実行前に作成する。 デフォルトでは3. -For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values ​​starting with 1. Compilation normally takes about 5-10 seconds. -値が1以上の場合、コンパイルは別のスレッドで非同期に実行されます。 結果は、現在実行中のクエリを含め、準備が整うとすぐに使用されます。 - -コンパイルされたコードは、クエリで使用される集計関数とGROUP BY句内のキーの種類のそれぞれの異なる組み合わせに必要です。 -The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. - ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} 値がtrueの場合、json\*Int64およびUInt64形式(ほとんどのJavaScript実装との互換性のため)を使用するときに整数が引用符で表示されます。 diff --git a/docs/ja/operations/system-tables.md b/docs/ja/operations/system-tables.md index 095038b2b72..be0c3356247 100644 --- a/docs/ja/operations/system-tables.md +++ b/docs/ja/operations/system-tables.md @@ -625,10 +625,8 @@ ClickHouseはこのテーブルを作成します。 [query_log](server-configur - `quota_key` (String) — The “quota key” で指定される。 [クォータ](quotas.md) 設定(参照 `keyed`). - `revision` (UInt32) — ClickHouse revision. - `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [システムイベント](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` 列。 -- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` パラメータは1。 -- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` 列。 +- `ProfileEvents` (Map(String, UInt64)) — ProfileEvents that measure different metrics. The description of them could be found in the table [システムイベント](#system_tables-events) +- `Settings` (Map(String, String)) — Settings 列。 それぞれのクエリでは、一つまたは二つの行が `query_log` クエリのステータスに応じて、テーブル: @@ -698,8 +696,7 @@ ClickHouseはこのテーブルを作成します。 [query_thread_log](server-c - `http_user_agent` (String) — The `UserAgent` HTTP要求で渡されるヘッダー。 - `quota_key` (String) — The “quota key” で指定される。 [クォータ](quotas.md) 設定(参照 `keyed`). - `revision` (UInt32) — ClickHouse revision. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [システムイベント](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。 +- `ProfileEvents` (Map(String, UInt64)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [システムイベント](#system_tables-events) 既定では、ログは7.5秒間隔でテーブルに追加されます。 この間隔は [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) サーバ設定(参照 `flush_interval_milliseconds` 変数)。 ログをメモリバッファからテーブルに強制的にフラッシュするには、 `SYSTEM FLUSH LOGS` クエリ。 diff --git a/docs/ja/sql-reference/statements/alter.md b/docs/ja/sql-reference/statements/alter.md index 226565dd226..0967f60e06a 100644 --- a/docs/ja/sql-reference/statements/alter.md +++ b/docs/ja/sql-reference/statements/alter.md @@ -175,7 +175,7 @@ MODIFY ORDER BY new_expression [複製](../../engines/table-engines/mergetree-family/replication.md) テーブル)。 次の操作 利用できます: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` -付加価指数の説明をテーブルメタデータを指すものとします。 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` -付加価指数の説明をテーブルメタデータを指すものとします。 - `ALTER TABLE [db].name DROP INDEX name` -除去す指標の説明からテーブルメタデータを削除を行指数のファイルからディスク。 diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index de29e629ceb..6e1230b4831 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -824,17 +824,9 @@ The dictionary is configured incorrectly. **1.** Используются стандартная библиотека C++20 (допустимо использовать экспериментальные расширения) а также фреймворки `boost`, `Poco`. -**2.** При необходимости, можно использовать любые известные библиотеки, доступные в ОС из пакетов. +**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС или любые другие способы установки библиотек в систему. -Если есть хорошее готовое решение, то оно используется, даже если для этого придётся установить ещё одну библиотеку. - -(Но будьте готовы к тому, что иногда вам придётся выкидывать плохие библиотеки из кода.) - -**3.** Если в пакетах нет нужной библиотеки, или её версия достаточно старая, или если она собрана не так, как нужно, то можно использовать библиотеку, устанавливаемую не из пакетов. - -**4.** Если библиотека достаточно маленькая и у неё нет своей системы сборки, то следует включить её файлы в проект, в директорию `contrib`. - -**5.** Предпочтение всегда отдаётся уже использующимся библиотекам. +**3.** Предпочтение отдаётся уже использующимся библиотекам. ## Общее {#obshchee-1} diff --git a/docs/ru/engines/table-engines/integrations/ExternalDistributed.md b/docs/ru/engines/table-engines/integrations/ExternalDistributed.md new file mode 100644 index 00000000000..5b4386ff8b9 --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/ExternalDistributed.md @@ -0,0 +1,53 @@ +--- +toc_priority: 12 +toc_title: ExternalDistributed +--- + +# ExternalDistributed {#externaldistributed} + +Движок `ExternalDistributed` позволяет выполнять запросы `SELECT` для таблиц на удаленном сервере MySQL или PostgreSQL. Принимает в качестве аргумента табличные движки [MySQL](../../../engines/table-engines/integrations/mysql.md) или [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md), поэтому возможно шардирование. + +## Создание таблицы {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + ... +) ENGINE = ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password'); +``` + +Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query). + +Структура таблицы может отличаться от структуры исходной таблицы: + +- Имена столбцов должны быть такими же, как в исходной таблице, но можно использовать только некоторые из этих столбцов и в любом порядке. +- Типы столбцов могут отличаться от типов в исходной таблице. ClickHouse пытается [привести](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) значения к типам данных ClickHouse. + +**Параметры движка** + +- `engine` — табличный движок `MySQL` или `PostgreSQL`. +- `host:port` — адрес сервера MySQL или PostgreSQL. +- `database` — имя базы данных на сервере. +- `table` — имя таблицы. +- `user` — имя пользователя. +- `password` — пароль пользователя. + +## Особенности реализации {#implementation-details} + +Поддерживает несколько реплик, которые должны быть перечислены через `|`, а шарды — через `,`. Например: + +```sql +CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) ENGINE = ExternalDistributed('MySQL', `mysql{1|2}:3306,mysql{3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse'); +``` + +При указании реплик для каждого из шардов при чтении выбирается одна из доступных реплик. Если соединиться не удалось, то выбирается следующая реплика, и так для всех реплик. Если попытка соединения не удалась для всех реплик, то сервер ClickHouse снова пытается соединиться с одной из реплик, перебирая их по кругу, и так несколько раз. + +Вы можете указать любое количество шардов и любое количество реплик для каждого шарда. + +**Смотрите также** + +- [Табличный движок MySQL](../../../engines/table-engines/integrations/mysql.md) +- [Табличный движок PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) +- [Табличный движок Distributed](../../../engines/table-engines/special/distributed.md) diff --git a/docs/ru/engines/table-engines/integrations/mysql.md b/docs/ru/engines/table-engines/integrations/mysql.md index 784c941c173..486a432d86c 100644 --- a/docs/ru/engines/table-engines/integrations/mysql.md +++ b/docs/ru/engines/table-engines/integrations/mysql.md @@ -20,11 +20,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query). -Структура таблицы может отличаться от исходной структуры таблицы MySQL: +Структура таблицы может отличаться от структуры исходной таблицы MySQL: -- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке. -- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) значения к типам данных ClickHouse. -- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов. +- Имена столбцов должны быть такими же, как в исходной таблице MySQL, но можно использовать только некоторые из этих столбцов и в любом порядке. +- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [привести](../../../engines/database-engines/mysql.md#data_types-support) значения к типам данных ClickHouse. +- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов. **Параметры движка** @@ -50,6 +50,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL. +Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: + +```sql +CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL(`mysql{2|3|4}:3306`, 'clickhouse', 'test_replicas', 'root', 'clickhouse'); +``` + ## Пример использования {#primer-ispolzovaniia} Таблица в MySQL: diff --git a/docs/ru/engines/table-engines/integrations/odbc.md b/docs/ru/engines/table-engines/integrations/odbc.md index 669977ff531..27b8578a22c 100644 --- a/docs/ru/engines/table-engines/integrations/odbc.md +++ b/docs/ru/engines/table-engines/integrations/odbc.md @@ -29,7 +29,7 @@ ENGINE = ODBC(connection_settings, external_database, external_table) - Имена столбцов должны быть такими же, как в исходной таблице, но вы можете использовать только некоторые из этих столбцов и в любом порядке. - Типы столбцов могут отличаться от типов аналогичных столбцов в исходной таблице. ClickHouse пытается [приводить](../../../engines/table-engines/integrations/odbc.md#type_conversion_function-cast) значения к типам данных ClickHouse. -- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов. +- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов. **Параметры движка** diff --git a/docs/ru/engines/table-engines/integrations/postgresql.md b/docs/ru/engines/table-engines/integrations/postgresql.md index cb8e38ae5c9..caf3bb8c69a 100644 --- a/docs/ru/engines/table-engines/integrations/postgresql.md +++ b/docs/ru/engines/table-engines/integrations/postgresql.md @@ -20,19 +20,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query). -Структура таблицы может отличаться от исходной структуры таблицы PostgreSQL: +Структура таблицы может отличаться от структуры исходной таблицы PostgreSQL: -- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке. -- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [приводить](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. -- Настройка `external_table_functions_use_nulls` определяет как обрабатывать Nullable столбцы. По умолчанию 1, если 0 - табличная функция не будет делать nullable столбцы и будет вместо null выставлять значения по умолчанию для скалярного типа. Это также применимо для null значений внутри массивов. +- Имена столбцов должны быть такими же, как в исходной таблице PostgreSQL, но можно использовать только некоторые из этих столбцов и в любом порядке. +- Типы столбцов могут отличаться от типов в исходной таблице PostgreSQL. ClickHouse пытается [привести](../../../engines/database-engines/postgresql.md#data_types-support) значения к типам данных ClickHouse. +- Настройка [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) определяет как обрабатывать Nullable столбцы. Значение по умолчанию: 1. Если значение 0, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов. **Параметры движка** - `host:port` — адрес сервера PostgreSQL. -- `database` — Имя базы данных на сервере PostgreSQL. -- `table` — Имя таблицы. -- `user` — Имя пользователя PostgreSQL. -- `password` — Пароль пользователя PostgreSQL. +- `database` — имя базы данных на сервере PostgreSQL. +- `table` — имя таблицы. +- `user` — имя пользователя PostgreSQL. +- `password` — пароль пользователя PostgreSQL. - `schema` — имя схемы, если не используется схема по умолчанию. Необязательный аргумент. ## Особенности реализации {#implementation-details} @@ -49,6 +49,12 @@ PostgreSQL массивы конвертируются в массивы ClickHo !!! info "Внимание" Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. + +Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: + +```sql +CREATE TABLE test_replicas (id UInt32, name String) ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword'); +``` При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`. diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index d0a54d9043a..66a94bcfbca 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -87,9 +87,18 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. +### Из единого бинарного файла {#from-single-binary} + +Для установки ClickHouse под Linux можно использовать единый переносимый бинарный файл из последнего коммита ветки `master`: [https://builds.clickhouse.tech/master/amd64/clickhouse]. + +``` bash +curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse +sudo ./clickhouse install +``` + ### Из исполняемых файлов для нестандартных окружений {#from-binaries-non-linux} -Для других операционных систем и архитектуры AArch64, сборки ClickHouse предоставляются в виде кросс-компилированного бинарника с последнего коммита ветки master (с задержкой в несколько часов). +Для других операционных систем и архитектуры AArch64 сборки ClickHouse предоставляются в виде кросс-компилированного бинарного файла из последнего коммита ветки `master` (с задержкой в несколько часов). - [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` - [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` @@ -97,9 +106,9 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh После скачивания можно воспользоваться `clickhouse client` для подключения к серверу или `clickhouse local` для обработки локальных данных. -Чтобы установить ClickHouse в рамках всей системы (с необходимыми конфигурационными файлами, настройками пользователей и т.д.), выполните `sudo ./clickhouse install`. Затем выполните команды `clickhouse start` (чтобы запустить сервер) и `clickhouse-client` (чтобы подключиться к нему). +Чтобы установить ClickHouse в рамках всей системы (с необходимыми конфигурационными файлами, настройками пользователей и т.д.), выполните `sudo ./clickhouse install`. Затем выполните команды `clickhouse start` (чтобы запустить сервер) и `clickhouse-client` (чтобы подключиться к нему). -Данные сборки не рекомендуются для использования в продакшене, так как они недостаточно тщательно протестированны. Также, в них присутствуют не все возможности ClickHouse. +Данные сборки не рекомендуются для использования в рабочей среде, так как они недостаточно тщательно протестированы. Также в них присутствуют не все возможности ClickHouse. ### Из исходного кода {#from-sources} diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 9e553c12dc0..ee406fc3145 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -499,7 +499,7 @@ max_alter_threads 2 Say Hi! - + ``` ``` bash diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index fd5c9dba43a..625453c94c6 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -348,7 +348,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; ## input_format_null_as_default {#settings-input-format-null-as-default} Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). -Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки. +Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки. Эта настройка используется для запросов [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) для текстовых входных форматов. @@ -361,7 +361,7 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; ## insert_null_as_default {#insert_null_as_default} -Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). +Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки. Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`. @@ -1181,22 +1181,22 @@ load_balancing = round_robin !!! warning "Предупреждение" Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries). +## compile_expressions {#compile-expressions} -## compile {#compile} +Включает или выключает компиляцию часто используемых функций и операторов. Компиляция производится в нативный код платформы с помощью LLVM во время выполнения. -Включить компиляцию запросов. По умолчанию - 0 (выключено). +Возможные значения: -Компиляция предусмотрена только для части конвейера обработки запроса - для первой стадии агрегации (GROUP BY). -В случае, если эта часть конвейера была скомпилирована, запрос может работать быстрее, за счёт разворачивания коротких циклов и инлайнинга вызовов агрегатных функций. Максимальный прирост производительности (до четырёх раз в редких случаях) достигается на запросах с несколькими простыми агрегатными функциями. Как правило, прирост производительности незначителен. В очень редких случаях возможно замедление выполнения запроса. +- 0 — компиляция выключена. +- 1 — компиляция включена. -## min_count_to_compile {#min-count-to-compile} +Значение по умолчанию: `1`. -После скольких раз, когда скомпилированный кусок кода мог пригодиться, выполнить его компиляцию. По умолчанию - 3. -Для тестирования можно установить значение 0: компиляция выполняется синхронно, и запрос ожидает окончания процесса компиляции перед продолжением выполнения. Во всех остальных случаях используйте значения, начинающиеся с 1. Как правило, компиляция занимает по времени около 5-10 секунд. -В случае, если значение равно 1 или больше, компиляция выполняется асинхронно, в отдельном потоке. При готовности результата, он сразу же будет использован, в том числе, уже выполняющимися в данный момент запросами. +## min_count_to_compile_expression {#min-count-to-compile-expression} -Скомпилированный код требуется для каждого разного сочетания используемых в запросе агрегатных функций и вида ключей в GROUP BY. -Результаты компиляции сохраняются в директории build в виде .so файлов. Количество результатов компиляции не ограничено, так как они не занимают много места. При перезапуске сервера, старые результаты будут использованы, за исключением случая обновления сервера - тогда старые результаты удаляются. +Минимальное количество выполнений одного и того же выражения до его компиляции. + +Значение по умолчанию: `3`. ## input_format_skip_unknown_fields {#input-format-skip-unknown-fields} @@ -1606,6 +1606,28 @@ ClickHouse генерирует исключение Значение по умолчанию: 0. +## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} + +Включает или отключает оптимизацию путем преобразования некоторых функций к чтению подстолбцов, таким образом уменьшая объем данных для чтения. + +Могут быть преобразованы следующие функции: + +- [length](../../sql-reference/functions/array-functions.md#array_functions-length) к чтению подстолбца [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [empty](../../sql-reference/functions/array-functions.md#function-empty) к чтению подстолбца [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) к чтению подстолбца [size0](../../sql-reference/data-types/array.md#array-size). +- [isNull](../../sql-reference/operators/index.md#operator-is-null) к чтению подстолбца [null](../../sql-reference/data-types/nullable.md#finding-null). +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) к чтению подстолбца [null](../../sql-reference/data-types/nullable.md#finding-null). +- [count](../../sql-reference/aggregate-functions/reference/count.md) к чтению подстолбца [null](../../sql-reference/data-types/nullable.md#finding-null). +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) к чтению подстолбца [keys](../../sql-reference/data-types/map.md#map-subcolumns). +- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) к чтению подстолбца [values](../../sql-reference/data-types/map.md#map-subcolumns). + +Возможные значения: + +- 0 — оптимизация отключена. +- 1 — оптимизация включена. + +Значение по умолчанию: `0`. + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - Тип: секунды @@ -2721,7 +2743,7 @@ SELECT * FROM test2; - 0 — запрос `INSERT` добавляет данные в конец файла после существующих. - 1 — `INSERT` удаляет имеющиеся в файле данные и замещает их новыми. -Значение по умолчанию: `0`. +Значение по умолчанию: `0`. ## allow_experimental_geo_types {#allow-experimental-geo-types} @@ -2735,7 +2757,7 @@ SELECT * FROM test2; ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} -Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`. +Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`. Возможные значения: @@ -2813,7 +2835,7 @@ SELECT * FROM test2; **Пример** -Какие изменения привносит включение и выключение настройки: +Какие изменения привносит включение и выключение настройки: Запрос: @@ -3023,4 +3045,17 @@ SETTINGS index_granularity = 8192 │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) +## external_table_functions_use_nulls {#external-table-functions-use-nulls} + +Определяет, как табличные функции [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) и [odbc](../../sql-reference/table-functions/odbc.md)] используют Nullable столбцы. + +Возможные значения: + +- 0 — табличная функция явно использует Nullable столбцы. +- 1 — табличная функция неявно использует Nullable столбцы. + +Значение по умолчанию: `1`. + +**Использование** + +Если установлено значение `0`, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов. diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index d3872e1ef18..8cdddba462c 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -84,12 +84,10 @@ ClickHouse не удаляет данные из таблица автомати - `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `X-Forwarded-For`. - `quota_key` ([String](../../sql-reference/data-types/string.md)) — `ключ квоты` из настроек [квот](quotas.md) (см. `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events +- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. - `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка. - `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events -- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики, перечисленные в столбце `ProfileEvents.Names`. -- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. -- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения настроек, которые перечислены в столбце `Settings.Names`. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса. - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса. @@ -109,68 +107,49 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDa ``` text Row 1: ────── -type: QueryFinish -event_date: 2021-03-18 -event_time: 2021-03-18 20:54:18 -event_time_microseconds: 2021-03-18 20:54:18.676686 -query_start_time: 2021-03-18 20:54:18 -query_start_time_microseconds: 2021-03-18 20:54:18.673934 -query_duration_ms: 2 -read_rows: 100 -read_bytes: 800 -written_rows: 0 -written_bytes: 0 -result_rows: 2 -result_bytes: 4858 -memory_usage: 0 -current_database: default -query: SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]), week(toDate('2000-12-05')), CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)), avgOrDefaultIf(number, number % 2), sumOrNull(number), toTypeName(sumOrNull(number)), countIf(toDate('2000-12-05') + number as d, toDayOfYear(d) % 2) FROM numbers(100) -normalized_query_hash: 17858008518552525706 -query_kind: Select -databases: ['_table_function'] -tables: ['_table_function.numbers'] -columns: ['_table_function.numbers.number'] -exception_code: 0 +type: QueryStart +event_date: 2020-09-11 +event_time: 2020-09-11 10:08:17 +event_time_microseconds: 2020-09-11 10:08:17.063321 +query_start_time: 2020-09-11 10:08:17 +query_start_time_microseconds: 2020-09-11 10:08:17.063321 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +current_database: default +query: INSERT INTO test1 VALUES +exception_code: 0 exception: stack_trace: -is_initial_query: 1 -user: default -query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c -address: ::ffff:127.0.0.1 -port: 37486 -initial_user: default -initial_query_id: 58f3d392-0fa0-4663-ae1d-29917a1a9c9c -initial_address: ::ffff:127.0.0.1 -initial_port: 37486 -interface: 1 -os_user: sevirov -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse -client_revision: 54447 -client_version_major: 21 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 +is_initial_query: 1 +user: default +query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +address: ::ffff:127.0.0.1 +port: 33452 +initial_user: default +initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +initial_address: ::ffff:127.0.0.1 +initial_port: 33452 +interface: 1 +os_user: bharatnc +client_hostname: tower +client_name: ClickHouse +client_revision: 54437 +client_version_major: 20 +client_version_minor: 7 +client_version_patch: 2 +http_method: 0 http_user_agent: -http_referer: -forwarded_for: quota_key: -revision: 54449 -log_comment: -thread_ids: [587,11939] -ProfileEvents.Names: ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','TableFunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes'] -ProfileEvents.Values: [1,1,36,1,10,2,1048680,1,4096,36,1,110,100,800,77,1,3137,1476,1101,8,2577,8192] -Settings.Names: ['load_balancing','max_memory_usage'] -Settings.Values: ['random','10000000000'] -used_aggregate_functions: ['groupBitAnd','avg','sum','count','uniq'] -used_aggregate_function_combinators: ['OrDefault','If','OrNull','Array'] -used_database_engines: [] -used_data_type_families: ['String','Array','Int32','Nullable'] -used_dictionaries: [] -used_formats: [] -used_functions: ['toWeek','CAST','arrayFlatten','toTypeName','toDayOfYear','addDays','array','toDate','modulo','substring','plus'] -used_storages: [] -used_table_functions: ['numbers'] +revision: 54440 +thread_ids: [] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} +Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'} ``` **Смотрите также** diff --git a/docs/ru/operations/system-tables/query_thread_log.md b/docs/ru/operations/system-tables/query_thread_log.md index 0292a321524..6eade8fc53f 100644 --- a/docs/ru/operations/system-tables/query_thread_log.md +++ b/docs/ru/operations/system-tables/query_thread_log.md @@ -57,8 +57,7 @@ ClickHouse не удаляет данные из таблицы автомати - `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`. - `quota_key` ([String](../../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events). -- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events). **Пример** @@ -97,17 +96,16 @@ initial_port: 33452 interface: 1 os_user: bharatnc client_hostname: tower -client_name: ClickHouse +client_name: ClickHouse client_revision: 54437 client_version_major: 20 client_version_minor: 7 client_version_patch: 2 http_method: 0 -http_user_agent: -quota_key: +http_user_agent: +quota_key: revision: 54440 -ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars'] -ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} ``` **Смотрите также** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/count.md b/docs/ru/sql-reference/aggregate-functions/reference/count.md index 06cf66bd8bd..7018f51763a 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/count.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/count.md @@ -31,6 +31,8 @@ ClickHouse поддерживает синтаксис `COUNT(DISTINCT ...)`. П Запрос `SELECT count() FROM table` не оптимизирован, поскольку количество записей в таблице не хранится отдельно. Он выбирает небольшой столбец из таблицы и подсчитывает количество значений в нём. +При этом запрос `SELECT count(nullable_column) FROM table` может быть оптимизирован включением настройки [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [null](../../../sql-reference/data-types/nullable.md#finding-null) вместо чтения всех данных столбца. Запрос `SELECT count(n) FROM table` преобразуется к запросу `SELECT sum(NOT n.null) FROM table`. + **Примеры** Пример 1: @@ -68,4 +70,3 @@ SELECT count(DISTINCT num) FROM t ``` Этот пример показывает, что `count(DISTINCT num)` выполняется с помощью функции `uniqExact` в соответствии со значением настройки `count_distinct_implementation`. - diff --git a/docs/ru/sql-reference/aggregate-functions/reference/median.md b/docs/ru/sql-reference/aggregate-functions/reference/median.md index a208c21dd21..1472809e2e3 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/median.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/median.md @@ -1,17 +1,19 @@ # median {#median} -Функции `median*` — алиасы для соответствущих функций `quantile*`. Они вычисляют медиану числовой последовательности. +Функции `median*` — синонимы для соответствущих функций `quantile*`. Они вычисляют медиану числовой последовательности. -Functions: +Функции: -- `median` — алиас [quantile](#quantile). -- `medianDeterministic` — алиас [quantileDeterministic](#quantiledeterministic). -- `medianExact` — алиас [quantileExact](#quantileexact). -- `medianExactWeighted` — алиас [quantileExactWeighted](#quantileexactweighted). -- `medianTiming` — алиас [quantileTiming](#quantiletiming). -- `medianTimingWeighted` — алиас [quantileTimingWeighted](#quantiletimingweighted). -- `medianTDigest` — алиас [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — алиас [quantileTDigestWeighted](#quantiletdigestweighted). + +- `median` — синоним для [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +- `medianDeterministic` — синоним для [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). +- `medianExact` — синоним для [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). +- `medianExactWeighted` — синоним для [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). +- `medianTiming` — синоним для [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). +- `medianTimingWeighted` — синоним для [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). +- `medianTDigest` — синоним для [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). +- `medianTDigestWeighted` — синоним для [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). +- `medianBFloat16` — синоним для [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). **Пример** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/ru/sql-reference/aggregate-functions/reference/quantilebfloat16.md new file mode 100644 index 00000000000..ba4a762dff7 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -0,0 +1,64 @@ +--- +toc_priority: 209 +--- + +# quantileBFloat16 {#quantilebfloat16} + +Приближенно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) выборки чисел в формате [bfloat16](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). `bfloat16` — это формат с плавающей точкой, в котором для представления числа используется 1 знаковый бит, 8 бит для порядка и 7 бит для мантиссы. +Функция преобразует входное число в 32-битное с плавающей точкой и обрабатывает его старшие 16 бит. Она вычисляет квантиль в формате `bfloat16` и преобразует его в 64-битное число с плавающей точкой, добавляя нулевые биты. +Эта функция выполняет быстрые приближенные вычисления с относительной ошибкой не более 0.390625%. + +**Синтаксис** + +``` sql +quantileBFloat16[(level)](expr) +``` + +Синоним: `medianBFloat16` + +**Аргументы** + +- `expr` — столбец с числовыми данными. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). + +**Параметры** + +- `level` — уровень квантиля. Необязательный параметр. Допустимый диапазон значений от 0 до 1. Значение по умолчанию: 0.5. [Float](../../../sql-reference/data-types/float.md). + +**Возвращаемое значение** + +- Приближенное значение квантиля. + +Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Пример** + +В таблице есть столбцы с целыми числами и с числами с плавающей точкой: + +``` text +┌─a─┬─────b─┐ +│ 1 │ 1.001 │ +│ 2 │ 1.002 │ +│ 3 │ 1.003 │ +│ 4 │ 1.004 │ +└───┴───────┘ +``` + +Запрос для вычисления 0.75-квантиля (верхнего квартиля): + +``` sql +SELECT quantileBFloat16(0.75)(a), quantileBFloat16(0.75)(b) FROM example_table; +``` + +Результат: + +``` text +┌─quantileBFloat16(0.75)(a)─┬─quantileBFloat16(0.75)(b)─┐ +│ 3 │ 1 │ +└───────────────────────────┴───────────────────────────┘ +``` +Обратите внимание, что все числа с плавающей точкой в примере были округлены до 1.0 при преобразовании к `bfloat16`. + +**См. также** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index 82ebae1c14e..2f1e879eaa1 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -2,7 +2,9 @@ toc_priority: 202 --- -# quantileExact {#quantileexact} +# Функции quantileExact {#quantileexact-functions} + +## quantileExact {#quantileexact} Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. @@ -23,7 +25,6 @@ quantileExact(level)(expr) - `level` — уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types) или типов [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md). - **Возвращаемое значение** - Квантиль заданного уровня. @@ -50,7 +51,7 @@ SELECT quantileExact(number) FROM numbers(10) └───────────────────────┘ ``` -# quantileExactLow {#quantileexactlow} +## quantileExactLow {#quantileexactlow} Как и `quantileExact`, эта функция вычисляет точный [квантиль](https://en.wikipedia.org/wiki/Quantile) числовой последовательности данных. @@ -67,7 +68,7 @@ SELECT quantileExactLow(0.1)(number) FROM numbers(10) │ 1 │ └───────────────────────────────┘ ``` - + При использовании в запросе нескольких функций `quantile*` с разными уровнями, внутренние состояния не объединяются (то есть запрос работает менее эффективно). В этом случае используйте функцию [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles). **Синтаксис** @@ -83,7 +84,6 @@ quantileExact(level)(expr) - `level` — уровень квантили. Опциональный параметр. Константное занчение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://en.wikipedia.org/wiki/Median). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). - **Возвращаемое значение** - Квантиль заданного уровня. @@ -109,7 +109,7 @@ SELECT quantileExactLow(number) FROM numbers(10) │ 4 │ └──────────────────────────┘ ``` -# quantileExactHigh {#quantileexacthigh} +## quantileExactHigh {#quantileexacthigh} Как и `quantileExact`, эта функция вычисляет точный [квантиль](https://en.wikipedia.org/wiki/Quantile) числовой последовательности данных. @@ -134,7 +134,6 @@ quantileExactHigh(level)(expr) - `level` — уровень квантили. Опциональный параметр. Константное занчение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://en.wikipedia.org/wiki/Median). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). - **Возвращаемое значение** - Квантиль заданного уровня. @@ -161,8 +160,111 @@ SELECT quantileExactHigh(number) FROM numbers(10) └───────────────────────────┘ ``` +## quantileExactExclusive {#quantileexactexclusive} + +Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +Если в одном запросе вызывается несколько функций `quantileExactExclusive` с разными значениями `level`, эти функции вычисляются независимо друг от друга. В таких случаях используйте функцию [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), запрос будет выполняться эффективнее. + +**Синтаксис** + +``` sql +quantileExactExclusive(level)(expr) +``` + +**Аргументы** + +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Параметры** + +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1) — граничные значения не учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). + +**Возвращаемое значение** + +- Квантиль заданного уровня. + +Тип: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantileExactExclusive(0.6)(x)─┐ +│ 599.6 │ +└────────────────────────────────┘ +``` + +## quantileExactInclusive {#quantileexactinclusive} + +Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +Если в одном запросе вызывается несколько функций `quantileExactInclusive` с разными значениями `level`, эти функции вычисляются независимо друг от друга. В таких случаях используйте функцию [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactinclusive), запрос будет выполняться эффективнее. + +**Синтаксис** + +``` sql +quantileExactInclusive(level)(expr) +``` + +**Аргументы** + +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Параметры** + +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1] — граничные значения учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). + +**Возвращаемое значение** + +- Квантиль заданного уровня. + +Тип: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantileExactInclusive(0.6)(x)─┐ +│ 599.4 │ +└────────────────────────────────┘ +``` + **Смотрите также** - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) - diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index 671cbc1fc4d..2417d6de139 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -2,9 +2,114 @@ toc_priority: 201 --- -# quantiles {#quantiles} +# Функции для нескольких квантилей {#quantiles-functions} -Syntax: `quantiles(level1, level2, …)(x)` +## quantiles {#quantiles} -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +Синтаксис: `quantiles(level1, level2, …)(x)` +Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями. + +## quantilesExactExclusive {#quantilesexactexclusive} + +Точно вычисляет [квантили](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +С наборами уровней работает эффективнее, чем [quantileExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). + +**Синтаксис** + +``` sql +quantilesExactExclusive(level1, level2, ...)(expr) +``` + +**Аргументы** + +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Параметры** + +- `level` — уровень квантилей. Возможные значения: (0, 1) — граничные значения не учитываются. [Float](../../../sql-reference/data-types/float.md). + +**Возвращаемые значения** + +- [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней. + +Тип значений массива: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.25,499.5,749.75,899.9,949.9499999999999,989.99,998.999] │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## quantilesExactInclusive {#quantilesexactinclusive} + +Точно вычисляет [квантили](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +С наборами уровней работает эффективнее, чем [quantileExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactinclusive). + +**Синтаксис** + +``` sql +quantilesExactInclusive(level1, level2, ...)(expr) +``` + +**Аргументы** + +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Параметры** + +- `level` — уровень квантилей. Возможные значения: [0, 1] — граничные значения учитываются. [Float](../../../sql-reference/data-types/float.md). + +**Возвращаемые значения** + +- [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней. + +Тип значений массива: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ +└─────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md index 6cb8ccf1143..a703eb1b0ac 100644 --- a/docs/ru/sql-reference/data-types/map.md +++ b/docs/ru/sql-reference/data-types/map.md @@ -8,6 +8,7 @@ toc_title: Map(key, value) Тип данных `Map(key, value)` хранит пары `ключ:значение`. **Параметры** + - `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). - `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). @@ -61,6 +62,36 @@ SELECT a['key3'] FROM table_map; └─────────────────────────┘ ``` +## Подстолбцы Map.keys и Map.values {#map-subcolumns} + +Для оптимизации обработки столбцов `Map` в некоторых случаях можно использовать подстолбцы `keys` и `values` вместо чтения всего столбца. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; + +INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); + +SELECT a.keys FROM t_map; + +SELECT a.values FROM t_map; +``` + +Результат: + +``` text +┌─a.keys─────────────────┐ +│ ['key1','key2','key3'] │ +└────────────────────────┘ + +┌─a.values─┐ +│ [1,2,3] │ +└──────────┘ +``` + **См. также** - функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 1298f05eca0..ea1b62c6cef 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -51,13 +51,13 @@ LIFETIME(300) LIFETIME(MIN 300 MAX 360) ``` -Если `0` и `0`, ClickHouse не перегружает словарь по истечению времени. -В этм случае, ClickHouse может перезагрузить данные словаря если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. +Если `0` и `0`, ClickHouse не перезагружает словарь по истечении времени. +В этом случае ClickHouse может перезагрузить данные словаря, если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется. -- Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`. +- Для MySQL источника время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`). - Словари из других источников по умолчанию обновляются каждый раз. Для других источников (ODBC, PostgreSQL, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: @@ -86,4 +86,34 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher ... ``` -Для словарей `Cache`, `ComplexKeyCache`, `SSDCache` и `SSDComplexKeyCache` поддерживается как синхронное, так и асинхронное обновление. +Для словарей `Cache`, `ComplexKeyCache`, `SSDCache` и `SSDComplexKeyCache` поддерживается как синхронное, так и асинхронное обновление. + +Словари `Flat`, `Hashed` и `ComplexKeyHashed` могут запрашивать только те данные, которые были изменены после предыдущего обновления. Если `update_field` указано как часть конфигурации источника словаря, к запросу данных будет добавлено время предыдущего обновления в секундах. В зависимости от типа источника (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC) к `update_field` будет применена соответствующая логика перед запросом данных из внешнего источника. + +- Если источник HTTP, то `update_field` будет добавлено в качестве параметра запроса, а время последнего обновления — в качестве значения параметра. +- Если источник Executable, то `update_field` будет добавлено в качестве аргумента исполняемого скрипта, время последнего обновления — в качестве значения аргумента. +- Если источник ClickHouse, MySQL, PostgreSQL или ODBC, то будет дополнительная часть запроса `WHERE`, где `update_field` будет больше или равно времени последнего обновления. + +Если установлена опция `update_field`, то может быть установлена дополнительная опция `update_lag`. Значение `update_lag` вычитается из времени предыдущего обновления перед запросом обновленных данных. + +Пример настройки: + +``` xml + + ... + + ... + added_time + 15 + + ... + +``` + +или + +``` sql +... +SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) +... +``` diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 2feb088b4d9..197fde71279 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -159,7 +159,7 @@ CREATE DICTIONARY somename ( | Тег | Описание | Обязательный | |------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | `name` | Имя столбца. | Да | -| `type` | Тип данных ClickHouse: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md).
ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`.
[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache). Для словарей [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да | +| `type` | Тип данных ClickHouse: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`.
[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache). Для словарей [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да | | `null_value` | Значение по умолчанию для несуществующего элемента.
В примере это пустая строка. Значение [NULL](../../syntax.md#null-literal) можно указывать только для типов `Nullable` (см. предыдущую строку с описанием типов). | Да | | `expression` | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.
Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.

Значение по умолчанию: нет выражения. | Нет | | `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).

Значение по умолчанию: `false`. | Нет | diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 0dfad45605a..67c4f6c0136 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -11,18 +11,24 @@ toc_title: "Массивы" Тип результата - UInt8. Функция также работает для строк. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT empty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 = 0 FROM TABLE`. + ## notEmpty {#function-notempty} Возвращает 0 для пустого массива, и 1 для непустого массива. Тип результата - UInt8. Функция также работает для строк. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT notEmpty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 != 0 FROM TABLE`. + ## length {#array_functions-length} Возвращает количество элементов в массиве. Тип результата - UInt64. Функция также работает для строк. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT length(arr) FROM table` преобразуется к запросу `SELECT arr.size0 FROM TABLE`. + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 69f9a89f4cb..e7bd33bac45 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -27,40 +27,40 @@ SELECT Возвращает часовой пояс сервера. -**Синтаксис** +**Синтаксис** ``` sql timeZone() ``` -Псевдоним: `timezone`. +Синоним: `timezone`. **Возвращаемое значение** -- Часовой пояс. +- Часовой пояс. Тип: [String](../../sql-reference/data-types/string.md). ## toTimeZone {#totimezone} -Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение. +Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение. -**Синтаксис** +**Синтаксис** ``` sql toTimezone(value, timezone) ``` -Псевдоним: `toTimezone`. +Синоним: `toTimezone`. -**Аргументы** +**Аргументы** - `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). **Возвращаемое значение** -- Дата с временем. +- Дата с временем. Тип: [DateTime](../../sql-reference/data-types/datetime.md). @@ -80,6 +80,7 @@ SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, toInt32(time_samoa) AS int32samoa FORMAT Vertical; ``` + Результат: ```text @@ -102,21 +103,21 @@ int32samoa: 1546300800 Возвращает название часового пояса для значений типа [DateTime](../../sql-reference/data-types/datetime.md) и [DateTime64](../../sql-reference/data-types/datetime64.md). -**Синтаксис** +**Синтаксис** ``` sql timeZoneOf(value) ``` -Псевдоним: `timezoneOf`. +Синоним: `timezoneOf`. **Аргументы** -- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). **Возвращаемое значение** -- Название часового пояса. +- Название часового пояса. Тип: [String](../../sql-reference/data-types/string.md). @@ -145,15 +146,15 @@ SELECT timezoneOf(now()); timeZoneOffset(value) ``` -Псевдоним: `timezoneOffset`. +Синоним: `timezoneOffset`. **Аргументы** -- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Возвращаемое значение** -- Смещение в секундах от UTC. +- Смещение в секундах от UTC. Тип: [Int32](../../sql-reference/data-types/int-uint.md). @@ -626,7 +627,7 @@ SELECT now(), date_trunc('hour', now(), 'Europe/Moscow'); Добавляет интервал времени или даты к указанной дате или дате со временем. -**Синтаксис** +**Синтаксис** ``` sql date_add(unit, value, date) @@ -1025,6 +1026,45 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g'); └────────────────────────────────────────────┘ ``` +## dateName {#dataname} + +Возвращает указанную часть даты. + +**Синтаксис** + +``` sql +dateName(date_part, date) +``` + +**Аргументы** + +- `date_part` — часть даты. Возможные значения: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). +- `date` — дата. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — часовой пояс. Необязательный аргумент. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Указанная часть даты. + +Тип: [String](../../sql-reference/data-types/string.md#string). + +**Пример** + +Запрос: + +```sql +WITH toDateTime('2021-04-14 11:22:33') AS date_value +SELECT dateName('year', date_value), dateName('month', date_value), dateName('day', date_value); +``` + +Результат: + +```text +┌─dateName('year', date_value)─┬─dateName('month', date_value)─┬─dateName('day', date_value)─┐ +│ 2021 │ April │ 14 │ +└──────────────────────────────┴───────────────────────────────┴───────────────────────────── +``` + ## FROM\_UNIXTIME {#fromunixtime} Функция преобразует Unix timestamp в календарную дату и время. diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md index 23e840a7898..161c1304b7c 100644 --- a/docs/ru/sql-reference/functions/encoding-functions.md +++ b/docs/ru/sql-reference/functions/encoding-functions.md @@ -223,3 +223,53 @@ SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num; ## bitmaskToArray(num) {#bitmasktoarraynum} Принимает целое число. Возвращает массив чисел типа UInt64, содержащий степени двойки, в сумме дающих исходное число; числа в массиве идут по возрастанию. + +## bitPositionsToArray(num) {#bitpositionstoarraynum} + +Принимает целое число и приводит его к беззнаковому виду. Возвращает массив `UInt64` чисел, который содержит список позиций битов `arg`, равных `1`, в порядке возрастания. + +**Синтаксис** + +```sql +bitPositionsToArray(arg) +``` + +**Аргументы** + +- `arg` — целое значение. [Int/UInt](../../sql-reference/data-types/int-uint.md). + +**Возвращаемое значение** + +- Массив, содержащий список позиций битов, равных `1`, в порядке возрастания. + +Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). + +**Примеры** + +Запрос: + +``` sql +SELECT bitPositionsToArray(toInt8(1)) AS bit_positions; +``` + +Результат: + +``` text +┌─bit_positions─┐ +│ [0] │ +└───────────────┘ +``` + +Запрос: + +``` sql +select bitPositionsToArray(toInt8(-1)) as bit_positions; +``` + +Результат: + +``` text +┌─bit_positions─────┐ +│ [0,1,2,3,4,5,6,7] │ +└───────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index c385dbd8f87..94dccb58622 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -224,6 +224,8 @@ SELECT mapContains(a, 'name') FROM test; Возвращает все ключи контейнера `map`. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [keys](../../sql-reference/data-types/map.md#map-subcolumns) вместо чтения и обработки данных всего столбца. Запрос `SELECT mapKeys(m) FROM table` преобразуется к запросу `SELECT m.keys FROM table`. + **Синтаксис** ```sql @@ -265,6 +267,8 @@ SELECT mapKeys(a) FROM test; Возвращает все значения контейнера `map`. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [values](../../sql-reference/data-types/map.md#map-subcolumns) вместо чтения и обработки данных всего столбца. Запрос `SELECT mapValues(m) FROM table` преобразуется к запросу `SELECT m.values FROM table`. + **Синтаксис** ```sql diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 5cf21b64079..cfdb42f85a7 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -283,6 +283,8 @@ ClickHouse поддерживает операторы `IS NULL` и `IS NOT NULL - `0` в обратном случае. - Для прочих значений оператор `IS NULL` всегда возвращает `0`. +Оператор можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` читается только подстолбец [null](../../sql-reference/data-types/nullable.md#finding-null) вместо чтения и обработки данных всего столбца. Запрос `SELECT n IS NULL FROM table` преобразуется к запросу `SELECT n.null FROM TABLE`. + ``` sql @@ -302,6 +304,8 @@ SELECT x+100 FROM t_null WHERE y IS NULL - `1`, в обратном случае. - Для прочих значений оператор `IS NOT NULL` всегда возвращает `1`. +Оператор можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` читается только подстолбец [null](../../sql-reference/data-types/nullable.md#finding-null) вместо чтения и обработки данных всего столбца. Запрос `SELECT n IS NOT NULL FROM table` преобразуется к запросу `SELECT NOT n.null FROM TABLE`. + ``` sql diff --git a/docs/ru/sql-reference/statements/alter/index/index.md b/docs/ru/sql-reference/statements/alter/index/index.md index 632f11ed906..1f6bbea5c4b 100644 --- a/docs/ru/sql-reference/statements/alter/index/index.md +++ b/docs/ru/sql-reference/statements/alter/index/index.md @@ -9,7 +9,7 @@ toc_title: "Манипуляции с индексами" Добавить или удалить индекс можно с помощью операций ``` sql -ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] +ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name] ALTER TABLE [db.]name DROP INDEX name ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name ``` diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 79242e7bbf3..0a485c7b591 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -19,6 +19,8 @@ toc_title: PARTITION - [UNFREEZE PARTITION](#alter_unfreeze-partition) — удалить резервную копию партиции; - [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера; - [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том. +- [UPDATE IN PARTITION](#update-in-partition) — обновить данные внутри партиции по условию. +- [DELETE IN PARTITION](#delete-in-partition) — удалить данные внутри партиции по условию. ## DETACH PARTITION\|PART {#alter_detach-partition} diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index 2589408b8fa..634343d112f 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -5,14 +5,14 @@ toc_title: SYSTEM # Запросы SYSTEM {#query-language-system} -- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) +- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) - [RELOAD MODELS](#query_language-system-reload-models) - [RELOAD MODEL](#query_language-system-reload-model) - [DROP DNS CACHE](#query_language-system-drop-dns-cache) - [DROP MARK CACHE](#query_language-system-drop-mark-cache) -- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) +- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) - [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache) - [DROP REPLICA](#query_language-system-drop-replica) - [FLUSH LOGS](#query_language-system-flush_logs) @@ -24,10 +24,10 @@ toc_title: SYSTEM - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) - [STOP MERGES](#query_language-system-stop-merges) - [START MERGES](#query_language-system-start-merges) -- [STOP TTL MERGES](#query_language-stop-ttl-merges) -- [START TTL MERGES](#query_language-start-ttl-merges) -- [STOP MOVES](#query_language-stop-moves) -- [START MOVES](#query_language-start-moves) +- [STOP TTL MERGES](#query_language-stop-ttl-merges) +- [START TTL MERGES](#query_language-start-ttl-merges) +- [STOP MOVES](#query_language-stop-moves) +- [START MOVES](#query_language-start-moves) - [STOP FETCHES](#query_language-system-stop-fetches) - [START FETCHES](#query_language-system-start-fetches) - [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends) @@ -36,13 +36,13 @@ toc_title: SYSTEM - [START REPLICATION QUEUES](#query_language-system-start-replication-queues) - [SYNC REPLICA](#query_language-system-sync-replica) - [RESTART REPLICA](#query_language-system-restart-replica) -- [RESTART REPLICAS](#query_language-system-restart-replicas) +- [RESTART REPLICAS](#query_language-system-restart-replicas) -## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} +## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} Перегружает все [Встроенные словари](../dictionaries/internal-dicts.md). -По умолчанию встроенные словари выключены. +По умолчанию встроенные словари выключены. Всегда возвращает `Ok.`, вне зависимости от результата обновления встроенных словарей. - + ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Перегружает все словари, которые были успешно загружены до этого. @@ -115,7 +115,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk'; ## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache} Сбрасывает кеш скомпилированных выражений. Используется при разработке ClickHouse и тестах производительности. -Компилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile](../../operations/settings/settings.md#compile) +Cкомпилированные выражения используются когда включена настройка уровня запрос/пользователь/профиль [compile-expressions](../../operations/settings/settings.md#compile-expressions) ## FLUSH LOGS {#query_language-system-flush_logs} @@ -194,7 +194,7 @@ SYSTEM START MERGES [ON VOLUME | [db.]merge_tree_family_table_name SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name] ``` -### START TTL MERGES {#query_language-start-ttl-merges} +### START TTL MERGES {#query_language-start-ttl-merges} Запускает фоновые процессы удаления старых данных основанные на [выражениях TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) для таблиц семейства MergeTree: Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных: @@ -203,7 +203,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name] SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name] ``` -### STOP MOVES {#query_language-stop-moves} +### STOP MOVES {#query_language-stop-moves} Позволяет остановить фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family: Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных: @@ -212,7 +212,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name] SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] ``` -### START MOVES {#query_language-start-moves} +### START MOVES {#query_language-start-moves} Запускает фоновые процессы переноса данных основанные [табличных выражениях TTL с использованием TO VOLUME или TO DISK](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family: Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных: @@ -261,7 +261,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name] ### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues} -Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: +Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: ``` sql SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] @@ -269,7 +269,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ### START REPLICATION QUEUES {#query_language-system-start-replication-queues} -Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: +Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER: ``` sql SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] @@ -277,7 +277,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ### SYNC REPLICA {#query_language-system-sync-replica} -Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени: +Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени: ``` sql SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name diff --git a/docs/ru/sql-reference/table-functions/mysql.md b/docs/ru/sql-reference/table-functions/mysql.md index 665f1058ba2..e21d1a7fa06 100644 --- a/docs/ru/sql-reference/table-functions/mysql.md +++ b/docs/ru/sql-reference/table-functions/mysql.md @@ -38,6 +38,18 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_ Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL. +Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: + +```sql +SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` + +или + +```sql +SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` + **Возвращаемое значение** Объект таблицы с теми же столбцами, что и в исходной таблице MySQL. diff --git a/docs/ru/sql-reference/table-functions/postgresql.md b/docs/ru/sql-reference/table-functions/postgresql.md index 2d8afe28f1e..50f651527c5 100644 --- a/docs/ru/sql-reference/table-functions/postgresql.md +++ b/docs/ru/sql-reference/table-functions/postgresql.md @@ -43,6 +43,18 @@ PostgreSQL массивы конвертируются в массивы ClickHo !!! info "Примечание" Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. + +Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: + +```sql +SELECT name FROM postgresql(`postgres{1|2|3}:5432`, 'postgres_database', 'postgres_table', 'user', 'password'); +``` + +или + +```sql +SELECT name FROM postgresql(`postgres1:5431|postgres2:5432`, 'postgres_database', 'postgres_table', 'user', 'password'); +``` При использовании словаря PostgreSQL поддерживается приоритет реплик. Чем больше номер реплики, тем ниже ее приоритет. Наивысший приоритет у реплики с номером `0`. diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md new file mode 100644 index 00000000000..826f1a5b25b --- /dev/null +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -0,0 +1,48 @@ +--- +toc_priority: 55 +toc_title: s3Cluster +--- + +# Табличная функция s3Cluster {#s3Cluster-table-function} + +Позволяет обрабатывать файлы из [Amazon S3](https://aws.amazon.com/s3/) параллельно из многих узлов в указанном кластере. На узле-инициаторе функция создает соединение со всеми узлами в кластере, заменяет символы '*' в пути к файлу S3 и динамически отправляет каждый файл. На рабочем узле функция запрашивает у инициатора следующую задачу и обрабатывает ее. Это повторяется до тех пор, пока все задачи не будут завершены. + +**Синтаксис** + +``` sql +s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) +``` + +**Аргументы** + +- `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. +- `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. +- `format` — [формат](../../interfaces/formats.md#formats) файла. +- `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. + +**Возвращаемое значение** + +Таблица с указанной структурой для чтения или записи данных в указанный файл. + +**Примеры** + +Вывод данных из всех файлов кластера `cluster_simple`: + +``` sql +SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); +``` + +Подсчет общего количества строк во всех файлах кластера `cluster_simple`: + +``` sql +SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); +``` + +!!! warning "Внимание" + Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. + +**Смотрите также** + +- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md) +- [Табличная функция s3](../../sql-reference/table-functions/s3.md) diff --git a/docs/tools/README.md b/docs/tools/README.md index 4340561fa57..61a2e9a04f2 100644 --- a/docs/tools/README.md +++ b/docs/tools/README.md @@ -47,6 +47,13 @@ When all prerequisites are installed, running `build.py` without args (there are The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888. +## How to change code highlighting? {#how-to-change-code-hl} + +ClickHouse does not use mkdocs `highlightjs` feature. It uses modified pygments styles instead. +If you want to change code highlighting, edit the `website/css/highlight.css` file. +Currently, an [eighties](https://github.com/idleberg/base16-pygments/blob/master/css/base16-eighties.dark.css) theme +is used. + ## How to subscribe on documentation changes? {#how-to-subscribe-on-documentation-changes} At the moment there’s no easy way to do just that, but you can consider: diff --git a/docs/tools/build.py b/docs/tools/build.py index 61112d5a4f5..dae61eec87e 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -87,6 +87,7 @@ def build_for_lang(lang, args): website_url = 'https://clickhouse.tech' site_name = site_names.get(lang, site_names['en']) % '' site_name = site_name.replace(' ', ' ') + raw_config = dict( site_name=site_name, site_url=f'{website_url}/docs/{lang}/', @@ -153,9 +154,6 @@ def build(args): if not args.skip_website: website.build_website(args) - if not args.skip_test_templates: - test.test_templates(args.website_dir) - if not args.skip_docs: generate_cmake_flags_files() @@ -196,7 +194,6 @@ if __name__ == '__main__': arg_parser.add_argument('--skip-blog', action='store_true') arg_parser.add_argument('--skip-git-log', action='store_true') arg_parser.add_argument('--skip-docs', action='store_true') - arg_parser.add_argument('--skip-test-templates', action='store_true') arg_parser.add_argument('--test-only', action='store_true') arg_parser.add_argument('--minify', action='store_true') arg_parser.add_argument('--htmlproofer', action='store_true') diff --git a/docs/tools/test.py b/docs/tools/test.py index ada4df29644..526294dbe21 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -7,36 +7,6 @@ import bs4 import subprocess -def test_template(template_path): - if template_path.endswith('amp.html'): - # Inline CSS/JS is ok for AMP pages - return - - logging.debug(f'Running tests for {template_path} template') - with open(template_path, 'r') as f: - soup = bs4.BeautifulSoup( - f, - features='html.parser' - ) - for tag in soup.find_all(): - style_attr = tag.attrs.get('style') - assert not style_attr, f'Inline CSS is prohibited, found {style_attr} in {template_path}' - - if tag.name == 'script': - if tag.attrs.get('type') == 'application/ld+json': - continue - for content in tag.contents: - assert not content, f'Inline JavaScript is prohibited, found "{content}" in {template_path}' - - -def test_templates(base_dir): - logging.info('Running tests for templates') - for root, _, filenames in os.walk(base_dir): - for filename in filenames: - if filename.endswith('.html'): - test_template(os.path.join(root, filename)) - - def test_single_page(input_path, lang): with open(input_path) as f: soup = bs4.BeautifulSoup( diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index bb9bfde7b9b..dcbfbc79e33 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -742,19 +742,11 @@ CPU指令集是我们服务器中支持的最小集合。 目前,它是SSE 4.2 ## 库 {#ku} -**1.** 使用C++20标准库(允许实验性功能),以及 `boost` 和 `Poco` 框架。 +**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks. -**2.** 如有必要,您可以使用 OS 包中提供的任何已知库。 +**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse. -如果有一个好的解决方案已经可用,那就使用它,即使这意味着你必须安装另一个库。 - -(但要准备从代码中删除不好的库) - -**3.** 如果软件包没有您需要的软件包或者有过时的版本或错误的编译类型,则可以安装不在软件包中的库。 - -**4.** 如果库很小并且没有自己的复杂构建系统,请将源文件放在 `contrib` 文件夹中。 - -**5.** 始终优先考虑已经使用的库。 +**3.** Preference is always given to libraries that are already in use. ## 一般建议 {#yi-ban-jian-yi-1} diff --git a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..7c04600894e --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,42 @@ +--- +toc_priority: 9 +toc_title: EmbeddedRocksDB +--- + +# EmbeddedRocksDB 引擎 {#EmbeddedRocksDB-engine} + +这个引擎允许 ClickHouse 与 [rocksdb](http://rocksdb.org/) 进行集成。 + +## 创建一张表 {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name) +``` + +必要参数: + +- `primary_key_name` – any column name in the column list. +- 必须指定 `primary key`, 仅支持主键中的一个列. 主键将被序列化为二进制的`rocksdb key`. +- 主键以外的列将以相应的顺序在二进制中序列化为`rocksdb`值. +- 带有键 `equals` 或 `in` 过滤的查询将被优化为从 `rocksdb` 进行多键查询. + +示例: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key +``` + +[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/embedded-rocksdb/) diff --git a/docs/zh/engines/table-engines/integrations/index.md b/docs/zh/engines/table-engines/integrations/index.md index 17e9d204aa6..0c34ae078a0 100644 --- a/docs/zh/engines/table-engines/integrations/index.md +++ b/docs/zh/engines/table-engines/integrations/index.md @@ -1,8 +1,21 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u96C6\u6210" toc_priority: 30 --- +# 集成的表引擎 {#table-engines-for-integrations} +ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像所有其他的表引擎一样,使用`CREATE TABLE`或`ALTER TABLE`查询语句来完成配置。然后从用户的角度来看,配置的集成看起来像查询一个正常的表,但对它的查询是代理给外部系统的。这种透明的查询是这种方法相对于其他集成方法的主要优势之一,比如外部字典或表函数,它们需要在每次使用时使用自定义查询方法。 + +以下是支持的集成方式: + +- [ODBC](../../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../../engines/table-engines/integrations/hdfs.md) +- [S3](../../../engines/table-engines/integrations/s3.md) +- [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) diff --git a/docs/zh/engines/table-engines/integrations/rabbitmq.md b/docs/zh/engines/table-engines/integrations/rabbitmq.md new file mode 100644 index 00000000000..a4a5be5f685 --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/rabbitmq.md @@ -0,0 +1,167 @@ +--- +toc_priority: 10 +toc_title: RabbitMQ +--- + +# RabbitMQ 引擎 {#rabbitmq-engine} + +该引擎允许 ClickHouse 与 [RabbitMQ](https://www.rabbitmq.com) 进行集成. + +`RabbitMQ` 可以让你: + +- 发布或订阅数据流。 +- 在数据流可用时进行处理。 + +## 创建一张表 {#table_engine-rabbitmq-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = RabbitMQ SETTINGS + rabbitmq_host_port = 'host:port', + rabbitmq_exchange_name = 'exchange_name', + rabbitmq_format = 'data_format'[,] + [rabbitmq_exchange_type = 'exchange_type',] + [rabbitmq_routing_key_list = 'key1,key2,...',] + [rabbitmq_row_delimiter = 'delimiter_symbol',] + [rabbitmq_schema = '',] + [rabbitmq_num_consumers = N,] + [rabbitmq_num_queues = N,] + [rabbitmq_queue_base = 'queue',] + [rabbitmq_deadletter_exchange = 'dl-exchange',] + [rabbitmq_persistent = 0,] + [rabbitmq_skip_broken_messages = N,] + [rabbitmq_max_block_size = N,] + [rabbitmq_flush_interval_ms = N] +``` + +必要参数: + +- `rabbitmq_host_port` – 主机名:端口号 (比如, `localhost:5672`). +- `rabbitmq_exchange_name` – RabbitMQ exchange 名称. +- `rabbitmq_format` – 消息格式. 使用与SQL`FORMAT`函数相同的标记,如`JSONEachRow`。 更多信息,请参阅 [Formats](../../../interfaces/formats.md) 部分. + +可选参数: + +- `rabbitmq_exchange_type` – RabbitMQ exchange 的类型: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. 默认是: `fanout`. +- `rabbitmq_routing_key_list` – 一个以逗号分隔的路由键列表. +- `rabbitmq_row_delimiter` – 用于消息结束的分隔符. +- `rabbitmq_schema` – 如果格式需要模式定义,必须使用该参数。比如, [Cap’n Proto](https://capnproto.org/) 需要模式文件的路径以及根 `schema.capnp:Message` 对象的名称. +- `rabbitmq_num_consumers` – 每个表的消费者数量。默认:`1`。如果一个消费者的吞吐量不够,可以指定更多的消费者. +- `rabbitmq_num_queues` – 队列的总数。默认值: `1`. 增加这个数字可以显著提高性能. +- `rabbitmq_queue_base` - 指定一个队列名称的提示。这个设置的使用情况如下. +- `rabbitmq_deadletter_exchange` - 为[dead letter exchange](https://www.rabbitmq.com/dlx.html)指定名称。你可以用这个 exchange 的名称创建另一个表,并在消息被重新发布到 dead letter exchange 的情况下收集它们。默认情况下,没有指定 dead letter exchange。Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). +- `rabbitmq_persistent` - 如果设置为 1 (true), 在插入查询中交付模式将被设置为 2 (将消息标记为 'persistent'). 默认是: `0`. +- `rabbitmq_skip_broken_messages` – RabbitMQ 消息解析器对每块模式不兼容消息的容忍度。默认值:`0`. 如果 `rabbitmq_skip_broken_messages = N`,那么引擎将跳过 *N* 个无法解析的 RabbitMQ 消息(一条消息等于一行数据)。 +- `rabbitmq_max_block_size` +- `rabbitmq_flush_interval_ms` + +同时,格式的设置也可以与 rabbitmq 相关的设置一起添加。 + +示例: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64, + date DateTime + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; +``` + +RabbitMQ 服务器配置应使用 ClickHouse 配置文件添加。 + +必要配置: + +``` xml + + root + clickhouse + +``` + +可选配置: + +``` xml + + clickhouse + +``` + +## 描述 {#description} + +`SELECT`对于读取消息不是特别有用(除了调试),因为每个消息只能读取一次。使用[物化视图](../../../sql-reference/statements/create.md#create-view)创建实时线程更为实用。要做到这一点: + +1. 使用引擎创建一个 RabbitMQ 消费者,并将其视为一个数据流。 +2. 创建一个具有所需结构的表。 +3. 创建一个物化视图,转换来自引擎的数据并将其放入先前创建的表中。 + +当`物化视图`加入引擎时,它开始在后台收集数据。这允许您持续接收来自 RabbitMQ 的消息,并使用 `SELECT` 将它们转换为所需格式。 +一个 RabbitMQ 表可以有多个你需要的物化视图。 + +数据可以根据`rabbitmq_exchange_type`和指定的`rabbitmq_routing_key_list`进行通道。 +每个表不能有多于一个 exchange。一个 exchange 可以在多个表之间共享 - 因为可以使用路由让数据同时进入多个表。 + +Exchange 类型的选项: + +- `direct` - 路由是基于精确匹配的键。例如表的键列表: `key1,key2,key3,key4,key5`, 消息键可以是等同他们中的任意一个. +- `fanout` - 路由到所有的表 (exchange 名称相同的情况) 无论是什么键都是这样. +- `topic` - 路由是基于带有点分隔键的模式. 比如: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. +- `headers` - 路由是基于`key=value`的匹配,设置为`x-match=all`或`x-match=any`. 例如表的键列表: `x-match=all,format=logs,type=report,year=2020`. +- `consistent_hash` - 数据在所有绑定的表之间均匀分布 (exchange 名称相同的情况). 请注意,这种 exchange 类型必须启用 RabbitMQ 插件: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. + +设置`rabbitmq_queue_base`可用于以下情况: + +- 来让不同的表共享队列, 这样就可以为同一个队列注册多个消费者,这使得性能更好。如果使用`rabbitmq_num_consumers`和/或`rabbitmq_num_queues`设置,在这些参数相同的情况下,实现队列的精确匹配。 +- 以便在不是所有消息都被成功消费时,能够恢复从某些持久队列的阅读。要从一个特定的队列恢复消耗 - 在`rabbitmq_queue_base`设置中设置其名称,不要指定`rabbitmq_num_consumers`和`rabbitmq_num_queues`(默认为1)。要恢复所有队列的消费,这些队列是为一个特定的表所声明的 - 只要指定相同的设置。`rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`。默认情况下,队列名称对表来说是唯一的。 +- 以重复使用队列,因为它们被声明为持久的,并且不会自动删除。可以通过任何 RabbitMQ CLI 工具删除) + +为了提高性能,收到的消息被分组为大小为 [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size) 的块。如果在[stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md)毫秒内没有形成数据块,无论数据块是否完整,数据都会被刷到表中。 + +如果`rabbitmq_num_consumers`和/或`rabbitmq_num_queues`设置与`rabbitmq_exchange_type`一起被指定,那么: + +- 必须启用`rabbitmq-consistent-hash-exchange` 插件. +- 必须指定已发布信息的 `message_id`属性(对于每个信息/批次都是唯一的)。 + +对于插入查询时有消息元数据,消息元数据被添加到每个发布的消息中:`messageID`和`republished`标志(如果值为true,则表示消息发布不止一次) - 可以通过消息头访问。 + +不要在插入和物化视图中使用同一个表。 + +示例: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64 + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'format=logs,type=report,year=2020', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5; + + CREATE TABLE daily (key UInt64, value UInt64) + ENGINE = MergeTree() ORDER BY key; + + CREATE MATERIALIZED VIEW consumer TO daily + AS SELECT key, value FROM queue; + + SELECT key, value FROM daily ORDER BY key; +``` + +## 虚拟列 {#virtual-columns} + +- `_exchange_name` - RabbitMQ exchange 名称. +- `_channel_id` - 接收消息的消费者所声明的频道ID. +- `_delivery_tag` - 收到消息的DeliveryTag. 以每个频道为范围. +- `_redelivered` - 消息的`redelivered`标志. +- `_message_id` - 收到的消息的ID;如果在消息发布时被设置,则为非空. +- `_timestamp` - 收到的消息的时间戳;如果在消息发布时被设置,则为非空. + +[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/rabbitmq/) diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md new file mode 100644 index 00000000000..5b934dae2c4 --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/s3.md @@ -0,0 +1,213 @@ +--- +toc_priority: 7 +toc_title: S3 +--- + +# S3 表引擎 {#table-engine-s3} + +这个引擎提供与[Amazon S3](https://aws.amazon.com/s3/)生态系统的集成。这个引擎类似于[HDFS](../../../engines/table-engines/integrations/hdfs.md)引擎,但提供了 S3 特有的功能。 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, [compression]) +``` + +**引擎参数** + +- `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). +- `format` — 文件的[格式](../../../interfaces/formats.md#formats). +- `aws_access_key_id`, `aws_secret_access_key` - [AWS](https://aws.amazon.com/) 账号的长期凭证. 你可以使用凭证来对你的请求进行认证.参数是可选的. 如果没有指定凭据, 将从配置文件中读取凭据. 更多信息参见 [使用 S3 来存储数据](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. + +**示例** + +1. 创建 `s3_engine_table` 表: + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip'); +``` + +2. 填充文件: + +``` sql +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +``` + +3. 查询数据: + +``` sql +SELECT * FROM s3_engine_table LIMIT 2; +``` + +```text +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ +``` +## 虚拟列 {#virtual-columns} + +- `_path` — 文件路径. +- `_file` — 文件名. + +有关虚拟列的更多信息,见 [这里](../../../engines/table-engines/index.md#table_engines-virtual_columns). + +## 实施细节 {#implementation-details} + +- 读取和写入可以是并行的 +- 以下是不支持的: + - `ALTER` 和 `SELECT...SAMPLE` 操作. + - 索引. + - 复制. + +## 路径中的通配符 {#wildcards-in-path} + +`path` 参数可以使用类 bash 的通配符来指定多个文件。对于正在处理的文件应该存在并匹配到整个路径模式。 文件列表的确定是在 `SELECT` 的时候进行(而不是在 `CREATE` 的时候)。 + +- `*` — 替代任何数量的任何字符,除了 `/` 以及空字符串。 +- `?` — 代替任何单个字符. +- `{some_string,another_string,yet_another_one}` — 替代 `'some_string', 'another_string', 'yet_another_one'`字符串. +- `{N..M}` — 替换 N 到 M 范围内的任何数字,包括两个边界的值. N 和 M 可以以 0 开头,比如 `000..078` + +带 `{}` 的结构类似于 [远程](../../../sql-reference/table-functions/remote.md) 表函数。 + +**示例** + +1. 假设我们在 S3 上有几个 CSV 格式的文件,URI如下: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ + +有几种方法来创建由所有六个文件组成的数据表: + +第一种方式: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +表由两个目录中的所有文件组成(所有文件应满足查询中描述的格式和模式)。 + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 + +**示例** + +使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表: + +``` sql +CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` + +## 虚拟列 {#virtual-columns} + +- `_path` — 文件路径. +- `_file` — 文件名. + +**另请参阅** + +- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) + +## S3 相关的设置 {#settings} + +以下设置可以在查询执行前设置,也可以放在配置文件中。 + +- `s3_max_single_part_upload_size` - 使用单文件上传至 S3 的对象的最大文件大小。默认值是`64Mb`。 +- `s3_min_upload_part_size` - 使用[S3多文件块上传](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html)时,文件块的最小文件大小。默认值是`512Mb`。 +- `s3_max_redirects` - 允许的最大S3重定向跳数。默认值是`10`。 +- `s3_single_read_retries` - 单次读取时的最大尝试次数。默认值是`4`。 + +安全考虑:如果恶意用户可以指定任意的 S3 网址,`s3_max_redirects`参数必须设置为零,以避免[SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery)攻击;或者,必须在服务器配置中指定`remote_host_filter`。 + +## 基于 Endpoint 的设置 {#endpoint-settings} + +在配置文件中可以为给定的端点指定以下设置(将通过URL的准确前缀来匹配)。 + +- `endpoint` - 指定一个端点的前缀。必要参数。 +- `access_key_id`和`secret_access_key` - 用于指定端点的登陆凭据。可选参数。 +- `use_environment_credentials` - 如果设置为`true`,S3客户端将尝试从环境变量和[Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud)元数据中为指定的端点获取证书。可选参数,默认值是`false`。 +- `region` - 指定S3的区域名称。可选参数。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端将使用不安全的 IMDS 请求,同时从Amazon EC2 元数据获取证书。可选参数,默认值是`false`。 +- `header` - 添加指定的HTTP头到给定端点的请求中。可选参数,可以使用多次此参数来添加多个值。 +- `server_side_encryption_customer_key_base64` - 如果指定,需要指定访问 SSE-C 加密的 S3 对象所需的头信息。可选参数。 +- `max_single_read_retries` - 单次读取时的最大尝试次数。默认值是`4`。可选参数。 + +**示例:** + +``` xml + + + https://storage.yandexcloud.net/my-test-bucket-768/ + + + + + + + + + + +``` + +## 用法 {#usage-examples} + +假设我们在 S3 上有几个 CSV 格式的文件,URI 如下: + +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' + + +1. 有几种方式来制作由所有六个文件组成的表格,其中一种方式如下: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +2. 另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +3. 表由两个目录中的所有文件组成(所有文件应满足查询中描述的格式和模式): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +!!! warning "Warning" + 如果文件列表中包含有从0开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`. + +4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表: + +``` sql +CREATE TABLE big_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` + +## 另请参阅 + +- [S3 表函数](../../../sql-reference/table-functions/s3.md) diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 353dd5f5bc8..45e080fd640 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -6,21 +6,21 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 主要特点: -- 存储的数据按主键排序。 +- 存储的数据按主键排序。 - 这使得你能够创建一个小型的稀疏索引来加快数据检索。 + 这使得您能够创建一个小型的稀疏索引来加快数据检索。 -- 支持数据分区,如果指定了 [分区键](custom-partitioning-key.md) 的话。 +- 如果指定了 [分区键](custom-partitioning-key.md) 的话,可以使用分区。 在相同数据集和相同结果集的情况下 ClickHouse 中某些带分区的操作会比普通操作更快。查询中指定了分区键时 ClickHouse 会自动截取分区数据。这也有效增加了查询性能。 -- 支持数据副本。 +- 支持数据副本。 `ReplicatedMergeTree` 系列的表提供了数据副本功能。更多信息,请参阅 [数据副本](replication.md) 一节。 -- 支持数据采样。 +- 支持数据采样。 - 需要的话,你可以给表设置一个采样方法。 + 需要的话,您可以给表设置一个采样方法。 !!! note "注意" [合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 @@ -50,54 +50,58 @@ ORDER BY expr **子句** -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 - -- `ORDER BY` — 排序键。 +- `ORDER BY` — 排序键。 可以是一组列的元组或任意的表达式。 例如: `ORDER BY (CounterID, EventDate)` 。 - - 如果没有使用 `PRIMARY KEY` 显式的指定主键,ClickHouse 会使用排序键作为主键。 - + + 如果没有使用 `PRIMARY KEY` 显式指定的主键,ClickHouse 会使用排序键作为主键。 + 如果不需要排序,可以使用 `ORDER BY tuple()`. 参考 [选择主键](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#selecting-the-primary-key) -- `PARTITION BY` — [分区键](custom-partitioning-key.md) 。 +- `PARTITION BY` — [分区键](custom-partitioning-key.md) ,可选项。 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"` 。 -- `PRIMARY KEY` - 主键,如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),可选。 +- `PRIMARY KEY` - 如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),在这里指定,可选项。 默认情况下主键跟排序键(由 `ORDER BY` 子句指定)相同。 因此,大部分情况下不需要再专门指定一个 `PRIMARY KEY` 子句。 -- `SAMPLE BY` — 用于抽样的表达式。 +- `SAMPLE BY` - 用于抽样的表达式,可选项。 如果要用抽样表达式,主键中必须包含这个表达式。例如: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))` 。 - -- TTL 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选。 + +- `TTL` - 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选项。 表达式中必须存在至少一个 `Date` 或 `DateTime` 类型的列,比如: - + `TTL date + INTERVAl 1 DAY` - + 规则的类型 `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'`指定了当满足条件(到达指定时间)时所要执行的动作:移除过期的行,还是将数据片段(如果数据片段中的所有行都满足表达式的话)移动到指定的磁盘(`TO DISK 'xxx'`) 或 卷(`TO VOLUME 'xxx'`)。默认的规则是移除(`DELETE`)。可以在列表中指定多个规则,但最多只能有一个`DELETE`的规则。 - + 更多细节,请查看 [表和列的 TTL](#table_engine-mergetree-ttl) -- `SETTINGS` — 控制 `MergeTree` 行为的额外参数: +- `SETTINGS` — 控制 `MergeTree` 行为的额外参数,可选项: - - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值,8192 。参考[数据存储](#mergetree-data-storage)。 - - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 - - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果你的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 - - `use_minimalistic_part_header_in_zookeeper` — 是否在 ZooKeeper 中启用最小的数据片段头 。如果设置了 `use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考『服务配置参数』这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 - - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 + - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值8192 。参考[数据存储](#mergetree-data-storage)。 + - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 + - `min_index_granularity_bytes` - 允许的最小数据粒度,默认值:1024b。该选项用于防止误操作,添加了一个非常低索引粒度的表。参考[数据存储](#mergetree-data-storage) + - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果您的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 + - `use_minimalistic_part_header_in_zookeeper` — ZooKeeper中数据片段存储方式 。如果`use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考[服务配置参数]([Server Settings | ClickHouse Documentation](https://clickhouse.tech/docs/zh/operations/server-configuration-parameters/settings/))这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 + - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 - - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 - - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不建议更改)。 - - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 - - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). - - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。你可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 + - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不要关闭)。 + - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 + - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). + - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。您可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `max_parts_in_total` - 所有分区中最大块的数量(意义不明) + - `max_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最大大小。您可以在全局设置中设置该值(参见[max_compress_block_size](https://clickhouse.tech/docs/zh/operations/settings/settings/#max-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `min_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最小大小。您可以在全局设置中设置该值(参见[min_compress_block_size](https://clickhouse.tech/docs/zh/operations/settings/settings/#min-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `max_partitions_to_read` - 一次查询中可访问的分区最大数。您可以在全局设置中设置该值(参见[max_partitions_to_read](https://clickhouse.tech/docs/zh/operations/settings/settings/#max_partitions_to_read))。 **示例配置** @@ -107,12 +111,11 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa 在这个例子中,我们设置了按月进行分区。 -同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得你可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果你在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 +同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得您可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果您在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 `index_granularity` 可省略因为 8192 是默认设置 。
- 已弃用的建表方法 !!! attention "注意" @@ -127,10 +130,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa **MergeTree() 参数** -- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 -- `sampling_expression` — 采样表达式。 -- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) -- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 +- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 +- `sampling_expression` — 采样表达式。 +- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) +- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 **示例** @@ -152,51 +155,55 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa 数据存储格式由 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 表引擎参数控制。如果数据片段中的字节数或行数少于相应的设置值,数据片段会以 `Compact` 格式存储,否则会以 `Wide` 格式存储。 每个数据片段被逻辑的分割成颗粒(granules)。颗粒是 ClickHouse 中进行数据查询时的最小不可分割数据集。ClickHouse 不会对行或值进行拆分,所以每个颗粒总是包含整数个行。每个颗粒的第一行通过该行的主键值进行标记, -ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让你可以在列文件中直接找到数据。 +ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让您可以在列文件中直接找到数据。 -颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。取决于行的大小,颗粒的行数的在 `[1, index_granularity]` 范围中。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 +颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。颗粒的行数的在 `[1, index_granularity]` 范围中,这取决于行的大小。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 ## 主键和索引在查询中的表现 {#primary-keys-and-indexes-in-queries} 我们以 `(CounterID, Date)` 以主键。排序好的索引的图示会是下面这样: +``` text 全部数据 : [-------------------------------------------------------------------------] CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] 标记: | | | | | | | | | | | a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 标记号: 0 1 2 3 4 5 6 7 8 9 10 +``` 如果指定查询如下: -- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 -- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 -- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 +- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 +- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 +- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 上面例子可以看出使用索引通常会比全表描述要高效。 稀疏索引会引起额外的数据读取。当读取主键单个区间范围的数据时,每个数据块中最多会多读 `index_granularity * 2` 行额外的数据。 -稀疏索引使得你可以处理极大量的行,因为大多数情况下,这些索引常驻与内存(RAM)中。 +稀疏索引使得您可以处理极大量的行,因为大多数情况下,这些索引常驻于内存。 -ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键的行。 +ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键的行。 + +您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](https://clickhouse.tech/docs/zh/operations/settings/settings/#allow-nullable-key),[NULLS_LAST](https://clickhouse.tech/docs/zh/sql-reference/statements/select/order-by/#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 ### 主键的选择 {#zhu-jian-de-xuan-ze} -主键中列的数量并没有明确的限制。依据数据结构,你可以在主键包含多些或少些列。这样可以: +主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: -- 改善索引的性能。 +- 改善索引的性能。 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: - - - 查询会使用 `c` 列作为条件 - - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让你的查询略过很长的数据范围。 -- 改善数据压缩。 + - 查询会使用 `c` 列作为条件 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 + +- 改善数据压缩。 ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 -- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 +- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 @@ -206,9 +213,9 @@ ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键 想要根据初始顺序进行数据查询,使用 [单线程查询](../../../operations/settings/settings.md#settings-max_threads) -### 选择与排序键不同主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} +### 选择与排序键不同的主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} -指定一个跟排序键不一样的主键是可以的,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀。 +Clickhouse可以做到指定一个跟排序键不一样的主键,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀(即主键为(a,b),排序列必须为(a,b,******))。 当使用 [SummingMergeTree](summingmergetree.md) 和 [AggregatingMergeTree](aggregatingmergetree.md) 引擎时,这个特性非常有用。通常在使用这类引擎时,表里的列分两种:*维度* 和 *度量* 。典型的查询会通过任意的 `GROUP BY` 对度量列进行聚合并通过维度列进行过滤。由于 SummingMergeTree 和 AggregatingMergeTree 会对排序键相同的行进行聚合,所以把所有的维度放进排序键是很自然的做法。但这将导致排序键中包含大量的列,并且排序键会伴随着新添加的维度不断的更新。 @@ -218,14 +225,20 @@ ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键 ### 索引和分区在查询中的应用 {#use-of-indexes-and-partitions-in-queries} -对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为谓词链接一子项或整个)则可以使用索引:包含一个表示与主键/分区键中的部分字段或全部字段相等/不等的比较表达式;基于主键/分区键的字段上的 `IN` 或 固定前缀的`LIKE` 表达式;基于主键/分区键的字段上的某些函数;基于主键/分区键的表达式的逻辑表达式。 +对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为完整WHERE条件的一部分或全部)则可以使用索引:进行相等/不相等的比较;对主键列或分区列进行`IN`运算、有固定前缀的`LIKE`运算(如name like 'test%')、函数运算(部分函数适用),还有对上述表达式进行逻辑运算。 + + -因此,在索引键的一个或多个区间上快速地执行查询都是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 + + +因此,在索引键的一个或多个区间上快速地执行查询是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 当引擎配置如下时: +``` sql ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +``` 这种情况下,这些查询: @@ -237,7 +250,7 @@ SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDat ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的分区键剪掉那些不包含符合数据的分区。 -上文的查询显示,即使索引用于复杂表达式。因为读表操作是组织好的,所以,使用索引不会比完整扫描慢。 +上文的查询显示,即使索引用于复杂表达式,因为读表操作经过优化,所以使用索引不会比完整扫描慢。 下面这个例子中,不会使用索引。 @@ -247,17 +260,16 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' 要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) 和 [force_primary_key](../../../operations/settings/settings.md) 。 -按月分区的分区键是只能读取包含适当范围日期的数据块。这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有带主键前几个字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 +使用按月分区的分区列允许只读取包含适当日期区间的数据块,这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有用主键字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 ### 部分单调主键的使用 -考虑这样的场景,比如一个月中的几天。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 +考虑这样的场景,比如一个月中的天数。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 如果查询参数范围内的主键不是单调序列,那么 ClickHouse 无法使用索引。在这种情况下,ClickHouse 会进行全表扫描。 ClickHouse 在任何主键代表一个部分单调序列的情况下都会使用这个逻辑。 - ### 跳数索引 {#tiao-shu-suo-yin-fen-duan-hui-zong-suo-yin-shi-yan-xing-de} 此索引在 `CREATE` 语句的列部分里定义。 @@ -267,11 +279,7 @@ INDEX index_name expr TYPE type(...) GRANULARITY granularity_value ``` `*MergeTree` 系列的表可以指定跳数索引。 - -这些索引是由数据块按粒度分割后的每部分在指定表达式上汇总信息 `granularity_value` 组成(粒度大小用表引擎里 `index_granularity` 的指定)。 -这些汇总信息有助于用 `where` 语句跳过大片不满足的数据,从而减少 `SELECT` 查询从磁盘读取的数据量, - -这些索引会在数据块上聚合指定表达式的信息,这些信息以 granularity_value 指定的粒度组成 (粒度的大小通过在表引擎中定义 index_granularity 定义)。这些汇总信息有助于跳过大片不满足 `where` 条件的数据,从而减少 `SELECT` 查询从磁盘读取的数据量。 +跳数索引是指数据片段按照粒度(建表时指定的`index_granularity`)分割成小块后,将上述SQL的granularity_value数量的小块组合成一个大的块,对这些大块写入索引信息,这样有助于使用`where`筛选时跳过大量不必要的数据,减少`SELECT`需要读取的数据量。 **示例** @@ -295,34 +303,32 @@ SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` -#### 索引的可用类型 {#table_engine-mergetree-data_skipping-indexes} +#### 可用的索引类型 {#table_engine-mergetree-data_skipping-indexes} -- `minmax` +- `minmax` 存储指定表达式的极值(如果表达式是 `tuple` ,则存储 `tuple` 中每个元素的极值),这些信息用于跳过数据块,类似主键。 -- `set(max_rows)` - 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查 数据块是否满足 `WHERE` 条件。 +- `set(max_rows)` + 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查数据块是否满足 `WHERE` 条件。 -- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` 存储一个包含数据块中所有 n元短语(ngram) 的 [布隆过滤器](https://en.wikipedia.org/wiki/Bloom_filter) 。只可用在字符串上。 可用于优化 `equals` , `like` 和 `in` 表达式的性能。 - `n` – 短语长度。 - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,单位字节。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 - `random_seed` – 哈希函数的随机种子。 + - `n` – 短语长度。 + - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,字节为单位。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 + - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 + - `random_seed` – 哈希函数的随机种子。 -- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - 跟 `ngrambf_v1` 类似,不同于 ngrams 存储字符串指定长度的所有片段。它只存储被非字母数字字符分割的片段。 +- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` + 跟 `ngrambf_v1` 类似,但是存储的是token而不是ngrams。Token是由非字母数字的符号分割的序列。 -- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 +- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 + + 可选参数`false_positive`用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - 可选的参数 false_positive 用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - 支持的数据类型:`Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`。 - + 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md) - - ``` sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 @@ -332,56 +338,56 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY #### 函数支持 {#functions-support} -WHERE 子句中的条件包含对列的函数调用,如果列是索引的一部分,ClickHouse 会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 +WHERE 子句中的条件可以包含对某列数据进行运算的函数表达式,如果列是索引的一部分,ClickHouse会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 `set` 索引会对所有函数生效,其他索引对函数的生效情况见下表 -| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | -|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| -| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | +| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | +| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ | +| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | +| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | +| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | +| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | 常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。 !!! note "注意" -布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于负向的函数,例如: +布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如: -- 可以用来优化的场景 - - `s LIKE '%test%'` - - `NOT s NOT LIKE '%test%'` - - `s = 1` - - `NOT s != 1` - - `startsWith(s, 'test')` -- 不能用来优化的场景 - - `NOT s LIKE '%test%'` - - `s NOT LIKE '%test%'` - - `NOT s = 1` - - `s != 1` - - `NOT startsWith(s, 'test')` +- 可以用来优化的场景 + - `s LIKE '%test%'` + - `NOT s NOT LIKE '%test%'` + - `s = 1` + - `NOT s != 1` + - `startsWith(s, 'test')` +- 不能用来优化的场景 + - `NOT s LIKE '%test%'` + - `s NOT LIKE '%test%'` + - `NOT s = 1` + - `s != 1` + - `NOT startsWith(s, 'test')` ## 并发数据访问 {#concurrent-data-access} -应对表的并发访问,我们使用多版本机制。换言之,当同时读和更新表时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 +对于表的并发访问,我们使用多版本机制。换言之,当一张表同时被读和更新时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 对表的读操作是自动并行的。 ## 列和表的 TTL {#table_engine-mergetree-ttl} -TTL 可以设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 +TTL用于设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 TTL 表达式的计算结果必须是 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 或 [日期时间](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的字段。 @@ -405,7 +411,7 @@ TTL date_time + INTERVAL 15 HOUR `TTL`子句不能被用于主键字段。 -示例: +**示例:** 创建表时指定 `TTL` @@ -443,16 +449,23 @@ ALTER TABLE example_table 表可以设置一个用于移除过期行的表达式,以及多个用于在磁盘或卷上自动转移数据片段的表达式。当表中的行过期时,ClickHouse 会删除所有对应的行。对于数据片段的转移特性,必须所有的行都满足转移条件。 ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] + ``` TTL 规则的类型紧跟在每个 TTL 表达式后面,它会影响满足表达式时(到达指定时间时)应当执行的操作: -- `DELETE` - 删除过期的行(默认操作); -- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; -- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `DELETE` - 删除过期的行(默认操作); +- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; +- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `GROUP BY` - 聚合过期的行 -示例: +使用`WHERE`从句,您可以指定哪些过期的行会被删除或聚合(不适用于移动)。`GROUP BY`表达式必须是表主键的前缀。如果某列不是`GROUP BY`表达式的一部分,也没有在SET从句显示引用,结果行中相应列的值是随机的(就好像使用了`any`函数)。 + +**示例**: 创建时指定 TTL @@ -477,19 +490,49 @@ ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +创建一张表,设置一个月后数据过期,这些过期的行中日期为星期一的删除: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +创建一张表,设置过期的列会被聚合。列`x`包含每组行中的最大值,`y`为最小值,`d`为可能任意值。 + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY (k1, k2) +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **删除数据** ClickHouse 在数据片段合并时会删除掉过期的数据。 -当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 你可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 +当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 您可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 -如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 查询。 +如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。 -## 使用具有多个块的设备进行数据存储 {#table_engine-mergetree-multiple-volumes} +## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes} ### 介绍 {#introduction} -MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。近期数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 +MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。最新数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 数据片段是 `MergeTree` 引擎表的最小可移动单元。属于同一个数据片段的数据被存储在同一块磁盘上。数据片段会在后台自动的在磁盘间移动,也可以通过 [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) 查询来移动。 @@ -497,12 +540,14 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 - 磁盘 — 挂载到文件系统的块设备 - 默认磁盘 — 在服务器设置中通过 [path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) 参数指定的数据存储 -- 卷 — 磁盘的等效有序集合 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) +- 卷 — 相同磁盘的顺序列表 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) - 存储策略 — 卷的集合及他们之间的数据移动规则 + 以上名称的信息在Clickhouse中系统表[system.storage_policies](https://clickhouse.tech/docs/zh/operations/system-tables/storage_policies/#system_tables-storage_policies)和[system.disks](https://clickhouse.tech/docs/zh/operations/system-tables/disks/#system_tables-disks)体现。为了应用存储策略,可以在建表时使用`storage_policy`设置。 + ### 配置 {#table_engine-mergetree-multiple-volumes_configure} -磁盘、卷和存储策略应当在主文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 +磁盘、卷和存储策略应当在主配置文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 配置结构: @@ -530,9 +575,9 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 标签: -- `` — 磁盘名,名称必须与其他磁盘不同. -- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. -- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. +- `` — 磁盘名,名称必须与其他磁盘不同. +- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. +- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. 磁盘定义的顺序无关紧要。 @@ -567,11 +612,12 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 标签: -- `policy_name_N` — 策略名称,不能重复。 -- `volume_name_N` — 卷名称,不能重复。 -- `disk` — 卷中的磁盘。 -- `max_data_part_size_bytes` — 任意卷上的磁盘可以存储的数据片段的最大大小。 -- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 +- `policy_name_N` — 策略名称,不能重复。 +- `volume_name_N` — 卷名称,不能重复。 +- `disk` — 卷中的磁盘。 +- `max_data_part_size_bytes` — 卷中的磁盘可以存储的数据片段的最大大小。 +- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 +- `prefer_not_to_merge` - 禁止在这个卷中进行数据合并。该选项启用时,对该卷的数据不能进行合并。这个选项主要用于慢速磁盘。 配置示例: @@ -600,19 +646,31 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 0.2 + + + +
+ jbod1 +
+ + external + true + +
+
...
``` -在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,你可能需要用 3 或更大的复制因此来补偿它。 +在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,您可能需要用3份或更多的复制份数来补偿它。 如果在系统中有不同类型的磁盘可用,可以使用 `moving_from_ssd_to_hdd`。`hot` 卷由 SSD 磁盘(`fast_ssd`)组成,这个卷上可以存储的数据片段的最大大小为 1GB。所有大于 1GB 的数据片段都会被直接存储到 `cold` 卷上,`cold` 卷包含一个名为 `disk1` 的 HDD 磁盘。 同样,一旦 `fast_ssd` 被填充超过 80%,数据会通过后台进程向 `disk1` 进行转移。 存储策略中卷的枚举顺序是很重要的。因为当一个卷被充满时,数据会向下一个卷转移。磁盘的枚举顺序同样重要,因为数据是依次存储在磁盘上的。 -在创建表时,可以将一个配置好的策略应用到表: +在创建表时,可以应用存储策略: ``` sql CREATE TABLE table_with_non_default_policy ( @@ -626,7 +684,7 @@ PARTITION BY toYYYYMM(EventDate) SETTINGS storage_policy = 'moving_from_ssd_to_hdd' ``` -`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。表创建后,它的存储策略就不能改变了。 +`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。您可以使用[ALTER TABLE ... MODIFY SETTING]来修改存储策略,新的存储策略应该包含所有以前的磁盘和卷,并使用相同的名称。 可以通过 [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 @@ -634,24 +692,121 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 对于 `MergeTree` 表,数据通过以下不同的方式写入到磁盘当中: -- 作为插入(`INSERT`查询)的结果 -- 在后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations)期间 -- 当从另一个副本下载时 -- 作为 [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区的结果 +- 插入(`INSERT`查询) +- 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) +- 从另一个副本下载 +- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: -1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) -2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) +1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) +2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) -更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到初始化的那一块磁盘上。 +更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到原来的那一块磁盘上。 -在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。同时,具体细节可以通过服务器日志查看。 +在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 -在后台合并和数据变异之后,就的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 +在后台合并和数据变异之后,旧的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 + +## 使用S3进行数据存储 {#using-s3-data-storage} + +`MergeTree`系列表引擎允许使用[S3](https://aws.amazon.com/s3/)存储数据,需要修改磁盘类型为`S3`。 + +示例配置: + +``` xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + your_base64_encoded_customer_key + + http://proxy1 + http://proxy2 + + 10000 + 5000 + 10 + 4 + 1000 + /var/lib/clickhouse/disks/s3/ + true + /var/lib/clickhouse/disks/s3/cache/ + false + + + ... + +``` + +必须的参数: + +- `endpoint` - S3的结点URL,以`path`或`virtual hosted`[格式](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html)书写。 +- `access_key_id` - S3的Access Key ID。 +- `secret_access_key` - S3的Secret Access Key。 + +可选参数: + +- `region` - S3的区域名称 +- `use_environment_credentials` - 从环境变量AWS_ACCESS_KEY_ID、AWS_SECRET_ACCESS_KEY和AWS_SESSION_TOKEN中读取认证参数。默认值为`false`。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端在认证时会使用不安全的IMDS请求。默认值为`false`。 +- `proxy` - 访问S3结点URL时代理设置。每一个`uri`项的值都应该是合法的代理URL。 +- `connect_timeout_ms` - Socket连接超时时间,默认值为`10000`,即10秒。 +- `request_timeout_ms` - 请求超时时间,默认值为`5000`,即5秒。 +- `retry_attempts` - 请求失败后的重试次数,默认值为10。 +- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。 +- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。 +- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks//` +- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。 +- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks//cache/`。 +- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。 +- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。 + +S3磁盘也可以设置冷热存储: +```xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + + + + +
+ s3 +
+
+
+ + +
+ default +
+ + s3 + +
+ 0.2 +
+
+ ... +
+``` + +指定了`cold`选项后,本地磁盘剩余空间如果小于`move_factor * disk_size`,或有TTL设置时,数据就会定时迁移至S3了。 [原始文章](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) diff --git a/docs/zh/engines/table-engines/special/file.md b/docs/zh/engines/table-engines/special/file.md index 08f0a58070f..4464dcf198c 100644 --- a/docs/zh/engines/table-engines/special/file.md +++ b/docs/zh/engines/table-engines/special/file.md @@ -54,7 +54,7 @@ SELECT * FROM file_engine_table ## 在 Clickhouse-local 中的使用 {#zai-clickhouse-local-zhong-de-shi-yong} -使用 [ツ环板-ョツ嘉ッツ偲](../../../engines/table-engines/special/file.md) 时,File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 +使用 [clickhouse-local](../../../operations/utilities/clickhouse-local.md) 时,File 引擎除了 `Format` 之外,还可以接收文件路径参数。可以使用数字或名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 **例如:** ``` bash diff --git a/docs/zh/engines/table-engines/special/set.md b/docs/zh/engines/table-engines/special/set.md index 71271b0d7ca..a4fd0d85bd1 100644 --- a/docs/zh/engines/table-engines/special/set.md +++ b/docs/zh/engines/table-engines/special/set.md @@ -1,4 +1,4 @@ -# 设置 {#set} +# 集合 {#set} 始终存在于 RAM 中的数据集。它适用于IN运算符的右侧(请参见 «IN运算符» 部分)。 diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index 59deecbb166..1bae0ad1df3 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -488,7 +488,7 @@ max_alter_threads 2 Say Hi! - + ``` ``` bash diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index a400696fc11..615f5ef933d 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -796,7 +796,7 @@ ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复 The `index` attribute specifies the node order when trying to connect to the ZooKeeper cluster. - `session_timeout` — Maximum timeout for the client session in milliseconds. -- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇.貌路.隆拢脳枚脢虏.麓脢for脱 可选。 +- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes)被用作根由ClickHouse服务器使用znodes 可选。 - `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. **配置示例** @@ -823,7 +823,7 @@ ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复 **另请参阅** - [复制](../../engines/table-engines/mergetree-family/replication.md) -- [动物园管理员程序员指南](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) +- [zookeeper管理指南](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) ## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 720b822ce29..ef4f4f86d01 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -817,21 +817,22 @@ load_balancing = first_or_random 为了保持一致性(以获取相同数据拆分的不同部分),此选项仅在设置了采样键时有效。 副本滞后不受控制。 -## 编译 {#compile} +## compile_expressions {#compile-expressions} -启用查询的编译。 默认情况下,0(禁用)。 +啟用或禁用在運行時使用 LLVM 將常用的簡單函數和運算符編譯為本機代碼。 -编译仅用于查询处理管道的一部分:用于聚合的第一阶段(GROUP BY)。 -如果编译了管道的这一部分,则由于部署周期较短和内联聚合函数调用,查询可能运行得更快。 对于具有多个简单聚合函数的查询,可以看到最大的性能改进(在极少数情况下可快四倍)。 通常,性能增益是微不足道的。 在极少数情况下,它可能会减慢查询执行速度。 +可能的值: -## min_count_to_compile {#min-count-to-compile} +- 0 — 禁用。 +- 1 — 啟用。 -在运行编译之前可能使用已编译代码块的次数。 默认情况下,3。 -For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values ​​starting with 1. Compilation normally takes about 5-10 seconds. -如果该值为1或更大,则编译在单独的线程中异步进行。 结果将在准备就绪后立即使用,包括当前正在运行的查询。 +默認值:`1`。 -对于查询中使用的聚合函数的每个不同组合以及GROUP BY子句中的键类型,都需要编译代码。 -The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. +## min_count_to_compile_expression {#min-count-to-compile-expression} + +在編譯之前執行相同表達式的最小計數。 + +默認值:`3`。 ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 9fbe15b8507..ff7593768d3 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -3,6 +3,6 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -## 系统。asynchronous_metric_log {#system-tables-async-log} +## system.asynchronous_metric_log {#system-tables-async-log} 包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)) diff --git a/docs/zh/operations/system-tables/asynchronous_metrics.md b/docs/zh/operations/system-tables/asynchronous_metrics.md index 805477c9f47..5a302f6da7b 100644 --- a/docs/zh/operations/system-tables/asynchronous_metrics.md +++ b/docs/zh/operations/system-tables/asynchronous_metrics.md @@ -3,14 +3,14 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。asynchronous_metrics {#system_tables-asynchronous_metrics} +# system.asynchronous_metrics {#system_tables-asynchronous_metrics} 包含在后台定期计算的指标。 例如,在使用的RAM量。 列: -- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名。 +- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 **示例** @@ -34,8 +34,7 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 ``` **另请参阅** - -- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [系统。metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](../../operations/monitoring.md) — ClickHouse监控的基本概念。 +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — 包含即时计算的指标。 +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — 包含出现的事件的次数。 +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics` 和 `system.events`表中的指标的历史值。 diff --git a/docs/zh/operations/system-tables/clusters.md b/docs/zh/operations/system-tables/clusters.md index 1e5935c276e..bcafff4970a 100644 --- a/docs/zh/operations/system-tables/clusters.md +++ b/docs/zh/operations/system-tables/clusters.md @@ -1,24 +1,20 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- - -# 系统。集群 {#system-clusters} +# system.clusters{#system-clusters} 包含有关配置文件中可用的集群及其中的服务器的信息。 列: -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32)-此主机无法到达副本的次数。 -- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 +- `cluster` (String) — 集群名。 +- `shard_num` (UInt32) — 集群中的分片数,从1开始。 +- `shard_weight` (UInt32) — 写数据时该分片的相对权重。 +- `replica_num` (UInt32) — 分片的副本数量,从1开始。 +- `host_name` (String) — 配置中指定的主机名。 +- `host_address` (String) — 从DNS获取的主机IP地址。 +- `port` (UInt16) — 连接到服务器的端口。 +- `user` (String) — 连接到服务器的用户名。 +- `errors_count` (UInt32) - 此主机无法访问副本的次数。 +- `slowdowns_count` (UInt32) - 与对冲请求建立连接时导致更改副本的减速次数。 +- `estimated_recovery_time` (UInt32) - 剩下的秒数,直到副本错误计数归零并被视为恢复正常。 请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 @@ -27,3 +23,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [表引擎分布式](../../engines/table-engines/special/distributed.md) - [distributed_replica_error_cap设置](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life设置](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/clusters) diff --git a/docs/zh/operations/system-tables/columns.md b/docs/zh/operations/system-tables/columns.md index 24296dc715c..b21be98c0dc 100644 --- a/docs/zh/operations/system-tables/columns.md +++ b/docs/zh/operations/system-tables/columns.md @@ -25,3 +25,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/columns) diff --git a/docs/zh/operations/system-tables/data_type_families.md b/docs/zh/operations/system-tables/data_type_families.md index db08ff0371b..000abfce65d 100644 --- a/docs/zh/operations/system-tables/data_type_families.md +++ b/docs/zh/operations/system-tables/data_type_families.md @@ -16,7 +16,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 **示例** ``` sql -SELECT * FROM system.data_type_families WHERE alias_to = 'String' +SELECT * FROM system.data_type_families WHERE alias_to = 'String'; ``` ``` text diff --git a/docs/zh/operations/system-tables/query_log.md b/docs/zh/operations/system-tables/query_log.md index aa954fc4845..b7661b73a50 100644 --- a/docs/zh/operations/system-tables/query_log.md +++ b/docs/zh/operations/system-tables/query_log.md @@ -76,14 +76,14 @@ ClickHouse不会自动从表中删除数据。更多详情请看 [introduction]( - `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或其他TCP客户端的Patch component。 - `http_method` (UInt8) — 发起查询的HTTP方法. 可能值: - 0 — TCP接口的查询. - - 1 — `GET` + - 1 — `GET` - 2 — `POST` - `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` The UserAgent header passed in the HTTP request。 - `quota_key` ([String](../../sql-reference/data-types/string.md)) — 在[quotas](../../operations/quotas.md) 配置里设置的“quota key” (见 `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — 参与查询的线程数. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — 衡量不同指标的计数器。 可以在[system.events](../../operations/system-tables/events.md#system_tables-events)中找到它们的描述。 -- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — `ProfileEvents.Names` 列中列出的指标的值。 +- `ProfileEvents` ([Map(String, UInt64))](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [系统。活动](../../operations/system-tables/events.md#system_tables-events) +- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` 参数为1。 +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — 参与查询的线程数. - `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — 客户端运行查询时更改的设置的名称。 要启用对设置的日志记录更改,请将log_query_settings参数设置为1。 - `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — `Settings.Names` 列中列出的设置的值。 **示例** @@ -133,10 +133,8 @@ http_user_agent: quota_key: revision: 54434 thread_ids: [] -ProfileEvents.Names: [] -ProfileEvents.Values: [] -Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] -Settings.Values: ['0','random','1','10000000000'] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} +Settings: {'background_pool_size':'32','load_balancing':'random','allow_suspicious_low_cardinality_types':'1','distributed_aggregation_memory_efficient':'1','skip_unavailable_shards':'1','log_queries':'1','max_bytes_before_external_group_by':'20000000000','max_bytes_before_external_sort':'20000000000','allow_introspection_functions':'1'} ``` **另请参阅** diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md index 8043be8bd75..33583f3b730 100644 --- a/docs/zh/operations/system-tables/query_thread_log.md +++ b/docs/zh/operations/system-tables/query_thread_log.md @@ -61,8 +61,7 @@ ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/sys - `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。 - `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents.Names` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events). -- `ProfileEvents.Values` ([数组(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。 +- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events). **示例** @@ -108,8 +107,7 @@ http_method: 0 http_user_agent: quota_key: revision: 54434 -ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds'] -ProfileEvents.Values: [1,97,81,5,81] +ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'CompressedReadBufferBlocks':1,'CompressedReadBufferBytes':10,'IOBufferAllocs':1,'IOBufferAllocBytes':89,'ContextLock':15,'RWLockAcquiredReadLocks':1} ... ``` diff --git a/docs/zh/operations/system-tables/tables.md b/docs/zh/operations/system-tables/tables.md index a690e938a3a..0c3e913b9bb 100644 --- a/docs/zh/operations/system-tables/tables.md +++ b/docs/zh/operations/system-tables/tables.md @@ -5,15 +5,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # 系统。表 {#system-tables} -包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. +包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`。 此表包含以下列(列类型显示在括号中): -- `database` (String) — The name of the database the table is in. +- `database` (String) — 表所在的数据库表名。 -- `name` (String) — Table name. +- `name` (String) — 表名。 -- `engine` (String) — Table engine name (without parameters). +- `engine` (String) — 表引擎名 (不包含参数)。 - `is_temporary` (UInt8)-指示表是否是临时的标志。 @@ -23,11 +23,11 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 -- `dependencies_database` (数组(字符串))-数据库依赖关系. +- `dependencies_database` (数组(字符串))-数据库依赖关系。 - `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表)。 -- `create_table_query` (String)-用于创建表的查询。 +- `create_table_query` (String)-用于创建表的SQL语句。 - `engine_full` (String)-表引擎的参数。 @@ -44,11 +44,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - [分布](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 +- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则行数为`Null`(包括底层 `Buffer` 表)。 -- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 +- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则字节数为`Null` (即**不** 包括任何底层存储)。 - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - 如果表在内存中存储数据,返回在内存中使用的近似字节数. + - 如果表将数据存在磁盘上,返回实际使用的磁盘空间(压缩后)。 + - 如果表在内存中存储数据,返回在内存中使用的近似字节数。 -该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 +- `lifetime_rows` (Nullbale(UInt64))-服务启动后插入的总行数(只针对`Buffer`表)。 + +`system.tables` 表被用于 `SHOW TABLES` 的查询实现中。 + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/tables) diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md index f7e816ccee6..ca767fba7aa 100644 --- a/docs/zh/operations/system-tables/zookeeper.md +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -3,13 +3,13 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。动物园管理员 {#system-zookeeper} +# system.zookeeper {#system-zookeeper} -如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 +如果未配置ZooKeeper,则该表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 +查询必须具有 ‘path’ WHERE子句中的相等条件。 这是ZooKeeper中您想要获取数据的子路径。 -查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 -要输出所有根节点的数据,write path= ‘/’. +查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出`/clickhouse`节点的对所有子路径的数据。 +要输出所有根节点的数据,使用path= ‘/’. 如果在指定的路径 ‘path’ 不存在,将引发异常。 查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。 @@ -18,20 +18,20 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 列: -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. +- `name` (String) — 节点的名字。 +- `path` (String) — 节点的路径。 +- `value` (String) — 节点的值。 +- `dataLength` (Int32) — 节点的值长度。 +- `numChildren` (Int32) — 子节点的个数。 +- `czxid` (Int64) — 创建该节点的事务ID。 +- `mzxid` (Int64) — 最后修改该节点的事务ID。 +- `pzxid` (Int64) — 最后删除或者增加子节点的事务ID。 +- `ctime` (DateTime) — 节点的创建时间。 +- `mtime` (DateTime) — 节点的最后修改时间。 +- `version` (Int32) — 节点版本:节点被修改的次数。 +- `cversion` (Int32) — 增加或删除子节点的个数。 +- `aversion` (Int32) — ACL的修改次数。 +- `ephemeralOwner` (Int64) — 针对临时节点,拥有该节点的事务ID。 示例: @@ -77,3 +77,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/zookeeper) diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 446feac96ce..4d1cdca71e5 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -174,7 +174,7 @@ MODIFY ORDER BY new_expression 该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。 下列操作是允许的: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - 在表的元数据中增加索引说明 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 在表的元数据中增加索引说明 - `ALTER TABLE [db].name DROP INDEX name` - 从表的元数据中删除索引描述,并从磁盘上删除索引文件 diff --git a/docs/zh/sql-reference/statements/system.md b/docs/zh/sql-reference/statements/system.md index 9952f383236..7f2b7ae1082 100644 --- a/docs/zh/sql-reference/statements/system.md +++ b/docs/zh/sql-reference/statements/system.md @@ -95,7 +95,7 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk'; ## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache} 重置已编译的表达式缓存。用于ClickHouse开发和性能测试。 -当 `query/user/profile` 启用配置项 [compile](../../operations/settings/settings.md#compile)时,编译的表达式缓存开启。 +当 `query/user/profile` 启用配置项 [compile-expressions](../../operations/settings/settings.md#compile-expressions)时,编译的表达式缓存开启。 ## FLUSH LOGS {#query_language-system-flush_logs} @@ -209,7 +209,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] ### STOP FETCHES {#query_language-system-stop-fetches} -停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。 +停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。 不管表引擎类型如何或表/数据库是否存,都返回 `OK.`。 ``` sql @@ -218,7 +218,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name] ### START FETCHES {#query_language-system-start-fetches} -启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。 +启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。 不管表引擎类型如何或表/数据库是否存,都返回 `OK.`。 ``` sql @@ -227,7 +227,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name] ### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends} -停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。 +停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。 ``` sql SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name] @@ -235,7 +235,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name] ### START REPLICATED SENDS {#query_language-system-start-replicated-sends} -启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。 +启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。 ``` sql SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name] diff --git a/docs/zh/sql-reference/syntax.md b/docs/zh/sql-reference/syntax.md index c05c5a1a7bf..644dc646726 100644 --- a/docs/zh/sql-reference/syntax.md +++ b/docs/zh/sql-reference/syntax.md @@ -1,39 +1,42 @@ --- toc_priority: 31 toc_title: SQL语法 + --- # SQL语法 {#syntax} -CH有2类解析器:完整SQL解析器(递归式解析器),以及数据格式解析器(快速流式解析器) +ClickHouse有2类解析器:完整SQL解析器(递归式解析器),以及数据格式解析器(快速流式解析器) 除了 `INSERT` 查询,其它情况下仅使用完整SQL解析器。 `INSERT`查询会同时使用2种解析器: + ``` sql INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -含`INSERT INTO t VALUES` 的部分由完整SQL解析器处理,包含数据的部分 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 交给快速流式解析器解析。通过设置参数 [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions),你也可以对数据部分开启完整SQL解析器。当 `input_format_values_interpret_expressions = 1` 时,CH优先采用快速流式解析器来解析数据。如果失败,CH再尝试用完整SQL解析器来处理,就像处理SQL [expression](#syntax-expressions) 一样。 +含`INSERT INTO t VALUES` 的部分由完整SQL解析器处理,包含数据的部分 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 交给快速流式解析器解析。通过设置参数 [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions),你也可以对数据部分开启完整SQL解析器。当 `input_format_values_interpret_expressions = 1` 时,ClickHouse优先采用快速流式解析器来解析数据。如果失败,ClickHouse再尝试用完整SQL解析器来处理,就像处理SQL [expression](#syntax-expressions) 一样。 数据可以采用任何格式。当CH接收到请求时,服务端先在内存中计算不超过 [max_query_size](../operations/settings/settings.md#settings-max_query_size) 字节的请求数据(默认1 mb),然后剩下部分交给快速流式解析器。 -这将避免在处理大型的 `INSERT`语句时出现问题。 +当 `INSERT` 语句中使用 `Values` 格式时,看起来数据部分的解析和解析`SELECT` 中的表达式相同,但并不是这样的。 `Values` 格式有非常多的限制。 -当 `INSERT` 语句中使用 `Values` 形式时,看起来 数据部分的解析和解析`SELECT` 中的表达式相同,但并不是这样的。 `Values` 形式非常有限。 -该篇的剩余部分涵盖了完整SQL解析器。关于格式解析的更多信息,参见 [Formats](../interfaces/formats.md) 章节。 +本文的剩余部分涵盖了完整SQL解析器。关于格式解析的更多信息,参见 [Formats](../interfaces/formats.md) 章节。 -## 空字符 {#spaces} +## 空白{#spaces} -sql语句中(包含sql的起始和结束)可以有任意的空字符,这些空字符类型包括:空格字符,tab制表符,换行符,CR符,换页符等。 +sql语句的语法结构部分之间(标识符之间、部分符号之间、包括sql的起始和结束)可以有任意的空白字符,这些空字符类型包括:空格字符,tab制表符,换行符,CR符,换页符等。 ## 注释 {#comments} -CH支持SQL风格或C语言风格的注释: +ClickHouse支持SQL风格或C语言风格的注释: + - SQL风格的注释以 `--` 开始,直到行末,`--` 后紧跟的空格可以忽略 -- C语言风格的注释以 `/*` 开始,以 `*/` 结束,支持多行形式,同样可以省略 `/*` 后的空格 +- C语言风格的注释以 `/*` 开始,以 `*/` 结束,可以跨行,同样可以省略 `/*` 后的空格 ## 关键字 {#syntax-keywords} 以下场景的关键字是大小写不敏感的: + - 标准SQL。例如,`SELECT`, `select` 和 `SeLeCt` 都是允许的 - 在某些流行的RDBMS中被实现的关键字,例如,`DateTime` 和 `datetime`是一样的 @@ -41,38 +44,36 @@ CH支持SQL风格或C语言风格的注释: 你可以在系统表 [system.data_type_families](../operations/system-tables/data_type_families.md#system_tables-data_type_families) 中检查某个数据类型的名称是否是大小写敏感型。 和标准SQL相反,所有其它的关键字都是 **大小写敏感的**,包括函数名称。 -In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -关键字不是保留的;它们仅在相应的上下文中才会被处理。如果你使用和关键字同名的 [变量名](#syntax-identifiers) ,需要使用双引号或转移符将它们包含起来。例如:如果表 `table_name` 包含列 `"FROM"`,那么 `SELECT "FROM" FROM table_name` 是合法的 +关键字不是保留的;它们仅在相应的上下文中才会被认为是关键字。如果你使用和关键字同名的 [标识符](#syntax-identifiers) ,需要使用双引号或反引号将它们包含起来。例如:如果表 `table_name` 包含列 `"FROM"`,那么 `SELECT "FROM" FROM table_name` 是合法的 -## 变量名 {#syntax-identifiers} +## 标识符 {#syntax-identifiers} -变量包括: -Identifiers are: +标识符包括: -- 集群,数据库,表,分区,列名称 +- 集群、数据库、表、分区、列的名称 - 函数 - 数据类型 -- 表达式别名 +- [表达式别名](https://clickhouse.tech/docs/zh/sql-reference/syntax/#syntax-expression_aliases) -变量名可以使用反引号包含起来 +变量名可以被括起或不括起,后者是推荐做法。 -没有使用反引号包含的变量名,必须匹配正则表达式 `^[a-zA-Z_][0-9a-zA-Z_]*$`,并且不能和 [关键字]相同 +没有括起的变量名,必须匹配正则表达式 `^[a-zA-Z_][0-9a-zA-Z_]*$`,并且不能和 [关键字](#syntax-keywords)相同,合法的标识符名称:`x`,`_1`,`X_y__Z123_`等。 -如果想使用和关键字同名的变量名称,或者在变量名称中包含其它符号,你需要通过双引号或转义符号,例如: `"id"`, `` `id` `` +如果想使用和关键字同名的变量名称,或者在变量名称中包含其它符号,你需要通过双引号或反引号,例如: `"id"`, `` `id` `` ## 字符 {#literals} -CH包含数字,字母,括号,NULL值等字符 +字符包含数字,字母,括号,NULL值等字符。 ### 数字 {#numeric} 数字类型字符会被做如下解析: -- 首先,当做64位的有符号整数,使用该函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) + +- 首先,当做64位的有符号整数,使用函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) - 如果失败,解析成64位无符号整数,同样使用函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) - 如果还失败了,试图解析成浮点型数值,使用函数 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) -Numeric literal tries to be parsed: - 最后,以上情形都不符合时,返回异常 @@ -82,13 +83,14 @@ Numeric literal tries to be parsed: 例如: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -### 字母 {#syntax-string-literal} -CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转义。下列转义字符都有相应的实际值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`。其它情况下,以 `\c`形式出现的转义字符,当`c`表示任意字符时,转义字符会转换成`c`。这意味着你可以使用 `\'`和`\\`。该值将拥有[String](../sql-reference/data-types/string.md)类型。 +### 字符串 {#syntax-string-literal} + +ClickHouse只支持用单引号包含的字符串。特殊字符可通过反斜杠进行转义。下列转义字符都有相应的实际值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`。其它情况下,以 `\c`形式出现的转义字符,当`c`表示任意字符时,转义字符会转换成`c`。这意味着你可以使用 `\'`和`\\`。该值将拥有[String](../sql-reference/data-types/string.md)类型。 在字符串中,你至少需要对 `'` 和 `\` 进行转义。单引号可以使用单引号转义,例如 `'It\'s'` 和 `'It''s'` 是相同的。 -### 括号 {#compound} +### 复合字符串 {#compound} 数组都是使用方括号进行构造 `[1, 2, 3]`,元组则使用圆括号 `(1, 'Hello, world!', 2)` 从技术上来讲,这些都不是字符串,而是包含创建数组和元组运算符的表达式。 @@ -97,17 +99,18 @@ CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转 ### NULL值 {#null-literal} -代表不存在的值 +代表不存在的值。 -为了能在表字段中存储NULL值,该字段必须声明为 [空值](../sql-reference/data-types/nullable.md) 类型 +为了能在表字段中存储NULL值,该字段必须声明为 [空值](../sql-reference/data-types/nullable.md) 类型。 根据数据的格式(输入或输出),NULL值有不同的表现形式。更多信息参见文档 [数据格式](../interfaces/formats.md#formats) -在处理 `NULL`时存在很多细微差别。例如,比较运算的至少一个参数为 `NULL` ,该结果也是 `NULL` 。与之类似的还有乘法运算, 加法运算,以及其它运算。更多信息,请参阅每种运算的文档部分。 +在处理 `NULL`时存在很多细微差别。例如,比较运算的至少一个参数为 `NULL` ,则该结果也是 `NULL` 。与之类似的还有乘法运算, 加法运算,以及其它运算。更多信息,请参阅每种运算的文档部分。 -在语句中,可以通过 [是否为NULL](operators/index.md#operator-is-null) 以及 [是否不为NULL](operators/index.md) 运算符,以及 `isNull` 、 `isNotNull` 函数来检查 `NULL` 值 +在语句中,可以通过 [IS NULL](operators/index.md#operator-is-null) 以及 [IS NOT NULL](operators/index.md) 运算符,以及 `isNull` 、 `isNotNull` 函数来检查 `NULL` 值 ## 函数 {#functions} -函数调用的写法,类似于变量并带有被圆括号包含的参数列表(可能为空)。与标准SQL不同,圆括号是必须的,不管参数列表是否为空。例如: `now()`。 + +函数调用的写法,类似于一个标识符后接被圆括号包含的参数列表(可能为空)。与标准SQL不同,圆括号是必须的,不管参数列表是否为空。例如: `now()`。 函数分为常规函数和聚合函数(参见“Aggregate functions”一章)。有些聚合函数包含2个参数列表,第一个参数列表中的参数被称为“parameters”。不包含“parameters”的聚合函数语法和常规函数是一样的。 @@ -116,12 +119,12 @@ CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转 在查询解析阶段,运算符会被转换成对应的函数,使用时请注意它们的优先级。例如: 表达式 `1 + 2 * 3 + 4` 会被解析成 `plus(plus(1, multiply(2, 3)), 4)`. - + ## 数据类型及数据库/表引擎 {#data_types-and-database-table-engines} `CREATE` 语句中的数据类型和表引擎写法与变量或函数类似。 -换句话说,它们可以用括号包含参数列表。更多信息,参见“数据类型,” “数据表引擎” 和 “CREATE语句”等章节 +换句话说,它们可以包含或不包含用括号包含的参数列表。更多信息,参见“数据类型,” “数据表引擎” 和 “CREATE语句”等章节 ## 表达式别名 {#syntax-expression_aliases} @@ -131,29 +134,30 @@ CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转 expr AS alias ``` -- `AS` — 用于定义别名的关键字。可以对表或select语句中的列定义别名(`AS` 可以省略) - 例如, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. +- `AS` — 用于定义别名的关键字。可以对表或select语句中的列定义别名(`AS` 可以省略) + 例如, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - 在 [CAST函数](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) 中,`AS`有其它含义。请参见该函数的说明部分。 + 在 [CAST函数](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 中,`AS`有其它含义。请参见该函数的说明部分。 -- `expr` — 任意CH支持的表达式. +- `expr` — 任意CH支持的表达式. - 例如, `SELECT column_name * 2 AS double FROM some_table`. + 例如, `SELECT column_name * 2 AS double FROM some_table`. -- `alias` — `expr` 的名称。别名必须符合 [变量名]](#syntax-identifiers) 语法. +- `alias` — `expr` 的名称。别名必须符合 [标识符](#syntax-identifiers) 语法. - 例如, `SELECT "table t".column_name FROM table_name AS "table t"`. + 例如, `SELECT "table t".column_name FROM table_name AS "table t"`. ### 用法注意 {#notes-on-usage} 别名在当前查询或子查询中是全局可见的,你可以在查询语句的任何位置对表达式定义别名 -别名在当前查询的子查询及不同子查询中是不可见的。例如,执行如下查询SQL: `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ,CH会提示异常 `Unknown identifier: num`. +别名在当前查询的子查询及不同子查询中是不可见的。例如,执行如下查询SQL: `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ,ClickHouse会提示异常 `Unknown identifier: num`. 如果给select子查询语句的结果列定义其别名,那么在外层可以使用该别名。例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. 注意列的别名和表的别名相同时的情形,考虑如下示例: + ``` sql CREATE TABLE t ( @@ -175,7 +179,7 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -在这个示例中,先声明了表 `t` 以及列 `b`。然后,在查询数据时,又定义了别名 `sum(b) AS b`。由于别名是全局的,CH使用表达式 `sum(b)` 来替换表达式 `argMax(a, b)` 中的变量 `b`。这种替换导致出现异常。 +在这个示例中,先声明了表 `t` 以及列 `b`。然后,在查询数据时,又定义了别名 `sum(b) AS b`。由于别名是全局的,ClickHouse使用表达式 `sum(b)` 来替换表达式 `argMax(a, b)` 中的变量 `b`。这种替换导致出现异常。 ## 星号 {#asterisk} @@ -184,7 +188,7 @@ select查询中,星号可以代替表达式使用。详情请参见“select ## 表达式 {#syntax-expressions} -表达式是函数、标识符、字符、运算符的应用程序、括号中的表达式、子查询或星号。它也可以包含别名。 +表达式是函数、标识符、字符、使用运算符的语句、括号中的表达式、子查询或星号。它也可以包含别名。 表达式列表是用逗号分隔的一个或多个表达式。 反过来,函数和运算符可以将表达式作为参数。 diff --git a/docs/zh/sql-reference/table-functions/postgresql.md b/docs/zh/sql-reference/table-functions/postgresql.md new file mode 100644 index 00000000000..df29c2c2891 --- /dev/null +++ b/docs/zh/sql-reference/table-functions/postgresql.md @@ -0,0 +1,120 @@ +--- +toc_priority: 42 +toc_title: postgresql +--- + +# postgresql {#postgresql} + +允许对存储在远程 PostgreSQL 服务器上的数据进行 `SELECT` 和 `INSERT` 查询. + +**语法** + +``` sql +postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) +``` + +**参数** + +- `host:port` — PostgreSQL 服务器地址. +- `database` — 远程数据库名称. +- `table` — 远程表名称. +- `user` — PostgreSQL 用户. +- `password` — 用户密码. +- `schema` — 非默认的表结构. 可选. + +**返回值** + +一个表对象,其列数与原 PostgreSQL 表的列数相同。 + +!!! info "Note" + 在`INSERT`查询中,为了区分表函数`postgresql(..)`和表名以及表的列名列表,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。请看下面的例子。 + +## 实施细节 {#implementation-details} + +`SELECT`查询在 PostgreSQL 上以 `COPY (SELECT ...) TO STDOUT` 的方式在只读的 PostgreSQL 事务中运行,每次在`SELECT`查询后提交。 + +简单的`WHERE`子句,如`=`、`!=`、`>`、`>=`、`<`、`<=`和`IN`,在PostgreSQL服务器上执行。 + +所有的连接、聚合、排序,`IN [ 数组 ]`条件和`LIMIT`采样约束只有在对PostgreSQL的查询结束后才会在ClickHouse中执行。 + +PostgreSQL 上的`INSERT`查询以`COPY "table_name" (field1, field2, ... fieldN) FROM STDIN`的方式在 PostgreSQL 事务中运行,每次`INSERT`语句后自动提交。 + +PostgreSQL 数组类型将转换为 ClickHouse 数组。 + +!!! info "Note" + 要小心,在 PostgreSQL 中,像 Integer[] 这样的数组数据类型列可以在不同的行中包含不同维度的数组,但在 ClickHouse 中,只允许在所有的行中有相同维度的多维数组。 + +支持设置 PostgreSQL 字典源中 Replicas 的优先级。地图中的数字越大,优先级就越低。`0`代表最高的优先级。 + +**示例** + +PostgreSQL 中的表: + +``` text +postgres=# CREATE TABLE "public"."test" ( +"int_id" SERIAL, +"int_nullable" INT NULL DEFAULT NULL, +"float" FLOAT NOT NULL, +"str" VARCHAR(100) NOT NULL DEFAULT '', +"float_nullable" FLOAT NULL DEFAULT NULL, +PRIMARY KEY (int_id)); + +CREATE TABLE + +postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2); +INSERT 0 1 + +postgresql> SELECT * FROM test; + int_id | int_nullable | float | str | float_nullable + --------+--------------+-------+------+---------------- + 1 | | 2 | test | +(1 row) +``` + +从 ClickHouse 检索数据: + +```sql +SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password') WHERE str IN ('test'); +``` + +``` text +┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐ +│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │ +└────────┴──────────────┴───────┴──────┴────────────────┘ +``` + +插入数据: + +```sql +INSERT INTO TABLE FUNCTION postgresql('localhost:5432', 'test', 'test', 'postgrsql_user', 'password') (int_id, float) VALUES (2, 3); +SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password'); +``` + +``` text +┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐ +│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │ +│ 2 │ ᴺᵁᴸᴸ │ 3 │ │ ᴺᵁᴸᴸ │ +└────────┴──────────────┴───────┴──────┴────────────────┘ +``` + +使用非默认的表结构: + +```text +postgres=# CREATE SCHEMA "nice.schema"; + +postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer); + +postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i) +``` + +```sql +CREATE TABLE pg_table_schema_with_dots (a UInt32) + ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema'); +``` + +**另请参阅** + +- [PostgreSQL 表引擎](../../engines/table-engines/integrations/postgresql.md) +- [使用 PostgreSQL 作为外部字典的来源](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) + +[原始文章](https://clickhouse.tech/docs/en/sql-reference/table-functions/postgresql/) diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md new file mode 100644 index 00000000000..c55412f4ddd --- /dev/null +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -0,0 +1,132 @@ +--- +toc_priority: 45 +toc_title: s3 +--- + +# S3 表函数 {#s3-table-function} + +提供类似于表的接口来 select/insert [Amazon S3](https://aws.amazon.com/s3/)中的文件。这个表函数类似于[hdfs](../../sql-reference/table-functions/hdfs.md),但提供了 S3 特有的功能。 + +**语法** + +``` sql +s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +``` + +**参数** + +- `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). +- `format` — 文件的[格式](../../interfaces/formats.md#formats). +- `structure` — 表的结构. 格式像这样 `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. + +**返回值** + +一个具有指定结构的表,用于读取或写入指定文件中的数据。 + +**示例** + +从 S3 文件`https://storage.yandexcloud.net/my-test-bucket-768/data.csv`中选择表格的前两行: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +类似的情况,但来源是`gzip`压缩的文件: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +## 用法 {#usage-examples} + +假设我们在S3上有几个文件,URI如下: + +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' + +计算以数字1至3结尾的文件的总行数: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +计算这两个目录中所有文件的行的总量: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + +!!! warning "Warning" + 如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 + +计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +插入数据到 `test-data.csv.gz` 文件: + +``` sql +INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); +``` + +从已有的表插入数据到 `test-data.csv.gz` 文件: + +``` sql +INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; +``` + +**另请参阅** + +- [S3 引擎](../../engines/table-engines/integrations/s3.md) + +[原始文章](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 7f85a3fc3d7..6929bd861ed 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -50,11 +50,15 @@ option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL}) + +option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL}) + if (NOT USE_NURAFT) # RECONFIGURE_MESSAGE_LEVEL should not be used here, # since USE_NURAFT is set to OFF for FreeBSD and Darwin. - message (STATUS "clickhouse-keeper will not be built (lack of NuRaft)") + message (STATUS "clickhouse-keeper and clickhouse-keeper-converter will not be built (lack of NuRaft)") set(ENABLE_CLICKHOUSE_KEEPER OFF) + set(ENABLE_CLICKHOUSE_KEEPER_CONVERTER OFF) endif() if (CLICKHOUSE_SPLIT_BINARY) @@ -150,6 +154,12 @@ else() message(STATUS "ClickHouse keeper mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + message(STATUS "ClickHouse keeper-converter mode: ON") +else() + message(STATUS "ClickHouse keeper-converter mode: OFF") +endif() + if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) set(CLICKHOUSE_ONE_SHARED ON) endif() @@ -222,6 +232,10 @@ if (ENABLE_CLICKHOUSE_KEEPER) add_subdirectory (keeper) endif() +if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + add_subdirectory (keeper-converter) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () @@ -231,9 +245,51 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK}) - target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} ${CLICKHOUSE_KEEPER_INCLUDE}) + add_library(clickhouse-lib SHARED + ${CLICKHOUSE_SERVER_SOURCES} + ${CLICKHOUSE_CLIENT_SOURCES} + ${CLICKHOUSE_LOCAL_SOURCES} + ${CLICKHOUSE_BENCHMARK_SOURCES} + ${CLICKHOUSE_COPIER_SOURCES} + ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} + ${CLICKHOUSE_COMPRESSOR_SOURCES} + ${CLICKHOUSE_FORMAT_SOURCES} + ${CLICKHOUSE_OBFUSCATOR_SOURCES} + ${CLICKHOUSE_GIT_IMPORT_SOURCES} + ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} + ${CLICKHOUSE_KEEPER_SOURCES} + ${CLICKHOUSE_KEEPER_CONVERTER_SOURCES}) + + target_link_libraries(clickhouse-lib + ${CLICKHOUSE_SERVER_LINK} + ${CLICKHOUSE_CLIENT_LINK} + ${CLICKHOUSE_LOCAL_LINK} + ${CLICKHOUSE_BENCHMARK_LINK} + ${CLICKHOUSE_COPIER_LINK} + ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} + ${CLICKHOUSE_COMPRESSOR_LINK} + ${CLICKHOUSE_FORMAT_LINK} + ${CLICKHOUSE_OBFUSCATOR_LINK} + ${CLICKHOUSE_GIT_IMPORT_LINK} + ${CLICKHOUSE_ODBC_BRIDGE_LINK} + ${CLICKHOUSE_KEEPER_LINK} + ${CLICKHOUSE_KEEPER_CONVERTER_LINK}) + + target_include_directories(clickhouse-lib + ${CLICKHOUSE_SERVER_INCLUDE} + ${CLICKHOUSE_CLIENT_INCLUDE} + ${CLICKHOUSE_LOCAL_INCLUDE} + ${CLICKHOUSE_BENCHMARK_INCLUDE} + ${CLICKHOUSE_COPIER_INCLUDE} + ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} + ${CLICKHOUSE_COMPRESSOR_INCLUDE} + ${CLICKHOUSE_FORMAT_INCLUDE} + ${CLICKHOUSE_OBFUSCATOR_INCLUDE} + ${CLICKHOUSE_GIT_IMPORT_INCLUDE} + ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} + ${CLICKHOUSE_KEEPER_INCLUDE} + ${CLICKHOUSE_KEEPER_CONVERTER_INCLUDE}) + set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() @@ -264,6 +320,10 @@ if (CLICKHOUSE_SPLIT_BINARY) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper) endif () + if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper-converter) + endif () + set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS}) @@ -314,6 +374,9 @@ else () if (ENABLE_CLICKHOUSE_KEEPER) clickhouse_target_link_split_lib(clickhouse keeper) endif() + if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + clickhouse_target_link_split_lib(clickhouse keeper-converter) + endif() if (ENABLE_CLICKHOUSE_INSTALL) clickhouse_target_link_split_lib(clickhouse install) endif () @@ -374,6 +437,11 @@ else () install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper) endif () + if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + add_custom_target (clickhouse-keeper-converter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-converter DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter) + endif () install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index c8f1a4eef47..859222c236e 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -37,6 +37,7 @@ #include #include + namespace fs = std::filesystem; /** A tool for evaluating ClickHouse performance. diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a3419003e2b..c4aef014971 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1380,9 +1380,19 @@ private: have_error = true; } + const auto * exception = server_exception ? server_exception.get() : client_exception.get(); + // Sometimes you may get TOO_DEEP_RECURSION from the server, + // and TOO_DEEP_RECURSION should not fail the fuzzer check. + if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION) + { + have_error = false; + server_exception.reset(); + client_exception.reset(); + return true; + } + if (have_error) { - const auto * exception = server_exception ? server_exception.get() : client_exception.get(); fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message()); // Try to reconnect after errors, for two reasons: @@ -2446,6 +2456,8 @@ public: { /// param_name value ++arg_num; + if (arg_num >= argc) + throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS); arg = argv[arg_num]; query_parameters.emplace(String(param_continuation), String(arg)); } diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index 50ba0c16a83..62fc076861c 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -17,3 +17,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER +#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER diff --git a/programs/keeper-converter/CMakeLists.txt b/programs/keeper-converter/CMakeLists.txt new file mode 100644 index 00000000000..d529f94d388 --- /dev/null +++ b/programs/keeper-converter/CMakeLists.txt @@ -0,0 +1,9 @@ +set (CLICKHOUSE_KEEPER_CONVERTER_SOURCES KeeperConverter.cpp) + +set (CLICKHOUSE_KEEPER_CONVERTER_LINK + PRIVATE + boost::program_options + dbms +) + +clickhouse_program_add(keeper-converter) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp new file mode 100644 index 00000000000..15dbc8bd220 --- /dev/null +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -0,0 +1,61 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +int mainEntryClickHouseKeeperConverter(int argc, char ** argv) +{ + using namespace DB; + namespace po = boost::program_options; + + po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); + desc.add_options() + ("help,h", "produce help message") + ("zookeeper-logs-dir", po::value(), "Path to directory with ZooKeeper logs") + ("zookeeper-snapshots-dir", po::value(), "Path to directory with ZooKeeper snapshots") + ("output-dir", po::value(), "Directory to place output clickhouse-keeper snapshot") + ; + po::variables_map options; + po::store(po::command_line_parser(argc, argv).options(desc).run(), options); + Poco::AutoPtr console_channel(new Poco::ConsoleChannel); + + Poco::Logger * logger = &Poco::Logger::get("KeeperConverter"); + logger->setChannel(console_channel); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " --zookeeper-logs-dir /var/lib/zookeeper/data/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/data/version-2 --output-dir /var/lib/clickhouse/coordination/snapshots" << std::endl; + std::cout << desc << std::endl; + return 0; + } + + try + { + DB::KeeperStorage storage(500, ""); + + DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as(), logger); + DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as(), logger); + DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(storage.getZXID(), 1, std::make_shared()); + DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); + + DB::KeeperSnapshotManager manager(options["output-dir"].as(), 1); + auto snp = manager.serializeSnapshotToBuffer(snapshot); + auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); + std::cout << "Snapshot serialized to path:" << path << std::endl; + } + catch (...) + { + std::cerr << getCurrentExceptionMessage(true) << '\n'; + return getCurrentExceptionCode(); + } + + return 0; +} diff --git a/programs/keeper-converter/clickhouse-keeper-converter.cpp b/programs/keeper-converter/clickhouse-keeper-converter.cpp new file mode 100644 index 00000000000..3cb6f99f837 --- /dev/null +++ b/programs/keeper-converter/clickhouse-keeper-converter.cpp @@ -0,0 +1,2 @@ +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +int main(int argc_, char ** argv_) { return mainEntryClickHouseKeeperConverter(argc_, argv_); } diff --git a/programs/main.cpp b/programs/main.cpp index 0e3d71c2c7b..225c1ac84de 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -59,6 +59,9 @@ int mainEntryClickHouseGitImport(int argc, char ** argv); #if ENABLE_CLICKHOUSE_KEEPER int mainEntryClickHouseKeeper(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_KEEPER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); int mainEntryClickHouseStart(int argc, char ** argv); @@ -119,6 +122,9 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_KEEPER {"keeper", mainEntryClickHouseKeeper}, #endif +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif #if ENABLE_CLICKHOUSE_INSTALL {"install", mainEntryClickHouseInstall}, {"start", mainEntryClickHouseStart}, diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 25c953c0b71..e1935c62725 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -115,6 +115,8 @@ void ODBCBlockInputStream::insertValue( assert_cast(column).insertValue(row.get(idx)); break; case ValueType::vtFixedString:[[fallthrough]]; + case ValueType::vtEnum8: + case ValueType::vtEnum16: case ValueType::vtString: assert_cast(column).insert(row.get(idx)); break; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2ffef474ad0..313523d19dc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include @@ -324,6 +324,13 @@ Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & sock socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config().getBool("listen_reuse_port", false)); #endif + /// If caller requests any available port from the OS, discover it after binding. + if (port == 0) + { + address = socket.address(); + LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port()); + } + socket.listen(/* backlog = */ config().getUInt("listen_backlog", 64)); return address; @@ -1152,7 +1159,7 @@ int Server::main(const std::vector & /*args*/) { /// This object will periodically calculate some metrics. AsynchronousMetrics async_metrics( - global_context, config().getUInt("asynchronous_metrics_update_period_s", 60), servers_to_start_before_tables, servers); + global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), servers_to_start_before_tables, servers); attachSystemTablesAsync(*DatabaseCatalog::instance().getSystemDatabase(), async_metrics); for (const auto & listen_host : listen_hosts) diff --git a/programs/server/config.xml b/programs/server/config.xml index dd50a693403..6f0b228dda7 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -583,7 +583,7 @@ 9019 --> - + @@ -917,7 +917,7 @@ Asynchronous metrics are updated once a minute, so there is no need to flush more often. --> - 60000 + 7000 + 60 + 60 + + + diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 0af76fe2648..97d076f698e 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -3,6 +3,7 @@ import logging import subprocess import os +import glob import time import shutil from collections import defaultdict @@ -17,7 +18,6 @@ SLEEP_BETWEEN_RETRIES = 5 PARALLEL_GROUP_SIZE = 100 CLICKHOUSE_BINARY_PATH = "/usr/bin/clickhouse" CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "/usr/bin/clickhouse-odbc-bridge" -DOCKERD_LOGS_PATH = "/ClickHouse/tests/integration/dockerd.log" CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "/usr/bin/clickhouse-library-bridge" TRIES_COUNT = 10 @@ -256,8 +256,8 @@ class ClickhouseIntegrationTestsRunner: shutil.copy(CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH, result_path_library_bridge) return None, None - def _compress_logs(self, path, result_path): - subprocess.check_call("tar czf {} -C {} .".format(result_path, path), shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL + def _compress_logs(self, dir, relpaths, result_path): + subprocess.check_call("tar czf {} -C {} {}".format(result_path, dir, ' '.join(relpaths)), shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL def _get_all_tests(self, repo_path): image_cmd = self._get_runner_image_cmd(repo_path) @@ -336,6 +336,27 @@ class ClickhouseIntegrationTestsRunner: logging.info("Cannot run with custom docker image version :(") return image_cmd + def _find_test_data_dirs(self, repo_path, test_names): + relpaths = {} + for test_name in test_names: + if '/' in test_name: + test_dir = test_name[:test_name.find('/')] + else: + test_dir = test_name + if os.path.isdir(os.path.join(repo_path, "tests/integration", test_dir)): + for name in os.listdir(os.path.join(repo_path, "tests/integration", test_dir)): + relpath = os.path.join(os.path.join(test_dir, name)) + mtime = os.path.getmtime(os.path.join(repo_path, "tests/integration", relpath)) + relpaths[relpath] = mtime + return relpaths + + def _get_test_data_dirs_difference(self, new_snapshot, old_snapshot): + res = set() + for path in new_snapshot: + if (not path in old_snapshot) or (old_snapshot[path] != new_snapshot[path]): + res.add(path) + return res + def run_test_group(self, repo_path, test_group, tests_in_group, num_tries, num_workers): counters = { "ERROR": [], @@ -355,18 +376,14 @@ class ClickhouseIntegrationTestsRunner: image_cmd = self._get_runner_image_cmd(repo_path) test_group_str = test_group.replace('/', '_').replace('.', '_') + log_paths = [] + test_data_dirs = {} for i in range(num_tries): logging.info("Running test group %s for the %s retry", test_group, i) clear_ip_tables_and_restart_daemons() - output_path = os.path.join(str(self.path()), "test_output_" + test_group_str + "_" + str(i) + ".log") - log_name = "integration_run_" + test_group_str + "_" + str(i) + ".txt" - log_path = os.path.join(str(self.path()), log_name) - log_paths.append(log_path) - logging.info("Will wait output inside %s", output_path) - test_names = set([]) for test_name in tests_in_group: if test_name not in counters["PASSED"]: @@ -375,11 +392,19 @@ class ClickhouseIntegrationTestsRunner: else: test_names.add(test_name) + if i == 0: + test_data_dirs = self._find_test_data_dirs(repo_path, test_names) + + info_basename = test_group_str + "_" + str(i) + ".nfo" + info_path = os.path.join(repo_path, "tests/integration", info_basename) + test_cmd = ' '.join([test for test in sorted(test_names)]) parallel_cmd = " --parallel {} ".format(num_workers) if num_workers > 0 else "" - cmd = "cd {}/tests/integration && ./runner --tmpfs {} -t {} {} '-ss -rfEp --run-id={} --color=no --durations=0 {}' | tee {}".format( - repo_path, image_cmd, test_cmd, parallel_cmd, i, _get_deselect_option(self.should_skip_tests()), output_path) + cmd = "cd {}/tests/integration && ./runner --tmpfs {} -t {} {} '-rfEp --run-id={} --color=no --durations=0 {}' | tee {}".format( + repo_path, image_cmd, test_cmd, parallel_cmd, i, _get_deselect_option(self.should_skip_tests()), info_path) + log_basename = test_group_str + "_" + str(i) + ".log" + log_path = os.path.join(repo_path, "tests/integration", log_basename) with open(log_path, 'w') as log: logging.info("Executing cmd: %s", cmd) retcode = subprocess.Popen(cmd, shell=True, stderr=log, stdout=log).wait() @@ -388,15 +413,41 @@ class ClickhouseIntegrationTestsRunner: else: logging.info("Some tests failed") - if os.path.exists(output_path): - lines = parse_test_results_output(output_path) + extra_logs_names = [log_basename] + log_result_path = os.path.join(str(self.path()), 'integration_run_' + log_basename) + shutil.copy(log_path, log_result_path) + log_paths.append(log_result_path) + + for pytest_log_path in glob.glob(os.path.join(repo_path, "tests/integration/pytest*.log")): + new_name = test_group_str + "_" + str(i) + "_" + os.path.basename(pytest_log_path) + os.rename(pytest_log_path, os.path.join(repo_path, "tests/integration", new_name)) + extra_logs_names.append(new_name) + + dockerd_log_path = os.path.join(repo_path, "tests/integration/dockerd.log") + if os.path.exists(dockerd_log_path): + new_name = test_group_str + "_" + str(i) + "_" + os.path.basename(dockerd_log_path) + os.rename(dockerd_log_path, os.path.join(repo_path, "tests/integration", new_name)) + extra_logs_names.append(new_name) + + if os.path.exists(info_path): + extra_logs_names.append(info_basename) + lines = parse_test_results_output(info_path) new_counters = get_counters(lines) - times_lines = parse_test_times(output_path) + times_lines = parse_test_times(info_path) new_tests_times = get_test_times(times_lines) self._update_counters(counters, new_counters) for test_name, test_time in new_tests_times.items(): tests_times[test_name] = test_time - os.remove(output_path) + + test_data_dirs_new = self._find_test_data_dirs(repo_path, test_names) + test_data_dirs_diff = self._get_test_data_dirs_difference(test_data_dirs_new, test_data_dirs) + test_data_dirs = test_data_dirs_new + + if extra_logs_names or test_data_dirs_diff: + extras_result_path = os.path.join(str(self.path()), "integration_run_" + test_group_str + "_" + str(i) + ".tar.gz") + self._compress_logs(os.path.join(repo_path, "tests/integration"), extra_logs_names + list(test_data_dirs_diff), extras_result_path) + log_paths.append(extras_result_path) + if len(counters["PASSED"]) + len(counters["FLAKY"]) == len(tests_in_group): logging.info("All tests from group %s passed", test_group) break @@ -459,15 +510,6 @@ class ClickhouseIntegrationTestsRunner: break time.sleep(5) - logging.info("Finally all tests done, going to compress test dir") - test_logs = os.path.join(str(self.path()), "./test_dir.tar.gz") - self._compress_logs("{}/tests/integration".format(repo_path), test_logs) - logging.info("Compression finished") - - result_path_dockerd_logs = os.path.join(str(self.path()), "dockerd.log") - if os.path.exists(result_path_dockerd_logs): - shutil.copy(DOCKERD_LOGS_PATH, result_path_dockerd_logs) - test_result = [] for state in ("ERROR", "FAILED", "PASSED", "SKIPPED", "FLAKY"): if state == "PASSED": @@ -479,7 +521,7 @@ class ClickhouseIntegrationTestsRunner: test_result += [(c + ' (✕' + str(final_retry) + ')', text_state, "{:.2f}".format(tests_times[c])) for c in counters[state]] status_text = description_prefix + ', '.join([str(n).lower().replace('failed', 'fail') + ': ' + str(len(c)) for n, c in counters.items()]) - return result_state, status_text, test_result, [test_logs] + logs + return result_state, status_text, test_result, logs def run_impl(self, repo_path, build_path): if self.flaky_check: @@ -539,15 +581,6 @@ class ClickhouseIntegrationTestsRunner: logging.info("Collected more than 20 failed/error tests, stopping") break - logging.info("Finally all tests done, going to compress test dir") - test_logs = os.path.join(str(self.path()), "./test_dir.tar.gz") - self._compress_logs("{}/tests/integration".format(repo_path), test_logs) - logging.info("Compression finished") - - result_path_dockerd_logs = os.path.join(str(self.path()), "dockerd.log") - if os.path.exists(result_path_dockerd_logs): - shutil.copy(DOCKERD_LOGS_PATH, result_path_dockerd_logs) - if counters["FAILED"] or counters["ERROR"]: logging.info("Overall status failure, because we have tests in FAILED or ERROR state") result_state = "failure" @@ -580,7 +613,7 @@ class ClickhouseIntegrationTestsRunner: if '(memory)' in self.params['context_name']: result_state = "success" - return result_state, status_text, test_result, [test_logs] + return result_state, status_text, test_result, [] def write_results(results_file, status_file, results, status): with open(results_file, 'w') as f: diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml index b27ecf0c3ef..717f6db7e4b 100644 --- a/tests/integration/helpers/0_common_instance_config.xml +++ b/tests/integration/helpers/0_common_instance_config.xml @@ -7,4 +7,14 @@ users.xml + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index bd2f7d2bd8a..5f7cfd9467b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -30,6 +30,7 @@ from kazoo.client import KazooClient from kazoo.exceptions import KazooException from minio import Minio from helpers.test_tools import assert_eq_with_retry +from helpers import pytest_xdist_logging_to_separate_files import docker @@ -56,22 +57,22 @@ def run_and_check(args, env=None, shell=False, stdout=subprocess.PIPE, stderr=su subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, env=env, shell=shell) return + logging.debug(f"Command:{args}") res = subprocess.run(args, stdout=stdout, stderr=stderr, env=env, shell=shell, timeout=timeout) out = res.stdout.decode('utf-8') err = res.stderr.decode('utf-8') - if res.returncode != 0: - # check_call(...) from subprocess does not print stderr, so we do it manually - logging.debug(f"Command:{args}") - logging.debug(f"Stderr:{err}") + # check_call(...) from subprocess does not print stderr, so we do it manually + if out: logging.debug(f"Stdout:{out}") - logging.debug(f"Env: {env}") + if err: + logging.debug(f"Stderr:{err}") + if res.returncode != 0: + logging.debug(f"Exitcode:{res.returncode}") + if env: + logging.debug(f"Env:{env}") if not nothrow: raise Exception(f"Command {args} return non-zero code {res.returncode}: {res.stderr.decode('utf-8')}") - else: - logging.debug(f"Command:{args}") - logging.debug(f"Stderr: {err}") - logging.debug(f"Stdout: {out}") - return out + return out # Based on https://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python/2838309#2838309 def get_free_port(): @@ -192,6 +193,7 @@ class ClickHouseCluster: zookeeper_keyfile=None, zookeeper_certfile=None): for param in list(os.environ.keys()): logging.debug("ENV %40s %s" % (param, os.environ[param])) + self.base_path = base_path self.base_dir = p.dirname(base_path) self.name = name if name is not None else '' @@ -392,11 +394,13 @@ class ClickHouseCluster: def cleanup(self): # Just in case kill unstopped containers from previous launch try: - result = run_and_check(f'docker container list --all --filter name={self.project_name} | wc -l', shell=True) + # We need to have "^/" and "$" in the "--filter name" option below to filter by exact name of the container, see + # https://stackoverflow.com/questions/48767760/how-to-make-docker-container-ls-f-name-filter-by-exact-name + result = run_and_check(f'docker container list --all --filter name=^/{self.project_name}$ | wc -l', shell=True) if int(result) > 1: - logging.debug(f"Trying to kill unstopped containers for project{self.project_name}...") - run_and_check(f'docker kill $(docker container list --all --quiet --filter name={self.project_name})', shell=True) - run_and_check(f'docker rm $(docker container list --all --quiet --filter name={self.project_name})', shell=True) + logging.debug(f"Trying to kill unstopped containers for project {self.project_name}...") + run_and_check(f'docker kill $(docker container list --all --quiet --filter name=^/{self.project_name}$)', shell=True) + run_and_check(f'docker rm $(docker container list --all --quiet --filter name=^/{self.project_name}$)', shell=True) logging.debug("Unstopped containers killed") run_and_check(['docker-compose', 'ps', '--services', '--all']) else: @@ -1068,7 +1072,7 @@ class ClickHouseCluster: logging.error("Can't connect to MySQL:{}".format(errors)) raise Exception("Cannot wait MySQL container") - def wait_postgres_to_start(self, timeout=180): + def wait_postgres_to_start(self, timeout=260): self.postgres_ip = self.get_instance_ip(self.postgres_host) start = time.time() while time.time() - start < timeout: @@ -1188,15 +1192,18 @@ class ClickHouseCluster: time.sleep(1) - def wait_hdfs_to_start(self, timeout=300): + def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() while time.time() - start < timeout: try: self.hdfs_api.write_data("/somefilewithrandomname222", "1") logging.debug("Connected to HDFS and SafeMode disabled! ") + if check_marker: + self.hdfs_api.read_data("/preparations_done_marker") + return except Exception as ex: - logging.exception("Can't connect to HDFS " + str(ex)) + logging.exception("Can't connect to HDFS or preparations are not done yet " + str(ex)) time.sleep(1) raise Exception("Can't wait HDFS to start") @@ -1290,6 +1297,9 @@ class ClickHouseCluster: raise Exception("Can't wait Cassandra to start") def start(self, destroy_dirs=True): + pytest_xdist_logging_to_separate_files.setup() + logging.info("Running tests in {}".format(self.base_path)) + logging.debug("Cluster start called. is_up={}, destroy_dirs={}".format(self.is_up, destroy_dirs)) if self.is_up: return @@ -1443,7 +1453,7 @@ class ClickHouseCluster: os.chmod(self.hdfs_kerberized_logs_dir, stat.S_IRWXO) run_and_check(self.base_kerberized_hdfs_cmd + common_opts) self.make_hdfs_api(kerberized=True) - self.wait_hdfs_to_start() + self.wait_hdfs_to_start(check_marker=True) if self.with_mongo and self.base_mongo_cmd: logging.debug('Setup Mongo') @@ -1489,9 +1499,9 @@ class ClickHouseCluster: instance.docker_client = self.docker_client instance.ip_address = self.get_instance_ip(instance.name) - logging.debug("Waiting for ClickHouse start in {instance}, ip: {instance.ip_address}...") + logging.debug(f"Waiting for ClickHouse start in {instance.name}, ip: {instance.ip_address}...") instance.wait_for_start(start_timeout) - logging.debug("ClickHouse {instance} started") + logging.debug(f"ClickHouse {instance.name} started") instance.client = Client(instance.ip_address, command=self.client_bin_path) @@ -1771,12 +1781,14 @@ class ClickHouseInstance: # Connects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer def query(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, ignore_error=False): + logging.debug(f"Executing query {sql} on {self.name}") return self.client.query(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, database=database, ignore_error=ignore_error) def query_with_retry(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, ignore_error=False, retry_count=20, sleep_time=0.5, check_callback=lambda x: True): + logging.debug(f"Executing query {sql} on {self.name}") result = None for i in range(retry_count): try: @@ -1794,23 +1806,27 @@ class ClickHouseInstance: raise Exception("Can't execute query {}".format(sql)) # As query() but doesn't wait response and returns response handler - def get_query_request(self, *args, **kwargs): - return self.client.get_query_request(*args, **kwargs) + def get_query_request(self, sql, *args, **kwargs): + logging.debug(f"Executing query {sql} on {self.name}") + return self.client.get_query_request(sql, *args, **kwargs) # Connects to the instance via clickhouse-client, sends a query (1st argument), expects an error and return its code def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None): + logging.debug(f"Executing query {sql} on {self.name}") return self.client.query_and_get_error(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, database=database) # The same as query_and_get_error but ignores successful query. def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None): + logging.debug(f"Executing query {sql} on {self.name}") return self.client.query_and_get_answer_with_error(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, database=database) # Connects to the instance via HTTP interface, sends a query and returns the answer def http_query(self, sql, data=None, params=None, user=None, password=None, expect_fail_and_get_error=False): + logging.debug(f"Executing query {sql} on {self.name} via HTTP interface") if params is None: params = {} else: @@ -1845,11 +1861,13 @@ class ClickHouseInstance: # Connects to the instance via HTTP interface, sends a query and returns the answer def http_request(self, url, method='GET', params=None, data=None, headers=None): + logging.debug(f"Sending HTTP request {url} to {self.name}") url = "http://" + self.ip_address + ":8123/" + url return requests.request(method=method, url=url, params=params, data=data, headers=headers) # Connects to the instance via HTTP interface, sends a query, expects an error and return the error message def http_query_and_get_error(self, sql, data=None, params=None, user=None, password=None): + logging.debug(f"Executing query {sql} on {self.name} via HTTP interface") return self.http_query(sql=sql, data=data, params=params, user=user, password=password, expect_fail_and_get_error=True) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 333e38f76bc..7d9906ae663 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -127,21 +127,21 @@ class _NetworkManager: return cls._instance def add_iptables_rule(self, **kwargs): - cmd = ['iptables', '-I', 'DOCKER-USER', '1'] + cmd = ['iptables', '--wait', '-I', 'DOCKER-USER', '1'] cmd.extend(self._iptables_cmd_suffix(**kwargs)) - self._exec_run_with_retry(cmd, retry_count=3, privileged=True) + self._exec_run(cmd, privileged=True) def delete_iptables_rule(self, **kwargs): - cmd = ['iptables', '-D', 'DOCKER-USER'] + cmd = ['iptables', '--wait', '-D', 'DOCKER-USER'] cmd.extend(self._iptables_cmd_suffix(**kwargs)) - self._exec_run_with_retry(cmd, retry_count=3, privileged=True) + self._exec_run(cmd, privileged=True) @staticmethod def clean_all_user_iptables_rules(): for i in range(1000): iptables_iter = i # when rules will be empty, it will return error - res = subprocess.run("iptables -D DOCKER-USER 1", shell=True) + res = subprocess.run("iptables --wait -D DOCKER-USER 1", shell=True) if res.returncode != 0: logging.info("All iptables rules cleared, " + str(iptables_iter) + " iterations, last error: " + str(res.stderr)) diff --git a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py new file mode 100644 index 00000000000..ee9a52e042c --- /dev/null +++ b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py @@ -0,0 +1,28 @@ +import logging +import os.path + +# Makes the parallel workers of pytest-xdist to log to separate files. +# Without this function all workers will log to the same log file +# and mix everything together making it much more difficult for troubleshooting. +def setup(): + worker_name = os.environ.get('PYTEST_XDIST_WORKER', 'master') + if worker_name == 'master': + return + logger = logging.getLogger('') + new_handlers = [] + handlers_to_remove = [] + for handler in logger.handlers: + if isinstance(handler, logging.FileHandler): + filename, ext = os.path.splitext(handler.baseFilename) + if not filename.endswith('-' + worker_name): + new_filename = filename + '-' + worker_name + new_handler = logging.FileHandler(new_filename + ext) + new_handler.setFormatter(handler.formatter) + new_handler.setLevel(handler.level) + new_handlers.append(new_handler) + handlers_to_remove.append(handler) + for new_handler in new_handlers: + logger.addHandler(new_handler) + for handler in handlers_to_remove: + handler.flush() + logger.removeHandler(handler) diff --git a/tests/integration/parallel.json b/tests/integration/parallel.json index f82e33138fc..2879f258406 100644 --- a/tests/integration/parallel.json +++ b/tests/integration/parallel.json @@ -177,6 +177,8 @@ "test_materialize_mysql_database/test.py::test_system_tables_table[clickhouse_node1]", "test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node0]", "test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node1]", + "test_materialize_mysql_database/test.py::test_materialize_with_enum[clickhouse_node0]", + "test_materialize_mysql_database/test.py::test_materialize_with_enum[clickhouse_node1]", "test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node0]", "test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node1]", "test_parts_delete_zookeeper/test.py::test_merge_doesnt_work_without_zookeeper", diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 7d124f4eac7..2c993691d78 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -181,6 +181,8 @@ "test_materialize_mysql_database/test.py::test_system_tables_table[clickhouse_node1]", "test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node0]", "test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node1]", + "test_materialize_mysql_database/test.py::test_materialize_with_enum[clickhouse_node0]", + "test_materialize_mysql_database/test.py::test_materialize_with_enum[clickhouse_node1]", "test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node0]", "test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node1]", "test_parts_delete_zookeeper/test.py::test_merge_doesnt_work_without_zookeeper", diff --git a/tests/integration/pytest.ini b/tests/integration/pytest.ini index 6d451adf7eb..4593fc8c4d8 100644 --- a/tests/integration/pytest.ini +++ b/tests/integration/pytest.ini @@ -4,10 +4,14 @@ norecursedirs = _instances* timeout = 1800 junit_duration_report = call junit_suite_name = integration -log_cli = 1 +log_level = DEBUG +log_format = %(asctime)s [ %(process)d ] %(levelname)s : %(message)s (%(filename)s:%(lineno)s, %(funcName)s) +log_date_format=%Y-%m-%d %H:%M:%S +log_cli = true log_cli_level = CRITICAL -log_cli_format = %%(asctime)s [%(levelname)8s] %(funcName)s %(message)s (%(filename)s:%(lineno)s) +log_cli_format = %(asctime)s [ %(process)d ] %(levelname)s : %(message)s (%(filename)s:%(lineno)s, %(funcName)s) +log_cli_date_format=%Y-%m-%d %H:%M:%S log_file = pytest.log log_file_level = DEBUG -log_file_format = %(asctime)s [%(levelname)8s] %(funcName)s %(message)s (%(filename)s:%(lineno)s) -log_file_date_format=%Y-%m-%d %H:%M:%S +log_file_format = %(asctime)s [ %(process)d ] %(levelname)s : %(message)s (%(filename)s:%(lineno)s, %(funcName)s) +log_file_date_format = %Y-%m-%d %H:%M:%S diff --git a/tests/integration/runner b/tests/integration/runner index 160c4a23652..cfd98134ea3 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -3,6 +3,7 @@ import subprocess import os import getpass +import glob import argparse import logging import signal @@ -99,7 +100,7 @@ signal.signal(signal.SIGINT, docker_kill_handler_handler) # 2) path of runner script is used to determine paths for trivial case, when we run it from repository if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + logging.basicConfig(level=logging.INFO, format='%(asctime)s [ %(process)d ] %(levelname)s : %(message)s (%(filename)s:%(lineno)s, %(funcName)s)') parser = argparse.ArgumentParser(description="ClickHouse integration tests runner") parser.add_argument( @@ -257,6 +258,9 @@ if __name__ == "__main__": if sys.stdout.isatty() and sys.stdin.isatty(): tty = "-it" + # Remove old logs. + for old_log_path in glob.glob(args.cases_dir + "/pytest*.log"): + os.remove(old_log_path) cmd = "docker run {net} {tty} --rm --name {name} --privileged \ --volume={odbc_bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \ diff --git a/tests/integration/test_concurrent_ttl_merges/configs/log_conf.xml b/tests/integration/test_concurrent_ttl_merges/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_concurrent_ttl_merges/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index f89d024842c..efa93220d55 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -5,8 +5,8 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml', 'configs/log_conf.xml'], with_zookeeper=True) -node2 = cluster.add_instance('node2', main_configs=['configs/fast_background_pool.xml', 'configs/log_conf.xml'], with_zookeeper=True) +node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=['configs/fast_background_pool.xml'], with_zookeeper=True) @pytest.fixture(scope="module") diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/log_conf.xml b/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/log_conf.xml deleted file mode 100644 index b52d833cde8..00000000000 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py index 65080ab40dd..78715bd17cf 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py @@ -40,7 +40,6 @@ def setup_module(module): main_configs = [] main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml')) - main_configs.append(os.path.join('configs', 'log_conf.xml')) dictionaries = simple_tester.list_dictionaries() diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py index 7feba20f3a1..7cd7460b8cb 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py @@ -41,7 +41,6 @@ def setup_module(module): main_configs = [] main_configs.append(os.path.join('configs', 'disable_ssl_verification.xml')) - main_configs.append(os.path.join('configs', 'log_conf.xml')) dictionaries = simple_tester.list_dictionaries() diff --git a/tests/integration/test_dictionaries_mysql/configs/log_conf.xml b/tests/integration/test_dictionaries_mysql/configs/log_conf.xml deleted file mode 100644 index b52d833cde8..00000000000 --- a/tests/integration/test_dictionaries_mysql/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index 110bb9481b2..fe284f71e00 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -6,7 +6,7 @@ import time import logging DICTS = ['configs/dictionaries/mysql_dict1.xml', 'configs/dictionaries/mysql_dict2.xml'] -CONFIG_FILES = ['configs/log_conf.xml', 'configs/remote_servers.xml'] +CONFIG_FILES = ['configs/remote_servers.xml'] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', main_configs=CONFIG_FILES, with_mysql=True, dictionaries=DICTS) diff --git a/tests/integration/test_dictionaries_postgresql/configs/log_conf.xml b/tests/integration/test_dictionaries_postgresql/configs/log_conf.xml deleted file mode 100644 index b52d833cde8..00000000000 --- a/tests/integration/test_dictionaries_postgresql/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index f0d18909166..d35182e15a1 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -7,10 +7,9 @@ from helpers.cluster import ClickHouseCluster from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=[ - 'configs/config.xml', - 'configs/dictionaries/postgres_dict.xml', - 'configs/log_conf.xml'], with_postgres=True, with_postgres_cluster=True) +node1 = cluster.add_instance('node1', + main_configs=['configs/config.xml', 'configs/dictionaries/postgres_dict.xml'], + with_postgres=True, with_postgres_cluster=True) postgres_dict_table_template = """ CREATE TABLE IF NOT EXISTS {} ( diff --git a/tests/integration/test_distributed_backward_compatability/configs/legacy.xml b/tests/integration/test_distributed_backward_compatability/configs/legacy.xml new file mode 100644 index 00000000000..01bd56de845 --- /dev/null +++ b/tests/integration/test_distributed_backward_compatability/configs/legacy.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py index eb18019c8df..0d36aaa23f4 100644 --- a/tests/integration/test_distributed_backward_compatability/test.py +++ b/tests/integration/test_distributed_backward_compatability/test.py @@ -5,8 +5,8 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node_old = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], image='yandex/clickhouse-server', - tag='19.17.8.54', stay_alive=True, with_installed_binary=True) -node_new = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml']) + tag='20.8.9.6', stay_alive=True, with_installed_binary=True) +node_new = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], user_configs=['configs/legacy.xml']) @pytest.fixture(scope="module") diff --git a/tests/queries/0_stateless/01442_merge_detach_attach.reference b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/__init__.py similarity index 100% rename from tests/queries/0_stateless/01442_merge_detach_attach.reference rename to tests/integration/test_distributed_directory_monitor_split_batch_on_failure/__init__.py diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml new file mode 100644 index 00000000000..4e4ccf75323 --- /dev/null +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_1.xml @@ -0,0 +1,15 @@ + + + + + 0 + + 1 + + 1 + + 86400 + 86400 + + + diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml new file mode 100644 index 00000000000..d7c69c4a9ac --- /dev/null +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/overrides_2.xml @@ -0,0 +1,15 @@ + + + + + 0 + + 1 + + 0 + + 86400 + 86400 + + + diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/remote_servers.xml b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/remote_servers.xml new file mode 100644 index 00000000000..ebce4697529 --- /dev/null +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/configs/remote_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py new file mode 100644 index 00000000000..9cbf8771ee5 --- /dev/null +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py @@ -0,0 +1,60 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +# node1 -- distributed_directory_monitor_split_batch_on_failure=on +node1 = cluster.add_instance('node1', + main_configs=['configs/remote_servers.xml'], + user_configs=['configs/overrides_1.xml'], +) +# node2 -- distributed_directory_monitor_split_batch_on_failure=off +node2 = cluster.add_instance('node2', + main_configs=['configs/remote_servers.xml'], + user_configs=['configs/overrides_2.xml'], +) + +@pytest.fixture(scope='module') +def started_cluster(): + try: + cluster.start() + + for _, node in cluster.instances.items(): + node.query(""" + create table null_ (key Int, value Int) engine=Null(); + create table dist as null_ engine=Distributed(test_cluster, currentDatabase(), null_, key); + create table data (key Int, uniq_values Int) engine=Memory(); + create materialized view mv to data as select key, uniqExact(value) uniq_values from null_ group by key; + system stop distributed sends dist; + + create table dist_data as data engine=Distributed(test_cluster, currentDatabase(), data); + """) + + yield cluster + finally: + cluster.shutdown() + +def test_distributed_directory_monitor_split_batch_on_failure_OFF(started_cluster): + for i in range(0, 100): + limit = 100e3 + node2.query(f'insert into dist select number/100, number from system.numbers limit {limit} offset {limit*i}', settings={ + # max_memory_usage is the limit for the batch on the remote node + # (local query should not be affected since 30MB is enough for 100K rows) + 'max_memory_usage': '30Mi', + }) + # "Received from" is mandatory, since the exception should be thrown on the remote node. + with pytest.raises(QueryRuntimeException, match=r'DB::Exception: Received from.*Memory limit \(for query\) exceeded: .*while pushing to view default\.mv'): + node2.query('system flush distributed dist') + assert int(node2.query('select count() from dist_data')) == 0 + +def test_distributed_directory_monitor_split_batch_on_failure_ON(started_cluster): + for i in range(0, 100): + limit = 100e3 + node1.query(f'insert into dist select number/100, number from system.numbers limit {limit} offset {limit*i}', settings={ + # max_memory_usage is the limit for the batch on the remote node + # (local query should not be affected since 30MB is enough for 100K rows) + 'max_memory_usage': '30Mi', + }) + node1.query('system flush distributed dist') + assert int(node1.query('select count() from dist_data')) == 100000 diff --git a/tests/integration/test_distributed_format/configs/remote_servers.xml b/tests/integration/test_distributed_format/configs/remote_servers.xml index 5c86713bd78..87eaea50a8b 100644 --- a/tests/integration/test_distributed_format/configs/remote_servers.xml +++ b/tests/integration/test_distributed_format/configs/remote_servers.xml @@ -1,19 +1,20 @@ - + + true not_existing 9000 - + - + not_existing 9000 - + diff --git a/tests/integration/test_distributed_format/test.py b/tests/integration/test_distributed_format/test.py index 22054077544..d6e1cc03fa8 100644 --- a/tests/integration/test_distributed_format/test.py +++ b/tests/integration/test_distributed_format/test.py @@ -1,16 +1,27 @@ -import pytest +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=line-too-long +import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance('node', main_configs=['configs/remote_servers.xml']) cluster_param = pytest.mark.parametrize("cluster", [ - ('test_cluster'), - ('test_cluster_2'), + ('test_cluster_internal_replication'), + ('test_cluster_no_internal_replication'), ]) +def get_dist_path(cluster, table, dist_format): + if dist_format == 0: + return f'/var/lib/clickhouse/data/test/{table}/default@not_existing:9000' + if cluster == 'test_cluster_internal_replication': + return f'/var/lib/clickhouse/data/test/{table}/shard1_all_replicas' + return f'/var/lib/clickhouse/data/test/{table}/shard1_replica1' + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -29,13 +40,16 @@ def test_single_file(started_cluster, cluster): node.query("insert into test.distr_1 values (1, 'a'), (2, 'bb'), (3, 'ccc')", settings={"use_compact_format_in_distributed_parts_names": "1"}) - query = "select * from file('/var/lib/clickhouse/data/test/distr_1/shard1_replica1/1.bin', 'Distributed')" + path = get_dist_path(cluster, 'distr_1', 1) + query = f"select * from file('{path}/1.bin', 'Distributed')" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) assert out == '1\ta\n2\tbb\n3\tccc\n' - query = "create table t (x UInt64, s String) engine = File('Distributed', '/var/lib/clickhouse/data/test/distr_1/shard1_replica1/1.bin');" \ - "select * from t" + query = f""" + create table t (x UInt64, s String) engine = File('Distributed', '{path}/1.bin'); + select * from t; + """ out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) assert out == '1\ta\n2\tbb\n3\tccc\n' @@ -54,13 +68,16 @@ def test_two_files(started_cluster, cluster): "use_compact_format_in_distributed_parts_names": "1", }) - query = "select * from file('/var/lib/clickhouse/data/test/distr_2/shard1_replica1/{1,2,3,4}.bin', 'Distributed') order by x" + path = get_dist_path(cluster, 'distr_2', 1) + query = f"select * from file('{path}/{{1,2,3,4}}.bin', 'Distributed') order by x" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) assert out == '0\t_\n1\ta\n2\tbb\n3\tccc\n' - query = "create table t (x UInt64, s String) engine = File('Distributed', '/var/lib/clickhouse/data/test/distr_2/shard1_replica1/{1,2,3,4}.bin');" \ - "select * from t order by x" + query = f""" + create table t (x UInt64, s String) engine = File('Distributed', '{path}/{{1,2,3,4}}.bin'); + select * from t order by x; + """ out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) assert out == '0\t_\n1\ta\n2\tbb\n3\tccc\n' @@ -76,13 +93,16 @@ def test_single_file_old(started_cluster, cluster): "use_compact_format_in_distributed_parts_names": "0", }) - query = "select * from file('/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin', 'Distributed')" + path = get_dist_path(cluster, 'distr_3', 0) + query = f"select * from file('{path}/1.bin', 'Distributed')" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) assert out == '1\ta\n2\tbb\n3\tccc\n' - query = "create table t (x UInt64, s String) engine = File('Distributed', '/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin');" \ - "select * from t" + query = f""" + create table t (x UInt64, s String) engine = File('Distributed', '{path}/1.bin'); + select * from t; + """ out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) assert out == '1\ta\n2\tbb\n3\tccc\n' diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 1a0e5a3dd91..0e77fc6e162 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -83,7 +83,7 @@ def get_query_user_info(node, query_pattern): def get_query_setting_on_shard(node, query_pattern, setting): node.query("SYSTEM FLUSH LOGS") return node.query(""" - SELECT (arrayFilter(x -> ((x.1) = '{}'), arrayZip(Settings.Names, Settings.Values))[1]).2 + SELECT Settings['{}'] FROM system.query_log WHERE query LIKE '%{}%' AND diff --git a/tests/integration/test_grant_and_revoke/configs/log_conf.xml b/tests/integration/test_grant_and_revoke/configs/log_conf.xml deleted file mode 100644 index 0de2745ca4c..00000000000 --- a/tests/integration/test_grant_and_revoke/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 1124f072a06..a63d6f136af 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', main_configs=['configs/log_conf.xml']) +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_https_replication/configs/log_conf.xml b/tests/integration/test_https_replication/configs/log_conf.xml deleted file mode 100644 index b52d833cde8..00000000000 --- a/tests/integration/test_https_replication/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_https_replication/test.py b/tests/integration/test_https_replication/test.py index ccc206531fa..1008ce07ad3 100644 --- a/tests/integration/test_https_replication/test.py +++ b/tests/integration/test_https_replication/test.py @@ -26,10 +26,10 @@ def _fill_nodes(nodes, shard): cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/ssl_conf.xml', "configs/server.crt", - "configs/server.key", "configs/dhparam.pem", "configs/log_conf.xml"], with_zookeeper=True) + "configs/server.key", "configs/dhparam.pem"], with_zookeeper=True) node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml', 'configs/ssl_conf.xml', "configs/server.crt", - "configs/server.key", "configs/dhparam.pem", "configs/log_conf.xml"], with_zookeeper=True) + "configs/server.key", "configs/dhparam.pem"], with_zookeeper=True) @pytest.fixture(scope="module") @@ -84,10 +84,8 @@ def test_replication_after_partition(both_https_cluster): assert_eq_with_retry(node2, "SELECT count() FROM test_table", '100') -node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml', 'configs/no_ssl_conf.xml', "configs/log_conf.xml"], - with_zookeeper=True) -node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/no_ssl_conf.xml', "configs/log_conf.xml"], - with_zookeeper=True) +node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml', 'configs/no_ssl_conf.xml'], with_zookeeper=True) +node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/no_ssl_conf.xml'], with_zookeeper=True) @pytest.fixture(scope="module") @@ -117,8 +115,10 @@ def test_both_http(both_http_cluster): node5 = cluster.add_instance('node5', main_configs=['configs/remote_servers.xml', 'configs/ssl_conf.xml', "configs/server.crt", - "configs/server.key", "configs/dhparam.pem", "configs/log_conf.xml"], with_zookeeper=True) -node6 = cluster.add_instance('node6', main_configs=['configs/remote_servers.xml', 'configs/no_ssl_conf.xml', "configs/log_conf.xml"], + "configs/server.key", "configs/dhparam.pem"], + with_zookeeper=True) +node6 = cluster.add_instance('node6', + main_configs=['configs/remote_servers.xml', 'configs/no_ssl_conf.xml'], with_zookeeper=True) diff --git a/tests/integration/test_insert_distributed_async_send/configs/remote_servers_split.xml b/tests/integration/test_insert_distributed_async_send/configs/remote_servers_split.xml new file mode 100644 index 00000000000..e2757bbc18c --- /dev/null +++ b/tests/integration/test_insert_distributed_async_send/configs/remote_servers_split.xml @@ -0,0 +1,32 @@ + + + + + false + + n3 + 9000 + + + n4 + 9000 + + + + + + + n3 + 9000 + + + + + n4 + 9000 + + + + + + diff --git a/tests/integration/test_insert_distributed_async_send/configs/users.d/split.xml b/tests/integration/test_insert_distributed_async_send/configs/users.d/split.xml new file mode 100644 index 00000000000..bf826629685 --- /dev/null +++ b/tests/integration/test_insert_distributed_async_send/configs/users.d/split.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_insert_distributed_async_send/test.py b/tests/integration/test_insert_distributed_async_send/test.py index b469da4e2e1..a9bf9801f4c 100644 --- a/tests/integration/test_insert_distributed_async_send/test.py +++ b/tests/integration/test_insert_distributed_async_send/test.py @@ -17,11 +17,29 @@ n1 = cluster.add_instance('n1', main_configs=['configs/remote_servers.xml'], use # n2 -- distributed_directory_monitor_batch_inserts=0 n2 = cluster.add_instance('n2', main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.d/no_batch.xml']) +# n3 -- distributed_directory_monitor_batch_inserts=1/distributed_directory_monitor_split_batch_on_failure=1 +n3 = cluster.add_instance('n3', main_configs=['configs/remote_servers_split.xml'], user_configs=[ + 'configs/users.d/batch.xml', + 'configs/users.d/split.xml', +]) +# n4 -- distributed_directory_monitor_batch_inserts=0/distributed_directory_monitor_split_batch_on_failure=1 +n4 = cluster.add_instance('n4', main_configs=['configs/remote_servers_split.xml'], user_configs=[ + 'configs/users.d/no_batch.xml', + 'configs/users.d/split.xml', +]) + batch_params = pytest.mark.parametrize('batch', [ (1), (0), ]) +batch_and_split_params = pytest.mark.parametrize('batch,split', [ + (1, 0), + (0, 0), + (1, 1), + (0, 1), +]) + @pytest.fixture(scope='module', autouse=True) def start_cluster(): try: @@ -62,15 +80,19 @@ def insert_data(node): assert size > 1<<16 return size -def get_node(batch): +def get_node(batch, split=None): + if split: + if batch: + return n3 + return n4 if batch: return n1 return n2 -def bootstrap(batch): +def bootstrap(batch, split=None): drop_tables() create_tables('insert_distributed_async_send_cluster_two_replicas') - return insert_data(get_node(batch)) + return insert_data(get_node(batch, split)) def get_path_to_dist_batch(file='2.bin'): # There are: @@ -80,8 +102,8 @@ def get_path_to_dist_batch(file='2.bin'): # @return the file for the n2 shard return f'/var/lib/clickhouse/data/default/dist/shard1_replica2/{file}' -def check_dist_after_corruption(truncate, batch): - node = get_node(batch) +def check_dist_after_corruption(truncate, batch, split=None): + node = get_node(batch, split) if batch: # In batch mode errors are ignored @@ -102,8 +124,12 @@ def check_dist_after_corruption(truncate, batch): broken = get_path_to_dist_batch('broken') node.exec_in_container(['bash', '-c', f'ls {broken}/2.bin']) - assert int(n1.query('SELECT count() FROM data')) == 10000 - assert int(n2.query('SELECT count() FROM data')) == 0 + if split: + assert int(n3.query('SELECT count() FROM data')) == 10000 + assert int(n4.query('SELECT count() FROM data')) == 0 + else: + assert int(n1.query('SELECT count() FROM data')) == 10000 + assert int(n2.query('SELECT count() FROM data')) == 0 @batch_params @@ -114,17 +140,17 @@ def test_insert_distributed_async_send_success(batch): assert int(n1.query('SELECT count() FROM data')) == 10000 assert int(n2.query('SELECT count() FROM data')) == 10000 -@batch_params -def test_insert_distributed_async_send_truncated_1(batch): - size = bootstrap(batch) +@batch_and_split_params +def test_insert_distributed_async_send_truncated_1(batch, split): + size = bootstrap(batch, split) path = get_path_to_dist_batch() - node = get_node(batch) + node = get_node(batch, split) new_size = size - 10 # we cannot use truncate, due to hardlinks node.exec_in_container(['bash', '-c', f'mv {path} /tmp/bin && head -c {new_size} /tmp/bin > {path}']) - check_dist_after_corruption(True, batch) + check_dist_after_corruption(True, batch, split) @batch_params def test_insert_distributed_async_send_truncated_2(batch): diff --git a/tests/integration/test_join_set_family_s3/configs/config.d/log_conf.xml b/tests/integration/test_join_set_family_s3/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_join_set_family_s3/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_join_set_family_s3/test.py b/tests/integration/test_join_set_family_s3/test.py index 625cac48dee..9454acf1541 100644 --- a/tests/integration/test_join_set_family_s3/test.py +++ b/tests/integration/test_join_set_family_s3/test.py @@ -10,7 +10,7 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance("node", - main_configs=["configs/minio.xml", "configs/ssl.xml", "configs/config.d/log_conf.xml"], + main_configs=["configs/minio.xml", "configs/ssl.xml"], with_minio=True, stay_alive=True) logging.info("Starting cluster...") cluster.start() diff --git a/tests/integration/test_keeper_auth/configs/keeper_config.xml b/tests/integration/test_keeper_auth/configs/keeper_config.xml index bb3c9a5d94a..bee3ccb0aba 100644 --- a/tests/integration/test_keeper_auth/configs/keeper_config.xml +++ b/tests/integration/test_keeper_auth/configs/keeper_config.xml @@ -4,7 +4,7 @@ 1 /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots - super:0DPiKuNIrrVmD8IUCuw1hQxNqZc= + super:xQJmxLMiHGwaqBvst5y6rkB6HQs= 5000 diff --git a/tests/integration/test_keeper_auth/configs/logs_conf.xml b/tests/integration/test_keeper_auth/configs/logs_conf.xml deleted file mode 100644 index 3384320992a..00000000000 --- a/tests/integration/test_keeper_auth/configs/logs_conf.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 5f60d5b8bdb..276fe3d8518 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 + import pytest from helpers.cluster import ClickHouseCluster from kazoo.client import KazooClient, KazooState @@ -6,7 +6,7 @@ from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import AuthFailedError, InvalidACLError, NoAuthError, KazooException cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=['configs/keeper_config.xml', 'configs/logs_conf.xml'], with_zookeeper=True, use_keeper=False, stay_alive=True) +node = cluster.add_instance('node', main_configs=['configs/keeper_config.xml'], with_zookeeper=True, use_keeper=False, stay_alive=True) SUPERAUTH = "super:admin" @@ -300,3 +300,47 @@ def test_auth_snapshot(started_cluster): with pytest.raises(NoAuthError): connection2.get("/test_snapshot_acl1") + + +@pytest.mark.parametrize( + ('get_zk'), + [ + get_genuine_zk, + get_fake_zk + ] +) +def test_get_set_acl(started_cluster, get_zk): + auth_connection = get_zk() + auth_connection.add_auth('digest', 'username1:secret1') + auth_connection.add_auth('digest', 'username2:secret2') + + auth_connection.create("/test_set_get_acl", b"data", acl=[make_acl("auth", "", all=True)]) + + acls, stat = auth_connection.get_acls("/test_set_get_acl") + + assert stat.aversion == 0 + assert len(acls) == 2 + for acl in acls: + assert acl.acl_list == ['ALL'] + assert acl.id.scheme == 'digest' + assert acl.perms == 31 + assert acl.id.id in ('username1:eGncMdBgOfGS/TCojt51xWsWv/Y=', 'username2:qgSSumukVlhftkVycylbHNvxhFU=') + + + other_auth_connection = get_zk() + other_auth_connection.add_auth('digest', 'username1:secret1') + other_auth_connection.add_auth('digest', 'username3:secret3') + other_auth_connection.set_acls("/test_set_get_acl", acls=[make_acl("auth", "", read=True, write=False, create=True, delete=True, admin=True)]) + + acls, stat = other_auth_connection.get_acls("/test_set_get_acl") + + assert stat.aversion == 1 + assert len(acls) == 2 + for acl in acls: + assert acl.acl_list == ['READ', 'CREATE', 'DELETE', 'ADMIN'] + assert acl.id.scheme == 'digest' + assert acl.perms == 29 + assert acl.id.id in ('username1:eGncMdBgOfGS/TCojt51xWsWv/Y=', 'username3:CvWITOxxTwk+u6S5PoGlQ4hNoWI=') + + with pytest.raises(KazooException): + other_auth_connection.set_acls("/test_set_get_acl", acls=[make_acl("auth", "", all=True)], version=0) diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 4cb539e448c..41c270e05e8 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -7,7 +7,7 @@ import time from multiprocessing.dummy import Pool cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=['configs/enable_keeper.xml', 'configs/logs_conf.xml'], with_zookeeper=True, use_keeper=False) +node = cluster.add_instance('node', main_configs=['configs/enable_keeper.xml'], with_zookeeper=True, use_keeper=False) from kazoo.client import KazooClient, KazooState, KeeperState def get_genuine_zk(): diff --git a/tests/integration/test_keeper_multinode_blocade_leader/configs/log_conf.xml b/tests/integration/test_keeper_multinode_blocade_leader/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_multinode_blocade_leader/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index 9ae81ccdcc6..2101c2a973f 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -9,9 +9,9 @@ from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) -node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) -node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/use_keeper.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/use_keeper.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/use_keeper.xml'], stay_alive=True) from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_multinode_simple/configs/log_conf.xml b/tests/integration/test_keeper_multinode_simple/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_multinode_simple/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 9e57567d8b1..d7cd4dd927e 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -9,9 +9,9 @@ from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) -node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) -node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/use_keeper.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/use_keeper.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/use_keeper.xml'], stay_alive=True) from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_persistent_log/configs/logs_conf.xml b/tests/integration/test_keeper_persistent_log/configs/logs_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_persistent_log/configs/logs_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index b0cd9155afb..35f38f3ea1b 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -10,7 +10,7 @@ from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=['configs/enable_keeper.xml', 'configs/logs_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) +node = cluster.add_instance('node', main_configs=['configs/enable_keeper.xml', 'configs/use_keeper.xml'], stay_alive=True) def random_string(length): diff --git a/tests/integration/test_keeper_persistent_log_multinode/configs/log_conf.xml b/tests/integration/test_keeper_persistent_log_multinode/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_persistent_log_multinode/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_persistent_log_multinode/test.py b/tests/integration/test_keeper_persistent_log_multinode/test.py index 306139369fb..8c02f269a60 100644 --- a/tests/integration/test_keeper_persistent_log_multinode/test.py +++ b/tests/integration/test_keeper_persistent_log_multinode/test.py @@ -7,9 +7,9 @@ import os import time cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) -node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) -node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/log_conf.xml', 'configs/use_keeper.xml'], stay_alive=True) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/use_keeper.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/use_keeper.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/use_keeper.xml'], stay_alive=True) from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_restore_from_snapshot/configs/log_conf.xml b/tests/integration/test_keeper_restore_from_snapshot/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_restore_from_snapshot/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 5f6156800bb..7a0323d95b4 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -7,9 +7,9 @@ import os import time cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/log_conf.xml'], stay_alive=True) -node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/log_conf.xml'], stay_alive=True) -node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/log_conf.xml'], stay_alive=True) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml'], stay_alive=True) from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_snapshots/configs/logs_conf.xml b/tests/integration/test_keeper_snapshots/configs/logs_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_snapshots/configs/logs_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 7d5b69bf5d1..607e461d835 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -13,7 +13,7 @@ from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) # clickhouse itself will use external zookeeper -node = cluster.add_instance('node', main_configs=['configs/enable_keeper.xml', 'configs/logs_conf.xml'], stay_alive=True, with_zookeeper=True) +node = cluster.add_instance('node', main_configs=['configs/enable_keeper.xml'], stay_alive=True, with_zookeeper=True) def random_string(length): return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length)) diff --git a/tests/integration/test_keeper_snapshots_multinode/configs/log_conf.xml b/tests/integration/test_keeper_snapshots_multinode/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_keeper_snapshots_multinode/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 96d19592d29..de4ed3a1a8f 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -7,9 +7,9 @@ import os import time cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/log_conf.xml'], stay_alive=True) -node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/log_conf.xml'], stay_alive=True) -node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/log_conf.xml'], stay_alive=True) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml'], stay_alive=True) from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_zookeeper_converter/__init__.py b/tests/integration/test_keeper_zookeeper_converter/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml b/tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml new file mode 100644 index 00000000000..ceaca04762e --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml @@ -0,0 +1,23 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/logs + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + 75 + + + + + 1 + localhost + 44444 + + + + diff --git a/tests/integration/test_keeper_back_to_back/configs/logs_conf.xml b/tests/integration/test_keeper_zookeeper_converter/configs/logs_conf.xml similarity index 100% rename from tests/integration/test_keeper_back_to_back/configs/logs_conf.xml rename to tests/integration/test_keeper_zookeeper_converter/configs/logs_conf.xml diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py new file mode 100644 index 00000000000..eac2b4c45c5 --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +from kazoo.client import KazooClient, KazooState +from kazoo.security import ACL, make_digest_acl, make_acl +from kazoo.exceptions import AuthFailedError, InvalidACLError, NoAuthError, KazooException +import os + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', main_configs=['configs/keeper_config.xml', 'configs/logs_conf.xml'], stay_alive=True) + +def start_zookeeper(): + node.exec_in_container(['bash', '-c', '/opt/zookeeper/bin/zkServer.sh start']) + +def stop_zookeeper(): + node.exec_in_container(['bash', '-c', '/opt/zookeeper/bin/zkServer.sh stop']) + +def clear_zookeeper(): + node.exec_in_container(['bash', '-c', 'rm -fr /zookeeper/*']) + +def restart_and_clear_zookeeper(): + stop_zookeeper() + clear_zookeeper() + start_zookeeper() + +def clear_clickhouse_data(): + node.exec_in_container(['bash', '-c', 'rm -fr /var/lib/clickhouse/coordination/logs/* /var/lib/clickhouse/coordination/snapshots/*']) + +def convert_zookeeper_data(): + cmd = '/usr/bin/clickhouse keeper-converter --zookeeper-logs-dir /zookeeper/version-2/ --zookeeper-snapshots-dir /zookeeper/version-2/ --output-dir /var/lib/clickhouse/coordination/snapshots' + node.exec_in_container(['bash', '-c', cmd]) + +def stop_clickhouse(): + node.stop_clickhouse() + +def start_clickhouse(): + node.start_clickhouse() + +def copy_zookeeper_data(make_zk_snapshots): + stop_zookeeper() + + if make_zk_snapshots: # force zookeeper to create snapshot + start_zookeeper() + stop_zookeeper() + + stop_clickhouse() + clear_clickhouse_data() + convert_zookeeper_data() + start_zookeeper() + start_clickhouse() + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def get_fake_zk(timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip('node') + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + +def get_genuine_zk(timeout=30.0): + _genuine_zk_instance = KazooClient(hosts=cluster.get_instance_ip('node') + ":2181", timeout=timeout) + _genuine_zk_instance.start() + return _genuine_zk_instance + +def compare_stats(stat1, stat2, path): + assert stat1.czxid == stat2.czxid, "path " + path + " cxzids not equal for stats: " + str(stat1.czxid) + " != " + str(stat2.zxid) + assert stat1.mzxid == stat2.mzxid, "path " + path + " mxzids not equal for stats: " + str(stat1.mzxid) + " != " + str(stat2.mzxid) + assert stat1.version == stat2.version, "path " + path + " versions not equal for stats: " + str(stat1.version) + " != " + str(stat2.version) + assert stat1.cversion == stat2.cversion, "path " + path + " cversions not equal for stats: " + str(stat1.cversion) + " != " + str(stat2.cversion) + assert stat1.aversion == stat2.aversion, "path " + path + " aversions not equal for stats: " + str(stat1.aversion) + " != " + str(stat2.aversion) + assert stat1.ephemeralOwner == stat2.ephemeralOwner,"path " + path + " ephemeralOwners not equal for stats: " + str(stat1.ephemeralOwner) + " != " + str(stat2.ephemeralOwner) + assert stat1.dataLength == stat2.dataLength , "path " + path + " ephemeralOwners not equal for stats: " + str(stat1.dataLength) + " != " + str(stat2.dataLength) + assert stat1.numChildren == stat2.numChildren, "path " + path + " numChildren not equal for stats: " + str(stat1.numChildren) + " != " + str(stat2.numChildren) + assert stat1.pzxid == stat2.pzxid, "path " + path + " pzxid not equal for stats: " + str(stat1.pzxid) + " != " + str(stat2.pzxid) + +def compare_states(zk1, zk2, path="/"): + data1, stat1 = zk1.get(path) + data2, stat2 = zk2.get(path) + print("Left Stat", stat1) + print("Right Stat", stat2) + assert data1 == data2, "Data not equal on path " + str(path) + # both paths have strange stats + if path not in ("/", "/zookeeper"): + compare_stats(stat1, stat2, path) + + first_children = list(sorted(zk1.get_children(path))) + second_children = list(sorted(zk2.get_children(path))) + print("Got children left", first_children) + print("Got children rigth", second_children) + assert first_children == second_children, "Childrens are not equal on path " + path + + for children in first_children: + print("Checking child", os.path.join(path, children)) + compare_states(zk1, zk2, os.path.join(path, children)) + +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_smoke(started_cluster, create_snapshots): + restart_and_clear_zookeeper() + + genuine_connection = get_genuine_zk() + genuine_connection.create("/test", b"data") + + assert genuine_connection.get("/test")[0] == b"data" + + copy_zookeeper_data(create_snapshots) + + genuine_connection = get_genuine_zk() + fake_connection = get_fake_zk() + + compare_states(genuine_connection, fake_connection) + +def get_bytes(s): + return s.encode() + +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_simple_crud_requests(started_cluster, create_snapshots): + restart_and_clear_zookeeper() + + genuine_connection = get_genuine_zk() + for i in range(100): + genuine_connection.create("/test_create" + str(i), get_bytes("data" + str(i))) + + # some set queries + for i in range(10): + for j in range(i + 1): + genuine_connection.set("/test_create" + str(i), get_bytes("value" + str(j))) + + for i in range(10, 20): + genuine_connection.delete("/test_create" + str(i)) + + path = "/test_create_deep" + for i in range(10): + genuine_connection.create(path, get_bytes("data" + str(i))) + path = os.path.join(path, str(i)) + + + genuine_connection.create("/test_sequential", b"") + for i in range(10): + genuine_connection.create("/test_sequential/" + "a" * i + "-", get_bytes("dataX" + str(i)), sequence=True) + + genuine_connection.create("/test_ephemeral", b"") + for i in range(10): + genuine_connection.create("/test_ephemeral/" + str(i), get_bytes("dataX" + str(i)), ephemeral=True) + + copy_zookeeper_data(create_snapshots) + + genuine_connection = get_genuine_zk() + fake_connection = get_fake_zk() + + compare_states(genuine_connection, fake_connection) + + # especially ensure that counters are the same + genuine_connection.create("/test_sequential/" + "a" * 10 + "-", get_bytes("dataX" + str(i)), sequence=True) + fake_connection.create("/test_sequential/" + "a" * 10 + "-", get_bytes("dataX" + str(i)), sequence=True) + + first_children = list(sorted(genuine_connection.get_children("/test_sequential"))) + second_children = list(sorted(fake_connection.get_children("/test_sequential"))) + assert first_children == second_children, "Childrens are not equal on path " + path + +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_multi_and_failed_requests(started_cluster, create_snapshots): + restart_and_clear_zookeeper() + + genuine_connection = get_genuine_zk() + genuine_connection.create('/test_multitransactions') + for i in range(10): + t = genuine_connection.transaction() + t.create('/test_multitransactions/freddy' + str(i), get_bytes('data' + str(i))) + t.create('/test_multitransactions/fred' + str(i), get_bytes('value' + str(i)), ephemeral=True) + t.create('/test_multitransactions/smith' + str(i), get_bytes('entity' + str(i)), sequence=True) + t.set_data('/test_multitransactions', get_bytes("somedata" + str(i))) + t.commit() + + with pytest.raises(Exception): + genuine_connection.set('/test_multitransactions/freddy0', get_bytes('mustfail' + str(i)), version=1) + + t = genuine_connection.transaction() + + t.create('/test_bad_transaction', get_bytes('data' + str(1))) + t.check('/test_multitransactions', version=32) + t.create('/test_bad_transaction1', get_bytes('data' + str(2))) + # should fail + t.commit() + + assert genuine_connection.exists('/test_bad_transaction') is None + assert genuine_connection.exists('/test_bad_transaction1') is None + + t = genuine_connection.transaction() + t.create('/test_bad_transaction2', get_bytes('data' + str(1))) + t.delete('/test_multitransactions/freddy0', version=5) + + # should fail + t.commit() + assert genuine_connection.exists('/test_bad_transaction2') is None + assert genuine_connection.exists('/test_multitransactions/freddy0') is not None + + copy_zookeeper_data(create_snapshots) + + genuine_connection = get_genuine_zk() + fake_connection = get_fake_zk() + + compare_states(genuine_connection, fake_connection) + +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_acls(started_cluster, create_snapshots): + restart_and_clear_zookeeper() + genuine_connection = get_genuine_zk() + genuine_connection.add_auth('digest', 'user1:password1') + genuine_connection.add_auth('digest', 'user2:password2') + genuine_connection.add_auth('digest', 'user3:password3') + + genuine_connection.create("/test_multi_all_acl", b"data", acl=[make_acl("auth", "", all=True)]) + + other_connection = get_genuine_zk() + other_connection.add_auth('digest', 'user1:password1') + other_connection.set("/test_multi_all_acl", b"X") + assert other_connection.get("/test_multi_all_acl")[0] == b"X" + + yet_other_auth_connection = get_genuine_zk() + yet_other_auth_connection.add_auth('digest', 'user2:password2') + + yet_other_auth_connection.set("/test_multi_all_acl", b"Y") + + genuine_connection.add_auth('digest', 'user3:password3') + + # just to check that we are able to deserialize it + genuine_connection.set_acls("/test_multi_all_acl", acls=[make_acl("auth", "", read=True, write=False, create=True, delete=True, admin=True)]) + + no_auth_connection = get_genuine_zk() + + with pytest.raises(Exception): + no_auth_connection.set("/test_multi_all_acl", b"Z") + + copy_zookeeper_data(create_snapshots) + + genuine_connection = get_genuine_zk() + genuine_connection.add_auth('digest', 'user1:password1') + genuine_connection.add_auth('digest', 'user2:password2') + genuine_connection.add_auth('digest', 'user3:password3') + + fake_connection = get_fake_zk() + fake_connection.add_auth('digest', 'user1:password1') + fake_connection.add_auth('digest', 'user2:password2') + fake_connection.add_auth('digest', 'user3:password3') + + compare_states(genuine_connection, fake_connection) + + for connection in [genuine_connection, fake_connection]: + acls, stat = connection.get_acls("/test_multi_all_acl") + assert stat.aversion == 1 + assert len(acls) == 3 + for acl in acls: + assert acl.acl_list == ['READ', 'CREATE', 'DELETE', 'ADMIN'] + assert acl.id.scheme == 'digest' + assert acl.perms == 29 + assert acl.id.id in ('user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=', 'user2:lo/iTtNMP+gEZlpUNaCqLYO3i5U=', 'user3:wr5Y0kEs9nFX3bKrTMKxrlcFeWo=') diff --git a/tests/integration/test_library_bridge/configs/log_conf.xml b/tests/integration/test_library_bridge/configs/log_conf.xml deleted file mode 100644 index eed7a435b68..00000000000 --- a/tests/integration/test_library_bridge/configs/log_conf.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - /var/log/clickhouse-server/clickhouse-library-bridge.log - /var/log/clickhouse-server/clickhouse-library-bridge.err.log - /var/log/clickhouse-server/clickhouse-library-bridge.stdout - /var/log/clickhouse-server/clickhouse-library-bridge.stderr - trace - - diff --git a/tests/integration/test_library_bridge/test.py b/tests/integration/test_library_bridge/test.py index 552e4033b8e..ba44918bd60 100644 --- a/tests/integration/test_library_bridge/test.py +++ b/tests/integration/test_library_bridge/test.py @@ -9,9 +9,7 @@ cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', dictionaries=['configs/dictionaries/dict1.xml'], - main_configs=[ - 'configs/config.d/config.xml', - 'configs/log_conf.xml']) + main_configs=['configs/config.d/config.xml']) @pytest.fixture(scope="module") def ch_cluster(): diff --git a/tests/integration/test_log_family_hdfs/configs/config.d/log_conf.xml b/tests/integration/test_log_family_hdfs/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_log_family_hdfs/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_log_family_hdfs/test.py b/tests/integration/test_log_family_hdfs/test.py index 3d067f0febe..a38b067358e 100644 --- a/tests/integration/test_log_family_hdfs/test.py +++ b/tests/integration/test_log_family_hdfs/test.py @@ -12,7 +12,7 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance("node", - main_configs=["configs/storage_conf.xml", "configs/config.d/log_conf.xml"], + main_configs=["configs/storage_conf.xml"], with_hdfs=True) logging.info("Starting cluster...") cluster.start() diff --git a/tests/integration/test_log_family_s3/configs/config.d/log_conf.xml b/tests/integration/test_log_family_s3/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_log_family_s3/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 115ec47871b..71d47a8a2e8 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -10,7 +10,7 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance("node", - main_configs=["configs/minio.xml", "configs/ssl.xml", "configs/config.d/log_conf.xml"], + main_configs=["configs/minio.xml", "configs/ssl.xml"], with_minio=True) logging.info("Starting cluster...") cluster.start() diff --git a/tests/integration/test_lost_part/__init__.py b/tests/integration/test_lost_part/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_lost_part/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py new file mode 100644 index 00000000000..614df52063f --- /dev/null +++ b/tests/integration/test_lost_part/test.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 + +import pytest +import time +import ast +import random + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def remove_part_from_disk(node, table, part_name): + part_path = node.query( + "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() + if not part_path: + raise Exception("Part " + part_name + "doesn't exist") + node.exec_in_container(['bash', '-c', 'rm -r {p}/*'.format(p=part_path)], privileged=True) + + +def test_lost_part_same_replica(start_cluster): + for node in [node1, node2]: + node.query( + "CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{}') ORDER BY tuple() PARTITION BY date".format(node.name)) + + node1.query("SYSTEM STOP MERGES mt0") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(5): + node1.query("INSERT INTO mt0 VALUES ({}, toDate('2020-10-01'))".format(i)) + + for i in range(20): + parts_to_merge = node1.query("SELECT parts_to_merge FROM system.replication_queue") + if parts_to_merge: + parts_list = list(sorted(ast.literal_eval(parts_to_merge))) + print("Got parts list", parts_list) + if len(parts_list) < 3: + raise Exception("Got too small parts list {}".format(parts_list)) + break + time.sleep(1) + + victim_part_from_the_middle = random.choice(parts_list[1:-1]) + print("Will corrupt part", victim_part_from_the_middle) + + remove_part_from_disk(node1, 'mt0', victim_part_from_the_middle) + + node1.query("DETACH TABLE mt0") + + node1.query("ATTACH TABLE mt0") + + node1.query("SYSTEM START MERGES mt0") + + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node1.query("SELECT count() FROM system.replication_queue FORMAT Vertical") + + assert node1.contains_in_log("Created empty part"), "Seems like empty part {} is not created or log message changed".format(victim_part_from_the_middle) + + assert node1.query("SELECT COUNT() FROM mt0") == "4\n" + + node2.query("SYSTEM START REPLICATION QUEUES") + + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + +def test_lost_part_other_replica(start_cluster): + for node in [node1, node2]: + node.query( + "CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{}') ORDER BY tuple()".format(node.name)) + + node1.query("SYSTEM STOP MERGES mt1") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(5): + node1.query("INSERT INTO mt1 VALUES ({})".format(i)) + + for i in range(20): + parts_to_merge = node1.query("SELECT parts_to_merge FROM system.replication_queue") + if parts_to_merge: + parts_list = list(sorted(ast.literal_eval(parts_to_merge))) + print("Got parts list", parts_list) + if len(parts_list) < 3: + raise Exception("Got too small parts list {}".format(parts_list)) + break + time.sleep(1) + + victim_part_from_the_middle = random.choice(parts_list[1:-1]) + print("Will corrupt part", victim_part_from_the_middle) + + remove_part_from_disk(node1, 'mt1', victim_part_from_the_middle) + + # other way to detect broken parts + node1.query("CHECK TABLE mt1") + + node2.query("SYSTEM START REPLICATION QUEUES") + + for i in range(10): + result = node2.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node2.query("SELECT * FROM system.replication_queue FORMAT Vertical") + + assert node1.contains_in_log("Created empty part"), "Seems like empty part {} is not created or log message changed".format(victim_part_from_the_middle) + + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + + node1.query("SYSTEM START MERGES mt1") + + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") + +def test_lost_part_mutation(start_cluster): + for node in [node1, node2]: + node.query( + "CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{}') ORDER BY tuple()".format(node.name)) + + node1.query("SYSTEM STOP MERGES mt2") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(2): + node1.query("INSERT INTO mt2 VALUES ({})".format(i)) + + node1.query("ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}) + + for i in range(20): + parts_to_mutate = node1.query("SELECT count() FROM system.replication_queue") + # two mutations for both replicas + if int(parts_to_mutate) == 4: + break + time.sleep(1) + + remove_part_from_disk(node1, 'mt2', 'all_1_1_0') + + # other way to detect broken parts + node1.query("CHECK TABLE mt2") + + node1.query("SYSTEM START MERGES mt2") + + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node1.query("SELECT * FROM system.replication_queue FORMAT Vertical") + + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1") + assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") + + node2.query("SYSTEM START REPLICATION QUEUES") + + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1") + assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + + +def test_lost_last_part(start_cluster): + for node in [node1, node2]: + node.query( + "CREATE TABLE mt3 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{}') ORDER BY tuple()".format(node.name)) + + node1.query("SYSTEM STOP MERGES mt3") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(1): + node1.query("INSERT INTO mt3 VALUES ({})".format(i)) + + # actually not important + node1.query("ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}) + + remove_part_from_disk(node1, 'mt3', 'all_0_0_0') + + # other way to detect broken parts + node1.query("CHECK TABLE mt3") + + node1.query("SYSTEM START MERGES mt3") + + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + assert int(result) <= 1, "Have a lot of entries in queue {}".format(node1.query("SELECT * FROM system.replication_queue FORMAT Vertical")) + if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log("DROP PARTITION"): + break + time.sleep(1) + else: + assert False, "Don't have required messages in node1 log" + + node1.query("ALTER TABLE mt3 DROP PARTITION ID 'all'") + + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index c5db90821e2..3fd1cb0ecae 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -850,9 +850,93 @@ def materialize_with_column_comments_test(clickhouse_node, mysql_node, service_n mysql_node.query("CREATE TABLE materialize_with_column_comments_test.test (id int NOT NULL PRIMARY KEY, value VARCHAR(255) COMMENT 'test comment') ENGINE=InnoDB") clickhouse_node.query("CREATE DATABASE materialize_with_column_comments_test ENGINE = MaterializeMySQL('{}:3306', 'materialize_with_column_comments_test', 'root', 'clickhouse')".format(service_name)) check_query(clickhouse_node, "DESCRIBE TABLE materialize_with_column_comments_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(String)\t\t\ttest comment\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") + mysql_node.query("ALTER TABLE materialize_with_column_comments_test.test MODIFY value VARCHAR(255) COMMENT 'comment test'") + check_query(clickhouse_node, "DESCRIBE TABLE materialize_with_column_comments_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(String)\t\t\tcomment test\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") + mysql_node.query("ALTER TABLE materialize_with_column_comments_test.test ADD value2 int COMMENT 'test comment 2'") + check_query(clickhouse_node, "DESCRIBE TABLE materialize_with_column_comments_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(String)\t\t\tcomment test\t\t\nvalue2\tNullable(Int32)\t\t\ttest comment 2\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") clickhouse_node.query("DROP DATABASE materialize_with_column_comments_test") mysql_node.query("DROP DATABASE materialize_with_column_comments_test") +def materialize_with_enum8_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS materialize_with_enum8_test") + clickhouse_node.query("DROP DATABASE IF EXISTS materialize_with_enum8_test") + mysql_node.query("CREATE DATABASE materialize_with_enum8_test") + enum8_values_count = 127 + enum8_values = "" + enum8_values_with_backslash = "" + for i in range(1, enum8_values_count): + enum8_values += '\'' + str(i) + "\', " + enum8_values_with_backslash += "\\\'" + str(i) +"\\\' = " + str(i) + ", " + enum8_values += '\'' + str(enum8_values_count) + '\'' + enum8_values_with_backslash += "\\\'" + str(enum8_values_count) +"\\\' = " + str(enum8_values_count) + mysql_node.query("CREATE TABLE materialize_with_enum8_test.test (id int NOT NULL PRIMARY KEY, value ENUM(" + enum8_values + ")) ENGINE=InnoDB") + mysql_node.query("INSERT INTO materialize_with_enum8_test.test (id, value) VALUES (1, '1'),(2, '2')") + clickhouse_node.query("CREATE DATABASE materialize_with_enum8_test ENGINE = MaterializeMySQL('{}:3306', 'materialize_with_enum8_test', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT value FROM materialize_with_enum8_test.test ORDER BY id", "1\n2\n") + mysql_node.query("INSERT INTO materialize_with_enum8_test.test (id, value) VALUES (3, '127')") + check_query(clickhouse_node, "SELECT value FROM materialize_with_enum8_test.test ORDER BY id", "1\n2\n127\n") + check_query(clickhouse_node, "DESCRIBE TABLE materialize_with_enum8_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(Enum8(" + enum8_values_with_backslash + "))\t\t\t\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") + clickhouse_node.query("DROP DATABASE materialize_with_enum8_test") + mysql_node.query("DROP DATABASE materialize_with_enum8_test") + +def materialize_with_enum16_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS materialize_with_enum16_test") + clickhouse_node.query("DROP DATABASE IF EXISTS materialize_with_enum16_test") + mysql_node.query("CREATE DATABASE materialize_with_enum16_test") + enum16_values_count = 600 + enum16_values = "" + enum16_values_with_backslash = "" + for i in range(1, enum16_values_count): + enum16_values += '\'' + str(i) + "\', " + enum16_values_with_backslash += "\\\'" + str(i) +"\\\' = " + str(i) + ", " + enum16_values += '\'' + str(enum16_values_count) + '\'' + enum16_values_with_backslash += "\\\'" + str(enum16_values_count) +"\\\' = " + str(enum16_values_count) + mysql_node.query("CREATE TABLE materialize_with_enum16_test.test (id int NOT NULL PRIMARY KEY, value ENUM(" + enum16_values + ")) ENGINE=InnoDB") + mysql_node.query("INSERT INTO materialize_with_enum16_test.test (id, value) VALUES (1, '1'),(2, '2')") + clickhouse_node.query("CREATE DATABASE materialize_with_enum16_test ENGINE = MaterializeMySQL('{}:3306', 'materialize_with_enum16_test', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT value FROM materialize_with_enum16_test.test ORDER BY id", "1\n2\n") + mysql_node.query("INSERT INTO materialize_with_enum16_test.test (id, value) VALUES (3, '500')") + check_query(clickhouse_node, "SELECT value FROM materialize_with_enum16_test.test ORDER BY id", "1\n2\n500\n") + check_query(clickhouse_node, "DESCRIBE TABLE materialize_with_enum16_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(Enum16(" + enum16_values_with_backslash + "))\t\t\t\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") + clickhouse_node.query("DROP DATABASE materialize_with_enum16_test") + mysql_node.query("DROP DATABASE materialize_with_enum16_test") + +def alter_enum8_to_enum16_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS alter_enum8_to_enum16_test") + clickhouse_node.query("DROP DATABASE IF EXISTS alter_enum8_to_enum16_test") + mysql_node.query("CREATE DATABASE alter_enum8_to_enum16_test") + + enum8_values_count = 100 + enum8_values = "" + enum8_values_with_backslash = "" + for i in range(1, enum8_values_count): + enum8_values += '\'' + str(i) + "\', " + enum8_values_with_backslash += "\\\'" + str(i) +"\\\' = " + str(i) + ", " + enum8_values += '\'' + str(enum8_values_count) + '\'' + enum8_values_with_backslash += "\\\'" + str(enum8_values_count) +"\\\' = " + str(enum8_values_count) + mysql_node.query("CREATE TABLE alter_enum8_to_enum16_test.test (id int NOT NULL PRIMARY KEY, value ENUM(" + enum8_values + ")) ENGINE=InnoDB") + mysql_node.query("INSERT INTO alter_enum8_to_enum16_test.test (id, value) VALUES (1, '1'),(2, '2')") + clickhouse_node.query("CREATE DATABASE alter_enum8_to_enum16_test ENGINE = MaterializeMySQL('{}:3306', 'alter_enum8_to_enum16_test', 'root', 'clickhouse')".format(service_name)) + mysql_node.query("INSERT INTO alter_enum8_to_enum16_test.test (id, value) VALUES (3, '75')") + check_query(clickhouse_node, "SELECT value FROM alter_enum8_to_enum16_test.test ORDER BY id", "1\n2\n75\n") + check_query(clickhouse_node, "DESCRIBE TABLE alter_enum8_to_enum16_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(Enum8(" + enum8_values_with_backslash + "))\t\t\t\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") + + enum16_values_count = 600 + enum16_values = "" + enum16_values_with_backslash = "" + for i in range(1, enum16_values_count): + enum16_values += '\'' + str(i) + "\', " + enum16_values_with_backslash += "\\\'" + str(i) +"\\\' = " + str(i) + ", " + enum16_values += '\'' + str(enum16_values_count) + '\'' + enum16_values_with_backslash += "\\\'" + str(enum16_values_count) +"\\\' = " + str(enum16_values_count) + mysql_node.query("ALTER TABLE alter_enum8_to_enum16_test.test MODIFY COLUMN value ENUM(" + enum16_values + ")") + check_query(clickhouse_node, "DESCRIBE TABLE alter_enum8_to_enum16_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(Enum16(" + enum16_values_with_backslash + "))\t\t\t\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n") + mysql_node.query("INSERT INTO alter_enum8_to_enum16_test.test (id, value) VALUES (4, '500')") + check_query(clickhouse_node, "SELECT value FROM alter_enum8_to_enum16_test.test ORDER BY id", "1\n2\n75\n500\n") + + clickhouse_node.query("DROP DATABASE alter_enum8_to_enum16_test") + mysql_node.query("DROP DATABASE alter_enum8_to_enum16_test") + def move_to_prewhere_and_column_filtering(clickhouse_node, mysql_node, service_name): clickhouse_node.query("DROP DATABASE IF EXISTS cond_on_key_col") mysql_node.query("DROP DATABASE IF EXISTS cond_on_key_col") diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py index e26500f07b3..252cf551d2d 100644 --- a/tests/integration/test_materialize_mysql_database/test.py +++ b/tests/integration/test_materialize_mysql_database/test.py @@ -223,6 +223,15 @@ def test_materialize_with_column_comments(started_cluster, started_mysql_8_0, st materialize_with_ddl.materialize_with_column_comments_test(clickhouse_node, started_mysql_5_7, "mysql57") materialize_with_ddl.materialize_with_column_comments_test(clickhouse_node, started_mysql_8_0, "mysql80") +@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary]) +def test_materialize_with_enum(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialize_with_enum8_test(clickhouse_node, started_mysql_5_7, "mysql57") + materialize_with_ddl.materialize_with_enum16_test(clickhouse_node, started_mysql_5_7, "mysql57") + materialize_with_ddl.alter_enum8_to_enum16_test(clickhouse_node, started_mysql_5_7, "mysql57") + materialize_with_ddl.materialize_with_enum8_test(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialize_with_enum16_test(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.alter_enum8_to_enum16_test(clickhouse_node, started_mysql_8_0, "mysql80") + @pytest.mark.parametrize(('clickhouse_node'), [node_disable_bytes_settings, node_disable_rows_settings]) def test_mysql_settings(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): diff --git a/tests/integration/test_max_http_connections_for_replication/configs/log_conf.xml b/tests/integration/test_max_http_connections_for_replication/configs/log_conf.xml deleted file mode 100644 index 0de2745ca4c..00000000000 --- a/tests/integration/test_max_http_connections_for_replication/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_max_http_connections_for_replication/test.py b/tests/integration/test_max_http_connections_for_replication/test.py index 634697c8668..3921cbfd1ae 100644 --- a/tests/integration/test_max_http_connections_for_replication/test.py +++ b/tests/integration/test_max_http_connections_for_replication/test.py @@ -24,9 +24,9 @@ def _fill_nodes(nodes, shard, connections_count): cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', user_configs=[], - main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) + main_configs=['configs/remote_servers.xml'], with_zookeeper=True) node2 = cluster.add_instance('node2', user_configs=[], - main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) + main_configs=['configs/remote_servers.xml'], with_zookeeper=True) @pytest.fixture(scope="module") @@ -78,12 +78,9 @@ def test_keepalive_timeout(start_small_cluster): assert not node2.contains_in_log("No message received"), "Found 'No message received' in clickhouse-server.log" -node3 = cluster.add_instance('node3', user_configs=[], - main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) -node4 = cluster.add_instance('node4', user_configs=[], - main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) -node5 = cluster.add_instance('node5', user_configs=[], - main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True) +node3 = cluster.add_instance('node3', user_configs=[], main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +node4 = cluster.add_instance('node4', user_configs=[], main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +node5 = cluster.add_instance('node5', user_configs=[], main_configs=['configs/remote_servers.xml'], with_zookeeper=True) @pytest.fixture(scope="module") diff --git a/tests/integration/test_merge_tree_hdfs/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_hdfs/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_merge_tree_hdfs/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 0984e4d288a..223ad2e1af2 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -58,8 +58,7 @@ def generate_values(date_str, count, sign=1): def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml", - "configs/config.d/log_conf.xml"], with_hdfs=True) + cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml"], with_hdfs=True) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_merge_tree_s3/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 31df6dff374..41f59e0cbb4 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -47,9 +47,10 @@ def replace_config(old, new): def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml", - "configs/config.d/bg_processing_pool_conf.xml", - "configs/config.d/log_conf.xml"], with_minio=True) + cluster.add_instance("node", + main_configs=["configs/config.d/storage_conf.xml", + "configs/config.d/bg_processing_pool_conf.xml"], + with_minio=True) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 5af9582363a..4dec1bc713f 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -43,8 +43,7 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance("node", - main_configs=["configs/config.d/log_conf.xml", - "configs/config.d/storage_conf.xml", + main_configs=["configs/config.d/storage_conf.xml", "configs/config.d/instant_moves.xml", "configs/config.d/part_log.xml"], with_minio=True) diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index 809fff6695c..babbea2beba 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -9,7 +9,7 @@ from helpers.cluster import ClickHouseCluster, get_instances_dir SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NOT_RESTORABLE_CONFIG_PATH = os.path.join(SCRIPT_DIR, './{}/node_not_restorable/configs/config.d/storage_conf_not_restorable.xml'.format(get_instances_dir())) -COMMON_CONFIGS = ["configs/config.d/bg_processing_pool_conf.xml", "configs/config.d/log_conf.xml", "configs/config.d/clusters.xml"] +COMMON_CONFIGS = ["configs/config.d/bg_processing_pool_conf.xml", "configs/config.d/clusters.xml"] def replace_config(old, new): diff --git a/tests/integration/test_merge_tree_s3_with_cache/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3_with_cache/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_merge_tree_s3_with_cache/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_merge_tree_s3_with_cache/test.py b/tests/integration/test_merge_tree_s3_with_cache/test.py index 7022f90a2b9..da4543ccb87 100644 --- a/tests/integration/test_merge_tree_s3_with_cache/test.py +++ b/tests/integration/test_merge_tree_s3_with_cache/test.py @@ -8,8 +8,8 @@ from helpers.cluster import ClickHouseCluster def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node", main_configs=["configs/config.d/log_conf.xml", "configs/config.d/storage_conf.xml", - "configs/config.d/ssl_conf.xml", "configs/config.d/query_log.xml"], + cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml", "configs/config.d/ssl_conf.xml", + "configs/config.d/query_log.xml"], user_configs=["configs/config.d/users.xml"], with_minio=True) logging.info("Starting cluster...") cluster.start() @@ -24,7 +24,7 @@ def get_query_stat(instance, hint): result = {} instance.query("SYSTEM FLUSH LOGS") events = instance.query(''' - SELECT ProfileEvents.Names, ProfileEvents.Values + SELECT ProfileEvents.keys, ProfileEvents.values FROM system.query_log ARRAY JOIN ProfileEvents WHERE type != 1 AND query LIKE '%{}%' diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 22f790e39c3..8f305fa8463 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -167,6 +167,28 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster): assert 'Database engine MySQL requested literal argument.' in str(exception.value) mysql_node.query("DROP DATABASE test_bad_arguments") +def test_column_comments_for_mysql_database_engine(started_cluster): + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')") + assert 'test_database' in clickhouse_node.query('SHOW DATABASES') + + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`), `test` int COMMENT 'test comment') ENGINE=InnoDB;") + assert 'test comment' in clickhouse_node.query('DESCRIBE TABLE `test_database`.`test_table`') + + time.sleep( + 3) # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained + mysql_node.query("ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11) COMMENT 'add_column comment'") + assert 'add_column comment' in clickhouse_node.query( + "SELECT comment FROM system.columns WHERE table = 'test_table' AND database = 'test_database'") + + clickhouse_node.query("DROP DATABASE test_database") + mysql_node.query("DROP DATABASE test_database") + def test_data_types_support_level_for_mysql_database_engine(started_cluster): with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: diff --git a/tests/integration/test_mysql_protocol/configs/log_conf.xml b/tests/integration/test_mysql_protocol/configs/log_conf.xml deleted file mode 100644 index 0346e43c81d..00000000000 --- a/tests/integration/test_mysql_protocol/configs/log_conf.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index f2d3f46b9bc..6533a6a23f9 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -16,8 +16,8 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) DOCKER_COMPOSE_PATH = get_docker_compose_path() cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/log_conf.xml", "configs/ssl_conf.xml", "configs/mysql.xml", - "configs/dhparam.pem", "configs/server.crt", "configs/server.key"], +node = cluster.add_instance('node', main_configs=["configs/ssl_conf.xml", "configs/mysql.xml", "configs/dhparam.pem", + "configs/server.crt", "configs/server.key"], user_configs=["configs/users.xml"], env_variables={'UBSAN_OPTIONS': 'print_stacktrace=1'}, with_mysql_client=True) server_port = 9001 @@ -200,6 +200,14 @@ def test_mysql_replacement_query(started_cluster): assert stdout.decode() == 'DATABASE()\ndefault\n' +def test_mysql_select_user(started_cluster): + code, (stdout, stderr) = started_cluster.mysql_client_container.exec_run(''' + mysql --protocol tcp -h {host} -P {port} default -u default --password=123 + -e "select user();" + '''.format(host=started_cluster.get_instance_ip('node'), port=server_port), demux=True) + assert code == 0 + assert stdout.decode() == 'currentUser()\ndefault\n' + def test_mysql_explain(started_cluster): # EXPLAIN SELECT 1 code, (stdout, stderr) = started_cluster.mysql_client_container.exec_run(''' @@ -312,6 +320,7 @@ def test_mysql_set_variables(started_cluster): assert code == 0 + def test_python_client(started_cluster): client = pymysql.connections.Connection(host=started_cluster.get_instance_ip('node'), user='user_with_double_sha1', password='abacaba', database='default', port=server_port) diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 403d6f58972..39a283448f5 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -554,9 +554,7 @@ def test_concurrent_queries(started_cluster): busy_pool = Pool(5) p = busy_pool.map_async(node_insert, range(5)) p.wait() - result = node1.query("SELECT count() FROM test_pg_table", user='default') - logging.debug(result) - assert(int(result) == 5 * 5 * 1000) + assert_eq_with_retry(node1, "SELECT count() FROM test_pg_table", str(5*5*1000)) def node_insert_select(_): for i in range(5): @@ -566,9 +564,7 @@ def test_concurrent_queries(started_cluster): busy_pool = Pool(5) p = busy_pool.map_async(node_insert_select, range(5)) p.wait() - result = node1.query("SELECT count() FROM test_pg_table", user='default') - logging.debug(result) - assert(int(result) == 5 * 5 * 1000 * 2) + assert_eq_with_retry(node1, "SELECT count() FROM test_pg_table", str(5*5*1000*2)) node1.query('DROP TABLE test_pg_table;') cursor.execute('DROP TABLE clickhouse.test_pg_table;') diff --git a/tests/integration/test_postgresql_replica_database_engine/__init__.py b/tests/integration/test_postgresql_replica_database_engine/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_postgresql/configs/log_conf.xml b/tests/integration/test_postgresql_replica_database_engine/configs/log_conf.xml similarity index 100% rename from tests/integration/test_storage_postgresql/configs/log_conf.xml rename to tests/integration/test_postgresql_replica_database_engine/configs/log_conf.xml diff --git a/tests/integration/test_postgresql_replica_database_engine/configs/users.xml b/tests/integration/test_postgresql_replica_database_engine/configs/users.xml new file mode 100644 index 00000000000..1cdece49459 --- /dev/null +++ b/tests/integration/test_postgresql_replica_database_engine/configs/users.xml @@ -0,0 +1,8 @@ + + + + + 1 + + + diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py new file mode 100644 index 00000000000..97fd461e640 --- /dev/null +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -0,0 +1,928 @@ +import pytest +import time +import psycopg2 +import os.path as p +import random + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +from helpers.test_tools import TSV + +from random import randrange +import threading + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + main_configs = ['configs/log_conf.xml'], + user_configs = ['configs/users.xml'], + with_postgres=True, stay_alive=True) + +postgres_table_template = """ + CREATE TABLE IF NOT EXISTS {} ( + key Integer NOT NULL, value Integer, PRIMARY KEY(key)) + """ +postgres_table_template_2 = """ + CREATE TABLE IF NOT EXISTS {} ( + key Integer NOT NULL, value1 Integer, value2 Integer, value3 Integer, PRIMARY KEY(key)) + """ +postgres_table_template_3 = """ + CREATE TABLE IF NOT EXISTS {} ( + key1 Integer NOT NULL, value1 Integer, key2 Integer NOT NULL, value2 Integer NOT NULL) + """ + +def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database'): + if database == True: + conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format(ip, port, database_name) + else: + conn_string = "host={} port={} user='postgres' password='mysecretpassword'".format(ip, port) + + conn = psycopg2.connect(conn_string) + if auto_commit: + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + conn.autocommit = True + return conn + + +def create_postgres_db(cursor, name='postgres_database'): + cursor.execute("CREATE DATABASE {}".format(name)) + +def drop_postgres_db(cursor, name='postgres_database'): + cursor.execute("DROP DATABASE IF EXISTS {}".format(name)) + +def create_clickhouse_postgres_db(ip, port, name='postgres_database'): + instance.query(''' + CREATE DATABASE {} + ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')'''.format(name, ip, port, name)) + +def drop_clickhouse_postgres_db(name='postgres_database'): + instance.query('DROP DATABASE {}'.format(name)) + +def create_materialized_db(ip, port, + materialized_database='test_database', + postgres_database='postgres_database', + settings=[]): + create_query = "CREATE DATABASE {} ENGINE = MaterializedPostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')".format(materialized_database, ip, port, postgres_database) + if len(settings) > 0: + create_query += " SETTINGS " + for i in range(len(settings)): + if i != 0: + create_query += ', ' + create_query += settings[i] + instance.query(create_query) + assert materialized_database in instance.query('SHOW DATABASES') + +def drop_materialized_db(materialized_database='test_database'): + instance.query('DROP DATABASE IF EXISTS {}'.format(materialized_database)) + assert materialized_database not in instance.query('SHOW DATABASES') + +def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template): + cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) + cursor.execute(template.format(table_name)) + if replica_identity_full: + cursor.execute('ALTER TABLE {} REPLICA IDENTITY FULL;'.format(table_name)) + +queries = [ + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', + 'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', + "UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0", + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', + 'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;', + "UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1", + 'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;', + 'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;', + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;', + 'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;', + "UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1", + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;', + "UPDATE postgresql_replica_{} SET key=key+10000000", + 'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', + 'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;' + ] + + +def assert_nested_table_is_created(table_name, materialized_database='test_database'): + database_tables = instance.query('SHOW TABLES FROM {}'.format(materialized_database)) + while table_name not in database_tables: + time.sleep(0.2) + database_tables = instance.query('SHOW TABLES FROM {}'.format(materialized_database)) + assert(table_name in database_tables) + + +def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database'): + assert_nested_table_is_created(table_name, materialized_database) + + expected = instance.query('select * from {}.{} order by {};'.format(postgres_database, table_name, order_by)) + result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) + + while result != expected: + time.sleep(0.5) + result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) + + assert(result == expected) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + conn = get_postgres_conn(ip=cluster.postgres_ip, + port=cluster.postgres_port) + cursor = conn.cursor() + create_postgres_db(cursor, 'postgres_database') + create_clickhouse_postgres_db(ip=cluster.postgres_ip, + port=cluster.postgres_port) + + instance.query("DROP DATABASE IF EXISTS test_database") + yield cluster + + finally: + cluster.shutdown() + + +def test_load_and_sync_all_database_tables(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + create_postgres_table(cursor, table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(50)".format(table_name)) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + assert 'test_database' in instance.query('SHOW DATABASES') + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + cursor.execute('drop table {};'.format(table_name)) + + result = instance.query('''SELECT count() FROM system.tables WHERE database = 'test_database';''') + assert(int(result) == NUM_TABLES) + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_replicating_dml(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format(i, i)) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for i in range(NUM_TABLES): + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(1000)".format(i, i)) + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + + for i in range(NUM_TABLES): + cursor.execute('UPDATE postgresql_replica_{} SET value = {} * {} WHERE key < 50;'.format(i, i, i)) + cursor.execute('UPDATE postgresql_replica_{} SET value = {} * {} * {} WHERE key >= 50;'.format(i, i, i, i)) + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + for i in range(NUM_TABLES): + cursor.execute('DELETE FROM postgresql_replica_{} WHERE (value*value + {}) % 2 = 0;'.format(i, i)) + cursor.execute('UPDATE postgresql_replica_{} SET value = value - (value % 7) WHERE key > 128 AND key < 512;'.format(i)) + cursor.execute('DELETE FROM postgresql_replica_{} WHERE key % 7 = 1;'.format(i, i)) + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + drop_materialized_db() + + +def test_different_data_types(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + cursor.execute('drop table if exists test_data_types;') + cursor.execute('drop table if exists test_array_data_type;') + + cursor.execute( + '''CREATE TABLE test_data_types ( + id integer PRIMARY KEY, a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, + h timestamp, i date, j decimal(5, 5), k numeric(5, 5))''') + + cursor.execute( + '''CREATE TABLE test_array_data_type + ( + key Integer NOT NULL PRIMARY KEY, + a Date[] NOT NULL, -- Date + b Timestamp[] NOT NULL, -- DateTime + c real[][] NOT NULL, -- Float32 + d double precision[][] NOT NULL, -- Float64 + e decimal(5, 5)[][][] NOT NULL, -- Decimal32 + f integer[][][] NOT NULL, -- Int32 + g Text[][][][][] NOT NULL, -- String + h Integer[][][], -- Nullable(Int32) + i Char(2)[][][][], -- Nullable(String) + k Char(2)[] -- Nullable(String) + )''') + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for i in range(10): + instance.query(''' + INSERT INTO postgres_database.test_data_types VALUES + ({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12', '2000-05-12', 0.2, 0.2)'''.format(i)) + + check_tables_are_synchronized('test_data_types', 'id'); + result = instance.query('SELECT * FROM test_database.test_data_types ORDER BY id LIMIT 1;') + assert(result == '0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\t2000-05-12\t0.20000\t0.20000\n') + + for i in range(10): + col = random.choice(['a', 'b', 'c']) + cursor.execute('UPDATE test_data_types SET {} = {};'.format(col, i)) + cursor.execute('''UPDATE test_data_types SET i = '2020-12-12';'''.format(col, i)) + + check_tables_are_synchronized('test_data_types', 'id'); + + instance.query("INSERT INTO postgres_database.test_array_data_type " + "VALUES (" + "0, " + "['2000-05-12', '2000-05-12'], " + "['2000-05-12 12:12:12', '2000-05-12 12:12:12'], " + "[[1.12345], [1.12345], [1.12345]], " + "[[1.1234567891], [1.1234567891], [1.1234567891]], " + "[[[0.11111, 0.11111]], [[0.22222, 0.22222]], [[0.33333, 0.33333]]], " + "[[[1, 1], [1, 1]], [[3, 3], [3, 3]], [[4, 4], [5, 5]]], " + "[[[[['winx', 'winx', 'winx']]]]], " + "[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]], " + "[[[[NULL]]]], " + "[]" + ")") + + expected = ( + "0\t" + + "['2000-05-12','2000-05-12']\t" + + "['2000-05-12 12:12:12','2000-05-12 12:12:12']\t" + + "[[1.12345],[1.12345],[1.12345]]\t" + + "[[1.1234567891],[1.1234567891],[1.1234567891]]\t" + + "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t" + "[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t" + "[[[[['winx','winx','winx']]]]]\t" + "[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\t" + "[[[[NULL]]]]\t" + "[]\n" + ) + + check_tables_are_synchronized('test_array_data_type'); + result = instance.query('SELECT * FROM test_database.test_array_data_type ORDER BY key;') + assert(result == expected) + + drop_materialized_db() + cursor.execute('drop table if exists test_data_types;') + cursor.execute('drop table if exists test_array_data_type;') + + +def test_load_and_sync_subset_of_database_tables(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 10 + + publication_tables = '' + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, number from numbers(50)".format(i)) + + if i < int(NUM_TABLES/2): + if publication_tables != '': + publication_tables += ', ' + publication_tables += table_name + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=["materialized_postgresql_tables_list = '{}'".format(publication_tables)]) + assert 'test_database' in instance.query('SHOW DATABASES') + + time.sleep(1) + + for i in range(int(NUM_TABLES/2)): + table_name = 'postgresql_replica_{}'.format(i) + assert_nested_table_is_created(table_name) + + result = instance.query('''SELECT count() FROM system.tables WHERE database = 'test_database';''') + assert(int(result) == int(NUM_TABLES/2)) + + database_tables = instance.query('SHOW TABLES FROM test_database') + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + if i < int(NUM_TABLES/2): + assert table_name in database_tables + else: + assert table_name not in database_tables + instance.query("INSERT INTO postgres_database.{} SELECT 50 + number, {} from numbers(100)".format(table_name, i)) + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + if i < int(NUM_TABLES/2): + check_tables_are_synchronized(table_name); + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_changing_replica_identity_value(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)") + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)") + check_tables_are_synchronized('postgresql_replica'); + cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") + check_tables_are_synchronized('postgresql_replica'); + + drop_materialized_db() + cursor.execute('drop table if exists postgresql_replica;') + + +def test_clickhouse_restart(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format(i, i)) + + instance.query("CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')") + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized(table_name); + + for i in range(NUM_TABLES): + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(50000)".format(i, i)) + + instance.restart_clickhouse() + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_replica_identity_index(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + + create_postgres_table(cursor, 'postgresql_replica', template=postgres_table_template_3); + cursor.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") + cursor.execute("ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)") + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(100, 10)") + check_tables_are_synchronized('postgresql_replica', order_by='key1'); + + cursor.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") + cursor.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") + cursor.execute("UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") + cursor.execute("UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") + check_tables_are_synchronized('postgresql_replica', order_by='key1'); + + cursor.execute('DELETE FROM postgresql_replica WHERE key2<75;') + check_tables_are_synchronized('postgresql_replica', order_by='key1'); + + drop_materialized_db() + cursor.execute('drop table if exists postgresql_replica;') + + +def test_table_schema_changes(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i), template=postgres_table_template_2); + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)".format(i, i, i, i)) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=["materialized_postgresql_allow_automatic_update = 1"]) + + for i in range(NUM_TABLES): + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)".format(i, i, i, i)) + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + expected = instance.query("SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key"); + + altered_table = random.randint(0, 4) + cursor.execute("ALTER TABLE postgresql_replica_{} DROP COLUMN value2".format(altered_table)) + + for i in range(NUM_TABLES): + cursor.execute("INSERT INTO postgresql_replica_{} VALUES (50, {}, {})".format(i, i, i)) + cursor.execute("UPDATE postgresql_replica_{} SET value3 = 12 WHERE key%2=0".format(i)) + + assert_nested_table_is_created('postgresql_replica_{}'.format(altered_table)) + check_tables_are_synchronized('postgresql_replica_{}'.format(altered_table)) + print('check1 OK') + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + for i in range(NUM_TABLES): + if i != altered_table: + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {}, {} from numbers(49)".format(i, i, i, i)) + else: + instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {} from numbers(49)".format(i, i, i)) + + check_tables_are_synchronized('postgresql_replica_{}'.format(altered_table)); + print('check2 OK') + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + for i in range(NUM_TABLES): + cursor.execute('drop table postgresql_replica_{};'.format(i)) + + instance.query("DROP DATABASE test_database") + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_many_concurrent_queries(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + instance.query('INSERT INTO postgres_database.postgresql_replica_{} SELECT number, number from numbers(10000)'.format(i)) + n = [10000] + + query_pool = ['DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', + 'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', + 'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', + 'UPDATE postgresql_replica_{} SET value = value*5 WHERE key % 2 = 1;', + 'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;', + 'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;', + 'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;', + 'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;', + 'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;', + 'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', + 'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;'] + + def attack(thread_id): + print('thread {}'.format(thread_id)) + k = 10000 + for i in range(20): + query_id = random.randrange(0, len(query_pool)-1) + table_id = random.randrange(0, 5) # num tables + + # random update / delete query + cursor.execute(query_pool[query_id].format(table_id)) + print("table {} query {} ok".format(table_id, query_id)) + + # allow some thread to do inserts (not to violate key constraints) + if thread_id < 5: + print("try insert table {}".format(thread_id)) + instance.query('INSERT INTO postgres_database.postgresql_replica_{} SELECT {}*10000*({} + number), number from numbers(1000)'.format(i, thread_id, k)) + k += 1 + print("insert table {} ok".format(thread_id)) + + if i == 5: + # also change primary key value + print("try update primary key {}".format(thread_id)) + cursor.execute("UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(thread_id, i+1, i+1)) + print("update primary key {} ok".format(thread_id)) + + threads = [] + threads_num = 16 + for i in range(threads_num): + threads.append(threading.Thread(target=attack, args=(i,))) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + n[0] = 50000 + for table_id in range(NUM_TABLES): + n[0] += 1 + instance.query('INSERT INTO postgres_database.postgresql_replica_{} SELECT {} + number, number from numbers(5000)'.format(table_id, n[0])) + #cursor.execute("UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(table_id, table_id+1, table_id+1)) + + for thread in threads: + thread.join() + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + count1 = instance.query('SELECT count() FROM postgres_database.postgresql_replica_{}'.format(i)) + count2 = instance.query('SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})'.format(i)) + assert(int(count1) == int(count2)) + print(count1, count2) + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_single_transaction(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, auto_commit=False) + cursor = conn.cursor() + + create_postgres_table(cursor, 'postgresql_replica_0'); + conn.commit() + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + assert_nested_table_is_created('postgresql_replica_0') + + for query in queries: + print('query {}'.format(query)) + cursor.execute(query.format(0)) + + time.sleep(5) + result = instance.query("select count() from test_database.postgresql_replica_0") + # no commit yet + assert(int(result) == 0) + + conn.commit() + check_tables_are_synchronized('postgresql_replica_0'); + + drop_materialized_db() + cursor.execute('drop table if exists postgresql_replica_0;') + + +def test_virtual_columns(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica_0'); + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=["materialized_postgresql_allow_automatic_update = 1"]) + assert_nested_table_is_created('postgresql_replica_0') + instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number from numbers(10)") + check_tables_are_synchronized('postgresql_replica_0'); + + # just check that it works, no check with `expected` becuase _version is taken as LSN, which will be different each time. + result = instance.query('SELECT key, value, _sign, _version FROM test_database.postgresql_replica_0;') + print(result) + + cursor.execute("ALTER TABLE postgresql_replica_0 ADD COLUMN value2 integer") + instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number, number from numbers(10, 10)") + check_tables_are_synchronized('postgresql_replica_0'); + + result = instance.query('SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;') + print(result) + + instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number, number from numbers(20, 10)") + check_tables_are_synchronized('postgresql_replica_0'); + + result = instance.query('SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;') + print(result) + + drop_materialized_db() + cursor.execute('drop table if exists postgresql_replica_0;') + + +def test_multiple_databases(started_cluster): + drop_materialized_db('test_database_1') + drop_materialized_db('test_database_2') + NUM_TABLES = 5 + + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=False) + cursor = conn.cursor() + create_postgres_db(cursor, 'postgres_database_1') + create_postgres_db(cursor, 'postgres_database_2') + + conn1 = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, database_name='postgres_database_1') + conn2 = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, database_name='postgres_database_2') + + cursor1 = conn1.cursor() + cursor2 = conn2.cursor() + + create_clickhouse_postgres_db(cluster.postgres_ip, cluster.postgres_port, 'postgres_database_1') + create_clickhouse_postgres_db(cluster.postgres_ip, cluster.postgres_port, 'postgres_database_2') + + cursors = [cursor1, cursor2] + for cursor_id in range(len(cursors)): + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + create_postgres_table(cursors[cursor_id], table_name); + instance.query("INSERT INTO postgres_database_{}.{} SELECT number, number from numbers(50)".format(cursor_id + 1, table_name)) + print('database 1 tables: ', instance.query('''SELECT name FROM system.tables WHERE database = 'postgres_database_1';''')) + print('database 2 tables: ', instance.query('''SELECT name FROM system.tables WHERE database = 'postgres_database_2';''')) + + create_materialized_db(started_cluster.postgres_ip, started_cluster.postgres_port, + 'test_database_1', 'postgres_database_1') + create_materialized_db(started_cluster.postgres_ip, started_cluster.postgres_port, + 'test_database_2', 'postgres_database_2') + + cursors = [cursor1, cursor2] + for cursor_id in range(len(cursors)): + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + instance.query("INSERT INTO postgres_database_{}.{} SELECT 50 + number, number from numbers(50)".format(cursor_id + 1, table_name)) + + for cursor_id in range(len(cursors)): + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + check_tables_are_synchronized( + table_name, 'key', 'postgres_database_{}'.format(cursor_id + 1), 'test_database_{}'.format(cursor_id + 1)); + + for i in range(NUM_TABLES): + cursor1.execute('drop table if exists postgresql_replica_{};'.format(i)) + for i in range(NUM_TABLES): + cursor2.execute('drop table if exists postgresql_replica_{};'.format(i)) + + drop_clickhouse_postgres_db('postgres_database_1') + drop_clickhouse_postgres_db('postgres_database_2') + + drop_materialized_db('test_database_1') + drop_materialized_db('test_database_2') + + +def test_concurrent_transactions(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 6 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + + def transaction(thread_id): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, auto_commit=False) + cursor_ = conn.cursor() + for query in queries: + cursor_.execute(query.format(thread_id)) + print('thread {}, query {}'.format(thread_id, query)) + conn.commit() + + threads = [] + threads_num = 6 + for i in range(threads_num): + threads.append(threading.Thread(target=transaction, args=(i,))) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for thread in threads: + time.sleep(random.uniform(0, 0.5)) + thread.start() + for thread in threads: + thread.join() + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + count1 = instance.query('SELECT count() FROM postgres_database.postgresql_replica_{}'.format(i)) + count2 = instance.query('SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})'.format(i)) + print(int(count1), int(count2), sep=' ') + assert(int(count1) == int(count2)) + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_abrupt_connection_loss_while_heavy_replication(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 6 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + + def transaction(thread_id): + if thread_id % 2: + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, auto_commit=True) + else: + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, auto_commit=False) + cursor_ = conn.cursor() + for query in queries: + cursor_.execute(query.format(thread_id)) + print('thread {}, query {}'.format(thread_id, query)) + if thread_id % 2 == 0: + conn.commit() + + threads = [] + threads_num = 6 + for i in range(threads_num): + threads.append(threading.Thread(target=transaction, args=(i,))) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for thread in threads: + time.sleep(random.uniform(0, 0.5)) + thread.start() + + # Join here because it takes time for data to reach wal + for thread in threads: + thread.join() + time.sleep(1) + started_cluster.pause_container('postgres1') + + for i in range(NUM_TABLES): + result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) + print(result) # Just debug + + started_cluster.unpause_container('postgres1') + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + for i in range(NUM_TABLES): + result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) + print(result) # Just debug + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_drop_database_while_replication_startup_not_finished(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + create_postgres_table(cursor, table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(100000)".format(table_name)) + + for i in range(6): + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + time.sleep(0.5 * i) + drop_materialized_db() + + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +def test_restart_server_while_replication_startup_not_finished(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 5 + + for i in range(NUM_TABLES): + table_name = 'postgresql_replica_{}'.format(i) + create_postgres_table(cursor, table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(100000)".format(table_name)) + + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + time.sleep(0.5) + instance.restart_clickhouse() + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table postgresql_replica_{};'.format(i)) + + +def test_abrupt_server_restart_while_heavy_replication(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + NUM_TABLES = 6 + + for i in range(NUM_TABLES): + create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); + + def transaction(thread_id): + if thread_id % 2: + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, auto_commit=True) + else: + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True, auto_commit=False) + cursor_ = conn.cursor() + for query in queries: + cursor_.execute(query.format(thread_id)) + print('thread {}, query {}'.format(thread_id, query)) + if thread_id % 2 == 0: + conn.commit() + + threads = [] + threads_num = 6 + for i in range(threads_num): + threads.append(threading.Thread(target=transaction, args=(i,))) + + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for thread in threads: + time.sleep(random.uniform(0, 0.5)) + thread.start() + + # Join here because it takes time for data to reach wal + for thread in threads: + thread.join() + instance.restart_clickhouse() + + for i in range(NUM_TABLES): + result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) + print(result) # Just debug + + for i in range(NUM_TABLES): + check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + + for i in range(NUM_TABLES): + result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) + print(result) # Just debug + + drop_materialized_db() + for i in range(NUM_TABLES): + cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + + +if __name__ == '__main__': + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_profile_events_s3/test.py b/tests/integration/test_profile_events_s3/test.py index 52f14c99b4a..98ad55b3adc 100644 --- a/tests/integration/test_profile_events_s3/test.py +++ b/tests/integration/test_profile_events_s3/test.py @@ -83,7 +83,7 @@ def get_query_stat(instance, hint): result = init_list.copy() instance.query("SYSTEM FLUSH LOGS") events = instance.query(''' - SELECT ProfileEvents.Names, ProfileEvents.Values + SELECT ProfileEvents.keys, ProfileEvents.values FROM system.query_log ARRAY JOIN ProfileEvents WHERE type != 1 AND query LIKE '%{}%' diff --git a/tests/integration/test_remote_prewhere/configs/log_conf.xml b/tests/integration/test_remote_prewhere/configs/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_remote_prewhere/configs/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_remote_prewhere/test.py b/tests/integration/test_remote_prewhere/test.py index 07d05797223..907a9d43d2a 100644 --- a/tests/integration/test_remote_prewhere/test.py +++ b/tests/integration/test_remote_prewhere/test.py @@ -3,8 +3,8 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/log_conf.xml']) -node2 = cluster.add_instance('node2', main_configs=['configs/log_conf.xml']) +node1 = cluster.add_instance('node1') +node2 = cluster.add_instance('node2') @pytest.fixture(scope="module") diff --git a/tests/integration/test_s3_with_https/configs/config.d/log_conf.xml b/tests/integration/test_s3_with_https/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_s3_with_https/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_s3_with_https/test.py b/tests/integration/test_s3_with_https/test.py index 6c7b47ea0b1..4fa8260ed2e 100644 --- a/tests/integration/test_s3_with_https/test.py +++ b/tests/integration/test_s3_with_https/test.py @@ -15,9 +15,8 @@ def check_proxy_logs(cluster, proxy_instance): def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml", "configs/config.d/log_conf.xml", - "configs/config.d/ssl.xml"], with_minio=True, - minio_certs_dir="minio_certs") + cluster.add_instance("node", main_configs=["configs/config.d/storage_conf.xml", "configs/config.d/ssl.xml"], + with_minio=True, minio_certs_dir="minio_certs") logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") diff --git a/tests/integration/test_s3_with_proxy/configs/config.d/log_conf.xml b/tests/integration/test_s3_with_proxy/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_s3_with_proxy/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py index 7a1a2292eef..33ad981d18d 100644 --- a/tests/integration/test_s3_with_proxy/test.py +++ b/tests/integration/test_s3_with_proxy/test.py @@ -19,7 +19,7 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance("node", - main_configs=["configs/config.d/log_conf.xml", "configs/config.d/storage_conf.xml"], + main_configs=["configs/config.d/storage_conf.xml"], with_minio=True) logging.info("Starting cluster...") cluster.start() diff --git a/tests/integration/test_select_access_rights/test.py b/tests/integration/test_select_access_rights/test.py index 213df529ef7..0272eac5fa1 100644 --- a/tests/integration/test_select_access_rights/test.py +++ b/tests/integration/test_select_access_rights/test.py @@ -177,3 +177,61 @@ def test_select_count(): instance.query("GRANT SELECT ON default.table1 TO A") assert instance.query(select_query, user = 'A') == "0\n" + + +def test_select_where(): + # User should have grants for the columns used in WHERE. + instance.query("CREATE TABLE table1(a String, b UInt8) ENGINE = MergeTree ORDER BY b") + instance.query("INSERT INTO table1 VALUES ('xxx', 0), ('yyy', 1), ('zzz', 0)") + instance.query("GRANT SELECT(a) ON default.table1 TO A") + + select_query = "SELECT a FROM table1 WHERE b = 0" + assert "it's necessary to have grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user = 'A') + + instance.query("GRANT SELECT(b) ON default.table1 TO A") + assert instance.query(select_query, user = 'A') == "xxx\nzzz\n" + + instance.query("REVOKE SELECT ON default.table1 FROM A") + assert "it's necessary to have grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user = 'A') + + instance.query("GRANT SELECT ON default.table1 TO A") + assert instance.query(select_query, user = 'A') == "xxx\nzzz\n" + + +def test_select_prewhere(): + # User should have grants for the columns used in PREWHERE. + instance.query("CREATE TABLE table1(a String, b UInt8) ENGINE = MergeTree ORDER BY b") + instance.query("INSERT INTO table1 VALUES ('xxx', 0), ('yyy', 1), ('zzz', 0)") + instance.query("GRANT SELECT(a) ON default.table1 TO A") + + select_query = "SELECT a FROM table1 PREWHERE b = 0" + assert "it's necessary to have grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user = 'A') + + instance.query("GRANT SELECT(b) ON default.table1 TO A") + assert instance.query(select_query, user = 'A') == "xxx\nzzz\n" + + instance.query("REVOKE SELECT ON default.table1 FROM A") + assert "it's necessary to have grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user = 'A') + + instance.query("GRANT SELECT ON default.table1 TO A") + assert instance.query(select_query, user = 'A') == "xxx\nzzz\n" + + +def test_select_with_row_policy(): + # Normal users should not aware of the existence of row policy filters. + instance.query("CREATE TABLE table1(a String, b UInt8) ENGINE = MergeTree ORDER BY b") + instance.query("INSERT INTO table1 VALUES ('xxx', 0), ('yyy', 1), ('zzz', 0)") + instance.query("CREATE ROW POLICY pol1 ON table1 USING b = 0 TO A") + + select_query = "SELECT a FROM table1" + select_query2 = "SELECT count() FROM table1" + assert "it's necessary to have grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user = 'A') + assert "it's necessary to have grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user = 'A') + + instance.query("GRANT SELECT(a) ON default.table1 TO A") + assert instance.query(select_query, user = 'A') == "xxx\nzzz\n" + assert instance.query(select_query2, user = 'A') == "2\n" + + instance.query("REVOKE SELECT(a) ON default.table1 FROM A") + assert "it's necessary to have grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user = 'A') + assert "it's necessary to have grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user = 'A') diff --git a/tests/integration/test_storage_hdfs/configs/log_conf.xml b/tests/integration/test_storage_hdfs/configs/log_conf.xml deleted file mode 100644 index 0de2745ca4c..00000000000 --- a/tests/integration/test_storage_hdfs/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 34ced652a01..731644b0987 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -4,7 +4,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', with_hdfs=True, main_configs=['configs/log_conf.xml']) +node1 = cluster.add_instance('node1', with_hdfs=True) @pytest.fixture(scope="module") @@ -15,7 +15,6 @@ def started_cluster(): finally: cluster.shutdown() - def test_read_write_storage(started_cluster): hdfs_api = started_cluster.hdfs_api @@ -235,7 +234,7 @@ def test_virtual_columns(started_cluster): expected = "1\tfile1\thdfs://hdfs1:9000//file1\n2\tfile2\thdfs://hdfs1:9000//file2\n3\tfile3\thdfs://hdfs1:9000//file3\n" assert node1.query("select id, _file as file_name, _path as file_path from virtual_cols order by id") == expected - + def test_read_files_with_spaces(started_cluster): hdfs_api = started_cluster.hdfs_api @@ -246,6 +245,18 @@ def test_read_files_with_spaces(started_cluster): assert node1.query("select * from test order by id") == "1\n2\n3\n" +def test_truncate_table(started_cluster): + hdfs_api = started_cluster.hdfs_api + node1.query( + "create table test_truncate (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/tr', 'TSV')") + node1.query("insert into test_truncate values (1, 'Mark', 72.53)") + assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n" + assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n" + node1.query("truncate table test_truncate") + assert node1.query("select * from test_truncate") == "" + node1.query("drop table test_truncate") + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/configs/log_conf.xml b/tests/integration/test_storage_kafka/configs/log_conf.xml deleted file mode 100644 index 95466269afe..00000000000 --- a/tests/integration/test_storage_kafka/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9cbba4b6e90..51b2052baae 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -43,7 +43,7 @@ from . import social_pb2 cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', - main_configs=['configs/kafka.xml', 'configs/log_conf.xml'], + main_configs=['configs/kafka.xml'], with_kafka=True, with_zookeeper=True, # For Replicated Table macros={"kafka_broker":"kafka1", diff --git a/tests/integration/test_storage_kerberized_hdfs/configs/log_conf.xml b/tests/integration/test_storage_kerberized_hdfs/configs/log_conf.xml deleted file mode 100644 index 0de2745ca4c..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh index 971491d4053..769056d70b3 100755 --- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh +++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh @@ -242,6 +242,7 @@ sleep 3 /usr/local/hadoop/bin/hdfs dfs -mkdir /user/specuser /usr/local/hadoop/bin/hdfs dfs -chown specuser /user/specuser +echo "chown_completed" | /usr/local/hadoop/bin/hdfs dfs -appendToFile - /preparations_done_marker kdestroy diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py index 13dfb4dfe89..d06f971557b 100644 --- a/tests/integration/test_storage_kerberized_hdfs/test.py +++ b/tests/integration/test_storage_kerberized_hdfs/test.py @@ -7,7 +7,7 @@ from helpers.cluster import ClickHouseCluster import subprocess cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', with_kerberized_hdfs=True, user_configs=[], main_configs=['configs/log_conf.xml', 'configs/hdfs.xml']) +node1 = cluster.add_instance('node1', with_kerberized_hdfs=True, user_configs=[], main_configs=['configs/hdfs.xml']) @pytest.fixture(scope="module") def started_cluster(): diff --git a/tests/integration/test_storage_kerberized_kafka/configs/log_conf.xml b/tests/integration/test_storage_kerberized_kafka/configs/log_conf.xml deleted file mode 100644 index 95466269afe..00000000000 --- a/tests/integration/test_storage_kerberized_kafka/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - \ No newline at end of file diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index ad94399967a..126c52bb1d9 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -21,10 +21,9 @@ import socket cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', - main_configs=['configs/kafka.xml', 'configs/log_conf.xml' ], + main_configs=['configs/kafka.xml'], with_kerberized_kafka=True, - clickhouse_path_dir="clickhouse_path" - ) + clickhouse_path_dir="clickhouse_path") def producer_serializer(x): return x.encode() if isinstance(x, str) else x diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 716f16c6211..307879265df 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -9,8 +9,8 @@ from helpers.test_tools import assert_eq_with_retry from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=["configs/log_conf.xml"], with_postgres=True) -node2 = cluster.add_instance('node2', main_configs=['configs/log_conf.xml'], with_postgres_cluster=True) +node1 = cluster.add_instance('node1', with_postgres=True) +node2 = cluster.add_instance('node2', with_postgres_cluster=True) def get_postgres_conn(cluster, ip, database=False): if database == True: @@ -307,7 +307,7 @@ def test_postgres_distributed(started_cluster): started_cluster.unpause_container('postgres1') assert(result == 'host2\nhost4\n' or result == 'host3\nhost4\n') - + def test_datetime_with_timezone(started_cluster): conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) cursor = conn.cursor() @@ -323,6 +323,22 @@ def test_datetime_with_timezone(started_cluster): assert(node1.query("select * from test_timezone") == "2014-04-04 20:00:00\t2014-04-04 16:00:00\n") +def test_postgres_ndim(started_cluster): + conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) + cursor = conn.cursor() + cursor.execute('CREATE TABLE arr1 (a Integer[])') + cursor.execute("INSERT INTO arr1 SELECT '{{1}, {2}}'") + + # The point is in creating a table via 'as select *', in postgres att_ndim will not be correct in this case. + cursor.execute('CREATE TABLE arr2 AS SELECT * FROM arr1') + cursor.execute("SELECT attndims AS dims FROM pg_attribute WHERE attrelid = 'arr2'::regclass; ") + result = cursor.fetchall()[0] + assert(int(result[0]) == 0) + + result = node1.query('''SELECT toTypeName(a) FROM postgresql('postgres1:5432', 'clickhouse', 'arr2', 'postgres', 'mysecretpassword')''') + assert(result.strip() == "Array(Array(Nullable(Int32)))") + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_postgresql_replica/__init__.py b/tests/integration/test_storage_postgresql_replica/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_rabbitmq/configs/log_conf.xml b/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml similarity index 100% rename from tests/integration/test_storage_rabbitmq/configs/log_conf.xml rename to tests/integration/test_storage_postgresql_replica/configs/log_conf.xml diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py new file mode 100644 index 00000000000..4602d567b46 --- /dev/null +++ b/tests/integration/test_storage_postgresql_replica/test.py @@ -0,0 +1,600 @@ +import pytest +import time +import psycopg2 +import os.path as p + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +from helpers.test_tools import TSV + +import threading + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', main_configs=['configs/log_conf.xml'], with_postgres=True, stay_alive=True) + +postgres_table_template = """ + CREATE TABLE IF NOT EXISTS {} ( + key Integer NOT NULL, value Integer, PRIMARY KEY(key)) + """ + +queries = [ + 'INSERT INTO postgresql_replica select i, i from generate_series(0, 10000) as t(i);', + 'DELETE FROM postgresql_replica WHERE (value*value) % 3 = 0;', + 'UPDATE postgresql_replica SET value = value + 125 WHERE key % 2 = 0;', + "UPDATE postgresql_replica SET key=key+20000 WHERE key%2=0", + 'INSERT INTO postgresql_replica select i, i from generate_series(40000, 50000) as t(i);', + 'DELETE FROM postgresql_replica WHERE key % 10 = 0;', + 'UPDATE postgresql_replica SET value = value + 101 WHERE key % 2 = 1;', + "UPDATE postgresql_replica SET key=key+80000 WHERE key%2=1", + 'DELETE FROM postgresql_replica WHERE value % 2 = 0;', + 'UPDATE postgresql_replica SET value = value + 2000 WHERE key % 5 = 0;', + 'INSERT INTO postgresql_replica select i, i from generate_series(200000, 250000) as t(i);', + 'DELETE FROM postgresql_replica WHERE value % 3 = 0;', + 'UPDATE postgresql_replica SET value = value * 2 WHERE key % 3 = 0;', + "UPDATE postgresql_replica SET key=key+500000 WHERE key%2=1", + 'INSERT INTO postgresql_replica select i, i from generate_series(1000000, 1050000) as t(i);', + 'DELETE FROM postgresql_replica WHERE value % 9 = 2;', + "UPDATE postgresql_replica SET key=key+10000000", + 'UPDATE postgresql_replica SET value = value + 2 WHERE key % 3 = 1;', + 'DELETE FROM postgresql_replica WHERE value%5 = 0;' + ] + + +@pytest.mark.timeout(30) +def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database'): + expected = instance.query('select * from {}.{} order by {};'.format(postgres_database, table_name, order_by)) + result = instance.query('select * from test.{} order by {};'.format(table_name, order_by)) + + while result != expected: + time.sleep(0.5) + result = instance.query('select * from test.{} order by {};'.format(table_name, order_by)) + + assert(result == expected) + +def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database'): + if database == True: + conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format(ip, port, database_name) + else: + conn_string = "host={} port={} user='postgres' password='mysecretpassword'".format(ip, port) + + conn = psycopg2.connect(conn_string) + if auto_commit: + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + conn.autocommit = True + return conn + +def create_postgres_db(cursor, name): + cursor.execute("CREATE DATABASE {}".format(name)) + +def create_clickhouse_postgres_db(ip, port, name='postgres_database'): + instance.query(''' + CREATE DATABASE {} + ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')'''.format(name, ip, port, name)) + +def create_materialized_table(ip, port): + instance.query(''' + CREATE TABLE test.postgresql_replica (key UInt64, value UInt64) + ENGINE = MaterializedPostgreSQL( + '{}:{}', 'postgres_database', 'postgresql_replica', 'postgres', 'mysecretpassword') + PRIMARY KEY key; '''.format(ip, port)) + +def create_postgres_table(cursor, table_name, replica_identity_full=False): + cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) + cursor.execute(postgres_table_template.format(table_name)) + if replica_identity_full: + cursor.execute('ALTER TABLE {} REPLICA IDENTITY FULL;'.format(table_name)) + + +def postgresql_replica_check_result(result, check=False, ref_file='test_postgresql_replica.reference'): + fpath = p.join(p.dirname(__file__), ref_file) + with open(fpath) as reference: + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + conn = get_postgres_conn(ip=cluster.postgres_ip, + port=cluster.postgres_port) + cursor = conn.cursor() + create_postgres_db(cursor, 'postgres_database') + create_clickhouse_postgres_db(ip=cluster.postgres_ip, + port=cluster.postgres_port) + + instance.query('CREATE DATABASE test') + yield cluster + + finally: + cluster.shutdown() + + +@pytest.mark.timeout(320) +def test_initial_load_from_snapshot(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + while postgresql_replica_check_result(result) == False: + time.sleep(0.2) + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_no_connection_at_startup(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + time.sleep(3) + + instance.query('DETACH TABLE test.postgresql_replica') + started_cluster.pause_container('postgres1') + + instance.query('ATTACH TABLE test.postgresql_replica') + time.sleep(3) + started_cluster.unpause_container('postgres1') + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while int(result) == 0: + time.sleep(0.5); + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_detach_attach_is_ok(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) == 0): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + postgresql_replica_check_result(result, True) + + instance.query('DETACH TABLE test.postgresql_replica') + instance.query('ATTACH TABLE test.postgresql_replica') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + while postgresql_replica_check_result(result) == False: + time.sleep(0.5) + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_replicating_insert_queries(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 10): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 10 + number, 10 + number from numbers(10)") + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 20 + number, 20 + number from numbers(10)") + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 30): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 30 + number, 30 + number from numbers(10)") + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 40 + number, 40 + number from numbers(10)") + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 50): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_replicating_delete_queries(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + while postgresql_replica_check_result(result) == False: + time.sleep(0.2) + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)") + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while int(result) != 100: + time.sleep(0.5) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + cursor.execute('DELETE FROM postgresql_replica WHERE key > 49;') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + while postgresql_replica_check_result(result) == False: + time.sleep(0.5) + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_replicating_update_queries(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 50): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + cursor.execute('UPDATE postgresql_replica SET value = value - 10;') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + while postgresql_replica_check_result(result) == False: + time.sleep(0.5) + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_resume_from_written_version(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 50): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)") + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 100): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + instance.query('DETACH TABLE test.postgresql_replica') + + cursor.execute('DELETE FROM postgresql_replica WHERE key > 49;') + cursor.execute('UPDATE postgresql_replica SET value = value - 10;') + + instance.query('ATTACH TABLE test.postgresql_replica') + + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + while postgresql_replica_check_result(result) == False: + time.sleep(0.5) + result = instance.query('SELECT * FROM test.postgresql_replica ORDER BY key;') + + cursor.execute('DROP TABLE postgresql_replica;') + postgresql_replica_check_result(result, True) + + +@pytest.mark.timeout(320) +def test_many_replication_messages(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 100000): + time.sleep(0.2) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + print("SYNC OK") + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000, 100000)") + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while (int(result) != 200000): + time.sleep(1) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + print("INSERT OK") + + result = instance.query('SELECT key FROM test.postgresql_replica ORDER BY key;') + expected = instance.query("SELECT number from numbers(200000)") + assert(result == expected) + + cursor.execute('UPDATE postgresql_replica SET value = key + 1 WHERE key < 100000;') + + result = instance.query('SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;') + expected = instance.query("SELECT number from numbers(100000)") + + while (result != expected): + time.sleep(1) + result = instance.query('SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;') + print("UPDATE OK") + + cursor.execute('DELETE FROM postgresql_replica WHERE key % 2 = 1;') + cursor.execute('DELETE FROM postgresql_replica WHERE key != value;') + + result = instance.query('SELECT count() FROM (SELECT * FROM test.postgresql_replica);') + while (int(result) != 50000): + time.sleep(1) + result = instance.query('SELECT count() FROM (SELECT * FROM test.postgresql_replica);') + print("DELETE OK") + + cursor.execute('DROP TABLE postgresql_replica;') + + +@pytest.mark.timeout(320) +def test_connection_loss(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + i = 50 + while i < 100000: + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format(i)) + i += 10000 + + started_cluster.pause_container('postgres1') + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + print(int(result)) + time.sleep(6) + + started_cluster.unpause_container('postgres1') + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while int(result) < 100050: + time.sleep(1) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + cursor.execute('DROP TABLE postgresql_replica;') + assert(int(result) == 100050) + + +@pytest.mark.timeout(320) +def test_clickhouse_restart(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + i = 50 + while i < 100000: + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format(i)) + i += 10000 + + instance.restart_clickhouse() + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while int(result) < 100050: + time.sleep(1) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + cursor.execute('DROP TABLE postgresql_replica;') + print(result) + assert(int(result) == 100050) + + +def test_rename_table(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25)") + + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while int(result) != 25: + time.sleep(0.5) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + instance.query('RENAME TABLE test.postgresql_replica TO test.postgresql_replica_renamed') + assert(int(instance.query('SELECT count() FROM test.postgresql_replica_renamed;')) == 25) + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25, 25)") + + result = instance.query('SELECT count() FROM test.postgresql_replica_renamed;') + while int(result) != 50: + time.sleep(0.5) + result = instance.query('SELECT count() FROM test.postgresql_replica_renamed;') + + result = instance.query('SELECT * FROM test.postgresql_replica_renamed ORDER BY key;') + postgresql_replica_check_result(result, True) + cursor.execute('DROP TABLE postgresql_replica;') + instance.query('DROP TABLE IF EXISTS test.postgresql_replica_renamed') + + +def test_virtual_columns(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)") + result = instance.query('SELECT count() FROM test.postgresql_replica;') + while int(result) != 10: + time.sleep(0.5) + result = instance.query('SELECT count() FROM test.postgresql_replica;') + + # just check that it works, no check with `expected` becuase _version is taken as LSN, which will be different each time. + result = instance.query('SELECT key, value, _sign, _version FROM test.postgresql_replica;') + print(result) + cursor.execute('DROP TABLE postgresql_replica;') + + +def test_abrupt_connection_loss_while_heavy_replication(started_cluster): + instance.query("DROP DATABASE IF EXISTS test_database") + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for i in range(len(queries)): + query = queries[i] + cursor.execute(query) + print('query {}'.format(query)) + + started_cluster.pause_container('postgres1') + + result = instance.query("SELECT count() FROM test.postgresql_replica") + print(result) # Just debug + + started_cluster.unpause_container('postgres1') + + check_tables_are_synchronized('postgresql_replica'); + + result = instance.query("SELECT count() FROM test.postgresql_replica") + print(result) # Just debug + + +def test_abrupt_server_restart_while_heavy_replication(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port) + + for query in queries: + cursor.execute(query) + print('query {}'.format(query)) + + instance.restart_clickhouse() + + result = instance.query("SELECT count() FROM test.postgresql_replica") + print(result) # Just debug + + check_tables_are_synchronized('postgresql_replica'); + + result = instance.query("SELECT count() FROM test.postgresql_replica") + print(result) # Just debug + + +def test_drop_table_immediately(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + create_postgres_table(cursor, 'postgresql_replica'); + instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)") + + instance.query('DROP TABLE IF EXISTS test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + instance.query('DROP TABLE test.postgresql_replica') + create_materialized_table(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_tables_are_synchronized('postgresql_replica'); + instance.query('DROP TABLE test.postgresql_replica') + + +if __name__ == '__main__': + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference b/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference new file mode 100644 index 00000000000..959bb2aad74 --- /dev/null +++ b/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference @@ -0,0 +1,50 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 131611d2720..38c823cd52f 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -18,7 +18,7 @@ from . import rabbitmq_pb2 cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', - main_configs=['configs/rabbitmq.xml', 'configs/log_conf.xml'], + main_configs=['configs/rabbitmq.xml'], with_rabbitmq=True) @@ -751,22 +751,15 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): rabbitmq_routing_key_list = 'insert2', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; - CREATE TABLE test.view_many (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; - CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS - SELECT * FROM test.rabbitmq_consume; ''') - messages_num = 1000 + messages_num = 10000 + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) def insert(): - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ','.join(values) - while True: try: instance.query("INSERT INTO test.rabbitmq_many VALUES {}".format(values)) @@ -778,18 +771,29 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): raise threads = [] - threads_num = 20 + threads_num = 10 for _ in range(threads_num): threads.append(threading.Thread(target=insert)) for thread in threads: time.sleep(random.uniform(0, 1)) thread.start() + instance.query(''' + CREATE TABLE test.view_many (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS + SELECT * FROM test.rabbitmq_consume; + ''') + + for thread in threads: + thread.join() + while True: result = instance.query('SELECT count() FROM test.view_many') - time.sleep(1) if int(result) == messages_num * threads_num: break + time.sleep(1) instance.query(''' DROP TABLE test.rabbitmq_consume; @@ -798,9 +802,6 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): DROP TABLE test.view_many; ''') - for thread in threads: - thread.join() - assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 52b021a07c5..1ba29975202 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -646,3 +646,28 @@ def test_storage_s3_put_gzip(started_cluster, extension, method): f = gzip.GzipFile(fileobj=buf, mode="rb") uncompressed_content = f.read().decode() assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708 + + +def test_truncate_table(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + name = "truncate" + + instance.query("CREATE TABLE {} (id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV')".format( + name, started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, name)) + + instance.query("INSERT INTO {} SELECT number FROM numbers(10)".format(name)) + result = instance.query("SELECT * FROM {}".format(name)) + assert result == instance.query("SELECT number FROM numbers(10)") + instance.query("TRUNCATE TABLE {}".format(name)) + + minio = started_cluster.minio_client + timeout = 30 + while timeout > 0: + if len(list(minio.list_objects(started_cluster.minio_bucket, 'truncate/'))) == 0: + return + timeout -= 1 + time.sleep(1) + assert(len(list(minio.list_objects(started_cluster.minio_bucket, 'truncate/'))) == 0) + assert instance.query("SELECT * FROM {}".format(name)) == "" + diff --git a/tests/integration/test_table_functions_access_rights/test.py b/tests/integration/test_table_functions_access_rights/test.py index bd2f767413a..16f18407960 100644 --- a/tests/integration/test_table_functions_access_rights/test.py +++ b/tests/integration/test_table_functions_access_rights/test.py @@ -38,7 +38,7 @@ def test_merge(): assert "it's necessary to have grant CREATE TEMPORARY TABLE ON *.*" in instance.query_and_get_error(select_query, user = 'A') instance.query("GRANT CREATE TEMPORARY TABLE ON *.* TO A") - assert "no one matches regular expression" in instance.query_and_get_error(select_query, user = 'A') + assert "no tables in database matches" in instance.query_and_get_error(select_query, user = 'A') instance.query("GRANT SELECT ON default.table1 TO A") assert instance.query(select_query, user = 'A') == "1\n" diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index de5e5984082..f37c28b2a80 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -351,6 +351,7 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) TTL date + INTERVAL 3 SECOND + SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100 '''.format(suff=num_run, replica=node.name)) node.query( @@ -359,6 +360,7 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val) + SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100 '''.format(suff=num_run, replica=node.name)) node.query( @@ -367,6 +369,7 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1 + SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100 '''.format(suff=num_run, replica=node.name)) node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)") @@ -397,9 +400,9 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL") node_right.query("OPTIMIZE TABLE test_ttl_where FINAL") - exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete") - node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) - node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) + exec_query_with_retry(node_left, "OPTIMIZE TABLE test_ttl_delete FINAL") + node_left.query("OPTIMIZE TABLE test_ttl_group_by FINAL", timeout=20) + node_left.query("OPTIMIZE TABLE test_ttl_where FINAL", timeout=20) # After OPTIMIZE TABLE, it is not guaranteed that everything is merged. # Possible scenario (for test_ttl_group_by): @@ -414,6 +417,10 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): node_right.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) node_right.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) + exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete") + node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) + node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) + assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n" assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n" diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index dd8e1bc7a9e..4f8a61a5bf0 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -39,8 +39,8 @@ def test_mutate_and_upgrade(start_cluster): node2.restart_with_latest_version(signal=9) # After hard restart table can be in readonly mode - exec_query_with_retry(node2, "INSERT INTO mt VALUES ('2020-02-13', 3)") - exec_query_with_retry(node1, "SYSTEM SYNC REPLICA mt") + exec_query_with_retry(node2, "INSERT INTO mt VALUES ('2020-02-13', 3)", retry_count=60) + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA mt", retry_count=60) assert node1.query("SELECT COUNT() FROM mt") == "2\n" assert node2.query("SELECT COUNT() FROM mt") == "2\n" @@ -79,7 +79,10 @@ def test_upgrade_while_mutation(start_cluster): node3.restart_with_latest_version(signal=9) - exec_query_with_retry(node3, "ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) + # checks for readonly + exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", retry_count=60) + + node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) # will delete nothing, but previous async mutation will finish with this query assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n") diff --git a/tests/performance/dict_join.xml b/tests/performance/dict_join.xml new file mode 100644 index 00000000000..1fa5ce1726c --- /dev/null +++ b/tests/performance/dict_join.xml @@ -0,0 +1,37 @@ + + + CREATE TABLE join_dictionary_source_table (key UInt64, value String) + ENGINE = MergeTree ORDER BY key; + + + + CREATE DICTIONARY join_hashed_dictionary (key UInt64, value String) + PRIMARY KEY key + SOURCE(CLICKHOUSE(DB 'default' TABLE 'join_dictionary_source_table')) + LIFETIME(MIN 0 MAX 1000) + LAYOUT(HASHED()); + + + + INSERT INTO join_dictionary_source_table + SELECT number, toString(number) + FROM numbers(1000000); + + + + SELECT COUNT() + FROM join_dictionary_source_table + JOIN join_hashed_dictionary + ON join_dictionary_source_table.key = join_hashed_dictionary.key; + + + + SELECT COUNT() + FROM join_dictionary_source_table + JOIN join_hashed_dictionary + ON join_dictionary_source_table.key = toUInt64(join_hashed_dictionary.key); + + + DROP DICTIONARY IF EXISTS join_hashed_dictionary; + DROP TABLE IF EXISTS join_dictionary_source_table; + diff --git a/tests/performance/jit_aggregate_functions.xml b/tests/performance/jit_aggregate_functions.xml new file mode 100644 index 00000000000..21683ef2004 --- /dev/null +++ b/tests/performance/jit_aggregate_functions.xml @@ -0,0 +1,298 @@ + + + hits_100m_single + + + + 1 + 0 + + + + CREATE TABLE jit_test_memory ( + key UInt64, + value_1 UInt64, + value_2 UInt64, + value_3 UInt64, + value_4 UInt64, + value_5 UInt64, + predicate UInt8 + ) Engine = Memory + + + + CREATE TABLE jit_test_merge_tree ( + key UInt64, + value_1 UInt64, + value_2 UInt64, + value_3 UInt64, + value_4 UInt64, + value_5 UInt64, + predicate UInt8 + ) Engine = MergeTree + ORDER BY key + + + + CREATE TABLE jit_test_merge_tree_nullable ( + key UInt64, + value_1 Nullable(UInt64), + value_2 Nullable(UInt64), + value_3 Nullable(UInt64), + value_4 Nullable(UInt64), + value_5 Nullable(UInt64), + predicate UInt8 + ) Engine = Memory + + + + CREATE TABLE jit_test_memory_nullable ( + key UInt64, + value_1 Nullable(UInt64), + value_2 Nullable(UInt64), + value_3 Nullable(UInt64), + value_4 Nullable(UInt64), + value_5 Nullable(UInt64), + predicate UInt8 + ) Engine = MergeTree + ORDER BY key + + + + + function + + sum + min + max + avg + any + anyLast + count + + + + + table + + jit_test_memory + jit_test_merge_tree + jit_test_memory_nullable + jit_test_merge_tree_nullable + + + + + group_scale + + 1000000 + + + + + + INSERT INTO {table} + SELECT + number % 1000000, + number, + number, + number, + number, + number, + if (number % 2 == 0, 1, 0) + FROM + system.numbers_mt + LIMIT 10000000 + + + + SELECT + {function}(value_1), + {function}(value_2), + {function}(value_3) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}(value_1), + {function}(value_2), + groupBitAnd(value_3), + {function}(value_3) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}If(value_1, predicate), + {function}If(value_2, predicate), + {function}If(value_3, predicate) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}If(value_1, predicate), + {function}If(value_2, predicate), + groupBitAndIf(value_3, predicate), + {function}If(value_3, predicate) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}(value_1), + {function}(value_2), + {function}(value_3), + {function}(value_4), + {function}(value_5) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}(value_1), + {function}(value_2), + groupBitAnd(value_3), + {function}(value_3), + {function}(value_4), + {function}(value_5) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}If(value_1, predicate), + {function}If(value_2, predicate), + {function}If(value_3, predicate), + {function}If(value_4, predicate), + {function}If(value_5, predicate) + FROM {table} + GROUP BY key + FORMAT Null + + + + SELECT + {function}If(value_1, predicate), + {function}If(value_2, predicate), + groupBitAndIf(value_3, predicate), + {function}If(value_3, predicate), + {function}If(value_4, predicate), + {function}If(value_5, predicate) + FROM {table} + GROUP BY key + FORMAT Null + + + + + SELECT + {function}(WatchID), + {function}(CounterID), + {function}(ClientIP) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + SELECT + {function}(WatchID), + {function}(CounterID), + groupBitAnd(ClientIP), + {function}(ClientIP) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + SELECT + {function}(WatchID), + {function}(CounterID), + {function}(ClientIP), + {function}(GoodEvent), + {function}(CounterClass) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + SELECT + {function}(WatchID), + {function}(CounterID), + groupBitAnd(ClientIP), + {function}(ClientIP), + {function}(GoodEvent), + {function}(CounterClass) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + WITH (WatchID % 2 == 0) AS predicate + SELECT + {function}If(WatchID, predicate), + {function}If(CounterID, predicate), + {function}If(ClientIP, predicate) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + WITH (WatchID % 2 == 0) AS predicate + SELECT + {function}If(WatchID, predicate), + {function}If(CounterID, predicate), + groupBitAndIf(ClientIP, predicate), + {function}If(ClientIP, predicate) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + WITH (WatchID % 2 == 0) AS predicate + SELECT + {function}If(WatchID, predicate), + {function}If(CounterID, predicate), + {function}If(ClientIP, predicate), + {function}If(GoodEvent, predicate), + {function}If(CounterClass, predicate) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + + WITH (WatchID % 2 == 0) AS predicate + SELECT + {function}If(WatchID, predicate), + {function}If(CounterID, predicate), + groupBitAndIf(ClientIP, predicate), + {function}If(ClientIP, predicate), + {function}If(GoodEvent, predicate), + {function}If(CounterClass, predicate) + FROM hits_100m_single + GROUP BY intHash32(UserID) % {group_scale} + FORMAT Null + + + DROP TABLE IF EXISTS {table} + diff --git a/tests/performance/nyc_taxi.xml b/tests/performance/nyc_taxi.xml deleted file mode 100644 index b8d9621e3eb..00000000000 --- a/tests/performance/nyc_taxi.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - trips_mergetree - - - SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type - SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count - SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year - SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance ORDER BY year, count(*) DESC - diff --git a/tests/performance/quantile.xml b/tests/performance/quantile.xml index 4718c129db6..120166b600d 100644 --- a/tests/performance/quantile.xml +++ b/tests/performance/quantile.xml @@ -1,6 +1,7 @@ hits_100m_single + hits_10m_single @@ -10,9 +11,17 @@ SearchEngineID RegionID SearchPhrase + + + + + + key_slow + ClientIP + func @@ -26,4 +35,5 @@ SELECT {key} AS k, {func}(ResolutionWidth) FROM hits_100m_single GROUP BY k FORMAT Null + SELECT {key_slow} AS k, {func}(ResolutionWidth) FROM hits_10m_single GROUP BY k FORMAT Null diff --git a/tests/performance/uniq.xml b/tests/performance/uniq.xml index d4521961b9a..e8f3aed62fe 100644 --- a/tests/performance/uniq.xml +++ b/tests/performance/uniq.xml @@ -1,6 +1,7 @@ hits_100m_single + hits_10m_single 30000000000 @@ -58,4 +59,5 @@ SELECT {key} AS k, {func}(UserID) FROM hits_100m_single GROUP BY k FORMAT Null + SELECT {key} AS k, uniqTheta(UserID) FROM hits_10m_single GROUP BY k FORMAT Null diff --git a/tests/queries/0_stateless/00109_shard_totals_after_having.sql b/tests/queries/0_stateless/00109_shard_totals_after_having.sql index 72e5e011e19..ae143f594c5 100644 --- a/tests/queries/0_stateless/00109_shard_totals_after_having.sql +++ b/tests/queries/0_stateless/00109_shard_totals_after_having.sql @@ -3,7 +3,9 @@ SET max_block_size = 100001; SET group_by_overflow_mode = 'any'; DROP TABLE IF EXISTS numbers500k; -CREATE VIEW numbers500k AS SELECT number FROM system.numbers LIMIT 500000; +CREATE TABLE numbers500k (number UInt32) ENGINE = TinyLog; + +INSERT INTO numbers500k SELECT number FROM system.numbers LIMIT 500000; SET totals_mode = 'after_having_auto'; SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10; diff --git a/tests/queries/0_stateless/00292_parser_tuple_element.sql b/tests/queries/0_stateless/00292_parser_tuple_element.sql index bb28b771dce..6d43ac9c738 100644 --- a/tests/queries/0_stateless/00292_parser_tuple_element.sql +++ b/tests/queries/0_stateless/00292_parser_tuple_element.sql @@ -1 +1 @@ -SELECT ('a', 'b').2 \ No newline at end of file +SELECT ('a', 'b').2 diff --git a/tests/queries/0_stateless/00505_distributed_secure.data b/tests/queries/0_stateless/00505_distributed_secure.data index dc2d37dc5df..96a96ef4b68 100644 --- a/tests/queries/0_stateless/00505_distributed_secure.data +++ b/tests/queries/0_stateless/00505_distributed_secure.data @@ -1,22 +1,23 @@ -DROP TABLE IF EXISTS test.secure1; -DROP TABLE IF EXISTS test.secure2; -DROP TABLE IF EXISTS test.secure3; +DROP TABLE IF EXISTS secure1; +DROP TABLE IF EXISTS secure2; +DROP TABLE IF EXISTS secure3; -CREATE TABLE test.secure1 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = MergeTree(date, (a, date), 8192); -CREATE TABLE test.secure2 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost_secure, 'test', 'secure1'); -CREATE TABLE test.secure3 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost_secure, 'test', 'secure2'); +CREATE TABLE secure1 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = MergeTree(date, (a, date), 8192); +CREATE TABLE secure2 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost_secure, currentDatabase(), 'secure1'); +CREATE TABLE secure3 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost_secure, currentDatabase(), 'secure2'); -INSERT INTO test.secure1 VALUES (1, 2, 3, 4, 5); -INSERT INTO test.secure1 VALUES (11,12,13,14,15); -INSERT INTO test.secure2 VALUES (21,22,23,24,25); -INSERT INTO test.secure3 VALUES (31,32,33,34,35); +INSERT INTO secure1 VALUES (1, 2, 3, 4, 5); +INSERT INTO secure1 VALUES (11,12,13,14,15); +INSERT INTO secure2 VALUES (21,22,23,24,25); +INSERT INTO secure3 VALUES (31,32,33,34,35); -SELECT 'sleep', sleep(1); +SYSTEM FLUSH DISTRIBUTED secure2; +SYSTEM FLUSH DISTRIBUTED secure3; -SELECT * FROM test.secure1 ORDER BY a; -SELECT * FROM test.secure2 ORDER BY a; -SELECT * FROM test.secure3 ORDER BY a; +SELECT * FROM secure1 ORDER BY a; +SELECT * FROM secure2 ORDER BY a; +SELECT * FROM secure3 ORDER BY a; -DROP TABLE test.secure1; -DROP TABLE test.secure2; -DROP TABLE test.secure3; +DROP TABLE secure1; +DROP TABLE secure2; +DROP TABLE secure3; diff --git a/tests/queries/0_stateless/00505_secure.reference b/tests/queries/0_stateless/00505_secure.reference index c925bdd13bf..9a8656bf491 100644 --- a/tests/queries/0_stateless/00505_secure.reference +++ b/tests/queries/0_stateless/00505_secure.reference @@ -1,8 +1,6 @@ -1 2 3 4 -sleep 0 1970-01-02 2 3 4 5 1970-01-12 12 13 14 15 1970-01-22 22 23 24 25 diff --git a/tests/queries/0_stateless/00505_secure.sh b/tests/queries/0_stateless/00505_secure.sh index 3d9e28ba08d..c1113af761b 100755 --- a/tests/queries/0_stateless/00505_secure.sh +++ b/tests/queries/0_stateless/00505_secure.sh @@ -6,21 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Not default server config needed - - -if [ -n "$CLICKHOUSE_CONFIG_CLIENT" ]; then - USE_CONFIG="--config-file $CLICKHOUSE_CONFIG_CLIENT" -fi - -CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:="$CLICKHOUSE_CLIENT_BINARY $USE_CONFIG --secure --port=$CLICKHOUSE_PORT_TCP_SECURE"} -if [[ $CLICKHOUSE_CLIENT != *"--port"* ]]; then - # Auto port detect. Cant test with re-defined via command line ports - $CLICKHOUSE_CLIENT_BINARY $USE_CONFIG --secure -q "SELECT 1"; -else - echo 1 -fi - $CLICKHOUSE_CLIENT_SECURE -q "SELECT 2;" #disable test diff --git a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index cc5ece15435..93fd0c4a977 100755 --- a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -40,16 +40,16 @@ $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT $settings -q " WITH any(query_duration_ms*1000) AS duration, - sumIf(PV, PN = 'RealTimeMicroseconds') AS threads_realtime, - sumIf(PV, PN IN ('UserTimeMicroseconds', 'SystemTimeMicroseconds', 'OSIOWaitMicroseconds', 'OSCPUWaitMicroseconds')) AS threads_time_user_system_io + sum(ProfileEvents['RealTimeMicroseconds']) AS threads_realtime, + sum(ProfileEvents['UserTimeMicroseconds'] + ProfileEvents['SystemTimeMicroseconds'] + ProfileEvents['OSIOWaitMicroseconds'] + ProfileEvents['OSCPUWaitMicroseconds']) AS threads_time_user_system_io SELECT -- duration, threads_realtime, threads_time_user_system_io, threads_realtime >= 0.99 * duration, threads_realtime >= threads_time_user_system_io, any(length(thread_ids)) >= 1 FROM - (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE event_date >= today()-2 AND current_database = currentDatabase() AND type=2 ORDER BY event_time DESC LIMIT 1) - ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV" + (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE event_date >= today()-1 AND current_database = currentDatabase() AND type=2 ORDER BY event_time DESC LIMIT 1) +" # Clean rm "$server_logs_file" diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index c2274c53399..e54326cb59f 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -28,6 +28,7 @@ $CLICKHOUSE_CLIENT $settings -q "$touching_many_parts_query" &> /dev/null $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT $settings -q "SELECT pi.Values FROM system.query_log ARRAY JOIN ProfileEvents as pi WHERE query='$touching_many_parts_query' and current_database = currentDatabase() and pi.Names = 'FileOpen' ORDER BY event_time DESC LIMIT 1;" + +$CLICKHOUSE_CLIENT $settings -q "SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE query='$touching_many_parts_query' and current_database = currentDatabase() ORDER BY event_time DESC LIMIT 1;" $CLICKHOUSE_CLIENT $settings -q "DROP TABLE IF EXISTS merge_tree_table;" diff --git a/tests/queries/0_stateless/00735_long_conditional.reference b/tests/queries/0_stateless/00735_long_conditional.reference index 6308a48218b..082c2d49de9 100644 --- a/tests/queries/0_stateless/00735_long_conditional.reference +++ b/tests/queries/0_stateless/00735_long_conditional.reference @@ -92,8 +92,8 @@ value vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') 1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') column vs value 0 1 1 Int8 Int8 Int8 @@ -189,6 +189,6 @@ column vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') 1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') diff --git a/tests/queries/0_stateless/00842_array_with_constant_overflow.sql b/tests/queries/0_stateless/00842_array_with_constant_overflow.sql index b31efb89686..ffd5fecde10 100644 --- a/tests/queries/0_stateless/00842_array_with_constant_overflow.sql +++ b/tests/queries/0_stateless/00842_array_with_constant_overflow.sql @@ -1 +1 @@ -SELECT arrayWithConstant(-231.37104, -138); -- { serverError 128 } \ No newline at end of file +SELECT arrayWithConstant(-231.37104, -138); -- { serverError 128 } diff --git a/tests/queries/0_stateless/00900_long_parquet.reference b/tests/queries/0_stateless/00900_long_parquet.reference index 230d1f5ca48..bf0f66bb217 100644 --- a/tests/queries/0_stateless/00900_long_parquet.reference +++ b/tests/queries/0_stateless/00900_long_parquet.reference @@ -72,3 +72,5 @@ dest from null: 3 [] [] [] [[[1,2,3],[1,2,3]],[[1,2,3]],[[],[1,2,3]]] [[['Some string','Some string'],[]],[['Some string']],[[]]] [[NULL,1,2],[NULL],[1,2],[]] [['Some string',NULL,'Some string'],[NULL],[]] [[[1,2,3],[1,2,3]],[[1,2,3]],[[],[1,2,3]]] [[['Some string','Some string'],[]],[['Some string']],[[]]] [[NULL,1,2],[NULL],[1,2],[]] [['Some string',NULL,'Some string'],[NULL],[]] +0.1230 0.12312312 0.1231231231230000 0.12312312312312312300000000000000 +0.1230 0.12312312 0.1231231231230000 0.12312312312312312300000000000000 diff --git a/tests/queries/0_stateless/00900_long_parquet.sh b/tests/queries/0_stateless/00900_long_parquet.sh index 8c19c7cecab..c30e1148abe 100755 --- a/tests/queries/0_stateless/00900_long_parquet.sh +++ b/tests/queries/0_stateless/00900_long_parquet.sh @@ -166,3 +166,11 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_nested_arrays VALUES ([[[1,2,3 ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_nested_arrays FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_nested_arrays FORMAT Parquet" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_nested_arrays" ${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_nested_arrays" + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_decimal" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_decimal (d1 Decimal32(4), d2 Decimal64(8), d3 Decimal128(16), d4 Decimal256(32)) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="INSERT INTO TABLE parquet_decimal VALUES (0.123, 0.123123123, 0.123123123123, 0.123123123123123123)" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_decimal FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_decimal FORMAT Arrow" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_decimal" +${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_decimal" diff --git a/tests/queries/0_stateless/00900_orc_arrow_parquet_maps.sh b/tests/queries/0_stateless/00900_orc_arrow_parquet_maps.sh index 9330a5924a9..913789dedb0 100755 --- a/tests/queries/0_stateless/00900_orc_arrow_parquet_maps.sh +++ b/tests/queries/0_stateless/00900_orc_arrow_parquet_maps.sh @@ -19,9 +19,9 @@ formats="Arrow Parquet ORC"; for format in ${formats}; do echo $format - ${CLICKHOUSE_CLIENT} --query="SELECT * FROM maps FORMAT Parquet" > "${CLICKHOUSE_TMP}"/maps + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM maps FORMAT $format" > "${CLICKHOUSE_TMP}"/maps ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE maps" - cat "${CLICKHOUSE_TMP}"/maps | ${CLICKHOUSE_CLIENT} -q "INSERT INTO maps FORMAT Parquet" + cat "${CLICKHOUSE_TMP}"/maps | ${CLICKHOUSE_CLIENT} -q "INSERT INTO maps FORMAT $format" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM maps" done diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index a3beb2967d4..d9b34df8bf8 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -57,11 +57,21 @@ Friday (3,0) (3,5) (3,0) +(123456,3.55) +(1,'417ddc5d-e556-4d27-95dd-a34d84e46a50') +hello +(3333.6,'test') +(3333.6333333333,'test') +123456.1234 Decimal(20, 4) +123456789012345.1136 123456789012345.1136 +1234567890.12345677879616925706 (1234567890.12345677879616925706,'test') +1234567890.123456695758468374595199311875 (1234567890.123456695758468374595199311875,'test') --JSONExtractKeysAndValues-- [('a','hello')] [('b',[-100,200,300])] [('a','hello'),('b','world')] [('a',5),('b',7),('c',11)] +[('a','hello'),('b','world')] --JSONExtractRaw-- {"a":"hello","b":[-100,200,300]} "hello" diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index dbaa5b6a80a..f548b9e5e66 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -66,12 +66,22 @@ SELECT JSONExtract('{"a":3,"b":5,"c":7}', 'Tuple(Int, Int)'); SELECT JSONExtract('{"a":3}', 'Tuple(Int, Int)'); SELECT JSONExtract('[3,5,7]', 'Tuple(Int, Int)'); SELECT JSONExtract('[3]', 'Tuple(Int, Int)'); +SELECT JSONExtract('{"a":123456, "b":3.55}', 'Tuple(a LowCardinality(Int32), b Decimal(5, 2))'); +SELECT JSONExtract('{"a":1, "b":"417ddc5d-e556-4d27-95dd-a34d84e46a50"}', 'Tuple(a Int8, b UUID)'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'a', 'LowCardinality(String)'); +SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Decimal(10,1), b LowCardinality(String))'); +SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))'); +SELECT JSONExtract('{"a":123456.123456}', 'a', 'Decimal(20, 4)') as a, toTypeName(a); +SELECT toDecimal64(123456789012345.12, 4), JSONExtract('{"a":123456789012345.12}', 'a', 'Decimal(30, 4)'); +SELECT toDecimal128(1234567890.12345678901234567890, 20), JSONExtract('{"a":1234567890.12345678901234567890, "b":"test"}', 'Tuple(a Decimal(35,20), b LowCardinality(String))'); +SELECT toDecimal256(1234567890.123456789012345678901234567890, 30), JSONExtract('{"a":1234567890.12345678901234567890, "b":"test"}', 'Tuple(a Decimal(45,30), b LowCardinality(String))'); SELECT '--JSONExtractKeysAndValues--'; SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'String'); SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Float64)'); SELECT JSONExtractKeysAndValues('{"a": "hello", "b": "world"}', 'String'); SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8'); +SELECT JSONExtractKeysAndValues('{"a": "hello", "b": "world"}', 'LowCardinality(String)'); SELECT '--JSONExtractRaw--'; SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}'); diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh index 9865baaa1b9..36ac490c80f 100755 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh @@ -20,7 +20,8 @@ $CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query_id="test-query-uncompresse $CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND current_database = currentDatabase() AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1" + +$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents['Seek'], ProfileEvents['ReadCompressedBytes'], ProfileEvents['UncompressedCacheHits'] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') and current_database = currentDatabase() AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table" diff --git a/tests/queries/0_stateless/00980_merge_alter_settings.reference b/tests/queries/0_stateless/00980_merge_alter_settings.reference index 20146ed9d1e..7a958c40651 100644 --- a/tests/queries/0_stateless/00980_merge_alter_settings.reference +++ b/tests/queries/0_stateless/00980_merge_alter_settings.reference @@ -4,3 +4,9 @@ CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String\n)\ 2 CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 30 CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String,\n `Data2` UInt64\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 15 +CREATE TABLE default.table_for_reset_setting\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096 +CREATE TABLE default.table_for_reset_setting\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.table_for_reset_setting\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096 +CREATE TABLE default.table_for_reset_setting\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096 +CREATE TABLE default.table_for_reset_setting\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096, merge_with_ttl_timeout = 300, max_concurrent_queries = 1 +CREATE TABLE default.table_for_reset_setting\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 4096 diff --git a/tests/queries/0_stateless/00980_merge_alter_settings.sql b/tests/queries/0_stateless/00980_merge_alter_settings.sql index 6bf043fe4c8..755983ac62b 100644 --- a/tests/queries/0_stateless/00980_merge_alter_settings.sql +++ b/tests/queries/0_stateless/00980_merge_alter_settings.sql @@ -53,3 +53,50 @@ SHOW CREATE TABLE table_for_alter; DROP TABLE IF EXISTS table_for_alter; + +DROP TABLE IF EXISTS table_for_reset_setting; + +CREATE TABLE table_for_reset_setting ( + id UInt64, + Data String +) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity=4096; + +ALTER TABLE table_for_reset_setting MODIFY SETTING index_granularity=555; -- { serverError 472 } + +SHOW CREATE TABLE table_for_reset_setting; + +INSERT INTO table_for_reset_setting VALUES (1, '1'); +INSERT INTO table_for_reset_setting VALUES (2, '2'); + +ALTER TABLE table_for_reset_setting MODIFY SETTING parts_to_throw_insert = 1, parts_to_delay_insert = 1; + +SHOW CREATE TABLE table_for_reset_setting; + +INSERT INTO table_for_reset_setting VALUES (1, '1'); -- { serverError 252 } + +ALTER TABLE table_for_reset_setting RESET SETTING parts_to_delay_insert, parts_to_throw_insert; + +SHOW CREATE TABLE table_for_reset_setting; + +INSERT INTO table_for_reset_setting VALUES (1, '1'); +INSERT INTO table_for_reset_setting VALUES (2, '2'); + +DETACH TABLE table_for_reset_setting; +ATTACH TABLE table_for_reset_setting; + +SHOW CREATE TABLE table_for_reset_setting; + +ALTER TABLE table_for_reset_setting RESET SETTING index_granularity; -- { serverError 472 } + +-- ignore undefined setting +ALTER TABLE table_for_reset_setting RESET SETTING merge_with_ttl_timeout, unknown_setting; + +ALTER TABLE table_for_reset_setting MODIFY SETTING merge_with_ttl_timeout = 300, max_concurrent_queries = 1; + +SHOW CREATE TABLE table_for_reset_setting; + +ALTER TABLE table_for_reset_setting RESET SETTING max_concurrent_queries, merge_with_ttl_timeout; + +SHOW CREATE TABLE table_for_reset_setting; + +DROP TABLE IF EXISTS table_for_reset_setting; \ No newline at end of file diff --git a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference index ee5a8024a92..357d1bef78d 100644 --- a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference +++ b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference @@ -10,3 +10,12 @@ CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64,\n `Data CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64,\n `Data` String,\n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1, check_delay_period = 15 CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64,\n `Data` String,\n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.replicated_table_for_reset_setting1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_reset_setting2\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_reset_setting1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_reset_setting1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, merge_with_ttl_timeout = 100 +CREATE TABLE default.replicated_table_for_reset_setting2\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, merge_with_ttl_timeout = 200 +CREATE TABLE default.replicated_table_for_reset_setting1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, merge_with_ttl_timeout = 100 +CREATE TABLE default.replicated_table_for_reset_setting2\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, merge_with_ttl_timeout = 200 +CREATE TABLE default.replicated_table_for_reset_setting1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_reset_setting2\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00980/replicated_table_for_reset_setting\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql index 60e4cfff4e1..6ad8860227d 100644 --- a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql +++ b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql @@ -67,3 +67,51 @@ SHOW CREATE TABLE replicated_table_for_alter2; DROP TABLE IF EXISTS replicated_table_for_alter2; DROP TABLE IF EXISTS replicated_table_for_alter1; + +DROP TABLE IF EXISTS replicated_table_for_reset_setting1; +DROP TABLE IF EXISTS replicated_table_for_reset_setting2; + +SET replication_alter_partitions_sync = 2; + +CREATE TABLE replicated_table_for_reset_setting1 ( + id UInt64, + Data String +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00980/replicated_table_for_reset_setting', '1') ORDER BY id; + +CREATE TABLE replicated_table_for_reset_setting2 ( + id UInt64, + Data String +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00980/replicated_table_for_reset_setting', '2') ORDER BY id; + +SHOW CREATE TABLE replicated_table_for_reset_setting1; +SHOW CREATE TABLE replicated_table_for_reset_setting2; + +ALTER TABLE replicated_table_for_reset_setting1 MODIFY SETTING index_granularity = 4096; -- { serverError 472 } + +SHOW CREATE TABLE replicated_table_for_reset_setting1; + +ALTER TABLE replicated_table_for_reset_setting1 MODIFY SETTING merge_with_ttl_timeout = 100; +ALTER TABLE replicated_table_for_reset_setting2 MODIFY SETTING merge_with_ttl_timeout = 200; + +SHOW CREATE TABLE replicated_table_for_reset_setting1; +SHOW CREATE TABLE replicated_table_for_reset_setting2; + +DETACH TABLE replicated_table_for_reset_setting2; +ATTACH TABLE replicated_table_for_reset_setting2; + +DETACH TABLE replicated_table_for_reset_setting1; +ATTACH TABLE replicated_table_for_reset_setting1; + +SHOW CREATE TABLE replicated_table_for_reset_setting1; +SHOW CREATE TABLE replicated_table_for_reset_setting2; + +-- ignore undefined setting +ALTER TABLE replicated_table_for_reset_setting1 RESET SETTING check_delay_period, unknown_setting; +ALTER TABLE replicated_table_for_reset_setting1 RESET SETTING merge_with_ttl_timeout; +ALTER TABLE replicated_table_for_reset_setting2 RESET SETTING merge_with_ttl_timeout; + +SHOW CREATE TABLE replicated_table_for_reset_setting1; +SHOW CREATE TABLE replicated_table_for_reset_setting2; + +DROP TABLE IF EXISTS replicated_table_for_reset_setting2; +DROP TABLE IF EXISTS replicated_table_for_reset_setting1; diff --git a/tests/queries/0_stateless/01076_array_join_prewhere_const_folding.sql b/tests/queries/0_stateless/01076_array_join_prewhere_const_folding.sql index 2ab87fa883a..7c4f69a64fb 100644 --- a/tests/queries/0_stateless/01076_array_join_prewhere_const_folding.sql +++ b/tests/queries/0_stateless/01076_array_join_prewhere_const_folding.sql @@ -2,9 +2,7 @@ SET log_queries = 1; SELECT 1 LIMIT 0; SYSTEM FLUSH LOGS; -SELECT arrayJoin AS kv_key -FROM system.query_log -ARRAY JOIN ProfileEvents.Names AS arrayJoin -PREWHERE current_database = currentDatabase() AND has(arrayMap(key -> key, ProfileEvents.Names), 'Query') -WHERE arrayJoin = 'Query' +SELECT * FROM system.query_log +PREWHERE ProfileEvents['Query'] > 0 and current_database = currentDatabase() + LIMIT 0; diff --git a/tests/queries/0_stateless/01149_zookeeper_mutation_stuck_after_replace_partition.sql b/tests/queries/0_stateless/01149_zookeeper_mutation_stuck_after_replace_partition.sql index 178f9b81ead..fd3f1f3fcfe 100644 --- a/tests/queries/0_stateless/01149_zookeeper_mutation_stuck_after_replace_partition.sql +++ b/tests/queries/0_stateless/01149_zookeeper_mutation_stuck_after_replace_partition.sql @@ -4,7 +4,7 @@ drop table if exists rmt sync; create table mt (n UInt64, s String) engine = MergeTree partition by intDiv(n, 10) order by n; insert into mt values (3, '3'), (4, '4'); -create table rmt (n UInt64, s String) engine = ReplicatedMergeTree('/clickhouse/test_01149/rmt', 'r1') partition by intDiv(n, 10) order by n; +create table rmt (n UInt64, s String) engine = ReplicatedMergeTree('/clickhouse/test_01149_{database}/rmt', 'r1') partition by intDiv(n, 10) order by n; insert into rmt values (1,'1'), (2, '2'); select * from rmt; @@ -16,6 +16,8 @@ alter table rmt update s = 's'||toString(n) where 1; select * from rmt; alter table rmt replace partition '0' from mt; +system sync replica rmt; + select table, partition_id, name, rows from system.parts where database=currentDatabase() and table in ('mt', 'rmt') and active=1 order by table, name; alter table rmt drop column s; @@ -26,7 +28,7 @@ select * from rmt; drop table rmt sync; set replication_alter_partitions_sync=0; -create table rmt (n UInt64, s String) engine = ReplicatedMergeTree('/clickhouse/test_01149/rmt', 'r1') partition by intDiv(n, 10) order by n; +create table rmt (n UInt64, s String) engine = ReplicatedMergeTree('/clickhouse/test_01149_{database}/rmt', 'r1') partition by intDiv(n, 10) order by n; insert into rmt values (1,'1'), (2, '2'); alter table rmt update s = 's'||toString(n) where 1; diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 66ebbacee42..dd16b2dc63d 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -12,10 +12,10 @@ engines[2]="ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard} for ((i=0; i<16; i++)) do $CLICKHOUSE_CLIENT -q "CREATE TABLE dst_$i (p UInt64, k UInt64, v UInt64) ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst', '$i') - PARTITION BY p % 10 ORDER BY k" 2>&1| grep -Pv "Retrying createReplica|created by another server at the same moment, will retry" & + PARTITION BY p % 10 ORDER BY k" 2>&1| grep -Pv "Retrying createReplica|created by another server at the same moment, will retry|is already started to be removing" 2>&1 & engine=${engines[$((i % ${#engines[@]}))]} $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$i (p UInt64, k UInt64, v UInt64) ENGINE=$engine - PARTITION BY p % 10 ORDER BY k" 2>&1| grep -Pv "Retrying createReplica|created by another server at the same moment, will retry" & + PARTITION BY p % 10 ORDER BY k" 2>&1| grep -Pv "Retrying createReplica|created by another server at the same moment, will retry|is already started to be removing" 2>&1 & done wait @@ -85,12 +85,23 @@ function optimize_thread() done } +function drop_part_thread() +{ + while true; do + REPLICA=$(($RANDOM % 16)) + part=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.parts WHERE active AND database='$CLICKHOUSE_DATABASE' and table='dst_$REPLICA' ORDER BY rand() LIMIT 1") + $CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$REPLICA DROP PART '$part'" 2>/dev/null + sleep 0.$RANDOM; + done +} + #export -f create_drop_thread; export -f insert_thread; export -f move_partition_src_dst_thread; export -f replace_partition_src_src_thread; export -f drop_partition_thread; export -f optimize_thread; +export -f drop_part_thread; TIMEOUT=60 @@ -102,6 +113,7 @@ timeout $TIMEOUT bash -c move_partition_src_dst_thread & timeout $TIMEOUT bash -c replace_partition_src_src_thread & timeout $TIMEOUT bash -c drop_partition_thread & timeout $TIMEOUT bash -c optimize_thread & +timeout $TIMEOUT bash -c drop_part_thread & wait for ((i=0; i<16; i++)) do diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.reference b/tests/queries/0_stateless/01155_rename_move_materialized_view.reference index 942cedf8696..635fd16620d 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.reference +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.reference @@ -1,10 +1,12 @@ 1 .inner.mv1 before moving tablesmv1 +1 dist before moving tables 1 dst before moving tablesmv2 1 mv1 before moving tablesmv1 1 mv2 before moving tablesmv2 1 src before moving tables ordinary: .inner.mv1 +dist dst mv1 mv2 @@ -12,12 +14,16 @@ src ordinary after rename: atomic after rename: .inner_id. +dist dst mv1 mv2 src 3 .inner_id. after renaming databasemv1 3 .inner_id. before moving tablesmv1 +3 dist after moving tables +3 dist after renaming database +3 dist before moving tables 3 dst after renaming databasemv2 3 dst before moving tablesmv2 3 mv1 after renaming databasemv1 @@ -28,6 +34,7 @@ src 3 src after renaming database 3 src before moving tables .inner_id. +dist dst mv1 mv2 @@ -36,6 +43,10 @@ CREATE DATABASE test_01155_atomic\nENGINE = Atomic 4 .inner.mv1 after renaming databasemv1 4 .inner.mv1 after renaming tablesmv1 4 .inner.mv1 before moving tablesmv1 +4 dist after moving tables +4 dist after renaming database +4 dist after renaming tables +4 dist before moving tables 4 dst after renaming databasemv2 4 dst after renaming tablesmv2 4 dst before moving tablesmv2 @@ -51,6 +62,7 @@ CREATE DATABASE test_01155_atomic\nENGINE = Atomic 4 src before moving tables test_01155_ordinary: .inner.mv1 +dist dst mv1 mv2 diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index 2ede0fbcedf..882be2702d8 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -9,8 +9,11 @@ CREATE TABLE src (s String) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s; CREATE MATERIALIZED VIEW mv1 (s String) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s AS SELECT (*,).1 || 'mv1' as s FROM src; CREATE TABLE dst (s String) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s; CREATE MATERIALIZED VIEW mv2 TO dst (s String) AS SELECT (*,).1 || 'mv2' as s FROM src; -INSERT INTO src VALUES ('before moving tables'); -SELECT 1, substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; +CREATE TABLE dist (s String) Engine=Distributed(test_shard_localhost, test_01155_ordinary, src); +INSERT INTO dist VALUES ('before moving tables'); +SYSTEM FLUSH DISTRIBUTED dist; +-- FIXME Cannot convert column `1` because it is non constant in source stream but must be constant in result +SELECT materialize(1), substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; -- Move tables with materialized views from Ordinary to Atomic SELECT 'ordinary:'; @@ -19,6 +22,7 @@ RENAME TABLE test_01155_ordinary.mv1 TO test_01155_atomic.mv1; RENAME TABLE test_01155_ordinary.mv2 TO test_01155_atomic.mv2; RENAME TABLE test_01155_ordinary.dst TO test_01155_atomic.dst; RENAME TABLE test_01155_ordinary.src TO test_01155_atomic.src; +RENAME TABLE test_01155_ordinary.dist TO test_01155_atomic.dist; SELECT 'ordinary after rename:'; SELECT substr(name, 1, 10) FROM system.tables WHERE database='test_01155_ordinary'; SELECT 'atomic after rename:'; @@ -27,13 +31,14 @@ DROP DATABASE test_01155_ordinary; USE default; INSERT INTO test_01155_atomic.src VALUES ('after moving tables'); -SELECT 2, substr(_table, 1, 10), s FROM merge('test_01155_atomic', '') ORDER BY _table, s; -- { serverError 81 } +SELECT materialize(2), substr(_table, 1, 10), s FROM merge('test_01155_atomic', '') ORDER BY _table, s; -- { serverError 81 } RENAME DATABASE test_01155_atomic TO test_01155_ordinary; USE test_01155_ordinary; -INSERT INTO src VALUES ('after renaming database'); -SELECT 3, substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; +INSERT INTO dist VALUES ('after renaming database'); +SYSTEM FLUSH DISTRIBUTED dist; +SELECT materialize(3), substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; SELECT substr(name, 1, 10) FROM system.tables WHERE database='test_01155_ordinary'; @@ -47,9 +52,11 @@ RENAME TABLE test_01155_atomic.mv1 TO test_01155_ordinary.mv1; RENAME TABLE test_01155_atomic.mv2 TO test_01155_ordinary.mv2; RENAME TABLE test_01155_atomic.dst TO test_01155_ordinary.dst; RENAME TABLE test_01155_atomic.src TO test_01155_ordinary.src; +RENAME TABLE test_01155_atomic.dist TO test_01155_ordinary.dist; -INSERT INTO src VALUES ('after renaming tables'); -SELECT 4, substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; +INSERT INTO dist VALUES ('after renaming tables'); +SYSTEM FLUSH DISTRIBUTED dist; +SELECT materialize(4), substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; SELECT 'test_01155_ordinary:'; SHOW TABLES FROM test_01155_ordinary; SELECT 'test_01155_atomic:'; diff --git a/tests/queries/0_stateless/01156_pcg_deserialization.reference b/tests/queries/0_stateless/01156_pcg_deserialization.reference new file mode 100644 index 00000000000..e43b7ca3ceb --- /dev/null +++ b/tests/queries/0_stateless/01156_pcg_deserialization.reference @@ -0,0 +1,3 @@ +5 5 +5 5 +5 5 diff --git a/tests/queries/0_stateless/01156_pcg_deserialization.sh b/tests/queries/0_stateless/01156_pcg_deserialization.sh new file mode 100755 index 00000000000..9c8ac29f32e --- /dev/null +++ b/tests/queries/0_stateless/01156_pcg_deserialization.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +declare -a engines=("Memory" "MergeTree order by n" "Log") + +for engine in "${engines[@]}" +do + $CLICKHOUSE_CLIENT -q "drop table if exists t"; + $CLICKHOUSE_CLIENT -q "create table t (n UInt8, a1 AggregateFunction(groupArraySample(1), UInt8)) engine=$engine" + $CLICKHOUSE_CLIENT -q "insert into t select number % 5 as n, groupArraySampleState(1)(toUInt8(number)) from numbers(10) group by n" + + $CLICKHOUSE_CLIENT -q "select * from t format TSV" | $CLICKHOUSE_CLIENT -q "insert into t format TSV" + $CLICKHOUSE_CLIENT -q "select countDistinct(n), countDistinct(a1) from t" + + $CLICKHOUSE_CLIENT -q "drop table t"; +done diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql index b3540f3354b..c2470bb9a56 100644 --- a/tests/queries/0_stateless/01231_log_queries_min_type.sql +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -29,6 +29,7 @@ set max_rows_to_read=0; select count() from system.query_log where current_database = currentDatabase() and query like 'select \'01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING%' and + query not like '%system.query_log%' and event_date >= yesterday() and type = 'ExceptionWhileProcessing' and - has(Settings.Names, 'max_rows_to_read'); + Settings['max_rows_to_read'] != ''; diff --git a/tests/queries/0_stateless/01249_flush_interactive.reference b/tests/queries/0_stateless/01249_flush_interactive.reference index 6d6abb2d37f..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01249_flush_interactive.reference +++ b/tests/queries/0_stateless/01249_flush_interactive.reference @@ -1,12 +0,0 @@ -0 -1 -2 -3 -4 ---- -0 -1 -2 -3 -4 ---- diff --git a/tests/queries/0_stateless/01249_flush_interactive.sh b/tests/queries/0_stateless/01249_flush_interactive.sh index 2af75dbcbe5..89167002ed5 100755 --- a/tests/queries/0_stateless/01249_flush_interactive.sh +++ b/tests/queries/0_stateless/01249_flush_interactive.sh @@ -11,7 +11,20 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # unless the my-program will try to output a thousand more lines overflowing pipe buffer and terminating with Broken Pipe. # But if my program just output 5 (or slightly more) lines and hang up, the pipeline is not terminated. -timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query "SELECT DISTINCT number % 5 FROM system.numbers" ||: -echo '---' -timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary "SELECT DISTINCT number % 5 FROM system.numbers" ||: -echo '---' +function test() +{ + timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query " + SELECT DISTINCT number % 5 FROM system.numbers" ||: + echo '---' + timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary " + SELECT DISTINCT number % 5 FROM system.numbers" ||: + echo '---' +} + +# The test depends on timeouts. And there is a chance that under high system load the query +# will not be able to finish in 5 seconds (this will lead to test flakiness). +# Let's check that is will be able to show the expected result at least once. +while true; do + [[ $(test) == $(echo -ne "0\n1\n2\n3\n4\n---\n0\n1\n2\n3\n4\n---\n") ]] && break + sleep 1 +done diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index cad9b786667..d5bd99724ca 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -24,7 +24,7 @@ function show_processes_func() # These two system metrics for the generating query above are guaranteed to be nonzero when ProcFS is mounted at /proc $CLICKHOUSE_CLIENT -q " SELECT count() > 0 FROM system.processes\ - WHERE has(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds') AND has(ProfileEvents.Names, 'OSReadChars')\ + WHERE ProfileEvents['OSCPUVirtualTimeMicroseconds'] > 0 AND ProfileEvents['OSReadChars'] > 0 \ SETTINGS max_threads = 1 " | grep '1' && break; done diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 343d8ceeca3..035cb902bff 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -31,7 +31,7 @@ ALTER DROP CONSTRAINT ['DROP CONSTRAINT'] TABLE ALTER CONSTRAINT ALTER CONSTRAINT ['CONSTRAINT'] \N ALTER TABLE ALTER TTL ['ALTER MODIFY TTL','MODIFY TTL'] TABLE ALTER TABLE ALTER MATERIALIZE TTL ['MATERIALIZE TTL'] TABLE ALTER TABLE -ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING'] TABLE ALTER TABLE +ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING','RESET SETTING'] TABLE ALTER TABLE ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTER TABLE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE diff --git a/tests/queries/0_stateless/01271_show_privileges.sql b/tests/queries/0_stateless/01271_show_privileges.sql index efd6ddb200c..e3210a7ae00 100644 --- a/tests/queries/0_stateless/01271_show_privileges.sql +++ b/tests/queries/0_stateless/01271_show_privileges.sql @@ -1 +1 @@ -SHOW PRIVILEGES; \ No newline at end of file +SHOW PRIVILEGES; diff --git a/tests/queries/0_stateless/01273_arrow_decimal.reference b/tests/queries/0_stateless/01273_arrow_decimal.reference new file mode 100644 index 00000000000..a512796de07 --- /dev/null +++ b/tests/queries/0_stateless/01273_arrow_decimal.reference @@ -0,0 +1,2 @@ +0.1230 0.12312312 0.1231231231230000 0.12312312312312312300000000000000 +0.1230 0.12312312 0.1231231231230000 0.12312312312312312300000000000000 diff --git a/tests/queries/0_stateless/01273_arrow_decimal.sh b/tests/queries/0_stateless/01273_arrow_decimal.sh new file mode 100755 index 00000000000..22496035ea9 --- /dev/null +++ b/tests/queries/0_stateless/01273_arrow_decimal.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_decimal" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_decimal (d1 Decimal32(4), d2 Decimal64(8), d3 Decimal128(16), d4 Decimal256(32)) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="INSERT INTO TABLE arrow_decimal VALUES (0.123, 0.123123123, 0.123123123123, 0.123123123123123123)" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_decimal FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_decimal FORMAT Arrow" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_decimal" +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_decimal" + diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference index 7fe00709dee..65e7e5b158f 100644 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference @@ -16,11 +16,11 @@ ttl_01280_3 2 1 0 3 3 1 8 2 ttl_01280_4 -1 1 0 4 -10 2 13 9 +0 4 +13 9 ttl_01280_5 1 2 7 5 2 3 6 5 ttl_01280_6 -1 5 3 5 -2 10 3 5 +1 3 5 +2 3 5 diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh index 9f30c7c5872..c9936ce7afd 100755 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh @@ -80,7 +80,7 @@ insert into ttl_01280_4 values (1, 5, 4, 9, now())" sleep 2 optimize "ttl_01280_4" -$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_4 ORDER BY a, b, x, y" +$CLICKHOUSE_CLIENT --query "select x, y from ttl_01280_4 ORDER BY a, b, x, y" $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5" @@ -107,7 +107,7 @@ insert into ttl_01280_6 values (1, 5, 3, 5, now())" sleep 2 optimize "ttl_01280_6" -$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_6 ORDER BY a, b, x, y" +$CLICKHOUSE_CLIENT --query "select a, x, y from ttl_01280_6 ORDER BY a, b, x, y" $CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_1" $CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_2" diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract.sql b/tests/queries/0_stateless/01318_map_add_map_subtract.sql index 40c08e0a147..6ead7a2db46 100644 --- a/tests/queries/0_stateless/01318_map_add_map_subtract.sql +++ b/tests/queries/0_stateless/01318_map_add_map_subtract.sql @@ -2,7 +2,7 @@ drop table if exists map_test; create table map_test engine=TinyLog() as (select ([1, number], [toInt32(2),2]) as map from numbers(1, 10)); -- mapAdd -select mapAdd([1], [1]); -- { serverError 42 } +select mapAdd([1], [1]); -- { serverError 43 } select mapAdd(([1], [1])); -- { serverError 42 } select mapAdd(([1], [1]), map) from map_test; -- { serverError 43 } select mapAdd(([toUInt64(1)], [1]), map) from map_test; -- { serverError 43 } @@ -27,7 +27,7 @@ select mapAdd(([toInt64(1), 2], [toInt64(1), 1]), ([toInt64(1), 2], [toInt64(1), select mapAdd(([1, 2], [toFloat32(1.1), 1]), ([1, 2], [2.2, 1])) as res, toTypeName(res); select mapAdd(([1, 2], [toFloat64(1.1), 1]), ([1, 2], [2.2, 1])) as res, toTypeName(res); -select mapAdd(([toFloat32(1), 2], [toFloat64(1.1), 1]), ([toFloat32(1), 2], [2.2, 1])) as res, toTypeName(res); -- { serverError 44 } +select mapAdd(([toFloat32(1), 2], [toFloat64(1.1), 1]), ([toFloat32(1), 2], [2.2, 1])) as res, toTypeName(res); -- { serverError 43 } select mapAdd(([1, 2], [toFloat64(1.1), 1]), ([1, 2], [1, 1])) as res, toTypeName(res); -- { serverError 43 } select mapAdd((['a', 'b'], [1, 1]), ([key], [1])) from values('key String', ('b'), ('c'), ('d')); select mapAdd((cast(['a', 'b'], 'Array(FixedString(1))'), [1, 1]), ([key], [1])) as res, toTypeName(res) from values('key FixedString(1)', ('b'), ('c'), ('d')); diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference new file mode 100644 index 00000000000..96bafc2c79c --- /dev/null +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference @@ -0,0 +1,55 @@ +{1:5} +{1:3,2:2} +{1:3,3:2} +{1:3,4:2} +{1:3,5:2} +{1:3,6:2} +{1:3,7:2} +{1:3,8:2} +{1:3,9:2} +{1:3,10:2} +{1:5,2:2} +{1:3,2:4} +{1:3,2:2,3:2} +{1:3,2:2,4:2} +{1:3,2:2,5:2} +{1:3,2:2,6:2} +{1:3,2:2,7:2} +{1:3,2:2,8:2} +{1:3,2:2,9:2} +{1:3,2:2,10:2} +{1:2,2:2} Map(UInt8,UInt64) +{1:2,2:2} Map(UInt16,UInt64) +{1:2,2:2} Map(UInt32,UInt64) +{1:2,2:2} Map(UInt64,UInt64) +{1:2,2:2} Map(UInt128,UInt128) +{1:2,2:2} Map(UInt256,UInt256) +{1:2,2:2} Map(Int16,UInt64) +{1:2,2:2} Map(Int16,Int64) +{1:2,2:2} Map(Int32,Int64) +{1:2,2:2} Map(Int64,Int64) +{1:2,2:2} Map(Int128,Int128) +{1:2,2:2} Map(Int256,Int256) +{1:3.300000023841858,2:2} Map(UInt8,Float64) +{1:3.3000000000000003,2:2} Map(UInt8,Float64) +{'a':1,'b':2} +{'a':1,'b':1,'c':1} +{'a':1,'b':1,'d':1} +{'a':1,'b':2} Map(String,UInt64) +{'a':1,'b':1,'c':1} Map(String,UInt64) +{'a':1,'b':1,'d':1} Map(String,UInt64) +{'a':1,'b':2} +{'a':1,'b':1,'c':1} +{'a':1,'b':1,'d':1} +{'a':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) +{'b':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) +{'a':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) +{'b':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) +{'00000000-89ab-cdef-0123-456789abcdef':2} Map(UUID,Int64) +{'11111111-89ab-cdef-0123-456789abcdef':4} Map(UUID,Int64) +{1:0,2:0} Map(UInt8,UInt64) +{1:18446744073709551615,2:18446744073709551615} Map(UInt8,UInt64) +{1:-1,2:-1} Map(UInt8,Int64) +{1:-1.0999999761581423,2:0} Map(UInt8,Float64) +{1:-1,2:-1} Map(UInt8,Int64) +{1:-2,2:-2,3:1} Map(UInt8,Int64) diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql new file mode 100644 index 00000000000..9f0f1cb0489 --- /dev/null +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql @@ -0,0 +1,46 @@ +drop table if exists mapop_test; +set allow_experimental_map_type = 1; +create table mapop_test engine=TinyLog() as (select map(1, toInt32(2), number, 2) as m from numbers(1, 10)); + +-- mapAdd +select mapAdd(map(1, 1)); -- { serverError 42 } +select mapAdd(map(1, 1), m) from mapop_test; -- { serverError 43 } + +select mapAdd(map(toUInt64(1), toInt32(1)), m) from mapop_test; +select mapAdd(cast(m, 'Map(UInt8, UInt8)'), map(1, 1), map(2,2)) from mapop_test; + +-- cleanup +drop table mapop_test; + +-- check types +select mapAdd(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt16(1), toUInt16(1), 2, 1), map(toUInt16(1), toUInt16(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt32(1), toUInt32(1), 2, 1), map(toUInt32(1), toUInt32(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt64(1), toUInt64(1), 2, 1), map(toUInt64(1), toUInt64(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt128(1), toUInt128(1), 2, 1), map(toUInt128(1), toUInt128(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt256(1), toUInt256(1), 2, 1), map(toUInt256(1), toUInt256(1), 2, 1)) as res, toTypeName(res); + +select mapAdd(map(toInt8(1), 1, 2, 1), map(toInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt16(1), toInt16(1), 2, 1), map(toInt16(1), toInt16(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt32(1), toInt32(1), 2, 1), map(toInt32(1), toInt32(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt64(1), toInt64(1), 2, 1), map(toInt64(1), toInt64(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt128(1), toInt128(1), 2, 1), map(toInt128(1), toInt128(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt256(1), toInt256(1), 2, 1), map(toInt256(1), toInt256(1), 2, 1)) as res, toTypeName(res); + +select mapAdd(map(1, toFloat32(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapAdd(map(1, toFloat64(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapAdd(map(1, toFloat64(1.1), 2, 1), map(1, 1, 2, 1)) as res, toTypeName(res); -- { serverError 43 } +select mapAdd(map('a', 1, 'b', 1), map(key, 1)) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(cast('a', 'FixedString(1)'), 1, 'b', 1), map(key, 1)) as res, toTypeName(res) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(cast('a', 'LowCardinality(String)'), 1, 'b', 1), map(key, 1)) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key Enum16(\'a\'=1, \'b\'=2), val Int16', ('a', 1), ('b', 1)); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key Enum8(\'a\'=1, \'b\'=2), val Int16', ('a', 1), ('b', 1)); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key UUID, val Int32', ('00000000-89ab-cdef-0123-456789abcdef', 1), ('11111111-89ab-cdef-0123-456789abcdef', 2)); + +-- mapSubtract, same rules as mapAdd +select mapSubtract(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 2, 2, 2)) as res, toTypeName(res); -- overflow +select mapSubtract(map(toUInt8(1), toInt32(1), 2, 1), map(toUInt8(1), toInt16(2), 2, 2)) as res, toTypeName(res); +select mapSubtract(map(1, toFloat32(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(1), toInt32(1), 2, 1), map(toUInt8(1), toInt16(2), 2, 2)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(3), toInt32(1)), map(toUInt8(1), toInt32(2), 2, 2)) as res, toTypeName(res); diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index ef39605f4d2..cbcb5c643fe 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -6,6 +6,6 @@ SET min_bytes_to_use_mmap_io = 1; SELECT * FROM test_01343; SYSTEM FLUSH LOGS; -SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE current_database = currentDatabase() AND event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test_01343%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1; +SELECT ProfileEvents['CreatedReadBufferMMap'] AS value FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test_01343%' AND type = 2 ORDER BY event_time DESC LIMIT 1; DROP TABLE test_01343; diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql index 7aab991d203..3d148527270 100644 --- a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -6,6 +6,7 @@ SET min_bytes_to_use_mmap_io = 1; SELECT * FROM test_01344 WHERE x = 'Hello, world'; SYSTEM FLUSH LOGS; -SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE 'SELECT * FROM test_01344 WHERE x = ''Hello, world''%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1; +SELECT ProfileEvents['CreatedReadBufferMMap'] as value FROM system.query_log + WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE 'SELECT * FROM test_01344 WHERE x = ''Hello, world''%' AND type = 2 ORDER BY event_time DESC LIMIT 1; DROP TABLE test_01344; diff --git a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql index 3380f04f8c9..5f4855c6119 100644 --- a/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql +++ b/tests/queries/0_stateless/01360_materialized_view_with_join_on_query_log.sql @@ -8,7 +8,9 @@ SET log_queries=1; SELECT 1; SYSTEM FLUSH LOGS; + -- NOTE: can be rewritten using log_queries_min_query_duration_ms + CREATE MATERIALIZED VIEW slow_log Engine=Memory AS ( SELECT * FROM diff --git a/tests/queries/0_stateless/01413_rows_events.sql b/tests/queries/0_stateless/01413_rows_events.sql index 9e77d3fced9..0a0da9b4b12 100644 --- a/tests/queries/0_stateless/01413_rows_events.sql +++ b/tests/queries/0_stateless/01413_rows_events.sql @@ -6,23 +6,24 @@ SYSTEM FLUSH LOGS; SELECT written_rows FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE 'INSERT INTO /* test 01413, query 1 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; -SELECT ProfileEvents.Values as value FROM system.query_log ARRAY JOIN ProfileEvents - WHERE current_database = currentDatabase() AND ProfileEvents.Names = 'InsertedRows' AND query LIKE 'INSERT INTO /* test 01413, query 1 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; +SELECT ProfileEvents['InsertedRows'] as value FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE 'INSERT INTO /* test 01413, query 1 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; + INSERT INTO /* test 01413, query 2 */ rows_events_test VALUES (2,2), (3,3); SYSTEM FLUSH LOGS; SELECT written_rows FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE 'INSERT INTO /* test 01413, query 2 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; -SELECT ProfileEvents.Values as value FROM system.query_log ARRAY JOIN ProfileEvents - WHERE current_database = currentDatabase() AND ProfileEvents.Names = 'InsertedRows' AND query LIKE 'INSERT INTO /* test 01413, query 2 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; +SELECT ProfileEvents['InsertedRows'] as value FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE 'INSERT INTO /* test 01413, query 2 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; + SELECT * FROM /* test 01413, query 3 */ rows_events_test WHERE v = 2; SYSTEM FLUSH LOGS; SELECT read_rows FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE 'SELECT * FROM /* test 01413, query 3 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; -SELECT ProfileEvents.Values as value FROM system.query_log ARRAY JOIN ProfileEvents - WHERE current_database = currentDatabase() AND ProfileEvents.Names = 'SelectedRows' AND query LIKE 'SELECT * FROM /* test 01413, query 3 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; + +SELECT ProfileEvents['SelectedRows'] as value FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE 'SELECT * FROM /* test 01413, query 3 */ rows_events_test%' AND type = 2 AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1; + DROP TABLE rows_events_test; diff --git a/tests/queries/0_stateless/01414_optimize_any_bug.reference b/tests/queries/0_stateless/01414_optimize_any_bug.reference index 573541ac970..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01414_optimize_any_bug.reference +++ b/tests/queries/0_stateless/01414_optimize_any_bug.reference @@ -1 +0,0 @@ -0 diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.reference b/tests/queries/0_stateless/01442_merge_detach_attach_long.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01442_merge_detach_attach.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh similarity index 100% rename from tests/queries/0_stateless/01442_merge_detach_attach.sh rename to tests/queries/0_stateless/01442_merge_detach_attach_long.sh diff --git a/tests/queries/0_stateless/01447_json_strings.reference b/tests/queries/0_stateless/01447_json_strings.reference index ab88e2f3696..7892cb82922 100644 --- a/tests/queries/0_stateless/01447_json_strings.reference +++ b/tests/queries/0_stateless/01447_json_strings.reference @@ -14,7 +14,7 @@ "type": "Array(UInt8)" }, { - "name": "tuple(1, 'a')", + "name": "(1, 'a')", "type": "Tuple(UInt8, String)" }, { @@ -33,7 +33,7 @@ "1": "1", "'a'": "a", "[1, 2, 3]": "[1,2,3]", - "tuple(1, 'a')": "(1,'a')", + "(1, 'a')": "(1,'a')", "NULL": "ᴺᵁᴸᴸ", "nan": "nan" } diff --git a/tests/queries/0_stateless/01449_json_compact_strings.reference b/tests/queries/0_stateless/01449_json_compact_strings.reference index 1c6f073c0d0..53dba71d6ff 100644 --- a/tests/queries/0_stateless/01449_json_compact_strings.reference +++ b/tests/queries/0_stateless/01449_json_compact_strings.reference @@ -14,7 +14,7 @@ "type": "Array(UInt8)" }, { - "name": "tuple(1, 'a')", + "name": "(1, 'a')", "type": "Tuple(UInt8, String)" }, { diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index bf1d5b31682..8f034b0bf61 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -28,8 +28,8 @@ select count(*) "'"'"initial query spans with proper parent"'"'" from (select *, attribute_name, attribute_value from system.opentelemetry_span_log - array join attribute.names as attribute_name, - attribute.values as attribute_value) o + array join mapKeys(attribute) as attribute_name, + mapValues(attribute) as attribute_value) o join system.query_log on query_id = o.attribute_value where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id'))) @@ -45,7 +45,7 @@ select count(*) "'"'"initial query spans with proper parent"'"'" -- same non-empty value for all 'query' spans in this trace. select uniqExact(value) "'"'"unique non-empty tracestate values"'"'" from system.opentelemetry_span_log - array join attribute.names as name, attribute.values as value + array join mapKeys(attribute) as name, mapValues(attribute) as value where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id'))) and operation_name = 'query' @@ -108,14 +108,11 @@ wait ${CLICKHOUSE_CLIENT} -q "system flush logs" ${CLICKHOUSE_CLIENT} -q " - with count(*) as c -- expect 200 * 0.1 = 20 sampled events on average - select if(c > 1 and c < 50, 'OK', 'fail: ' || toString(c)) + select if(count() > 1 and count() < 50, 'OK', 'Fail') from system.opentelemetry_span_log - array join attribute.names as name, attribute.values as value - where name = 'clickhouse.query_id' - and operation_name = 'query' + where operation_name = 'query' and parent_span_id = 0 -- only account for the initial queries - and value like '$query_id-%' + and attribute['clickhouse.query_id'] like '$query_id-%' ; " diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index 3457d17dba1..6e2c8d458ae 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -7,7 +7,7 @@ SYSTEM DROP MARK CACHE; SELECT a.size0 FROM t_arr; SYSTEM FLUSH LOGS; -SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] +SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT a.size0 FROM %t_arr%')) AND current_database = currentDatabase(); @@ -24,7 +24,7 @@ SYSTEM DROP MARK CACHE; SELECT t.u FROM t_tup; SYSTEM FLUSH LOGS; -SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] +SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT t._ FROM %t_tup%')) AND current_database = currentDatabase(); @@ -38,7 +38,7 @@ SYSTEM DROP MARK CACHE; SELECT n.null FROM t_nul; SYSTEM FLUSH LOGS; -SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] +SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT n.null FROM %t_nul%')) AND current_database = currentDatabase(); @@ -57,7 +57,7 @@ SYSTEM DROP MARK CACHE; SELECT m.values FROM t_map; SYSTEM FLUSH LOGS; -SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] +SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT m.% FROM %t_map%')) AND current_database = currentDatabase(); diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 0ddb0cfbfb4..82049243006 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -33,7 +33,7 @@ SELECT col1.a FROM nested FORMAT Null; -- 4 files: (col1.size0, col1.a) x2 SYSTEM FLUSH LOGS; -SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] +SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT col1.a FROM %nested%')) AND event_date >= yesterday() AND current_database = currentDatabase(); @@ -43,7 +43,7 @@ SELECT col3.n2.s FROM nested FORMAT Null; -- 6 files: (col3.size0, col3.n2.size1, col3.n2.s) x2 SYSTEM FLUSH LOGS; -SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] +SELECT ProfileEvents['FileOpen'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT col3.n2.s FROM %nested%')) AND event_date >= yesterday() AND current_database = currentDatabase(); diff --git a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference index 8863d3b57c7..63f00b6f9c5 100644 --- a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference +++ b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference @@ -1,6 +1,6 @@ AlterQuery t1 (children 1) ExpressionList (children 1) - AlterCommand 30 (children 1) + AlterCommand 31 (children 1) Function equals (children 1) ExpressionList (children 2) Identifier date diff --git a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.sql b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.sql index 413acb789e0..41939123c92 100644 --- a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.sql +++ b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.sql @@ -1 +1,2 @@ +explain ast; -- { clientError 62 } explain ast alter table t1 delete where date = today() diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql new file mode 100644 index 00000000000..8bbc9ec0a14 --- /dev/null +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -0,0 +1,12 @@ +drop table if exists data_01641; + +create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 40)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0; + +SET max_block_size = 1000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; +insert into data_01641 select number, toString(number) from numbers(120000); + +-- Definitely should fail and it proves that memory is tracked in OPTIMIZE query. +set max_memory_usage='10Mi', max_untracked_memory=0; +optimize table data_01641 final; -- { serverError 241 } + +drop table data_01641; diff --git a/tests/queries/0_stateless/01668_test_toMonth_mysql_dialect.sql b/tests/queries/0_stateless/01668_test_toMonth_mysql_dialect.sql index fa2e1e41555..bdde3e7b825 100644 --- a/tests/queries/0_stateless/01668_test_toMonth_mysql_dialect.sql +++ b/tests/queries/0_stateless/01668_test_toMonth_mysql_dialect.sql @@ -1 +1 @@ -SELECT MONTH(toDateTime('2016-06-15 23:00:00')); \ No newline at end of file +SELECT MONTH(toDateTime('2016-06-15 23:00:00')); diff --git a/tests/queries/0_stateless/01669_test_toYear_mysql_dialect.sql b/tests/queries/0_stateless/01669_test_toYear_mysql_dialect.sql index f7cd84314e2..afd79e2c1ba 100644 --- a/tests/queries/0_stateless/01669_test_toYear_mysql_dialect.sql +++ b/tests/queries/0_stateless/01669_test_toYear_mysql_dialect.sql @@ -1 +1 @@ -SELECT YEAR(toDateTime('2016-06-15 23:00:00')); \ No newline at end of file +SELECT YEAR(toDateTime('2016-06-15 23:00:00')); diff --git a/tests/queries/0_stateless/01670_test_repeat_mysql_dialect.sql b/tests/queries/0_stateless/01670_test_repeat_mysql_dialect.sql index 29fe81012ec..ae2bdb46412 100644 --- a/tests/queries/0_stateless/01670_test_repeat_mysql_dialect.sql +++ b/tests/queries/0_stateless/01670_test_repeat_mysql_dialect.sql @@ -1 +1 @@ -SELECT REPEAT('Test', 3); \ No newline at end of file +SELECT REPEAT('Test', 3); diff --git a/tests/queries/0_stateless/01671_test_toQuarter_mysql_dialect.sql b/tests/queries/0_stateless/01671_test_toQuarter_mysql_dialect.sql index b6fa41f8b49..369f2b47723 100644 --- a/tests/queries/0_stateless/01671_test_toQuarter_mysql_dialect.sql +++ b/tests/queries/0_stateless/01671_test_toQuarter_mysql_dialect.sql @@ -1 +1 @@ -SELECT QUARTER(toDateTime('2016-06-15 23:00:00')); \ No newline at end of file +SELECT QUARTER(toDateTime('2016-06-15 23:00:00')); diff --git a/tests/queries/0_stateless/01672_test_toSecond_mysql_dialect.sql b/tests/queries/0_stateless/01672_test_toSecond_mysql_dialect.sql index adb72b9843c..0306fde14cd 100644 --- a/tests/queries/0_stateless/01672_test_toSecond_mysql_dialect.sql +++ b/tests/queries/0_stateless/01672_test_toSecond_mysql_dialect.sql @@ -1 +1 @@ -SELECT SECOND(toDateTime('2016-06-15 23:00:00')); \ No newline at end of file +SELECT SECOND(toDateTime('2016-06-15 23:00:00')); diff --git a/tests/queries/0_stateless/01673_test_toMinute_mysql_dialect.sql b/tests/queries/0_stateless/01673_test_toMinute_mysql_dialect.sql index 4ac7106158a..5d188b5b95b 100644 --- a/tests/queries/0_stateless/01673_test_toMinute_mysql_dialect.sql +++ b/tests/queries/0_stateless/01673_test_toMinute_mysql_dialect.sql @@ -1 +1 @@ -SELECT MINUTE(toDateTime('2016-06-15 23:00:00')); \ No newline at end of file +SELECT MINUTE(toDateTime('2016-06-15 23:00:00')); diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference index 95479cf37ba..71d10397326 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference @@ -1,9 +1,12 @@ +optimize_move_to_prewhere_if_final = 1 + SELECT x, y, z FROM prewhere_move_select_final PREWHERE y > 100 + SELECT x, y, @@ -11,6 +14,7 @@ SELECT FROM prewhere_move_select_final FINAL PREWHERE y > 100 + SELECT x, y, @@ -18,6 +22,7 @@ SELECT FROM prewhere_move_select_final FINAL WHERE z > 400 + SELECT x, y, @@ -26,3 +31,36 @@ FROM prewhere_move_select_final FINAL PREWHERE y > 100 WHERE (y > 100) AND (z > 400) + +optimize_move_to_prewhere_if_final = 0 + +SELECT + x, + y, + z +FROM prewhere_move_select_final +PREWHERE y > 100 + +SELECT + x, + y, + z +FROM prewhere_move_select_final +FINAL +WHERE y > 100 + +SELECT + x, + y, + z +FROM prewhere_move_select_final +FINAL +WHERE z > 400 + +SELECT + x, + y, + z +FROM prewhere_move_select_final +FINAL +WHERE (y > 100) AND (z > 400) diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql index a3a882c461a..ecc11c625e3 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql @@ -1,15 +1,36 @@ DROP TABLE IF EXISTS prewhere_move_select_final; + CREATE TABLE prewhere_move_select_final (x Int, y Int, z Int) ENGINE = ReplacingMergeTree() ORDER BY (x, y); INSERT INTO prewhere_move_select_final SELECT number, number * 2, number * 3 FROM numbers(1000); +select 'optimize_move_to_prewhere_if_final = 1'; +SET optimize_move_to_prewhere_if_final = 1; + -- order key can be pushed down with final +select ''; EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100; +select ''; EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100; -- can not be pushed down +select ''; EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400; -- only y can be pushed down +select ''; +EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400; + +select ''; +select 'optimize_move_to_prewhere_if_final = 0'; +SET optimize_move_to_prewhere_if_final = 0; + +select ''; +EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100; +select ''; +EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100; +select ''; +EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400; +select ''; EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400; DROP TABLE prewhere_move_select_final; diff --git a/tests/queries/0_stateless/01787_map_remote.sql b/tests/queries/0_stateless/01787_map_remote.sql index 854eafa0a50..748316c8044 100644 --- a/tests/queries/0_stateless/01787_map_remote.sql +++ b/tests/queries/0_stateless/01787_map_remote.sql @@ -1 +1 @@ -SELECT map('a', 1, 'b', 2) FROM remote('127.0.0.{1,2}', system, one); \ No newline at end of file +SELECT map('a', 1, 'b', 2) FROM remote('127.0.0.{1,2}', system, one); diff --git a/tests/queries/0_stateless/01866_view_persist_settings.reference b/tests/queries/0_stateless/01866_view_persist_settings.reference new file mode 100644 index 00000000000..07c96e76875 --- /dev/null +++ b/tests/queries/0_stateless/01866_view_persist_settings.reference @@ -0,0 +1,34 @@ +join_use_nulls = 1 +- +1 11 0 +2 12 22 +3 0 23 +- +1 11 0 +2 12 22 +3 0 23 +- +1 11 \N +2 12 22 +3 \N 23 +- +1 11 0 +2 12 22 +3 0 23 +join_use_nulls = 0 +- +1 11 0 +2 12 22 +3 0 23 +- +1 11 0 +2 12 22 +3 0 23 +- +1 11 \N +2 12 22 +3 \N 23 +- +1 11 0 +2 12 22 +3 0 23 diff --git a/tests/queries/0_stateless/01866_view_persist_settings.sql b/tests/queries/0_stateless/01866_view_persist_settings.sql new file mode 100644 index 00000000000..71d6c856b9b --- /dev/null +++ b/tests/queries/0_stateless/01866_view_persist_settings.sql @@ -0,0 +1,61 @@ +DROP TABLE IF EXISTS view_no_nulls; +DROP TABLE IF EXISTS view_no_nulls_set; +DROP TABLE IF EXISTS view_nulls_set; +DROP TABLE IF EXISTS view_nulls; + +SET join_use_nulls = 0; + +CREATE OR REPLACE VIEW view_no_nulls AS +SELECT * FROM ( SELECT number + 1 AS a, number + 11 AS b FROM numbers(2) ) AS t1 +FULL JOIN ( SELECT number + 2 AS a, number + 22 AS c FROM numbers(2) ) AS t2 +USING a ORDER BY a; + +CREATE OR REPLACE VIEW view_nulls_set AS +SELECT * FROM ( SELECT number + 1 AS a, number + 11 AS b FROM numbers(2) ) AS t1 +FULL JOIN ( SELECT number + 2 AS a, number + 22 AS c FROM numbers(2) ) AS t2 +USING a ORDER BY a +SETTINGS join_use_nulls = 1; + +SET join_use_nulls = 1; + +CREATE OR REPLACE VIEW view_nulls AS +SELECT * FROM ( SELECT number + 1 AS a, number + 11 AS b FROM numbers(2) ) AS t1 +FULL JOIN ( SELECT number + 2 AS a, number + 22 AS c FROM numbers(2) ) AS t2 +USING a ORDER BY a; + +CREATE OR REPLACE VIEW view_no_nulls_set AS +SELECT * FROM ( SELECT number + 1 AS a, number + 11 AS b FROM numbers(2) ) AS t1 +FULL JOIN ( SELECT number + 2 AS a, number + 22 AS c FROM numbers(2) ) AS t2 +USING a ORDER BY a +SETTINGS join_use_nulls = 0; + +SET join_use_nulls = 1; + +SELECT 'join_use_nulls = 1'; + +SELECT '-'; +SELECT * FROM view_no_nulls; +SELECT '-'; +SELECT * FROM view_no_nulls_set; +SELECT '-'; +SELECT * FROM view_nulls_set; +SELECT '-'; +SELECT * FROM view_nulls; + +SET join_use_nulls = 0; + +SELECT 'join_use_nulls = 0'; + +SELECT '-'; +SELECT * FROM view_no_nulls; +SELECT '-'; +SELECT * FROM view_no_nulls_set; +SELECT '-'; +SELECT * FROM view_nulls_set; +SELECT '-'; +SELECT * FROM view_nulls; + +DROP TABLE IF EXISTS view_no_nulls; +DROP TABLE IF EXISTS view_no_nulls_set; +DROP TABLE IF EXISTS view_nulls_set; +DROP TABLE IF EXISTS view_nulls; diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference new file mode 100644 index 00000000000..593f2fb2d20 --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -0,0 +1,43 @@ +--JSON_VALUE-- + +1 +1.2 +true +"world" +null + + + + +--JSON_QUERY-- +[{"hello":1}] +[1] +[1.2] +[true] +["world"] +[null] +[["world","world2"]] +[{"world":"!"}] + + +[0, 1, 4, 0, -1, -4] +--JSON_EXISTS-- +1 +0 +1 +1 +1 +0 +1 +0 +0 +1 +1 +0 +1 +0 +1 +--MANY ROWS-- +0 ["Vasily", "Kostya"] +1 ["Tihon", "Ernest"] +2 ["Katya", "Anatoliy"] diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql new file mode 100644 index 00000000000..1c5069ccfde --- /dev/null +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -0,0 +1,50 @@ +SELECT '--JSON_VALUE--'; +SELECT JSON_VALUE('$', '{"hello":1}'); -- root is a complex object => default value (empty string) +SELECT JSON_VALUE('$.hello', '{"hello":1}'); +SELECT JSON_VALUE('$.hello', '{"hello":1.2}'); +SELECT JSON_VALUE('$.hello', '{"hello":true}'); +SELECT JSON_VALUE('$.hello', '{"hello":"world"}'); +SELECT JSON_VALUE('$.hello', '{"hello":null}'); +SELECT JSON_VALUE('$.hello', '{"hello":["world","world2"]}'); +SELECT JSON_VALUE('$.hello', '{"hello":{"world":"!"}}'); +SELECT JSON_VALUE('$.hello', '{hello:world}'); -- invalid json => default value (empty string) +SELECT JSON_VALUE('$.hello', ''); + +SELECT '--JSON_QUERY--'; +SELECT JSON_QUERY('$', '{"hello":1}'); +SELECT JSON_QUERY('$.hello', '{"hello":1}'); +SELECT JSON_QUERY('$.hello', '{"hello":1.2}'); +SELECT JSON_QUERY('$.hello', '{"hello":true}'); +SELECT JSON_QUERY('$.hello', '{"hello":"world"}'); +SELECT JSON_QUERY('$.hello', '{"hello":null}'); +SELECT JSON_QUERY('$.hello', '{"hello":["world","world2"]}'); +SELECT JSON_QUERY('$.hello', '{"hello":{"world":"!"}}'); +SELECT JSON_QUERY('$.hello', '{hello:{"world":"!"}}}'); -- invalid json => default value (empty string) +SELECT JSON_QUERY('$.hello', ''); +SELECT JSON_QUERY('$.array[*][0 to 2, 4]', '{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}'); + +SELECT '--JSON_EXISTS--'; +SELECT JSON_EXISTS('$', '{"hello":1}'); +SELECT JSON_EXISTS('$', ''); +SELECT JSON_EXISTS('$', '{}'); +SELECT JSON_EXISTS('$.hello', '{"hello":1}'); +SELECT JSON_EXISTS('$.world', '{"hello":1,"world":2}'); +SELECT JSON_EXISTS('$.world', '{"hello":{"world":1}}'); +SELECT JSON_EXISTS('$.hello.world', '{"hello":{"world":1}}'); +SELECT JSON_EXISTS('$.hello', '{hello:world}'); -- invalid json => default value (zero integer) +SELECT JSON_EXISTS('$.hello', ''); +SELECT JSON_EXISTS('$.hello[*]', '{"hello":["world"]}'); +SELECT JSON_EXISTS('$.hello[0]', '{"hello":["world"]}'); +SELECT JSON_EXISTS('$.hello[1]', '{"hello":["world"]}'); +SELECT JSON_EXISTS('$.a[*].b', '{"a":[{"b":1},{"c":2}]}'); +SELECT JSON_EXISTS('$.a[*].f', '{"a":[{"b":1},{"c":2}]}'); +SELECT JSON_EXISTS('$.a[*][0].h', '{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}'); + +SELECT '--MANY ROWS--'; +DROP TABLE IF EXISTS 01889_sql_json; +CREATE TABLE 01889_sql_json (id UInt8, json String) ENGINE = MergeTree ORDER BY id; +INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}'); +INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}'); +INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}'); +SELECT id, JSON_QUERY('$.friends[0 to 2]', json) FROM 01889_sql_json ORDER BY id; +DROP TABLE 01889_sql_json; diff --git a/tests/queries/0_stateless/01890_jit_aggregation_function_sum_long.reference b/tests/queries/0_stateless/01890_jit_aggregation_function_sum_long.reference new file mode 100644 index 00000000000..4897a71df3c --- /dev/null +++ b/tests/queries/0_stateless/01890_jit_aggregation_function_sum_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 2340 2340 2340 2340 +1 2380 2380 2380 2380 +2 2420 2420 2420 2420 +Test signed integer values +0 2340 2340 2340 2340 +1 2380 2380 2380 2380 +2 2420 2420 2420 2420 +Test float values +0 2340 2340 +1 2380 2380 +2 2420 2420 +Test nullable unsigned integer values +0 2340 2340 2340 2340 +1 2380 2380 2380 2380 +2 2420 2420 2420 2420 +Test nullable signed integer values +0 2340 2340 2340 2340 +1 2380 2380 2380 2380 +2 2420 2420 2420 2420 +Test nullable float values +0 2340 2340 +1 2380 2380 +2 2420 2420 +Test null specifics +0 6 4 \N diff --git a/tests/queries/0_stateless/01890_jit_aggregation_function_sum_long.sql b/tests/queries/0_stateless/01890_jit_aggregation_function_sum_long.sql new file mode 100644 index 00000000000..0f61ab168f5 --- /dev/null +++ b/tests/queries/0_stateless/01890_jit_aggregation_function_sum_long.sql @@ -0,0 +1,119 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, sum(value1), sum(value2), sum(value3), sum(value4) FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, sum(value1), sum(value2), sum(value3), sum(value4)FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, sum(value1), sum(value2) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, sum(value1), sum(value2), sum(value3), sum(value4) FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, sum(value1), sum(value2), sum(value3), sum(value4) FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, sum(value1), sum(value2) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL); + +SELECT id, sum(value1), sum(value2), sum(value3) FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01891_jit_aggregation_function_any_long.reference b/tests/queries/0_stateless/01891_jit_aggregation_function_any_long.reference new file mode 100644 index 00000000000..d3ea3d46376 --- /dev/null +++ b/tests/queries/0_stateless/01891_jit_aggregation_function_any_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test signed integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test float values +0 0 0 +1 1 1 +2 2 2 +Test nullable unsigned integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test nullable signed integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test nullable float values +0 0 0 +1 1 1 +2 2 2 +Test null specifics +0 1 1 \N diff --git a/tests/queries/0_stateless/01891_jit_aggregation_function_any_long.sql b/tests/queries/0_stateless/01891_jit_aggregation_function_any_long.sql new file mode 100644 index 00000000000..28e81640993 --- /dev/null +++ b/tests/queries/0_stateless/01891_jit_aggregation_function_any_long.sql @@ -0,0 +1,119 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, any(value1), any(value2), any(value3), any(value4) FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, any(value1), any(value2), any(value3), any(value4) FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, any(value1), any(value2) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, any(value1), any(value2), any(value3), any(value4) FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, any(value1), any(value2), any(value3), any(value4) FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, any(value1), any(value2) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL); + +SELECT id, any(value1), any(value2), any(value3) FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01892_jit_aggregation_function_any_last_long.reference b/tests/queries/0_stateless/01892_jit_aggregation_function_any_last_long.reference new file mode 100644 index 00000000000..bdf0499e1f3 --- /dev/null +++ b/tests/queries/0_stateless/01892_jit_aggregation_function_any_last_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test signed integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test float values +0 117 117 +1 118 118 +2 119 119 +Test nullable unsigned integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test nullable signed integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test nullable float values +0 117 117 +1 118 118 +2 119 119 +Test null specifics +0 3 3 \N diff --git a/tests/queries/0_stateless/01892_jit_aggregation_function_any_last_long.sql b/tests/queries/0_stateless/01892_jit_aggregation_function_any_last_long.sql new file mode 100644 index 00000000000..c02ed8f18ee --- /dev/null +++ b/tests/queries/0_stateless/01892_jit_aggregation_function_any_last_long.sql @@ -0,0 +1,119 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, anyLast(value1), anyLast(value2), anyLast(value3), anyLast(value4) FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, anyLast(value1), anyLast(value2), anyLast(value3), anyLast(value4) FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, anyLast(value1), anyLast(value2) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, anyLast(value1), anyLast(value2), anyLast(value3), anyLast(value4) FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, anyLast(value1), anyLast(value2), anyLast(value3), anyLast(value4) FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, anyLast(value1), anyLast(value2) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL); + +SELECT id, anyLast(value1), anyLast(value2), anyLast(value3) FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01893_jit_aggregation_function_min_long.reference b/tests/queries/0_stateless/01893_jit_aggregation_function_min_long.reference new file mode 100644 index 00000000000..d3ea3d46376 --- /dev/null +++ b/tests/queries/0_stateless/01893_jit_aggregation_function_min_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test signed integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test float values +0 0 0 +1 1 1 +2 2 2 +Test nullable unsigned integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test nullable signed integer values +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +Test nullable float values +0 0 0 +1 1 1 +2 2 2 +Test null specifics +0 1 1 \N diff --git a/tests/queries/0_stateless/01893_jit_aggregation_function_min_long.sql b/tests/queries/0_stateless/01893_jit_aggregation_function_min_long.sql new file mode 100644 index 00000000000..5e700e537eb --- /dev/null +++ b/tests/queries/0_stateless/01893_jit_aggregation_function_min_long.sql @@ -0,0 +1,119 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2), min(value3), min(value4) FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2), min(value3), min(value4) FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2), min(value3), min(value4) FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2), min(value3), min(value4) FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL); + +SELECT id, min(value1), min(value2), min(value3) FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01894_jit_aggregation_function_max_long.reference b/tests/queries/0_stateless/01894_jit_aggregation_function_max_long.reference new file mode 100644 index 00000000000..321d1fa7196 --- /dev/null +++ b/tests/queries/0_stateless/01894_jit_aggregation_function_max_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test signed integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test float values +0 0 0 +1 1 1 +2 2 2 +Test nullable unsigned integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test nullable signed integer values +0 117 117 117 117 +1 118 118 118 118 +2 119 119 119 119 +Test nullable float values +0 117 117 +1 118 118 +2 119 119 +Test null specifics +0 3 3 \N diff --git a/tests/queries/0_stateless/01894_jit_aggregation_function_max_long.sql b/tests/queries/0_stateless/01894_jit_aggregation_function_max_long.sql new file mode 100644 index 00000000000..8ba11f4c643 --- /dev/null +++ b/tests/queries/0_stateless/01894_jit_aggregation_function_max_long.sql @@ -0,0 +1,119 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, max(value1), max(value2), max(value3), max(value4) FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, max(value1), max(value2), max(value3), max(value4) FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, min(value1), min(value2) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, max(value1), max(value2), max(value3), max(value4) FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, max(value1), max(value2), max(value3), max(value4) FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, max(value1), max(value2) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL); + +SELECT id, max(value1), max(value2), max(value3) FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01895_jit_aggregation_function_avg_long.reference b/tests/queries/0_stateless/01895_jit_aggregation_function_avg_long.reference new file mode 100644 index 00000000000..e1eac2fe41b --- /dev/null +++ b/tests/queries/0_stateless/01895_jit_aggregation_function_avg_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 58.5 58.5 58.5 58.5 +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test signed integer values +0 58.5 58.5 58.5 58.5 +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test float values +0 58.5 58.5 +1 59.5 59.5 +2 60.5 60.5 +Test nullable unsigned integer values +0 58.5 58.5 58.5 58.5 +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test nullable signed integer values +0 58.5 58.5 58.5 58.5 +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test nullable float values +0 58.5 58.5 +1 59.5 59.5 +2 60.5 60.5 +Test null specifics +0 2 2 \N diff --git a/tests/queries/0_stateless/01895_jit_aggregation_function_avg_long.sql b/tests/queries/0_stateless/01895_jit_aggregation_function_avg_long.sql new file mode 100644 index 00000000000..903a7c65f21 --- /dev/null +++ b/tests/queries/0_stateless/01895_jit_aggregation_function_avg_long.sql @@ -0,0 +1,119 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, avg(value1), avg(value2), avg(value3), avg(value4) FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, avg(value1), avg(value2), avg(value3), avg(value4) FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, avg(value1), avg(value2) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, avg(value1), avg(value2), avg(value3), avg(value4) FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number FROM system.numbers LIMIT 120; +SELECT id, avg(value1), avg(value2), avg(value3), avg(value4) FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64) +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number FROM system.numbers LIMIT 120; +SELECT id, avg(value1), avg(value2) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL); + +SELECT id, avg(value1), avg(value2), avg(value3) FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01896_jit_aggregation_function_if_long.reference b/tests/queries/0_stateless/01896_jit_aggregation_function_if_long.reference new file mode 100644 index 00000000000..153adc0f998 --- /dev/null +++ b/tests/queries/0_stateless/01896_jit_aggregation_function_if_long.reference @@ -0,0 +1,28 @@ +Test unsigned integer values +0 1140 1140 1140 1140 +1 1220 1220 1220 1220 +2 1180 1180 1180 1180 +Test signed integer values +0 1140 1140 1140 1140 +1 1220 1220 1220 1220 +2 1180 1180 1180 1180 +Test float values +0 1140 1140 +1 1220 1220 +2 1180 1180 +Test nullable unsigned integer values +0 1140 1140 1140 1140 +1 1220 1220 1220 1220 +2 1180 1180 1180 1180 +Test nullable signed integer values +0 1140 1140 1140 1140 +1 1220 1220 1220 1220 +2 1180 1180 1180 1180 +Test nullable float values +0 1140 1140 +1 1220 1220 +2 1180 1180 +Test null specifics +0 6 4 \N +Test null variadic +0 2.3333333333333335 2.5 \N diff --git a/tests/queries/0_stateless/01896_jit_aggregation_function_if_long.sql b/tests/queries/0_stateless/01896_jit_aggregation_function_if_long.sql new file mode 100644 index 00000000000..8b5618230f0 --- /dev/null +++ b/tests/queries/0_stateless/01896_jit_aggregation_function_if_long.sql @@ -0,0 +1,197 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64, + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number, if(number % 2 == 0, 1, 0) FROM system.numbers LIMIT 120; +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value), + sumIf(value3, predicate_value), + sumIf(value4, predicate_value) +FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64, + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number, if(number % 2 == 0, 1, 0) FROM system.numbers LIMIT 120; +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value), + sumIf(value3, predicate_value), + sumIf(value4, predicate_value) +FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64, + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number, if(number % 2 == 0, 1, 0) FROM system.numbers LIMIT 120; +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value) +FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64), + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number, if(number % 2 == 0, 1, 0) FROM system.numbers LIMIT 120; +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value), + sumIf(value3, predicate_value), + sumIf(value4, predicate_value) +FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64), + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number, if(number % 2 == 0, 1, 0) FROM system.numbers LIMIT 120; +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value), + sumIf(value3, predicate_value), + sumIf(value4, predicate_value) +FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64), + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number, if(number % 2 == 0, 1, 0) FROM system.numbers LIMIT 120; +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value) +FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64), + + predicate_value UInt8 +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL, 1); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL, 1); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL, 1); + +SELECT + id, + sumIf(value1, predicate_value), + sumIf(value2, predicate_value), + sumIf(value3, predicate_value) +FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; + +SELECT 'Test null variadic'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64), + + predicate_value UInt8, + weight UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL, 1, 1); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL, 1, 2); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL, 1, 3); + +SELECT + id, + avgWeightedIf(value1, weight, predicate_value), + avgWeightedIf(value2, weight, predicate_value), + avgWeightedIf(value3, weight, predicate_value) +FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01897_jit_aggregation_function_avg_weighted_long.reference b/tests/queries/0_stateless/01897_jit_aggregation_function_avg_weighted_long.reference new file mode 100644 index 00000000000..fec5cc09859 --- /dev/null +++ b/tests/queries/0_stateless/01897_jit_aggregation_function_avg_weighted_long.reference @@ -0,0 +1,26 @@ +Test unsigned integer values +0 nan nan nan nan +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test signed integer values +0 nan nan nan nan +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test float values +0 nan nan +1 59.5 59.5 +2 60.5 60.5 +Test nullable unsigned integer values +0 nan nan nan nan +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test nullable signed integer values +0 nan nan nan nan +1 59.5 59.5 59.5 59.5 +2 60.5 60.5 60.5 60.5 +Test nullable float values +0 nan nan +1 59.5 59.5 +2 60.5 60.5 +Test null specifics +0 2.3333333333333335 2.5 \N 2.5 2.5 \N diff --git a/tests/queries/0_stateless/01897_jit_aggregation_function_avg_weighted_long.sql b/tests/queries/0_stateless/01897_jit_aggregation_function_avg_weighted_long.sql new file mode 100644 index 00000000000..04b8a818382 --- /dev/null +++ b/tests/queries/0_stateless/01897_jit_aggregation_function_avg_weighted_long.sql @@ -0,0 +1,167 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Test unsigned integer values'; + +DROP TABLE IF EXISTS test_table_unsigned_values; +CREATE TABLE test_table_unsigned_values +( + id UInt64, + + value1 UInt8, + value2 UInt16, + value3 UInt32, + value4 UInt64, + + weight UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_unsigned_values SELECT number % 3, number, number, number, number, number % 3 FROM system.numbers LIMIT 120; +SELECT + id, + avgWeighted(value1, weight), + avgWeighted(value2, weight), + avgWeighted(value3, weight), + avgWeighted(value4, weight) +FROM test_table_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_unsigned_values; + +SELECT 'Test signed integer values'; + +DROP TABLE IF EXISTS test_table_signed_values; +CREATE TABLE test_table_signed_values +( + id UInt64, + + value1 Int8, + value2 Int16, + value3 Int32, + value4 Int64, + + weight UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_signed_values SELECT number % 3, number, number, number, number, number % 3 FROM system.numbers LIMIT 120; +SELECT + id, + avgWeighted(value1, weight), + avgWeighted(value2, weight), + avgWeighted(value3, weight), + avgWeighted(value4, weight) +FROM test_table_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_signed_values; + +SELECT 'Test float values'; + +DROP TABLE IF EXISTS test_table_float_values; +CREATE TABLE test_table_float_values +( + id UInt64, + + value1 Float32, + value2 Float64, + + weight UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_float_values SELECT number % 3, number, number, number % 3 FROM system.numbers LIMIT 120; +SELECT id, avgWeighted(value1, weight), avgWeighted(value2, weight) FROM test_table_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_float_values; + +SELECT 'Test nullable unsigned integer values'; + +DROP TABLE IF EXISTS test_table_nullable_unsigned_values; +CREATE TABLE test_table_nullable_unsigned_values +( + id UInt64, + + value1 Nullable(UInt8), + value2 Nullable(UInt16), + value3 Nullable(UInt32), + value4 Nullable(UInt64), + + weight UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_unsigned_values SELECT number % 3, number, number, number, number, number % 3 FROM system.numbers LIMIT 120; +SELECT + id, + avgWeighted(value1, weight), + avgWeighted(value2, weight), + avgWeighted(value3, weight), + avgWeighted(value4, weight) +FROM test_table_nullable_unsigned_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_unsigned_values; + +SELECT 'Test nullable signed integer values'; + +DROP TABLE IF EXISTS test_table_nullable_signed_values; +CREATE TABLE test_table_nullable_signed_values +( + id UInt64, + + value1 Nullable(Int8), + value2 Nullable(Int16), + value3 Nullable(Int32), + value4 Nullable(Int64), + + weight UInt64 +) ENGINE=TinyLog; + + +INSERT INTO test_table_nullable_signed_values SELECT number % 3, number, number, number, number, number % 3 FROM system.numbers LIMIT 120; +SELECT + id, + avgWeighted(value1, weight), + avgWeighted(value2, weight), + avgWeighted(value3, weight), + avgWeighted(value4, weight) +FROM test_table_nullable_signed_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_signed_values; + +SELECT 'Test nullable float values'; + +DROP TABLE IF EXISTS test_table_nullable_float_values; +CREATE TABLE test_table_nullable_float_values +( + id UInt64, + + value1 Nullable(Float32), + value2 Nullable(Float64), + + weight UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_nullable_float_values SELECT number % 3, number, number, number % 3 FROM system.numbers LIMIT 120; +SELECT id, avgWeighted(value1, weight), avgWeighted(value2, weight) FROM test_table_nullable_float_values GROUP BY id ORDER BY id; +DROP TABLE test_table_nullable_float_values; + +SELECT 'Test null specifics'; + +DROP TABLE IF EXISTS test_table_null_specifics; +CREATE TABLE test_table_null_specifics +( + id UInt64, + + value1 Nullable(UInt64), + value2 Nullable(UInt64), + value3 Nullable(UInt64), + + weight UInt64, + weight_nullable Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_table_null_specifics VALUES (0, 1, 1, NULL, 1, 1); +INSERT INTO test_table_null_specifics VALUES (0, 2, NULL, NULL, 2, NULL); +INSERT INTO test_table_null_specifics VALUES (0, 3, 3, NULL, 3, 3); + +SELECT + id, + avgWeighted(value1, weight), + avgWeighted(value2, weight), + avgWeighted(value3, weight), + avgWeighted(value1, weight_nullable), + avgWeighted(value2, weight_nullable), + avgWeighted(value3, weight_nullable) +FROM test_table_null_specifics GROUP BY id ORDER BY id; +DROP TABLE IF EXISTS test_table_null_specifics; diff --git a/tests/queries/0_stateless/01902_table_function_merge_db_repr.reference b/tests/queries/0_stateless/01902_table_function_merge_db_repr.reference new file mode 100644 index 00000000000..4fd27ceec77 --- /dev/null +++ b/tests/queries/0_stateless/01902_table_function_merge_db_repr.reference @@ -0,0 +1,195 @@ +CREATE TABLE t_merge as 01902_db.t ENGINE=Merge(REGEXP(^01902_db), ^t) +SELECT _database, _table, n FROM 01902_db.t_merge ORDER BY _database, _table, n +01902_db t 0 +01902_db t 1 +01902_db t 2 +01902_db t 3 +01902_db t 4 +01902_db t 5 +01902_db t 6 +01902_db t 7 +01902_db t 8 +01902_db t 9 +01902_db1 t1 0 +01902_db1 t1 1 +01902_db1 t1 2 +01902_db1 t1 3 +01902_db1 t1 4 +01902_db1 t1 5 +01902_db1 t1 6 +01902_db1 t1 7 +01902_db1 t1 8 +01902_db1 t1 9 +01902_db2 t2 0 +01902_db2 t2 1 +01902_db2 t2 2 +01902_db2 t2 3 +01902_db2 t2 4 +01902_db2 t2 5 +01902_db2 t2 6 +01902_db2 t2 7 +01902_db2 t2 8 +01902_db2 t2 9 +01902_db3 t3 0 +01902_db3 t3 1 +01902_db3 t3 2 +01902_db3 t3 3 +01902_db3 t3 4 +01902_db3 t3 5 +01902_db3 t3 6 +01902_db3 t3 7 +01902_db3 t3 8 +01902_db3 t3 9 +SHOW CREATE TABLE 01902_db.t_merge +CREATE TABLE `01902_db`.t_merge\n(\n `n` Int8\n)\nENGINE = Merge(REGEXP(\'^01902_db\'), \'^t\') +SELECT _database, _table, n FROM merge(REGEXP(^01902_db), ^t) ORDER BY _database, _table, n +01902_db t 0 +01902_db t 1 +01902_db t 2 +01902_db t 3 +01902_db t 4 +01902_db t 5 +01902_db t 6 +01902_db t 7 +01902_db t 8 +01902_db t 9 +01902_db t_merge 0 +01902_db t_merge 0 +01902_db t_merge 0 +01902_db t_merge 0 +01902_db t_merge 1 +01902_db t_merge 1 +01902_db t_merge 1 +01902_db t_merge 1 +01902_db t_merge 2 +01902_db t_merge 2 +01902_db t_merge 2 +01902_db t_merge 2 +01902_db t_merge 3 +01902_db t_merge 3 +01902_db t_merge 3 +01902_db t_merge 3 +01902_db t_merge 4 +01902_db t_merge 4 +01902_db t_merge 4 +01902_db t_merge 4 +01902_db t_merge 5 +01902_db t_merge 5 +01902_db t_merge 5 +01902_db t_merge 5 +01902_db t_merge 6 +01902_db t_merge 6 +01902_db t_merge 6 +01902_db t_merge 6 +01902_db t_merge 7 +01902_db t_merge 7 +01902_db t_merge 7 +01902_db t_merge 7 +01902_db t_merge 8 +01902_db t_merge 8 +01902_db t_merge 8 +01902_db t_merge 8 +01902_db t_merge 9 +01902_db t_merge 9 +01902_db t_merge 9 +01902_db t_merge 9 +01902_db1 t1 0 +01902_db1 t1 1 +01902_db1 t1 2 +01902_db1 t1 3 +01902_db1 t1 4 +01902_db1 t1 5 +01902_db1 t1 6 +01902_db1 t1 7 +01902_db1 t1 8 +01902_db1 t1 9 +01902_db2 t2 0 +01902_db2 t2 1 +01902_db2 t2 2 +01902_db2 t2 3 +01902_db2 t2 4 +01902_db2 t2 5 +01902_db2 t2 6 +01902_db2 t2 7 +01902_db2 t2 8 +01902_db2 t2 9 +01902_db3 t3 0 +01902_db3 t3 1 +01902_db3 t3 2 +01902_db3 t3 3 +01902_db3 t3 4 +01902_db3 t3 5 +01902_db3 t3 6 +01902_db3 t3 7 +01902_db3 t3 8 +01902_db3 t3 9 +SELECT _database, _table, n FROM 01902_db.t_merge WHERE _database = 01902_db1 ORDER BY _database, _table, n +01902_db1 t1 0 +01902_db1 t1 1 +01902_db1 t1 2 +01902_db1 t1 3 +01902_db1 t1 4 +01902_db1 t1 5 +01902_db1 t1 6 +01902_db1 t1 7 +01902_db1 t1 8 +01902_db1 t1 9 +SELECT _database, _table, n FROM 01902_db.t_merge WHERE _table = t1 ORDER BY _database, _table, n +01902_db1 t1 0 +01902_db1 t1 1 +01902_db1 t1 2 +01902_db1 t1 3 +01902_db1 t1 4 +01902_db1 t1 5 +01902_db1 t1 6 +01902_db1 t1 7 +01902_db1 t1 8 +01902_db1 t1 9 +CREATE TABLE t_merge1 as 01902_db.t ENGINE=Merge(01902_db, ^t$) +SELECT _database, _table, n FROM 01902_db.t_merge1 ORDER BY _database, _table, n +01902_db t 0 +01902_db t 1 +01902_db t 2 +01902_db t 3 +01902_db t 4 +01902_db t 5 +01902_db t 6 +01902_db t 7 +01902_db t 8 +01902_db t 9 +SELECT _database, _table, n FROM merge(01902_db, ^t$) ORDER BY _database, _table, n +01902_db t 0 +01902_db t 1 +01902_db t 2 +01902_db t 3 +01902_db t 4 +01902_db t 5 +01902_db t 6 +01902_db t 7 +01902_db t 8 +01902_db t 9 +CREATE TABLE t_merge_1 as 01902_db.t ENGINE=Merge(currentDatabase(), ^t) +SELECT _database, _table, n FROM 01902_db.t_merge_1 ORDER BY _database, _table, n +01902_db1 t1 0 +01902_db1 t1 1 +01902_db1 t1 2 +01902_db1 t1 3 +01902_db1 t1 4 +01902_db1 t1 5 +01902_db1 t1 6 +01902_db1 t1 7 +01902_db1 t1 8 +01902_db1 t1 9 +SHOW CREATE TABLE 01902_db.t_merge_1 +CREATE TABLE `01902_db`.t_merge_1\n(\n `n` Int8\n)\nENGINE = Merge(\'01902_db1\', \'^t\') +SELECT _database, _table, n FROM merge(currentDatabase(), ^t) ORDER BY _database, _table, n +01902_db1 t1 0 +01902_db1 t1 1 +01902_db1 t1 2 +01902_db1 t1 3 +01902_db1 t1 4 +01902_db1 t1 5 +01902_db1 t1 6 +01902_db1 t1 7 +01902_db1 t1 8 +01902_db1 t1 9 diff --git a/tests/queries/0_stateless/01902_table_function_merge_db_repr.sql b/tests/queries/0_stateless/01902_table_function_merge_db_repr.sql new file mode 100644 index 00000000000..3aabf1a1f36 --- /dev/null +++ b/tests/queries/0_stateless/01902_table_function_merge_db_repr.sql @@ -0,0 +1,66 @@ +DROP DATABASE IF EXISTS 01902_db; +DROP DATABASE IF EXISTS 01902_db1; +DROP DATABASE IF EXISTS 01902_db2; +DROP DATABASE IF EXISTS 01902_db3; + +CREATE DATABASE 01902_db; +CREATE DATABASE 01902_db1; +CREATE DATABASE 01902_db2; +CREATE DATABASE 01902_db3; + +CREATE TABLE 01902_db.t (n Int8) ENGINE=MergeTree ORDER BY n; +CREATE TABLE 01902_db1.t1 (n Int8) ENGINE=MergeTree ORDER BY n; +CREATE TABLE 01902_db2.t2 (n Int8) ENGINE=MergeTree ORDER BY n; +CREATE TABLE 01902_db3.t3 (n Int8) ENGINE=MergeTree ORDER BY n; + +INSERT INTO 01902_db.t SELECT * FROM numbers(10); +INSERT INTO 01902_db1.t1 SELECT * FROM numbers(10); +INSERT INTO 01902_db2.t2 SELECT * FROM numbers(10); +INSERT INTO 01902_db3.t3 SELECT * FROM numbers(10); + +SELECT 'CREATE TABLE t_merge as 01902_db.t ENGINE=Merge(REGEXP(^01902_db), ^t)'; +CREATE TABLE 01902_db.t_merge as 01902_db.t ENGINE=Merge(REGEXP('^01902_db'), '^t'); + +SELECT 'SELECT _database, _table, n FROM 01902_db.t_merge ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM 01902_db.t_merge ORDER BY _database, _table, n; + +SELECT 'SHOW CREATE TABLE 01902_db.t_merge'; +SHOW CREATE TABLE 01902_db.t_merge; + +SELECT 'SELECT _database, _table, n FROM merge(REGEXP(^01902_db), ^t) ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM merge(REGEXP('^01902_db'), '^t') ORDER BY _database, _table, n; + +SELECT 'SELECT _database, _table, n FROM 01902_db.t_merge WHERE _database = 01902_db1 ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM 01902_db.t_merge WHERE _database = '01902_db1' ORDER BY _database, _table, n; + +SELECT 'SELECT _database, _table, n FROM 01902_db.t_merge WHERE _table = t1 ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM 01902_db.t_merge WHERE _table = 't1' ORDER BY _database, _table, n; + +-- not regexp +SELECT 'CREATE TABLE t_merge1 as 01902_db.t ENGINE=Merge(01902_db, ^t$)'; +CREATE TABLE 01902_db.t_merge1 as 01902_db.t ENGINE=Merge('01902_db', '^t$'); + +SELECT 'SELECT _database, _table, n FROM 01902_db.t_merge1 ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM 01902_db.t_merge1 ORDER BY _database, _table, n; + +SELECT 'SELECT _database, _table, n FROM merge(01902_db, ^t$) ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM merge('01902_db', '^t$') ORDER BY _database, _table, n; + +USE 01902_db1; + +SELECT 'CREATE TABLE t_merge_1 as 01902_db.t ENGINE=Merge(currentDatabase(), ^t)'; +CREATE TABLE 01902_db.t_merge_1 as 01902_db.t ENGINE=Merge(currentDatabase(), '^t'); + +SELECT 'SELECT _database, _table, n FROM 01902_db.t_merge_1 ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM 01902_db.t_merge_1 ORDER BY _database, _table, n; + +SELECT 'SHOW CREATE TABLE 01902_db.t_merge_1'; +SHOW CREATE TABLE 01902_db.t_merge_1; + +SELECT 'SELECT _database, _table, n FROM merge(currentDatabase(), ^t) ORDER BY _database, _table, n'; +SELECT _database, _table, n FROM merge(currentDatabase(), '^t') ORDER BY _database, _table, n; + +DROP DATABASE 01902_db; +DROP DATABASE 01902_db1; +DROP DATABASE 01902_db2; +DROP DATABASE 01902_db3; diff --git a/tests/queries/0_stateless/01913_names_of_tuple_literal.reference b/tests/queries/0_stateless/01913_names_of_tuple_literal.reference new file mode 100644 index 00000000000..a4c05ad853a --- /dev/null +++ b/tests/queries/0_stateless/01913_names_of_tuple_literal.reference @@ -0,0 +1,4 @@ +((1, 2), (2, 3), (3, 4)) +((1,2),(2,3),(3,4)) +tuple(tuple(1, 2), tuple(2, 3), tuple(3, 4)) +((1,2),(2,3),(3,4)) diff --git a/tests/queries/0_stateless/01913_names_of_tuple_literal.sql b/tests/queries/0_stateless/01913_names_of_tuple_literal.sql new file mode 100644 index 00000000000..09de9e8cf37 --- /dev/null +++ b/tests/queries/0_stateless/01913_names_of_tuple_literal.sql @@ -0,0 +1,2 @@ +SELECT ((1, 2), (2, 3), (3, 4)) FORMAT TSVWithNames; +SELECT ((1, 2), (2, 3), (3, 4)) FORMAT TSVWithNames SETTINGS legacy_column_name_of_tuple_literal = 1; diff --git a/tests/queries/0_stateless/01913_replace_dictionary.reference b/tests/queries/0_stateless/01913_replace_dictionary.reference new file mode 100644 index 00000000000..2d33c16ccc2 --- /dev/null +++ b/tests/queries/0_stateless/01913_replace_dictionary.reference @@ -0,0 +1,2 @@ +0 Value0 +0 Value1 diff --git a/tests/queries/0_stateless/01913_replace_dictionary.sql b/tests/queries/0_stateless/01913_replace_dictionary.sql new file mode 100644 index 00000000000..43dd460707a --- /dev/null +++ b/tests/queries/0_stateless/01913_replace_dictionary.sql @@ -0,0 +1,51 @@ +DROP DATABASE IF EXISTS 01913_db; +CREATE DATABASE 01913_db ENGINE=Atomic; + +DROP TABLE IF EXISTS 01913_db.test_source_table_1; +CREATE TABLE 01913_db.test_source_table_1 +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 01913_db.test_source_table_1 VALUES (0, 'Value0'); + +DROP DICTIONARY IF EXISTS 01913_db.test_dictionary; +CREATE DICTIONARY 01913_db.test_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +LAYOUT(DIRECT()) +SOURCE(CLICKHOUSE(DB '01913_db' TABLE 'test_source_table_1')); + +SELECT * FROM 01913_db.test_dictionary; + +DROP TABLE IF EXISTS 01913_db.test_source_table_2; +CREATE TABLE 01913_db.test_source_table_2 +( + id UInt64, + value_1 String +) ENGINE=TinyLog; + +INSERT INTO 01913_db.test_source_table_2 VALUES (0, 'Value1'); + +REPLACE DICTIONARY 01913_db.test_dictionary +( + id UInt64, + value_1 String +) +PRIMARY KEY id +LAYOUT(HASHED()) +SOURCE(CLICKHOUSE(DB '01913_db' TABLE 'test_source_table_2')) +LIFETIME(0); + +SELECT * FROM 01913_db.test_dictionary; + +DROP DICTIONARY 01913_db.test_dictionary; + +DROP TABLE 01913_db.test_source_table_1; +DROP TABLE 01913_db.test_source_table_2; + +DROP DATABASE 01913_db; diff --git a/tests/queries/0_stateless/01914_exchange_dictionaries.reference b/tests/queries/0_stateless/01914_exchange_dictionaries.reference new file mode 100644 index 00000000000..9278d0abeed --- /dev/null +++ b/tests/queries/0_stateless/01914_exchange_dictionaries.reference @@ -0,0 +1,4 @@ +1 Table1 +2 Table2 +2 Table2 +1 Table1 diff --git a/tests/queries/0_stateless/01914_exchange_dictionaries.sql b/tests/queries/0_stateless/01914_exchange_dictionaries.sql new file mode 100644 index 00000000000..77bcf53ab5e --- /dev/null +++ b/tests/queries/0_stateless/01914_exchange_dictionaries.sql @@ -0,0 +1,39 @@ +DROP DATABASE IF EXISTS 01914_db; +CREATE DATABASE 01914_db ENGINE=Atomic; + +DROP TABLE IF EXISTS 01914_db.table_1; +CREATE TABLE 01914_db.table_1 (id UInt64, value String) ENGINE=TinyLog; + +DROP TABLE IF EXISTS 01914_db.table_2; +CREATE TABLE 01914_db.table_2 (id UInt64, value String) ENGINE=TinyLog; + +INSERT INTO 01914_db.table_1 VALUES (1, 'Table1'); +INSERT INTO 01914_db.table_2 VALUES (2, 'Table2'); + +DROP DICTIONARY IF EXISTS 01914_db.dictionary_1; +CREATE DICTIONARY 01914_db.dictionary_1 (id UInt64, value String) +PRIMARY KEY id +LAYOUT(DIRECT()) +SOURCE(CLICKHOUSE(DB '01914_db' TABLE 'table_1')); + +DROP DICTIONARY IF EXISTS 01914_db.dictionary_2; +CREATE DICTIONARY 01914_db.dictionary_2 (id UInt64, value String) +PRIMARY KEY id +LAYOUT(DIRECT()) +SOURCE(CLICKHOUSE(DB '01914_db' TABLE 'table_2')); + +SELECT * FROM 01914_db.dictionary_1; +SELECT * FROM 01914_db.dictionary_2; + +EXCHANGE DICTIONARIES 01914_db.dictionary_1 AND 01914_db.dictionary_2; + +SELECT * FROM 01914_db.dictionary_1; +SELECT * FROM 01914_db.dictionary_2; + +DROP DICTIONARY 01914_db.dictionary_1; +DROP DICTIONARY 01914_db.dictionary_2; + +DROP TABLE 01914_db.table_1; +DROP TABLE 01914_db.table_2; + +DROP DATABASE 01914_db; diff --git a/tests/queries/0_stateless/01915_create_or_replace_dictionary.reference b/tests/queries/0_stateless/01915_create_or_replace_dictionary.reference new file mode 100644 index 00000000000..2d33c16ccc2 --- /dev/null +++ b/tests/queries/0_stateless/01915_create_or_replace_dictionary.reference @@ -0,0 +1,2 @@ +0 Value0 +0 Value1 diff --git a/tests/queries/0_stateless/01915_create_or_replace_dictionary.sql b/tests/queries/0_stateless/01915_create_or_replace_dictionary.sql new file mode 100644 index 00000000000..c9df6114ec9 --- /dev/null +++ b/tests/queries/0_stateless/01915_create_or_replace_dictionary.sql @@ -0,0 +1,51 @@ +DROP DATABASE IF EXISTS 01915_db; +CREATE DATABASE 01915_db ENGINE=Atomic; + +DROP TABLE IF EXISTS 01915_db.test_source_table_1; +CREATE TABLE 01915_db.test_source_table_1 +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 01915_db.test_source_table_1 VALUES (0, 'Value0'); + +DROP DICTIONARY IF EXISTS 01915_db.test_dictionary; +CREATE OR REPLACE DICTIONARY 01915_db.test_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +LAYOUT(DIRECT()) +SOURCE(CLICKHOUSE(DB '01915_db' TABLE 'test_source_table_1')); + +SELECT * FROM 01915_db.test_dictionary; + +DROP TABLE IF EXISTS 01915_db.test_source_table_2; +CREATE TABLE 01915_db.test_source_table_2 +( + id UInt64, + value_1 String +) ENGINE=TinyLog; + +INSERT INTO 01915_db.test_source_table_2 VALUES (0, 'Value1'); + +CREATE OR REPLACE DICTIONARY 01915_db.test_dictionary +( + id UInt64, + value_1 String +) +PRIMARY KEY id +LAYOUT(HASHED()) +SOURCE(CLICKHOUSE(DB '01915_db' TABLE 'test_source_table_2')) +LIFETIME(0); + +SELECT * FROM 01915_db.test_dictionary; + +DROP DICTIONARY 01915_db.test_dictionary; + +DROP TABLE 01915_db.test_source_table_1; +DROP TABLE 01915_db.test_source_table_2; + +DROP DATABASE 01915_db; diff --git a/tests/queries/0_stateless/01917_prewhere_column_type.reference b/tests/queries/0_stateless/01917_prewhere_column_type.reference new file mode 100644 index 00000000000..58c9bdf9d01 --- /dev/null +++ b/tests/queries/0_stateless/01917_prewhere_column_type.reference @@ -0,0 +1 @@ +111 diff --git a/tests/queries/0_stateless/01917_prewhere_column_type.sql b/tests/queries/0_stateless/01917_prewhere_column_type.sql new file mode 100644 index 00000000000..5147e6093a9 --- /dev/null +++ b/tests/queries/0_stateless/01917_prewhere_column_type.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 ( s String, f Float32, e UInt16 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = '100G'; + +INSERT INTO t1 VALUES ('111', 1, 1); + +SELECT s FROM t1 WHERE f AND (e = 1); -- { serverError 59 } +SELECT s FROM t1 PREWHERE f; -- { serverError 59 } +SELECT s FROM t1 PREWHERE f WHERE (e = 1); -- { serverError 59 } +SELECT s FROM t1 PREWHERE f WHERE f AND (e = 1); -- { serverError 59 } + +SELECT s FROM t1 WHERE e AND (e = 1); +SELECT s FROM t1 PREWHERE e; -- { serverError 59 } +SELECT s FROM t1 PREWHERE e WHERE (e = 1); -- { serverError 59 } +SELECT s FROM t1 PREWHERE e WHERE f AND (e = 1); -- { serverError 59 } + diff --git a/tests/queries/0_stateless/01917_system_data_skipping_indices.reference b/tests/queries/0_stateless/01917_system_data_skipping_indices.reference new file mode 100644 index 00000000000..b5a4b596a97 --- /dev/null +++ b/tests/queries/0_stateless/01917_system_data_skipping_indices.reference @@ -0,0 +1,10 @@ +default data_01917 d1_idx minmax d1 1 +default data_01917 d1_null_idx minmax assumeNotNull(d1_null) 1 +default data_01917_2 memory set frequency * length(name) 5 +default data_01917_2 sample_index1 minmax length(name), name 4 +default data_01917_2 sample_index2 ngrambf_v1 lower(name), name 4 +2 +3 +d1_idx +d1_null_idx +sample_index1 diff --git a/tests/queries/0_stateless/01917_system_data_skipping_indices.sql b/tests/queries/0_stateless/01917_system_data_skipping_indices.sql new file mode 100644 index 00000000000..bfe9d6398b3 --- /dev/null +++ b/tests/queries/0_stateless/01917_system_data_skipping_indices.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS data_01917; +DROP TABLE IF EXISTS data_01917_2; + +CREATE TABLE data_01917 +( + key Int, + d1 Int, + d1_null Nullable(Int), + INDEX d1_idx d1 TYPE minmax GRANULARITY 1, + INDEX d1_null_idx assumeNotNull(d1_null) TYPE minmax GRANULARITY 1 +) +Engine=MergeTree() +ORDER BY key; + +CREATE TABLE data_01917_2 +( + name String, + frequency UInt64, + INDEX memory (frequency * length(name)) TYPE set(1000) GRANULARITY 5, + INDEX sample_index1 (length(name), name) TYPE minmax GRANULARITY 4, + INDEX sample_index2 (lower(name), name) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4 +) +Engine=MergeTree() +ORDER BY name; + +SELECT * FROM system.data_skipping_indices WHERE database = currentDatabase(); + +SELECT count(*) FROM system.data_skipping_indices WHERE table = 'data_01917' AND database = currentDatabase(); +SELECT count(*) FROM system.data_skipping_indices WHERE table = 'data_01917_2' AND database = currentDatabase(); + +SELECT name FROM system.data_skipping_indices WHERE type = 'minmax' AND database = currentDatabase(); + +DROP TABLE data_01917; +DROP TABLE data_01917_2; + diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh new file mode 100755 index 00000000000..13086879e0d --- /dev/null +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +NUM_REPLICAS=5 + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS ttl_table$i" & +done + +wait + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE ttl_table$i( + key DateTime + ) + ENGINE ReplicatedMergeTree('/test/01921_concurrent_ttl_and_normal_merges/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/ttl_table', '$i') + ORDER BY tuple() + TTL key + INTERVAL 1 SECOND + SETTINGS merge_with_ttl_timeout=1, max_replicated_merges_with_ttl_in_queue=100, max_number_of_merges_with_ttl_in_pool=100, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" +done + +function optimize_thread +{ + while true; do + REPLICA=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE ttl_table$REPLICA FINAl" + done +} + +function insert_thread +{ + while true; do + REPLICA=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --optimize_on_insert=0 --query "INSERT INTO ttl_table$REPLICA SELECT now() + rand() % 5 - rand() % 3 FROM numbers(5)" + $CLICKHOUSE_CLIENT --optimize_on_insert=0 --query "INSERT INTO ttl_table$REPLICA SELECT now() + rand() % 5 - rand() % 3 FROM numbers(5)" + $CLICKHOUSE_CLIENT --optimize_on_insert=0 --query "INSERT INTO ttl_table$REPLICA SELECT now() + rand() % 5 - rand() % 3 FROM numbers(5)" + done +} + + +export -f insert_thread; +export -f optimize_thread; + +TIMEOUT=30 + +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & + +wait + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA ttl_table$i" +done + +$CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}' and type='MERGE_PARTS' and last_exception != '' FORMAT Vertical" +$CLICKHOUSE_CLIENT --query "SELECT COUNT() > 0 FROM system.part_log where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}'" + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS ttl_table$i" & +done + +wait diff --git a/tests/queries/0_stateless/01922_client_param.reference b/tests/queries/0_stateless/01922_client_param.reference new file mode 100644 index 00000000000..0d55bed3a35 --- /dev/null +++ b/tests/queries/0_stateless/01922_client_param.reference @@ -0,0 +1,2 @@ +foo +foo diff --git a/tests/queries/0_stateless/01922_client_param.sh b/tests/queries/0_stateless/01922_client_param.sh new file mode 100755 index 00000000000..bb0abfb2191 --- /dev/null +++ b/tests/queries/0_stateless/01922_client_param.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --param_foo |& grep -q -x 'Code: 36. DB::Exception: Parameter requires value' +$CLICKHOUSE_CLIENT --param_foo foo -q 'select {foo:String}' +$CLICKHOUSE_CLIENT -q 'select {foo:String}' --param_foo foo diff --git a/tests/queries/0_stateless/01923_different_expression_name_alias.reference b/tests/queries/0_stateless/01923_different_expression_name_alias.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/01923_different_expression_name_alias.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/01923_different_expression_name_alias.sql b/tests/queries/0_stateless/01923_different_expression_name_alias.sql new file mode 100644 index 00000000000..09108cef483 --- /dev/null +++ b/tests/queries/0_stateless/01923_different_expression_name_alias.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS distributed_tbl; +DROP TABLE IF EXISTS merge_tree_table; + +CREATE TABLE merge_tree_table +( + Date Date, + SomeType UInt8, + Alternative1 UInt64, + Alternative2 UInt64, + User UInt32, + CharID UInt64 ALIAS multiIf(SomeType IN (3, 4, 11), 0, SomeType IN (7, 8), Alternative1, Alternative2) +) +ENGINE = MergeTree() +ORDER BY tuple(); + +INSERT INTO merge_tree_table VALUES(toDate('2016-03-01'), 4, 0, 0, 1486392); + +SELECT count() FROM merge_tree_table; + +CREATE TABLE distributed_tbl +( + Date Date, + SomeType UInt8, + Alternative1 UInt64, + Alternative2 UInt64, + CharID UInt64, + User UInt32 +) +ENGINE = Distributed(test_shard_localhost, currentDatabase(), merge_tree_table); + +SELECT identity(CharID) AS x +FROM distributed_tbl +WHERE (Date = toDate('2016-03-01')) AND (User = 1486392) AND (x = 0); + +DROP TABLE IF EXISTS distributed_tbl; +DROP TABLE IF EXISTS merge_tree_table; diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh new file mode 100755 index 00000000000..bcb3775f86a --- /dev/null +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" + +# Rate limit is chosen for operation to spent more than one second. +seq 1 1000 | pv --quiet --rate-limit 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" + +# We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. +${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; + WITH ProfileEvents['NetworkReceiveElapsedMicroseconds'] AS time + SELECT time >= 1000000 ? 1 : time FROM system.query_log + WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/01923_ttl_with_modify_column.reference b/tests/queries/0_stateless/01923_ttl_with_modify_column.reference new file mode 100644 index 00000000000..2e55ea564b9 --- /dev/null +++ b/tests/queries/0_stateless/01923_ttl_with_modify_column.reference @@ -0,0 +1,2 @@ +2 ['Int16'] +2 ['Date'] diff --git a/tests/queries/0_stateless/01923_ttl_with_modify_column.sql b/tests/queries/0_stateless/01923_ttl_with_modify_column.sql new file mode 100644 index 00000000000..ed2812d2a39 --- /dev/null +++ b/tests/queries/0_stateless/01923_ttl_with_modify_column.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_ttl_modify_column; + +CREATE TABLE t_ttl_modify_column +( + InsertionDateTime DateTime, + TTLDays Int32 DEFAULT CAST(365, 'Int32') +) +ENGINE = MergeTree +ORDER BY tuple() +TTL InsertionDateTime + toIntervalDay(TTLDays) +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_ttl_modify_column VALUES (now(), 23); + +SET mutations_sync = 2; + +ALTER TABLE t_ttl_modify_column modify column TTLDays Int16 DEFAULT CAST(365, 'Int16'); + +INSERT INTO t_ttl_modify_column VALUES (now(), 23); + +SELECT sum(rows), groupUniqArray(type) FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_ttl_modify_column' AND column = 'TTLDays' AND active; + +DROP TABLE IF EXISTS t_ttl_modify_column; + +CREATE TABLE t_ttl_modify_column (InsertionDateTime DateTime) +ENGINE = MergeTree +ORDER BY tuple() +TTL InsertionDateTime + INTERVAL 3 DAY +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_ttl_modify_column VALUES (now()); + +ALTER TABLE t_ttl_modify_column MODIFY COLUMN InsertionDateTime Date; + +INSERT INTO t_ttl_modify_column VALUES (now()); + +SELECT sum(rows), groupUniqArray(type) FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_ttl_modify_column' AND column = 'InsertionDateTime' AND active; + +ALTER TABLE t_ttl_modify_column MODIFY COLUMN InsertionDateTime String; -- { serverError 43 } + +DROP TABLE IF EXISTS t_ttl_modify_column; diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.reference b/tests/queries/0_stateless/01925_date_date_time_comparison.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.sql b/tests/queries/0_stateless/01925_date_date_time_comparison.sql new file mode 100644 index 00000000000..13e856384d2 --- /dev/null +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.sql @@ -0,0 +1,2 @@ +SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Europe/Moscow'); +SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 0, 'Europe/Moscow'); diff --git a/tests/queries/0_stateless/01925_jit_aggregation_function_count_long.reference b/tests/queries/0_stateless/01925_jit_aggregation_function_count_long.reference new file mode 100644 index 00000000000..f3b78aeb71e --- /dev/null +++ b/tests/queries/0_stateless/01925_jit_aggregation_function_count_long.reference @@ -0,0 +1,3 @@ +0 40 20 +1 40 20 +2 40 20 diff --git a/tests/queries/0_stateless/01925_jit_aggregation_function_count_long.sql b/tests/queries/0_stateless/01925_jit_aggregation_function_count_long.sql new file mode 100644 index 00000000000..03a3c4f8b65 --- /dev/null +++ b/tests/queries/0_stateless/01925_jit_aggregation_function_count_long.sql @@ -0,0 +1,15 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + + value UInt8, + value_nullable Nullable(UInt8) +) ENGINE=TinyLog; + +INSERT INTO test_table SELECT number % 3, number, if (number % 2 == 0, number, NULL) FROM system.numbers LIMIT 120; +SELECT id, count(value), count(value_nullable) FROM test_table GROUP BY id ORDER BY id; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/01925_merge_prewhere_table.reference b/tests/queries/0_stateless/01925_merge_prewhere_table.reference new file mode 100644 index 00000000000..368be4d48e2 --- /dev/null +++ b/tests/queries/0_stateless/01925_merge_prewhere_table.reference @@ -0,0 +1,2 @@ +x_1 10 +x_2 10 diff --git a/tests/queries/0_stateless/01925_merge_prewhere_table.sql b/tests/queries/0_stateless/01925_merge_prewhere_table.sql new file mode 100644 index 00000000000..4862a7bb426 --- /dev/null +++ b/tests/queries/0_stateless/01925_merge_prewhere_table.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS x_1; +DROP TABLE IF EXISTS x_2; +DROP TABLE IF EXISTS x; + +create table x_1 engine=Log as select * from numbers(10); +create table x_2 engine=Log as select * from numbers(10); +create table x engine=Merge(currentDatabase(), '^x_(1|2)$') as x_1; + +select _table, count() from x group by _table order by _table; + +DROP TABLE x_1; +DROP TABLE x_2; +DROP TABLE x; diff --git a/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference b/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql b/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql new file mode 100644 index 00000000000..8a5de0e7c4f --- /dev/null +++ b/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql @@ -0,0 +1,2 @@ +SELECT 1 as a, count() FROM numbers(10) WHERE 0 GROUP BY a; +SELECT count() FROM numbers(10) WHERE 0 diff --git a/tests/queries/0_stateless/01925_test_storage_merge_aliases.reference b/tests/queries/0_stateless/01925_test_storage_merge_aliases.reference new file mode 100644 index 00000000000..b0fea25ed4b --- /dev/null +++ b/tests/queries/0_stateless/01925_test_storage_merge_aliases.reference @@ -0,0 +1,10 @@ +alias1 +1 4 16 23 +23 16 4 1 +2020-02-02 1 4 2 16 3 23 +alias2 +1 3 4 4 +4 4 3 1 +23 16 4 1 +2020-02-01 1 3 2 4 3 4 +2020-02-02 1 4 2 16 3 23 diff --git a/tests/queries/0_stateless/01925_test_storage_merge_aliases.sql b/tests/queries/0_stateless/01925_test_storage_merge_aliases.sql new file mode 100644 index 00000000000..f3a5b2db62e --- /dev/null +++ b/tests/queries/0_stateless/01925_test_storage_merge_aliases.sql @@ -0,0 +1,57 @@ +drop table if exists merge; +create table merge +( + dt Date, + colAlias0 Int32, + colAlias1 Int32, + col2 Int32, + colAlias2 UInt32, + col3 Int32, + colAlias3 UInt32 +) +engine = Merge(currentDatabase(), '^alias_'); + +drop table if exists alias_1; +drop table if exists alias_2; + +create table alias_1 +( + dt Date, + col Int32, + colAlias0 UInt32 alias col, + colAlias1 UInt32 alias col3 + colAlias0, + col2 Int32, + colAlias2 Int32 alias colAlias1 + col2 + 10, + col3 Int32, + colAlias3 Int32 alias colAlias2 + colAlias1 + col3 +) +engine = MergeTree() +order by (dt); + +insert into alias_1 (dt, col, col2, col3) values ('2020-02-02', 1, 2, 3); + +select 'alias1'; +select colAlias0, colAlias1, colAlias2, colAlias3 from alias_1; +select colAlias3, colAlias2, colAlias1, colAlias0 from merge; +select * from merge; + +create table alias_2 +( + dt Date, + col Int32, + col2 Int32, + colAlias0 UInt32 alias col, + colAlias3 Int32 alias col3 + colAlias0, + colAlias1 UInt32 alias colAlias0 + col2, + colAlias2 Int32 alias colAlias0 + colAlias1, + col3 Int32 +) +engine = MergeTree() +order by (dt); + +insert into alias_2 (dt, col, col2, col3) values ('2020-02-01', 1, 2, 3); + +select 'alias2'; +select colAlias0, colAlias1, colAlias2, colAlias3 from alias_2; +select colAlias3, colAlias2, colAlias1, colAlias0 from merge order by dt; +select * from merge order by dt; diff --git a/tests/queries/0_stateless/01926_bin_unbin.reference b/tests/queries/0_stateless/01926_bin_unbin.reference new file mode 100644 index 00000000000..f84a858e449 --- /dev/null +++ b/tests/queries/0_stateless/01926_bin_unbin.reference @@ -0,0 +1,35 @@ + +00000000 +00000001 +00001010 +01111111 +11111111 +0000000100000000 +0000000111111111 +0000001000000000 +00110000 +0011000100110000 +111001101011010110001011111010001010111110010101 +11100110101101011000101111101000101011111001010100000000000000000000000000000000 +10011010100110011001100100111111 +0011001100110011001100110011001100110011001100111111001100111111 +00000000000011100010011100000111 +0000000000000000000011000011110101011101010100111010101000000001 +0011000100110010001100110011001100110010001101000011001000110100 +0011000100110010001100110011001100110010001101000011001000110100 +0011000100110010001100110011001100110010001101000011001000110100 +0011000100110010001100110011001100110010001101000011001000110100 + +1 +0 +10 +测试 +0 +0 +0 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01926_bin_unbin.sql b/tests/queries/0_stateless/01926_bin_unbin.sql new file mode 100644 index 00000000000..555770d09c6 --- /dev/null +++ b/tests/queries/0_stateless/01926_bin_unbin.sql @@ -0,0 +1,39 @@ +select bin(''); +select bin(0); +select bin(1); +select bin(10); +select bin(127); +select bin(255); +select bin(256); +select bin(511); +select bin(512); +select bin('0'); +select bin('10'); +select bin('测试'); +select bin(toFixedString('测试', 10)); +select bin(toFloat32(1.2)); +select bin(toFloat64(1.2)); +select bin(toDecimal32(1.2, 8)); +select bin(toDecimal64(1.2, 17)); +select bin('12332424'); +select bin(materialize('12332424')); +select bin(toNullable(materialize('12332424'))); +select bin(toLowCardinality(materialize('12332424'))); + +select unbin(''); +select unbin('0') == '\0'; +select unbin('00110000'); -- 0 +select unbin('0011000100110000'); -- 10 +select unbin('111001101011010110001011111010001010111110010101'); -- 测试 +select unbin(materialize('00110000')); +select unbin(toNullable(materialize('00110000'))); +select unbin(toLowCardinality(materialize('00110000'))); + +select unbin(bin('')) == ''; +select bin(unbin('')) == ''; +select bin(unbin('0')) == '00000000'; + +-- hex and bin consistent for corner cases +select hex('') == bin(''); +select unhex('') == unbin(''); +select unhex('0') == unbin('0'); diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.reference b/tests/queries/0_stateless/01926_date_date_time_supertype.reference new file mode 100644 index 00000000000..ec9933dfbd2 --- /dev/null +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.reference @@ -0,0 +1,12 @@ +Array +Array(DateTime(\'Europe/Moscow\')) +Array(DateTime64(5, \'Europe/Moscow\')) +Array(DateTime64(6, \'Europe/Moscow\')) +If +2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') +Cast +2000-01-01 00:00:00 DateTime(\'UTC\') +2000-01-01 00:00:00.00000 DateTime64(5, \'UTC\') diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.sql b/tests/queries/0_stateless/01926_date_date_time_supertype.sql new file mode 100644 index 00000000000..559cd465ebb --- /dev/null +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.sql @@ -0,0 +1,24 @@ +SELECT 'Array'; + +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow'), toDateTime64('2000-01-01', 6, 'Europe/Moscow')]); + +DROP TABLE IF EXISTS predicate_table; +CREATE TABLE predicate_table (value UInt8) ENGINE=TinyLog; + +INSERT INTO predicate_table VALUES (0), (1); + +SELECT 'If'; + +WITH toDate('2000-01-01') as a, toDateTime('2000-01-01', 'Europe/Moscow') as b +SELECT if(value, b, a) as result, toTypeName(result) +FROM predicate_table; + +WITH toDateTime('2000-01-01') as a, toDateTime64('2000-01-01', 5, 'Europe/Moscow') as b +SELECT if(value, b, a) as result, toTypeName(result) +FROM predicate_table; + +SELECT 'Cast'; +SELECT CAST(toDate('2000-01-01') AS DateTime('UTC')) AS x, toTypeName(x); +SELECT CAST(toDate('2000-01-01') AS DateTime64(5, 'UTC')) AS x, toTypeName(x); diff --git a/tests/queries/0_stateless/01926_union_all_schmak.reference b/tests/queries/0_stateless/01926_union_all_schmak.reference new file mode 100644 index 00000000000..17d07eb79ef --- /dev/null +++ b/tests/queries/0_stateless/01926_union_all_schmak.reference @@ -0,0 +1,2 @@ +1 2 +3 4 diff --git a/tests/queries/0_stateless/01926_union_all_schmak.sql b/tests/queries/0_stateless/01926_union_all_schmak.sql new file mode 100644 index 00000000000..feab81ccac2 --- /dev/null +++ b/tests/queries/0_stateless/01926_union_all_schmak.sql @@ -0,0 +1,8 @@ +SELECT * FROM ( + SELECT 1 AS a, 2 AS b FROM system.one + JOIN system.one USING dummy + UNION ALL + SELECT 3 AS a, 4 AS b FROM system.one +) +WHERE a != 10 +ORDER BY a, b; diff --git a/tests/queries/0_stateless/01931_storage_merge_no_columns.reference b/tests/queries/0_stateless/01931_storage_merge_no_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01931_storage_merge_no_columns.sql b/tests/queries/0_stateless/01931_storage_merge_no_columns.sql new file mode 100644 index 00000000000..787316e299c --- /dev/null +++ b/tests/queries/0_stateless/01931_storage_merge_no_columns.sql @@ -0,0 +1,4 @@ +drop table if exists data; +create table data (key Int) engine=MergeTree() order by key; +select 1 from merge(currentDatabase(), '^data$') prewhere _table in (NULL); +drop table data; diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.reference b/tests/queries/0_stateless/01932_alter_index_with_order.reference new file mode 100644 index 00000000000..07e1aab3df9 --- /dev/null +++ b/tests/queries/0_stateless/01932_alter_index_with_order.reference @@ -0,0 +1,9 @@ +default alter_index_test index_a set a 1 +default alter_index_test index_b minmax b 1 +default alter_index_test index_c set c 2 +default alter_index_test index_a set a 1 +default alter_index_test index_d set d 1 +default alter_index_test index_b minmax b 1 +default alter_index_test index_c set c 2 +default alter_index_test index_a set a 1 +default alter_index_test index_d set d 1 diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.sql b/tests/queries/0_stateless/01932_alter_index_with_order.sql new file mode 100644 index 00000000000..0f2953b53f9 --- /dev/null +++ b/tests/queries/0_stateless/01932_alter_index_with_order.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS alter_index_test; + +CREATE TABLE alter_index_test ( + a UInt32, + b Date, + c UInt32, + d UInt32, + INDEX index_a a TYPE set(0) GRANULARITY 1 +) +ENGINE = MergeTree() +ORDER BY tuple(); + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +ALTER TABLE alter_index_test ADD INDEX index_b b type minmax granularity 1 FIRST; + +ALTER TABLE alter_index_test ADD INDEX index_c c type set(0) granularity 2 AFTER index_b; + +ALTER TABLE alter_index_test ADD INDEX index_d d type set(0) granularity 1; + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +DETACH TABLE alter_index_test; +ATTACH TABLE alter_index_test; + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +DROP TABLE IF EXISTS alter_index_test; diff --git a/tests/queries/0_stateless/01932_null_valid_identifier.reference b/tests/queries/0_stateless/01932_null_valid_identifier.reference new file mode 100644 index 00000000000..8600160f48c --- /dev/null +++ b/tests/queries/0_stateless/01932_null_valid_identifier.reference @@ -0,0 +1,3 @@ +1 +1 +1 \N diff --git a/tests/queries/0_stateless/01932_null_valid_identifier.sql b/tests/queries/0_stateless/01932_null_valid_identifier.sql new file mode 100644 index 00000000000..31f1a771675 --- /dev/null +++ b/tests/queries/0_stateless/01932_null_valid_identifier.sql @@ -0,0 +1,3 @@ +SELECT `null` FROM remote('127.0.0.2', view(SELECT 1 AS `null`)); +SELECT `NULL` FROM remote('127.0.0.2', view(SELECT 1 AS `NULL`)); +SELECT `nULl`, null FROM remote('127.0.0.2', view(SELECT 1 AS `nULl`)); diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect new file mode 100755 index 00000000000..890d024847f --- /dev/null +++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect @@ -0,0 +1,33 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 60 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} +set basedir [file dirname $argv0] + +exec bash -c "echo select 1 > $argv0.txt" +exec bash -c "echo select 1 >> $argv0.txt" +exec bash -c "echo select 1 >> $argv0.txt" + +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$argv0.txt" +expect "The history file ($argv0.txt) is in old format. 3 lines, 1 unique lines." +expect ":) " +send -- "\4" +expect eof + +spawn bash -c "wc -l $argv0.txt" +# The following lines are expected: +# +# ### YYYY-MM-DD HH:MM:SS.SSS +# select 1 +# +expect "2" +expect eof + +exec bash -c "rm $argv0.txt" diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.reference b/tests/queries/0_stateless/01933_client_replxx_convert_history.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01933_invalid_date.reference b/tests/queries/0_stateless/01933_invalid_date.reference new file mode 100644 index 00000000000..829e7e8c420 --- /dev/null +++ b/tests/queries/0_stateless/01933_invalid_date.reference @@ -0,0 +1 @@ +2019-07-08 diff --git a/tests/queries/0_stateless/01933_invalid_date.sql b/tests/queries/0_stateless/01933_invalid_date.sql new file mode 100644 index 00000000000..aac09c99e60 --- /dev/null +++ b/tests/queries/0_stateless/01933_invalid_date.sql @@ -0,0 +1,10 @@ +SELECT toDate('07-08-2019'); -- { serverError 6 } +SELECT toDate('2019-0708'); -- { serverError 38 } +SELECT toDate('201907-08'); -- { serverError 38 } +SELECT toDate('2019^7^8'); + +CREATE TEMPORARY TABLE test (d Date); +INSERT INTO test VALUES ('2018-01-01'); + +SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 53 } +SELECT * FROM test WHERE d >= '2019-07-08'; diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference new file mode 100644 index 00000000000..61be3e78ae7 --- /dev/null +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference @@ -0,0 +1,2 @@ +[0,1,2,3,4] +[0,1,2,3,4] diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql new file mode 100644 index 00000000000..3ab969ca256 --- /dev/null +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql @@ -0,0 +1,11 @@ +SELECT groupArray(2 + 3)(number) FROM numbers(10); +SELECT groupArray('5'::UInt8)(number) FROM numbers(10); + +SELECT groupArray()(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(NULL)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(NULL + NULL)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray([])(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(throwIf(1))(number) FROM numbers(10); -- { serverError 395 } + +-- Not the best error message, can be improved. +SELECT groupArray(number)(number) FROM numbers(10); -- { serverError 47 } diff --git a/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh new file mode 100755 index 00000000000..bbc24af1214 --- /dev/null +++ b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS -XPOST "${CLICKHOUSE_URL}¶m_lim=2" --data-binary 'select length(topKArray({lim:UInt32})([1,1,2,3,4,5,6,7,7,7]))' diff --git a/tests/queries/0_stateless/01936_empty_function_support_uuid.reference b/tests/queries/0_stateless/01936_empty_function_support_uuid.reference new file mode 100644 index 00000000000..30373074c1f --- /dev/null +++ b/tests/queries/0_stateless/01936_empty_function_support_uuid.reference @@ -0,0 +1,4 @@ +1 +0 +1 2 +2 1 diff --git a/tests/queries/0_stateless/01936_empty_function_support_uuid.sql b/tests/queries/0_stateless/01936_empty_function_support_uuid.sql new file mode 100644 index 00000000000..c67f38b776a --- /dev/null +++ b/tests/queries/0_stateless/01936_empty_function_support_uuid.sql @@ -0,0 +1,35 @@ +SELECT empty(toUUID('00000000-0000-0000-0000-000000000000')); +SELECT notEmpty(toUUID('00000000-0000-0000-0000-000000000000')); +SELECT uniqIf(uuid, empty(uuid)), uniqIf(uuid, notEmpty(uuid)) +FROM +( + SELECT toUUID('00000000-0000-0000-0000-000000000002') AS uuid + UNION ALL + SELECT toUUID('00000000-0000-0000-0000-000000000000') AS uuid + UNION ALL + SELECT toUUID('00000000-0000-0000-0000-000000000001') AS uuid +); + +DROP TABLE IF EXISTS users; +DROP TABLE IF EXISTS orders; + +CREATE TABLE users (user_id UUID) ENGINE = Memory; +CREATE TABLE orders (order_id UUID, user_id UUID) ENGINE = Memory; + +INSERT INTO users VALUES ('00000000-0000-0000-0000-000000000001'); +INSERT INTO users VALUES ('00000000-0000-0000-0000-000000000002'); +INSERT INTO orders VALUES ('00000000-0000-0000-0000-000000000003', '00000000-0000-0000-0000-000000000001'); + +SELECT + uniq(user_id) AS users, + uniqIf(order_id, notEmpty(order_id)) AS orders +FROM +( + SELECT * FROM users +) t1 ALL LEFT JOIN ( + SELECT * FROM orders +) t2 USING (user_id); + +DROP TABLE users; +DROP TABLE orders; + diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference new file mode 100644 index 00000000000..f9b4a3157f7 --- /dev/null +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference @@ -0,0 +1,4 @@ +[nan] +[nan] +[nan] +[nan] diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql new file mode 100644 index 00000000000..81ac6224268 --- /dev/null +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql @@ -0,0 +1,9 @@ +set aggregate_functions_null_for_empty=0; + +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); + +set aggregate_functions_null_for_empty=1; + +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference new file mode 100644 index 00000000000..bbf76e61257 --- /dev/null +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference @@ -0,0 +1 @@ +still alive diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql new file mode 100644 index 00000000000..d2ca771edc5 --- /dev/null +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql @@ -0,0 +1,7 @@ +SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 47 } + +SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 47 } + +SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 47 } + +SELECT 'still alive'; diff --git a/tests/queries/0_stateless/01937_nested_chinese.reference b/tests/queries/0_stateless/01937_nested_chinese.reference new file mode 100644 index 00000000000..54b6175d7fc --- /dev/null +++ b/tests/queries/0_stateless/01937_nested_chinese.reference @@ -0,0 +1,12 @@ +id String +products.产品 Array(Array(String)) +products.销量 Array(Array(Int32)) +id String +products.产品 Array(Array(String)) +products.销量 Array(Array(Int32)) +id String +products.产品 Array(String) +products.销量 Array(Int32) +p.产品 Array(String) +p.销量 Array(Int32) +0 diff --git a/tests/queries/0_stateless/01937_nested_chinese.sql b/tests/queries/0_stateless/01937_nested_chinese.sql new file mode 100644 index 00000000000..94c6598480e --- /dev/null +++ b/tests/queries/0_stateless/01937_nested_chinese.sql @@ -0,0 +1,8 @@ +CREATE TEMPORARY TABLE test (`id` String, `products` Nested (`产品` Array(String), `销量` Array(Int32))); + +DESCRIBE test; +DESCRIBE (SELECT * FROM test); +DESCRIBE (SELECT * FROM test ARRAY JOIN products); +DESCRIBE (SELECT p.`产品`, p.`销量` FROM test ARRAY JOIN products AS p); +SELECT * FROM test ARRAY JOIN products; +SELECT count() FROM (SELECT * FROM test ARRAY JOIN products); diff --git a/tests/queries/0_stateless/01938_joins_identifiers.reference b/tests/queries/0_stateless/01938_joins_identifiers.reference new file mode 100644 index 00000000000..4ce2f5c2505 --- /dev/null +++ b/tests/queries/0_stateless/01938_joins_identifiers.reference @@ -0,0 +1 @@ +0 0 1 diff --git a/tests/queries/0_stateless/01938_joins_identifiers.sql b/tests/queries/0_stateless/01938_joins_identifiers.sql new file mode 100644 index 00000000000..b518080b116 --- /dev/null +++ b/tests/queries/0_stateless/01938_joins_identifiers.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS "/t0"; +DROP TABLE IF EXISTS "/t1"; + +create table "/t0" (a Int64, b Int64) engine = MergeTree() partition by a order by a; +create table "/t1" (a Int64, b Int64) engine = MergeTree() partition by a order by a; + +insert into "/t0" values (0, 0); +insert into "/t1" values (0, 1); + +select * from "/t0" join "/t1" using a; + +DROP TABLE "/t0"; +DROP TABLE "/t1"; diff --git a/tests/queries/0_stateless/01939_type_map_json.reference b/tests/queries/0_stateless/01939_type_map_json.reference new file mode 100644 index 00000000000..9b831c29608 --- /dev/null +++ b/tests/queries/0_stateless/01939_type_map_json.reference @@ -0,0 +1,8 @@ +{"m":{"1":2,"3":4}} +{1:2,3:4} {"1":2,"3":4} 1 +{"m":{"key1":"1","key2":"2"}} +{'key1':1,'key2':2} {"key1":"1","key2":"2"} 1 +{"m":{"key1":1,"key2":2}} +{'key1':1,'key2':2} {"key1":1,"key2":2} 1 +{"m1":{"k1":"1","k2":"2"},"m2":{"1":2,"2":3}} +{"m1":{"k1":1,"k2":2},"m2":{"1":2,"2":3}} diff --git a/tests/queries/0_stateless/01939_type_map_json.sql b/tests/queries/0_stateless/01939_type_map_json.sql new file mode 100644 index 00000000000..4ad25f3c073 --- /dev/null +++ b/tests/queries/0_stateless/01939_type_map_json.sql @@ -0,0 +1,19 @@ +SELECT map(1, 2, 3, 4) AS m FORMAT JSONEachRow; +SELECT map(1, 2, 3, 4) AS m, toJSONString(m) AS s, isValidJSON(s); + +SELECT map('key1', number, 'key2', number * 2) AS m FROM numbers(1, 1) FORMAT JSONEachRow; +SELECT map('key1', number, 'key2', number * 2) AS m, toJSONString(m) AS s, isValidJSON(s) FROM numbers(1, 1); + +SELECT map('key1', number, 'key2', number * 2) AS m FROM numbers(1, 1) + FORMAT JSONEachRow + SETTINGS output_format_json_quote_64bit_integers = 0; + +SELECT map('key1', number, 'key2', number * 2) AS m, toJSONString(m) AS s, isValidJSON(s) FROM numbers(1, 1) + SETTINGS output_format_json_quote_64bit_integers = 0; + +CREATE TEMPORARY TABLE map_json (m1 Map(String, UInt64), m2 Map(UInt32, UInt32)); + +INSERT INTO map_json FORMAT JSONEachRow {"m1" : {"k1" : 1, "k2" : 2}, "m2" : {"1" : 2, "2" : 3}}; + +SELECT m1, m2 FROM map_json FORMAT JSONEachRow; +SELECT m1, m2 FROM map_json FORMAT JSONEachRow SETTINGS output_format_json_quote_64bit_integers = 0; diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 0f3861c0bbe..838a2da9aff 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -92,6 +92,7 @@ 01129_dict_get_join_lose_constness 01138_join_on_distributed_and_tmp 01153_attach_mv_uuid +01155_rename_move_materialized_view 01191_rename_dictionary 01200_mutations_memory_consumption 01211_optimize_skip_unused_shards_type_mismatch @@ -248,3 +249,8 @@ 01824_prefer_global_in_and_join 01576_alias_column_rewrite 01924_argmax_bitmap_state +01914_exchange_dictionaries +01923_different_expression_name_alias +01932_null_valid_identifier +00918_json_functions +01889_sql_json_functions diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference new file mode 100644 index 00000000000..2d94ad190ca --- /dev/null +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference @@ -0,0 +1,128 @@ +Aggregation using JIT compilation +Simple functions +1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 +732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 +598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 +792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 +716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 +59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 +800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 +170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 +63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 +Simple functions with non compilable function +1704509 4611700827100483880 9223360787015464643 10441337359398154812 4611686018427387904 19954243669348.844 9648741.579254271 523264 +732797 4611701940806302259 9223355550934604746 977192643464016658 4611686018427387904 2054229034942.3723 51998323.94457991 475698 +598875 4611701407242345792 9223362250391155632 9312163881623734456 4611686018427387904 27615161624211.875 12261797.824844675 337212 +792887 4611699550286611812 9223290551912005343 6930300520201292824 4611686018427387904 27479710385933.586 53095331.60360441 252197 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 4611686018427387904 85240848090850.69 22373416.533275086 196036 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 4611686018427387904 67568783303242.086 3154349.826950714 147211 +716829 4611852156092872082 9223361623076951140 15381015774917924786 4611686018427387904 170693446547158.72 201431892.4773785 90109 +59183 4611730685242027332 9223354909338698162 8078812522502896568 4611686018427387904 94622946187035.42 1425270865.0901496 85379 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 4611686018427387904 26532987929602.555 3695122.4062526934 77807 +800784 4611752907938305166 9223340418389788041 18082918611792817587 4611686018427387904 233352070043266.62 36535786.81446395 77492 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 4611686018427387904 68246505203164.63 6316535.831023813 73213 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 4611686018427387904 185015319325648.16 9962165.34831339 68945 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 4611686018427387904 255019232629204.38 7937191.271698021 67570 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 4611686018427387904 38308020331864.36 14590240.469105456 64174 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 4611686018427387904 214467085941034.7 7257521.096258734 60456 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 4611686018427387904 231724477077663.4 4737362.521046629 58389 +170282 4611833225706935900 9223371583739401906 8076893424988479310 4611686018427387904 141657635880324.8 1613795518.1065989 57017 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 4611686018427387904 283531099960470.8 9938452.835998287 52345 +63469 4611695097019173921 9223353530156141191 6296784708578574520 4611686018427387904 120762239817777.88 579655378.4603049 52142 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 4611686018427387904 123712996438970.34 867841.595541967 47758 +Simple functions if combinator +1704509 4611700827100483880 9223310246721229500 16398241567152875142 62618822667209.71 2224726.7626273884 261874 +732797 4611721382223060002 9223355550934604746 16281585268876620522 68472164943295.68 5898616.931652982 237784 +598875 4611701407242345792 9223362250391155632 3577699408183553052 21300140553347.42 53771550.26565126 167966 +792887 4611699550286611812 9223164887726235740 7088177025760385824 56461952267903.89 92835869.96920013 125539 +3807842 4611710821592843606 9223283397553859544 5756765290752687660 58835559208469.4 39794091.419183925 97845 +25703952 4611784761593342388 9223241341744449690 4782279928971192568 65182094768443.91 9276773.708181158 73368 +716829 4611852156092872082 9223361623076951140 8613712481895484190 191445613359755.62 291083243.75407773 44993 +59183 4611730685242027332 9223354909338698162 18369075291092794110 429013599530392 5925109959.715378 42817 +33010362 4611704682869732882 9223092117352620518 9991152681891671022 257099731913529.5 12412830.045471078 38861 +800784 4611752907938305166 9223309994342931384 5251877538869750510 135472890315726.03 53535427.52018088 38767 +20810645 4611712185532639162 9223218900001937412 11803718472901310700 323593455407553 10496765.20741332 36477 +25843850 4611744529689964352 9223346023778617822 127137885677350808 3700925266420.715 18966925.191309396 34353 +23447120 4611796031755620254 9223329309291309758 1841522159325376278 54534534450526.42 6271211.193812284 33768 +14739804 4611762063154116632 9223007205463222212 16302703534054321116 506987919332451.8 6885575.861759452 32156 +32077710 4612033458080771112 9223352444952988904 421072759851674408 13955745719596.793 12220152.393889504 30172 +22446879 4611846229717089436 9223124373140579096 6577134317587565298 224866980668999.47 2482202.163802278 29249 +170282 4611833225706935900 9223371583739401906 15764226366913732386 551447384017691 2515144222.953728 28587 +11482817 4611990575414646848 9223302669582414438 9828522700609834800 378121905921203.2 34845264.2080656 25993 +63469 4612175339998036670 9222961628400798084 17239621485933250238 663164390134376.5 7825349797.6059 25996 +29103473 4611744585914335132 9223035551850347954 12590190375872647672 525927999326314.7 26049107.15514301 23939 +Aggregation without JIT compilation +Simple functions +1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 +732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 +598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 +792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 +716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 +59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 +800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 +170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 +63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 +Simple functions with non compilable function +1704509 4611700827100483880 9223360787015464643 10441337359398154812 4611686018427387904 19954243669348.844 9648741.579254271 523264 +732797 4611701940806302259 9223355550934604746 977192643464016658 4611686018427387904 2054229034942.3723 51998323.94457991 475698 +598875 4611701407242345792 9223362250391155632 9312163881623734456 4611686018427387904 27615161624211.875 12261797.824844675 337212 +792887 4611699550286611812 9223290551912005343 6930300520201292824 4611686018427387904 27479710385933.586 53095331.60360441 252197 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 4611686018427387904 85240848090850.69 22373416.533275086 196036 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 4611686018427387904 67568783303242.086 3154349.826950714 147211 +716829 4611852156092872082 9223361623076951140 15381015774917924786 4611686018427387904 170693446547158.72 201431892.4773785 90109 +59183 4611730685242027332 9223354909338698162 8078812522502896568 4611686018427387904 94622946187035.42 1425270865.0901496 85379 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 4611686018427387904 26532987929602.555 3695122.4062526934 77807 +800784 4611752907938305166 9223340418389788041 18082918611792817587 4611686018427387904 233352070043266.62 36535786.81446395 77492 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 4611686018427387904 68246505203164.63 6316535.831023813 73213 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 4611686018427387904 185015319325648.16 9962165.34831339 68945 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 4611686018427387904 255019232629204.38 7937191.271698021 67570 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 4611686018427387904 38308020331864.36 14590240.469105456 64174 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 4611686018427387904 214467085941034.7 7257521.096258734 60456 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 4611686018427387904 231724477077663.4 4737362.521046629 58389 +170282 4611833225706935900 9223371583739401906 8076893424988479310 4611686018427387904 141657635880324.8 1613795518.1065989 57017 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 4611686018427387904 283531099960470.8 9938452.835998287 52345 +63469 4611695097019173921 9223353530156141191 6296784708578574520 4611686018427387904 120762239817777.88 579655378.4603049 52142 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 4611686018427387904 123712996438970.34 867841.595541967 47758 +Simple functions if combinator +1704509 4611700827100483880 9223310246721229500 16398241567152875142 2224726.7626273884 261874 +732797 4611721382223060002 9223355550934604746 16281585268876620522 5898616.931652982 237784 +598875 4611701407242345792 9223362250391155632 3577699408183553052 53771550.26565126 167966 +792887 4611699550286611812 9223164887726235740 7088177025760385824 92835869.96920013 125539 +3807842 4611710821592843606 9223283397553859544 5756765290752687660 39794091.419183925 97845 +25703952 4611784761593342388 9223241341744449690 4782279928971192568 9276773.708181158 73368 +716829 4611852156092872082 9223361623076951140 8613712481895484190 291083243.75407773 44993 +59183 4611730685242027332 9223354909338698162 18369075291092794110 5925109959.715378 42817 +33010362 4611704682869732882 9223092117352620518 9991152681891671022 12412830.045471078 38861 +800784 4611752907938305166 9223309994342931384 5251877538869750510 53535427.52018088 38767 +20810645 4611712185532639162 9223218900001937412 11803718472901310700 10496765.20741332 36477 +25843850 4611744529689964352 9223346023778617822 127137885677350808 18966925.191309396 34353 +23447120 4611796031755620254 9223329309291309758 1841522159325376278 6271211.193812284 33768 +14739804 4611762063154116632 9223007205463222212 16302703534054321116 6885575.861759452 32156 +32077710 4612033458080771112 9223352444952988904 421072759851674408 12220152.393889504 30172 +22446879 4611846229717089436 9223124373140579096 6577134317587565298 2482202.163802278 29249 +170282 4611833225706935900 9223371583739401906 15764226366913732386 2515144222.953728 28587 +11482817 4611990575414646848 9223302669582414438 9828522700609834800 34845264.2080656 25993 +63469 4612175339998036670 9222961628400798084 17239621485933250238 7825349797.6059 25996 +29103473 4611744585914335132 9223035551850347954 12590190375872647672 26049107.15514301 23939 diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql new file mode 100644 index 00000000000..90917209d1b --- /dev/null +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql @@ -0,0 +1,39 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; + +SELECT 'Aggregation using JIT compilation'; + +SELECT 'Simple functions'; + +SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +GROUP BY CounterID ORDER BY count() DESC LIMIT 20; + +SELECT 'Simple functions with non compilable function'; + +SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), groupBitAnd(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +GROUP BY CounterID ORDER BY count() DESC LIMIT 20; + +SELECT 'Simple functions if combinator'; + +WITH (WatchID % 2 == 0) AS predicate +SELECT CounterID, minIf(WatchID,predicate), maxIf(WatchID, predicate), sumIf(WatchID, predicate), avgIf(WatchID, predicate), avgWeightedIf(WatchID, CounterID, predicate), countIf(WatchID, predicate) FROM test.hits +GROUP BY CounterID ORDER BY count() DESC LIMIT 20; + +SET compile_aggregate_expressions = 0; + +SELECT 'Aggregation without JIT compilation'; + +SELECT 'Simple functions'; + +SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +GROUP BY CounterID ORDER BY count() DESC LIMIT 20; + +SELECT 'Simple functions with non compilable function'; +SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), groupBitAnd(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +GROUP BY CounterID ORDER BY count() DESC LIMIT 20; + +SELECT 'Simple functions if combinator'; + +WITH (WatchID % 2 == 0) AS predicate +SELECT CounterID, minIf(WatchID,predicate), maxIf(WatchID, predicate), sumIf(WatchID, predicate), avgWeightedIf(WatchID, CounterID, predicate), countIf(WatchID, predicate) FROM test.hits +GROUP BY CounterID ORDER BY count() DESC LIMIT 20; diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index b7a3701c326..e768a773255 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -73,6 +73,8 @@ export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:="9004"} export CLICKHOUSE_PORT_POSTGRESQL=${CLICKHOUSE_PORT_POSTGRESQL:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=postgresql_port 2>/dev/null)} 2>/dev/null export CLICKHOUSE_PORT_POSTGRESQL=${CLICKHOUSE_PORT_POSTGRESQL:="9005"} +export CLICKHOUSE_CLIENT_SECURE=${CLICKHOUSE_CLIENT_SECURE:=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--port=${CLICKHOUSE_PORT_TCP}"'/'"--secure --port=${CLICKHOUSE_PORT_TCP_SECURE}"'/g')} + # Add database and log comment to url params if [ -v CLICKHOUSE_URL_PARAMS ] then diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index e38089230f4..b2f00dcfb87 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -18,7 +18,6 @@ "functions_bad_arguments", /// Too long for TSan "01603_read_with_backoff_bug", /// Too long for TSan "01646_system_restart_replicas_smoke", /// RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan - "01641_memory_tracking_insert_optimize", /// INSERT lots of rows is too heavy for TSan "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage ], "address-sanitizer": [ @@ -71,8 +70,7 @@ "hyperscan", "01193_metadata_loading", "01473_event_time_microseconds", - "01396_inactive_replica_cleanup_nodes", - "01641_memory_tracking_insert_optimize" /// INSERT lots of rows is too heavy in debug build + "01396_inactive_replica_cleanup_nodes" ], "unbundled-build": [ "00429", @@ -112,7 +110,8 @@ "00738_lock_for_inner_table", "01153_attach_mv_uuid", /// Sometimes cannot lock file most likely due to concurrent or adjacent tests, but we don't care how it works in Ordinary database. - "rocksdb" + "rocksdb", + "01914_exchange_dictionaries" /// Requires Atomic database ], "database-replicated": [ /// Unclassified @@ -182,336 +181,6 @@ "01508_partition_pruning_long", /// bug, shoud be fixed "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed ], - "antlr": [ - "00011_array_join_alias", - "00186_very_long_arrays", - "00233_position_function_sql_comparibilty", - "00417_kill_query", - "00534_functions_bad_arguments12", - "00534_functions_bad_arguments2", - "00534_functions_bad_arguments4", - "00534_functions_bad_arguments9", - "00564_temporary_table_management", - "00600_replace_running_query", - "00626_replace_partition_from_table_zookeeper", - "00652_replicated_mutations_zookeeper", - "00687_top_and_offset", - "00746_sql_fuzzy", - "00763_create_query_as_table_engine_bug", - "00765_sql_compatibility_aliases", - "00825_protobuf_format_array_3dim", - "00825_protobuf_format_array_of_arrays", - "00825_protobuf_format_enum_mapping", - "00825_protobuf_format_map", - "00825_protobuf_format_nested_in_nested", - "00825_protobuf_format_nested_optional", - "00825_protobuf_format_no_length_delimiter", - "00825_protobuf_format_persons", - "00825_protobuf_format_splitted_nested", - "00825_protobuf_format_squares", - "00825_protobuf_format_table_default", - "00826_cross_to_inner_join", - "00834_not_between", - "00855_join_with_array_join", - "00909_kill_not_initialized_query", - "00938_template_input_format", - "00939_limit_by_offset", - "00943_materialize_index", - "00944_clear_index_in_partition", - "00952_input_function", - "00953_constraints_operations", - "00954_client_prepared_statements", - "00956_sensitive_data_masking", - "00969_columns_clause", - "00975_indices_mutation_replicated_zookeeper_long", - "00975_values_list", - "00976_system_stop_ttl_merges", - "00977_int_div", - "00978_table_function_values_alias", - "00980_merge_alter_settings", - "00980_zookeeper_merge_tree_alter_settings", - "00982_array_enumerate_uniq_ranked", - "00984_materialized_view_to_columns", - "00988_constraints_replication_zookeeper", - "00995_order_by_with_fill", - "01001_enums_in_in_section", - "01011_group_uniq_array_memsan", - "01011_test_create_as_skip_indices", - "01014_format_custom_separated", - "01015_attach_part", - "01015_database_bad_tables", - "01017_uniqCombined_memory_usage", - "01018_ddl_dictionaries_concurrent_requrests", /// Cannot parse ATTACH DICTIONARY IF NOT EXISTS - "01019_alter_materialized_view_atomic", - "01019_alter_materialized_view_consistent", - "01019_alter_materialized_view_query", - "01021_tuple_parser", - "01025_array_compact_generic", - "01030_limit_by_with_ties_error", - "01033_quota_dcl", - "01034_with_fill_and_push_down_predicate", - "01035_avg_weighted_long", - "01039_row_policy_dcl", - "01039_test_setting_parse", - "01042_system_reload_dictionary_reloads_completely", - "01045_dictionaries_restrictions", - "01053_ssd_dictionary", - "01055_compact_parts_1", - "01056_create_table_as", - "01066_bit_count", - "01070_materialize_ttl", - "01070_mutations_with_dependencies", - "01073_grant_and_revoke", - "01073_show_tables_not_like", - "01074_partial_revokes", - "01075_allowed_client_hosts", - "01083_expressions_in_engine_arguments", - "01085_regexp_input_format", - "01086_regexp_input_format_skip_unmatched", - "01089_alter_settings_old_format", - "01095_tpch_like_smoke", - "01107_atomic_db_detach_attach", - "01109_exchange_tables", - "01109_sc0rp10_string_hash_map_zero_bytes", - "01110_dictionary_layout_without_arguments", - "01114_database_atomic", - "01114_materialize_clear_index_compact_parts", - "01115_join_with_dictionary", - "01117_comma_and_others_join_mix", - "01125_dict_ddl_cannot_add_column", - "01130_in_memory_parts", - "01144_multiple_joins_rewriter_v2_and_lambdas", - "01144_multiword_data_types", - "01145_with_fill_const", - "01149_zookeeper_mutation_stuck_after_replace_partition", - "01150_ddl_guard_rwr", - "01153_attach_mv_uuid", - "01155_old_mutation_parts_to_do", - "01155_rename_move_materialized_view", - "01182_materialized_view_different_structure", - "01185_create_or_replace_table", - "01187_set_profile_as_setting", - "01188_attach_table_from_path", - "01190_full_attach_syntax", - "01191_rename_dictionary", - "01192_rename_database_zookeeper", - "01213_alter_rename_column", - "01232_untuple", - "01244_optimize_distributed_group_by_sharding_key", - "01254_dict_load_after_detach_attach", - "01256_misspell_layout_name_podshumok", - "01257_dictionary_mismatch_types", - "01267_alter_default_key_columns_zookeeper", - "01268_mv_scalars", - "01269_create_with_null", - "01271_show_privileges", - "01272_offset_without_limit", - "01277_alter_rename_column_constraint_zookeeper", - "01278_min_insert_block_size_rows_for_materialized_views", - "01280_min_map_max_map", - "01280_null_in", - "01280_ssd_complex_key_dictionary", - "01280_ttl_where_group_by_negative", - "01280_ttl_where_group_by", - "01280_unicode_whitespaces_lexer", - "01292_create_user", - "01293_create_role", - "01293_pretty_max_value_width", - "01293_show_clusters", - "01293_show_settings", - "01294_create_settings_profile", - "01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long", - "01294_system_distributed_on_cluster", - "01295_create_row_policy", - "01296_create_row_policy_in_current_database", - "01297_create_quota", - "01308_row_policy_and_trivial_count_query", - "01317_no_password_in_command_line", - "01318_map_add_map_subtract", - "01322_any_input_optimize", - "01324_if_transform_strings_to_enum", - "01337_mysql_global_variables", - "01355_alter_column_with_order", - "01355_ilike", - "01373_is_zero_or_null", - "01374_if_nullable_filimonov", - "01378_alter_rename_with_ttl_zookeeper", - "01379_with_fill_several_columns", - "01397_in_bad_arguments", - "01412_mod_float", - "01415_table_function_view", - "01417_freeze_partition_verbose_zookeeper", - "01417_freeze_partition_verbose", - "01418_custom_settings", - "01419_merge_tree_settings_sanity_check", - "01430_modify_sample_by_zookeeper", - "01447_json_strings", - "01449_json_compact_strings", - "01451_detach_drop_part", - "01451_replicated_detach_drop_and_quorum", - "01451_replicated_detach_drop_part", - "01457_create_as_table_function_structure", - "01460_allow_dollar_and_number_in_identifier", - "01463_test_alter_live_view_refresh", - "01465_ttl_recompression", - "01470_columns_transformers", - "01470_columns_transformers2", - "01470_explain", - "01470_show_databases_like", - "01470_test_insert_select_asterisk", - "01475_read_subcolumns_2", - "01475_read_subcolumns_3", - "01475_read_subcolumns_storages", - "01475_read_subcolumns", - "01480_binary_operator_monotonicity", - "01491_nested_multiline_comments", - "01493_table_function_null", - "01495_subqueries_in_with_statement_2", - "01495_subqueries_in_with_statement_3", - "01495_subqueries_in_with_statement", - "01501_clickhouse_client_INSERT_exception", - "01504_compression_multiple_streams", - "01504_rocksdb", - "01506_ttl_same_with_order_by", - "01508_explain_header", - "01508_partition_pruning_long", - "01509_check_parallel_quorum_inserts_long", - "01509_dictionary_preallocate", - "01509_parallel_quorum_and_merge_long", - "01515_mv_and_array_join_optimisation_bag", - "01515_with_global_and_with_propagation", - "01516_create_table_primary_key", - "01517_drop_mv_with_inner_table", - "01523_client_local_queries_file_parameter", - "01523_interval_operator_support_string_literal", - "01525_select_with_offset_fetch_clause", - "01526_client_start_and_exit", - "01527_dist_sharding_key_dictGet_reload", - "01529_union_distinct_and_setting_union_default_mode", - "01530_drop_database_atomic_sync", - "01532_execute_merges_on_single_replica", - "01532_primary_key_without_order_by_zookeeper", - "01533_multiple_nested", - "01541_max_memory_usage_for_user_long", - "01551_mergetree_read_in_order_spread", - "01552_dict_fixedstring", - "01554_bloom_filter_index_big_integer_uuid", - "01556_explain_select_with_union_query", - "01561_aggregate_functions_of_key_with_join", - "01562_optimize_monotonous_functions_in_order_by", - "01568_window_functions_distributed", - "01571_window_functions", - "01576_alias_column_rewrite", - "01581_deduplicate_by_columns_local", - "01581_deduplicate_by_columns_replicated", - "01582_any_join_supertype", - "01582_distinct_optimization", - "01590_countSubstrings", - "01591_window_functions", - "01592_long_window_functions1", - "01592_window_functions", - "01593_insert_settings", - "01594_too_low_memory_limits", - "01596_setting_limit_offset", - "01600_log_queries_with_extensive_info", - "01600_quota_by_forwarded_ip", - "01601_detach_permanently", - "01602_show_create_view", - "01603_read_with_backoff_bug", - "01604_explain_ast_of_nonselect_query", - "01605_drop_settings_profile_while_assigned", - "01605_skip_idx_compact_parts", - "01606_git_import", - "01606_merge_from_wide_to_compact", - "01614_with_fill_with_limit", - "01622_multiple_ttls", - "01630_disallow_floating_point_as_partition_key", - "01632_max_partitions_to_read", - "01632_select_all_syntax", - "01638_div_mod_ambiguities", - "01642_if_nullable_regression", - "01643_system_suspend", - "01646_system_restart_replicas_smoke", - "01650_drop_part_and_deduplication_zookeeper", - "01650_fetch_patition_with_macro_in_zk_path", - "01651_lc_insert_tiny_log", - "01655_plan_optimizations", - "01656_test_query_log_factories_info", - "01658_values_ubsan", - "01663_quantile_weighted_overflow", - "01666_blns", - "01666_merge_tree_max_query_limit", - "01666_merge_tree_max_query_limit", - "01674_clickhouse_client_query_param_cte", - "01674_unicode_asan", - "01676_dictget_in_default_expression", - "01680_predicate_pushdown_union_distinct_subquery", - "01681_cache_dictionary_simple_key", - "01682_cache_dictionary_complex_key", - "01683_flat_dictionary", - "01684_ssd_cache_dictionary_simple_key", - "01685_ssd_cache_dictionary_complex_key", - "01686_rocksdb", - "01699_timezoneOffset", - "01702_bitmap_native_integers", - "01702_system_query_log", - "01710_projections", - "01711_cte_subquery_fix", - "01712_no_adaptive_granularity_vertical_merge", - "01715_table_function_view_fix", - "01720_dictionary_create_source_with_functions", - "01720_union_distinct_with_limit", - "01721_dictionary_decimal_p_s", - "01721_engine_file_truncate_on_insert", - "01730_distributed_group_by_no_merge_order_by_long", - "01732_explain_syntax_union_query", - "01732_union_and_union_all", - "01747_join_view_filter_dictionary", - "01748_dictionary_table_dot", - "01753_direct_dictionary_simple_key", - "01754_direct_dictionary_complex_key", - "01756_optimize_skip_unused_shards_rewrite_in", - "01757_optimize_skip_unused_shards_limit", - "01759_dictionary_unique_attribute_names", - "01760_polygon_dictionaries", - "01760_system_dictionaries", - "01763_long_ttl_group_by", - "01765_hashed_dictionary_simple_key", - "01766_hashed_dictionary_complex_key", - "01774_tuple_null_in", - "01778_hierarchical_dictionaries", - "01778_mmap_cache_infra", - "01780_clickhouse_dictionary_source_loop", - "01781_merge_tree_deduplication", - "01785_dictionary_element_count", - "01786_explain_merge_tree", - "01801_distinct_group_by_shard", - "01802_rank_corr_mann_whitney_over_window", - "01802_test_postgresql_protocol_with_row_policy", /// It cannot parse DROP ROW POLICY - "01818_move_partition_simple", - "01821_table_comment", - "01823_explain_json", - "01837_database_memory_ddl_dictionaries", - "01839_join_to_subqueries_rewriter_columns_matcher", - "01840_tupleElement_formatting_fuzzer", - "01851_fix_row_policy_empty_result", - "01851_hedged_connections_external_tables", - "01852_cast_operator_bad_cases", - "01852_cast_operator", - "01861_explain_pipeline", - "01868_order_by_fill_with_datetime64", - "01870_buffer_flush", - "01871_merge_tree_compile_expressions", - "01872_functions_to_subcolumns", - "01881_union_header_mismatch_bug", - "01883_subcolumns_distributed", - "01889_postgresql_protocol_null_fields", - "01889_check_row_policy_defined_using_user_function", - "01892_setting_limit_offset_distributed", - "01902_dictionary_array_type", - "01903_ssd_cache_dictionary_array_type", - "01905_to_json_string" - ], "parallel": [ /// Pessimistic list of tests which work badly in parallel. @@ -836,6 +505,13 @@ "01870_modulo_partition_key", "01870_buffer_flush", // creates database "01889_postgresql_protocol_null_fields", - "01889_check_row_policy_defined_using_user_function" + "01889_check_row_policy_defined_using_user_function", + "01921_concurrent_ttl_and_normal_merges_zookeeper_long", // heavy test, better to run sequentially + "01913_replace_dictionary", + "01914_exchange_dictionaries", + "01915_create_or_replace_dictionary", + "01925_test_storage_merge_aliases", + "01933_client_replxx_convert_history", /// Uses non unique history file + "01902_table_function_merge_db_repr" ] } diff --git a/tests/testflows/extended_precision_data_types/snapshots/common.py.tests.snapshot b/tests/testflows/extended_precision_data_types/snapshots/common.py.tests.snapshot index d0b7b3423d8..c8b57ffdd1c 100644 --- a/tests/testflows/extended_precision_data_types/snapshots/common.py.tests.snapshot +++ b/tests/testflows/extended_precision_data_types/snapshots/common.py.tests.snapshot @@ -653,7 +653,7 @@ a """ Inline___Int128___arrayReduceInRanges__sum_____1__5____ = r""" -arrayReduceInRanges(\'sum\', array(tuple(1, 5)), array(toInt128(\'3\'), toInt128(\'2\'), toInt128(\'1\'))) +arrayReduceInRanges(\'sum\', array((1, 5)), array(toInt128(\'3\'), toInt128(\'2\'), toInt128(\'1\'))) [6] """ @@ -1181,7 +1181,7 @@ a """ Inline___Int256___arrayReduceInRanges__sum_____1__5____ = r""" -arrayReduceInRanges(\'sum\', array(tuple(1, 5)), array(toInt256(\'3\'), toInt256(\'2\'), toInt256(\'1\'))) +arrayReduceInRanges(\'sum\', array((1, 5)), array(toInt256(\'3\'), toInt256(\'2\'), toInt256(\'1\'))) [6] """ @@ -1709,7 +1709,7 @@ a """ Inline___UInt128___arrayReduceInRanges__sum_____1__5____ = r""" -arrayReduceInRanges(\'sum\', array(tuple(1, 5)), array(toUInt128(\'3\'), toUInt128(\'2\'), toUInt128(\'1\'))) +arrayReduceInRanges(\'sum\', array((1, 5)), array(toUInt128(\'3\'), toUInt128(\'2\'), toUInt128(\'1\'))) [6] """ @@ -2237,7 +2237,7 @@ a """ Inline___UInt256___arrayReduceInRanges__sum_____1__5____ = r""" -arrayReduceInRanges(\'sum\', array(tuple(1, 5)), array(toUInt256(\'3\'), toUInt256(\'2\'), toUInt256(\'1\'))) +arrayReduceInRanges(\'sum\', array((1, 5)), array(toUInt256(\'3\'), toUInt256(\'2\'), toUInt256(\'1\'))) [6] """ @@ -2765,7 +2765,7 @@ a """ Inline___Decimal256_0____arrayReduceInRanges__sum_____1__5____ = r""" -arrayReduceInRanges(\'sum\', array(tuple(1, 5)), array(toDecimal256(\'3\', 0), toDecimal256(\'2\', 0), toDecimal256(\'1\', 0))) +arrayReduceInRanges(\'sum\', array((1, 5)), array(toDecimal256(\'3\', 0), toDecimal256(\'2\', 0), toDecimal256(\'1\', 0))) [6] """ diff --git a/tests/testflows/extended_precision_data_types/tests/arithmetic.py b/tests/testflows/extended_precision_data_types/tests/arithmetic.py index 49d7ee1fcb3..c57f3d7d8e1 100644 --- a/tests/testflows/extended_precision_data_types/tests/arithmetic.py +++ b/tests/testflows/extended_precision_data_types/tests/arithmetic.py @@ -141,7 +141,7 @@ def inline_check_dec(self, arithmetic_func, expected_result, node=None): if node is None: node = self.context.node - if arithmetic_func is 'negate' or arithmetic_func is 'abs': + if arithmetic_func in ['negate','abs']: with When(f"I check {arithmetic_func} with toDecimal256"): output = node.query(f"SELECT {arithmetic_func}(toDecimal256(1,0))").output diff --git a/tests/testflows/extended_precision_data_types/tests/rounding.py b/tests/testflows/extended_precision_data_types/tests/rounding.py index f01d6898b32..e32f4e941d3 100644 --- a/tests/testflows/extended_precision_data_types/tests/rounding.py +++ b/tests/testflows/extended_precision_data_types/tests/rounding.py @@ -25,7 +25,7 @@ def round_int_inline(self, func, expected_result, supported, int_type, min, max, if node is None: node = self.context.node - if func is 'roundDown': + if func == 'roundDown': with When(f"I check roundDown with {int_type}"): node.query(f"SELECT roundDown(to{int_type}(1), [0,2]), roundDown(to{int_type}(\'{max}\'), [0,2]), roundDown(to{int_type}(\'{min}\'), [0,2])", @@ -62,7 +62,7 @@ def round_int_table(self, func, expected_result, supported, int_type, min, max, with Given("I have a table"): table(name = table_name, data_type = int_type) - if func is 'roundDown': + if func == 'roundDown': for value in [1,max,min]: @@ -101,7 +101,7 @@ def round_dec_inline(self, func, expected_result, supported, node=None): if node is None: node = self.context.node - if func is 'roundDown': + if func == 'roundDown': with When(f"I check roundDown with Decimal256"): node.query(f"""SELECT roundDown(toDecimal256(1,0), [toDecimal256(0,0),toDecimal256(2,0)]), @@ -142,7 +142,7 @@ def round_dec_table(self, func, expected_result, supported, node=None): with Given("I have a table"): table(name = table_name, data_type = 'Decimal256(0)') - if func is 'roundDown': + if func == 'roundDown': for value in [1, max, min]: diff --git a/tests/testflows/regression.py b/tests/testflows/regression.py index c2e143a4b1c..eef6dadb4bb 100755 --- a/tests/testflows/regression.py +++ b/tests/testflows/regression.py @@ -23,14 +23,14 @@ def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): with Pool(8) as pool: try: run_scenario(pool, tasks, Feature(test=load("example.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("ldap.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("rbac.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("aes_encryption.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("map_type.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("window_functions.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("datetime64_extended_range.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("ldap.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("rbac.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("aes_encryption.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("map_type.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("window_functions.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("datetime64_extended_range.regression", "regression")), args) #run_scenario(pool, tasks, Feature(test=load("kerberos.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("extended_precision_data_types.regression", "regression")), args) + #run_scenario(pool, tasks, Feature(test=load("extended_precision_data_types.regression", "regression")), args) finally: join(tasks) diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index bd6453e406b..a6bf2843e9a 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -12,7 +12,6 @@ include(../cmake/limit_jobs.cmake) # Utils used in package add_subdirectory (config-processor) add_subdirectory (report) -add_subdirectory (syntax-analyzer) # Not used in package if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) diff --git a/src/Parsers/New/ClickHouseLexer.g4 b/utils/antlr/ClickHouseLexer.g4 similarity index 100% rename from src/Parsers/New/ClickHouseLexer.g4 rename to utils/antlr/ClickHouseLexer.g4 diff --git a/src/Parsers/New/ClickHouseParser.g4 b/utils/antlr/ClickHouseParser.g4 similarity index 100% rename from src/Parsers/New/ClickHouseParser.g4 rename to utils/antlr/ClickHouseParser.g4 diff --git a/src/Parsers/New/README.md b/utils/antlr/README.md similarity index 100% rename from src/Parsers/New/README.md rename to utils/antlr/README.md diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f7111cc28e4..541dea23698 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,6 +1,9 @@ +v21.6.6.51-stable 2021-07-02 v21.6.5.37-stable 2021-06-19 v21.6.4.26-stable 2021-06-11 v21.6.3.14-stable 2021-06-04 +v21.5.8.21-stable 2021-07-02 +v21.5.7.9-stable 2021-06-22 v21.5.6.6-stable 2021-05-29 v21.5.5.12-stable 2021-05-20 v21.4.7.3-stable 2021-05-19 @@ -8,6 +11,8 @@ v21.4.6.55-stable 2021-04-30 v21.4.5.46-stable 2021-04-24 v21.4.4.30-stable 2021-04-16 v21.4.3.21-stable 2021-04-12 +v21.3.14.1-lts 2021-07-01 +v21.3.13.9-lts 2021-06-22 v21.3.12.2-lts 2021-05-25 v21.3.11.5-lts 2021-05-14 v21.3.10.1-lts 2021-05-09 diff --git a/utils/syntax-analyzer/CMakeLists.txt b/utils/syntax-analyzer/CMakeLists.txt deleted file mode 100644 index 77068f528be..00000000000 --- a/utils/syntax-analyzer/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_executable(syntax-analyzer main.cpp) - -target_link_libraries(syntax-analyzer PRIVATE clickhouse_parsers_new dbms) diff --git a/utils/syntax-analyzer/main.cpp b/utils/syntax-analyzer/main.cpp deleted file mode 100644 index cf264160407..00000000000 --- a/utils/syntax-analyzer/main.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -using namespace DB; - -int main(int argc, const char **) -{ - if (argc > 1) - { - std::cerr << "No arguments needed. Reads query from input until EOF" << std::endl; - return 1; - } - - std::istreambuf_iterator begin(std::cin), end; - std::string query(begin, end); - - { - std::vector queries; - splitMultipartQuery(query, queries, 10000000, 10000); - for (const auto & q : queries) - { - std::cout << std::endl << "Query:" << std::endl; - std::cout << q << std::endl; - - ParserQuery parser(q.data() + q.size()); - ASTPtr orig_ast = parseQuery(parser, q, 10000000, 10000); - - std::cout << std::endl << "New AST:" << std::endl; - auto new_ast = parseQuery(q, ""); - new_ast->dump(); - - auto old_ast = new_ast->convertToOld(); - if (orig_ast) - { - std::cout << std::endl << "Original AST:" << std::endl; - WriteBufferFromOStream buf(std::cout, 1); - orig_ast->dumpTree(buf); - std::cout << std::endl << "Original query:" << std::endl; - orig_ast->format({buf, false}); - std::cout << std::endl; - } - if (old_ast) - { - std::cout << std::endl << "Converted AST:" << std::endl; - WriteBufferFromOStream buf(std::cout, 1); - old_ast->dumpTree(buf); - std::cout << std::endl << "Converted query:" << std::endl; - old_ast->format({buf, false}); - std::cout << std::endl; - } - } - } -} diff --git a/website/README.md b/website/README.md index a09a00379d1..9f808c6f658 100644 --- a/website/README.md +++ b/website/README.md @@ -12,7 +12,7 @@ sudo npm install -g purify-css amphtml-validator sudo apt install wkhtmltopdf virtualenv build -./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-docs --skip-test-templates --livereload 8080 +./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-docs --livereload 8080 # Open the web browser and go to http://localhost:8080/ ``` @@ -20,11 +20,11 @@ virtualenv build # How to quickly test the blog ``` -./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-git-log --skip-docs --skip-test-templates --livereload 8080 +./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-git-log --skip-docs --livereload 8080 ``` # How to quickly test the ugly annoying broken links in docs ``` -./build.py --skip-multi-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-test-templates --lang en --livereload 8080 +./build.py --skip-multi-page --skip-amp --skip-pdf --skip-blog --skip-git-log --lang en --livereload 8080 ``` diff --git a/website/css/highlight.css b/website/css/highlight.css index 7cc8a4865dd..52f65bfc74e 100644 --- a/website/css/highlight.css +++ b/website/css/highlight.css @@ -1,99 +1,76 @@ /* - - Name: Base16 Paraiso Light - Author: Jan T. Sott - + Name: Base16 Eighties Dark + Author: Chris Kempson (http://chriskempson.com) Pygments template by Jan T. Sott (https://github.com/idleberg) Created with Base16 Builder by Chris Kempson (https://github.com/chriskempson/base16-builder) - - Edited for ClickHouse to improve legibility. */ -.syntax .hll { background-color: #b9b6b0 } -.syntax { background: #f8f9fa; color: #2f1e2e } -.syntax .c { color: #8d8687 } /* Comment */ -.syntax .err {} /* Error */ -.syntax .k { color: #000000; font-weight: bold } /* Keyword */ -.syntax .l { color: #0088ff } /* Literal */ -.syntax .n { color: #2f1e2e } /* Name */ -.syntax .o { color: #880000 } /* Operator */ -.syntax .p { color: #2f1e2e } /* Punctuation */ -.syntax .cm { color: #8d8687 } /* Comment.Multiline */ -.syntax .cp { color: #8d8687 } /* Comment.Preproc */ -.syntax .c1 { color: #8d8687 } /* Comment.Single */ -.syntax .cs { color: #8d8687 } /* Comment.Special */ -.syntax .gd { color: #ef6155 } /* Generic.Deleted */ -.syntax .ge { font-style: italic } /* Generic.Emph */ -.syntax .gh { color: #2f1e2e; font-weight: bold } /* Generic.Heading */ -.syntax .gi { color: #48b685 } /* Generic.Inserted */ -.syntax .gp { color: #8d8687; font-weight: bold } /* Generic.Prompt */ -.syntax .gs { font-weight: bold } /* Generic.Strong */ -.syntax .gu { color: #5bc4bf; font-weight: bold } /* Generic.Subheading */ -.syntax .kc { color: #815ba4 } /* Keyword.Constant */ -.syntax .kd { color: #815ba4 } /* Keyword.Declaration */ -.syntax .kn { color: #5bc4bf } /* Keyword.Namespace */ -.syntax .kp { color: #815ba4 } /* Keyword.Pseudo */ -.syntax .kr { color: #815ba4 } /* Keyword.Reserved */ -.syntax .kt { color: #fec418 } /* Keyword.Type */ -.syntax .ld { color: #48b685 } /* Literal.Date */ -.syntax .m { color: #0088ff } /* Literal.Number */ -.syntax .s { color: #48b685 } /* Literal.String */ -.syntax .na { color: #06b6ef } /* Name.Attribute */ -.syntax .nb { color: #2f1e2e } /* Name.Builtin */ -.syntax .nc { color: #fec418 } /* Name.Class */ -.syntax .no { color: #ef6155 } /* Name.Constant */ -.syntax .nd { color: #5bc4bf } /* Name.Decorator */ -.syntax .ni { color: #2f1e2e } /* Name.Entity */ -.syntax .ne { color: #ef6155 } /* Name.Exception */ -.syntax .nf { color: #06b6ef } /* Name.Function */ -.syntax .nl { color: #2f1e2e } /* Name.Label */ -.syntax .nn { color: #fec418 } /* Name.Namespace */ -.syntax .nx { color: #06b6ef } /* Name.Other */ -.syntax .py { color: #2f1e2e } /* Name.Property */ -.syntax .nt { color: #5bc4bf } /* Name.Tag */ -.syntax .nv { color: #ef6155 } /* Name.Variable */ -.syntax .ow { color: #5bc4bf } /* Operator.Word */ -.syntax .w { color: #2f1e2e } /* Text.Whitespace */ -.syntax .mf { color: #0088ff } /* Literal.Number.Float */ -.syntax .mh { color: #0088ff } /* Literal.Number.Hex */ -.syntax .mi { color: #0088ff } /* Literal.Number.Integer */ -.syntax .mo { color: #0088ff } /* Literal.Number.Oct */ -.syntax .sb { color: #48b685 } /* Literal.String.Backtick */ -.syntax .sc { color: #2f1e2e } /* Literal.String.Char */ -.syntax .sd { color: #8d8687 } /* Literal.String.Doc */ -.syntax .s2 { color: #48b685 } /* Literal.String.Double */ -.syntax .se { color: #0088ff } /* Literal.String.Escape */ -.syntax .sh { color: #48b685 } /* Literal.String.Heredoc */ -.syntax .si { color: #0088ff } /* Literal.String.Interpol */ -.syntax .sx { color: #48b685 } /* Literal.String.Other */ -.syntax .sr { color: #48b685 } /* Literal.String.Regex */ -.syntax .s1 { color: #008800 } /* Literal.String.Single */ -.syntax .ss { color: #48b685 } /* Literal.String.Symbol */ -.syntax .bp { color: #2f1e2e } /* Name.Builtin.Pseudo */ -.syntax .vc { color: #ef6155 } /* Name.Variable.Class */ -.syntax .vg { color: #ef6155 } /* Name.Variable.Global */ -.syntax .vi { color: #ef6155 } /* Name.Variable.Instance */ -.syntax .il { color: #0088ff } /* Literal.Number.Integer.Long */ @media (prefers-color-scheme: dark) { -.syntax .k { color: #c78cff } /* Keyword */ -.syntax .gi { color: #64ffbb } /* Generic.Inserted */ -.syntax .ld { color: #64ffbb } /* Literal.Date */ -.syntax .s { color: #64ffbb } /* Literal.String */ -.syntax .sb { color: #64ffbb } /* Literal.String.Backtick */ -.syntax .s2 { color: #64ffbb } /* Literal.String.Double */ -.syntax .sh { color: #64ffbb } /* Literal.String.Heredoc */ -.syntax .sx { color: #64ffbb } /* Literal.String.Other */ -.syntax .sr { color: #64ffbb } /* Literal.String.Regex */ -.syntax .s1 { color: #64ffbb } /* Literal.String.Single */ -.syntax .ss { color: #64ffbb } /* Literal.String.Symbol */ -.syntax .c { color: #64ffbb } /* Comment */ -.syntax .n { color: #f8f9fa } /* Name */ -.syntax .p { color: #f8f9fa } /* Punctuation */ -.syntax .gh { color: #f8f9fa; font-weight: bold } /* Generic.Heading */ -.syntax .nb { color: #f8f9fa } /* Name.Builtin */ -.syntax .ni { color: #f8f9fa } /* Name.Entity */ -.syntax .nl { color: #f8f9fa } /* Name.Label */ -.syntax .py { color: #f8f9fa } /* Name.Property */ -.syntax .w { color: #f8f9fa } /* Text.Whitespace */ -.syntax .sc { color: #f8f9fa } /* Literal.String.Char */ + +.syntax .hll { background-color: #515151 } +.syntax { background: #2d2d2d; color: #f2f0ec } +.syntax .c { color: #747369 } /* Comment */ +.syntax .err { color: #f2777a } /* Error */ +.syntax .k { color: #cc99cc } /* Keyword */ +.syntax .l { color: #f99157 } /* Literal */ +.syntax .n { color: #f2f0ec } /* Name */ +.syntax .o { color: #66cccc } /* Operator */ +.syntax .p { color: #f2f0ec } /* Punctuation */ +.syntax .cm { color: #747369 } /* Comment.Multiline */ +.syntax .cp { color: #747369 } /* Comment.Preproc */ +.syntax .c1 { color: #747369 } /* Comment.Single */ +.syntax .cs { color: #747369 } /* Comment.Special */ +.syntax .gd { color: #f2777a } /* Generic.Deleted */ +.syntax .ge { font-style: italic } /* Generic.Emph */ +.syntax .gh { color: #f2f0ec; font-weight: bold } /* Generic.Heading */ +.syntax .gi { color: #99cc99 } /* Generic.Inserted */ +.syntax .gp { color: #747369; font-weight: bold } /* Generic.Prompt */ +.syntax .gs { font-weight: bold } /* Generic.Strong */ +.syntax .gu { color: #66cccc; font-weight: bold } /* Generic.Subheading */ +.syntax .kc { color: #cc99cc } /* Keyword.Constant */ +.syntax .kd { color: #cc99cc } /* Keyword.Declaration */ +.syntax .kn { color: #66cccc } /* Keyword.Namespace */ +.syntax .kp { color: #cc99cc } /* Keyword.Pseudo */ +.syntax .kr { color: #cc99cc } /* Keyword.Reserved */ +.syntax .kt { color: #ffcc66 } /* Keyword.Type */ +.syntax .ld { color: #99cc99 } /* Literal.Date */ +.syntax .m { color: #f99157 } /* Literal.Number */ +.syntax .s { color: #99cc99 } /* Literal.String */ +.syntax .na { color: #6699cc } /* Name.Attribute */ +.syntax .nb { color: #f2f0ec } /* Name.Builtin */ +.syntax .nc { color: #ffcc66 } /* Name.Class */ +.syntax .no { color: #f2777a } /* Name.Constant */ +.syntax .nd { color: #66cccc } /* Name.Decorator */ +.syntax .ni { color: #f2f0ec } /* Name.Entity */ +.syntax .ne { color: #f2777a } /* Name.Exception */ +.syntax .nf { color: #6699cc } /* Name.Function */ +.syntax .nl { color: #f2f0ec } /* Name.Label */ +.syntax .nn { color: #ffcc66 } /* Name.Namespace */ +.syntax .nx { color: #6699cc } /* Name.Other */ +.syntax .py { color: #f2f0ec } /* Name.Property */ +.syntax .nt { color: #66cccc } /* Name.Tag */ +.syntax .nv { color: #f2777a } /* Name.Variable */ +.syntax .ow { color: #66cccc } /* Operator.Word */ +.syntax .w { color: #f2f0ec } /* Text.Whitespace */ +.syntax .mf { color: #f99157 } /* Literal.Number.Float */ +.syntax .mh { color: #f99157 } /* Literal.Number.Hex */ +.syntax .mi { color: #f99157 } /* Literal.Number.Integer */ +.syntax .mo { color: #f99157 } /* Literal.Number.Oct */ +.syntax .sb { color: #99cc99 } /* Literal.String.Backtick */ +.syntax .sc { color: #f2f0ec } /* Literal.String.Char */ +.syntax .sd { color: #747369 } /* Literal.String.Doc */ +.syntax .s2 { color: #99cc99 } /* Literal.String.Double */ +.syntax .se { color: #f99157 } /* Literal.String.Escape */ +.syntax .sh { color: #99cc99 } /* Literal.String.Heredoc */ +.syntax .si { color: #f99157 } /* Literal.String.Interpol */ +.syntax .sx { color: #99cc99 } /* Literal.String.Other */ +.syntax .sr { color: #99cc99 } /* Literal.String.Regex */ +.syntax .s1 { color: #99cc99 } /* Literal.String.Single */ +.syntax .ss { color: #99cc99 } /* Literal.String.Symbol */ +.syntax .bp { color: #f2f0ec } /* Name.Builtin.Pseudo */ +.syntax .vc { color: #f2777a } /* Name.Variable.Class */ +.syntax .vg { color: #f2777a } /* Name.Variable.Global */ +.syntax .vi { color: #f2777a } /* Name.Variable.Instance */ +.syntax .il { color: #f99157 } /* Literal.Number.Integer.Long */ + } diff --git a/website/templates/index/community.html b/website/templates/index/community.html index a71e4097a68..28e9f12ce93 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -66,7 +66,7 @@
-

Quick start

-

System requirements for pre-built packages: Linux, x86_64 with SSE 4.2.

- -

For other operating systems the easiest way to get started is using