Merge branch 'master' into annadevyatova-DOCSUP-7502-replicated

This commit is contained in:
Anna 2021-07-04 07:31:06 +03:00 committed by GitHub
commit b2f059b6bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2068 changed files with 30524 additions and 48892 deletions

5
.gitignore vendored
View File

@ -14,6 +14,11 @@
/build-*
/tests/venv
# logs
*.log
*.stderr
*.stdout
/docs/build
/docs/publish
/docs/edit

12
.gitmodules vendored
View File

@ -103,7 +103,7 @@
url = https://github.com/ClickHouse-Extras/fastops
[submodule "contrib/orc"]
path = contrib/orc
url = https://github.com/apache/orc
url = https://github.com/ClickHouse-Extras/orc
[submodule "contrib/sparsehash-c11"]
path = contrib/sparsehash-c11
url = https://github.com/sparsehash/sparsehash-c11.git
@ -168,9 +168,6 @@
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git
[submodule "contrib/antlr4-runtime"]
path = contrib/antlr4-runtime
url = https://github.com/ClickHouse-Extras/antlr4-runtime.git
[submodule "contrib/sentry-native"]
path = contrib/sentry-native
url = https://github.com/ClickHouse-Extras/sentry-native.git
@ -210,9 +207,6 @@
[submodule "contrib/fast_float"]
path = contrib/fast_float
url = https://github.com/fastfloat/fast_float
[submodule "contrib/libpqxx"]
path = contrib/libpqxx
url = https://github.com/jtv/libpqxx
[submodule "contrib/libpq"]
path = contrib/libpq
url = https://github.com/ClickHouse-Extras/libpq
@ -228,7 +222,9 @@
[submodule "contrib/datasketches-cpp"]
path = contrib/datasketches-cpp
url = https://github.com/ClickHouse-Extras/datasketches-cpp.git
[submodule "contrib/yaml-cpp"]
path = contrib/yaml-cpp
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
[submodule "contrib/libpqxx"]
path = contrib/libpqxx
url = https://github.com/ClickHouse-Extras/libpqxx.git

View File

@ -763,6 +763,7 @@
* Allow using extended integer types (`Int128`, `Int256`, `UInt256`) in `avg` and `avgWeighted` functions. Also allow using different types (integer, decimal, floating point) for value and for weight in `avgWeighted` function. This is a backward-incompatible change: now the `avg` and `avgWeighted` functions always return `Float64` (as documented). Before this change the return type for `Decimal` arguments was also `Decimal`. [#15419](https://github.com/ClickHouse/ClickHouse/pull/15419) ([Mike](https://github.com/myrrc)).
* Expression `toUUID(N)` no longer works. Replace with `toUUID('00000000-0000-0000-0000-000000000000')`. This change is motivated by non-obvious results of `toUUID(N)` where N is non zero.
* SSL Certificates with incorrect "key usage" are rejected. In previous versions they are used to work. See [#19262](https://github.com/ClickHouse/ClickHouse/issues/19262).
* `incl` references to substitutions file (`/etc/metrika.xml`) were removed from the default config (`<remote_servers>`, `<zookeeper>`, `<macros>`, `<compression>`, `<networks>`). If you were using substitutions file and were relying on those implicit references, you should put them back manually and explicitly by adding corresponding sections with `incl="..."` attributes before the update. See [#18740](https://github.com/ClickHouse/ClickHouse/pull/18740) ([alexey-milovidov](https://github.com/alexey-milovidov)).
#### New Feature

View File

@ -183,24 +183,37 @@ endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if (OS_LINUX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (ARCH_AMD64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
elseif (ARCH_AARCH64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
if (NOT OBJCOPY_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT OBJCOPY_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (OBJCOPY_PATH NAMES "objcopy" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
else ()
message(FATAL_ERROR "Cannot find objcopy.")
endif ()
endif ()
if (OBJCOPY_PATH)
message (STATUS "Using objcopy: ${OBJCOPY_PATH}")
else ()
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
if (OS_DARWIN)
set(WHOLE_ARCHIVE -all_load)
set(NO_WHOLE_ARCHIVE -noall_load)
# The `-all_load` flag forces loading of all symbols from all libraries,
# and leads to multiply-defined symbols. This flag allows force loading
# from a _specific_ library, which is what we need.
set(WHOLE_ARCHIVE -force_load)
# The `-noall_load` flag is the default and now obsolete.
set(NO_WHOLE_ARCHIVE "")
else ()
set(WHOLE_ARCHIVE --whole-archive)
set(NO_WHOLE_ARCHIVE --no-whole-archive)
@ -528,7 +541,6 @@ include (cmake/find/libpqxx.cmake)
include (cmake/find/nuraft.cmake)
include (cmake/find/yaml-cpp.cmake)
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
set (ENABLE_ORC OFF CACHE INTERNAL "")
endif()

View File

@ -8,11 +8,11 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
* [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-qfort0u8-TWqK4wIP0YSdoDE0btKa1w) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [SF Bay Area ClickHouse Community Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/278144089/) on 16 June 2021.
* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021.

View File

@ -1,14 +1,22 @@
#include "IBridge.h"
#include <IO/ReadHelpers.h>
#include <boost/program_options.hpp>
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>
#include <Common/StringUtils/StringUtils.h>
#include <Formats/registerFormats.h>
#include <common/logger_useful.h>
#include <common/range.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/SensitiveDataMasker.h>
#include <common/errnoToString.h>
#include <IO/ReadHelpers.h>
#include <Formats/registerFormats.h>
#include <Server/HTTP/HTTPServer.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <sys/time.h>
#include <sys/resource.h>
#if USE_ODBC
# include <Poco/Data/ODBC/Connector.h>
@ -163,6 +171,31 @@ void IBridge::initialize(Application & self)
max_server_connections = config().getUInt("max-server-connections", 1024);
keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);
struct rlimit limit;
const UInt64 gb = 1024 * 1024 * 1024;
/// Set maximum RSS to 1 GiB.
limit.rlim_max = limit.rlim_cur = gb;
if (setrlimit(RLIMIT_RSS, &limit))
LOG_WARNING(log, "Unable to set maximum RSS to 1GB: {} (current rlim_cur={}, rlim_max={})",
errnoToString(errno), limit.rlim_cur, limit.rlim_max);
if (!getrlimit(RLIMIT_RSS, &limit))
LOG_INFO(log, "RSS limit: cur={}, max={}", limit.rlim_cur, limit.rlim_max);
try
{
const auto oom_score = toString(config().getUInt64("bridge_oom_score", 500));
WriteBufferFromFile buf("/proc/self/oom_score_adj");
buf.write(oom_score.data(), oom_score.size());
buf.close();
LOG_INFO(log, "OOM score is set to {}", oom_score);
}
catch (const Exception & e)
{
LOG_WARNING(log, "Failed to set OOM score, error: {}", e.what());
}
initializeTerminationAndSignalProcessing();
ServerApplication::initialize(self); // NOLINT
@ -214,7 +247,7 @@ int IBridge::main(const std::vector<std::string> & /*args*/)
server.stop();
for (size_t count : ext::range(1, 6))
for (size_t count : collections::range(1, 6))
{
if (server.currentConnections() == 0)
break;

View File

@ -17,7 +17,7 @@ class DateLUT : private boost::noncopyable
{
public:
/// Return singleton DateLUTImpl instance for the default time zone.
static ALWAYS_INLINE const DateLUTImpl & instance()
static ALWAYS_INLINE const DateLUTImpl & instance() // -V1071
{
const auto & date_lut = getInstance();
return *date_lut.default_impl.load(std::memory_order_acquire);

View File

@ -119,11 +119,16 @@ private:
}
public:
/// We use Int64 instead of time_t because time_t is mapped to the different types (long or long long)
/// on Linux and Darwin (on both of them, long and long long are 64 bit and behaves identically,
/// but they are different types in C++ and this affects function overload resolution).
using Time = Int64;
/// The order of fields matters for alignment and sizeof.
struct Values
{
/// time_t at beginning of the day.
Int64 date;
/// Time at beginning of the day.
Time date;
/// Properties of the day.
UInt16 year;
@ -182,20 +187,20 @@ private:
LUTIndex years_months_lut[DATE_LUT_YEARS * 12];
/// UTC offset at beginning of the Unix epoch. The same as unix timestamp of 1970-01-01 00:00:00 local time.
time_t offset_at_start_of_epoch;
Time offset_at_start_of_epoch;
/// UTC offset at the beginning of the first supported year.
time_t offset_at_start_of_lut;
Time offset_at_start_of_lut;
bool offset_is_whole_number_of_hours_during_epoch;
/// Time zone name.
std::string time_zone;
inline LUTIndex findIndex(time_t t) const
inline LUTIndex findIndex(Time t) const
{
/// First guess.
Int64 guess = (t / 86400) + daynum_offset_epoch;
Time guess = (t / 86400) + daynum_offset_epoch;
/// For negative time_t the integer division was rounded up, so the guess is offset by one.
/// For negative Time the integer division was rounded up, so the guess is offset by one.
if (unlikely(t < 0))
--guess;
@ -227,7 +232,7 @@ private:
return LUTIndex{static_cast<UInt32>(d + daynum_offset_epoch) & date_lut_mask};
}
inline LUTIndex toLUTIndex(time_t t) const
inline LUTIndex toLUTIndex(Time t) const
{
return findIndex(t);
}
@ -280,7 +285,7 @@ public:
/// Round down to start of monday.
template <typename DateOrTime>
inline time_t toFirstDayOfWeek(DateOrTime v) const
inline Time toFirstDayOfWeek(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i - (lut[i].day_of_week - 1)].date;
@ -295,7 +300,7 @@ public:
/// Round down to start of month.
template <typename DateOrTime>
inline time_t toFirstDayOfMonth(DateOrTime v) const
inline Time toFirstDayOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
return lut[i - (lut[i].day_of_month - 1)].date;
@ -332,13 +337,13 @@ public:
}
template <typename DateOrTime>
inline time_t toFirstDayOfQuarter(DateOrTime v) const
inline Time toFirstDayOfQuarter(DateOrTime v) const
{
return toDate(toFirstDayOfQuarterIndex(v));
}
/// Round down to start of year.
inline time_t toFirstDayOfYear(time_t t) const
inline Time toFirstDayOfYear(Time t) const
{
return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date;
}
@ -355,14 +360,14 @@ public:
return toDayNum(toFirstDayNumOfYearIndex(v));
}
inline time_t toFirstDayOfNextMonth(time_t t) const
inline Time toFirstDayOfNextMonth(Time t) const
{
LUTIndex index = findIndex(t);
index += 32 - lut[index].day_of_month;
return lut[index - (lut[index].day_of_month - 1)].date;
}
inline time_t toFirstDayOfPrevMonth(time_t t) const
inline Time toFirstDayOfPrevMonth(Time t) const
{
LUTIndex index = findIndex(t);
index -= lut[index].day_of_month;
@ -389,16 +394,16 @@ public:
/** Round to start of day, then shift for specified amount of days.
*/
inline time_t toDateAndShift(time_t t, Int32 days) const
inline Time toDateAndShift(Time t, Int32 days) const
{
return lut[findIndex(t) + days].date;
}
inline time_t toTime(time_t t) const
inline Time toTime(Time t) const
{
const LUTIndex index = findIndex(t);
time_t res = t - lut[index].date;
Time res = t - lut[index].date;
if (res >= lut[index].time_at_offset_change())
res += lut[index].amount_of_offset_change();
@ -406,11 +411,11 @@ public:
return res - offset_at_start_of_epoch; /// Starting at 1970-01-01 00:00:00 local time.
}
inline unsigned toHour(time_t t) const
inline unsigned toHour(Time t) const
{
const LUTIndex index = findIndex(t);
time_t time = t - lut[index].date;
Time time = t - lut[index].date;
if (time >= lut[index].time_at_offset_change())
time += lut[index].amount_of_offset_change();
@ -426,7 +431,7 @@ public:
* then subtract the former from the latter to get the offset result.
* The boundaries when meets DST(daylight saving time) change should be handled very carefully.
*/
inline time_t timezoneOffset(time_t t) const
inline Time timezoneOffset(Time t) const
{
const LUTIndex index = findIndex(t);
@ -434,7 +439,7 @@ public:
/// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin.
/// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date,
/// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now.
time_t res = (lut[index].date - lut[daynum_offset_epoch].date) % 86400;
Time res = (lut[index].date - lut[daynum_offset_epoch].date) % 86400;
/// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder
/// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward.
@ -448,7 +453,7 @@ public:
}
inline unsigned toSecond(time_t t) const
inline unsigned toSecond(Time t) const
{
auto res = t % 60;
if (likely(res >= 0))
@ -456,7 +461,7 @@ public:
return res + 60;
}
inline unsigned toMinute(time_t t) const
inline unsigned toMinute(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return (t / 60) % 60;
@ -474,27 +479,27 @@ public:
}
/// NOTE: Assuming timezone offset is a multiple of 15 minutes.
inline time_t toStartOfMinute(time_t t) const { return roundDown(t, 60); }
inline time_t toStartOfFiveMinute(time_t t) const { return roundDown(t, 300); }
inline time_t toStartOfFifteenMinutes(time_t t) const { return roundDown(t, 900); }
inline Time toStartOfMinute(Time t) const { return roundDown(t, 60); }
inline Time toStartOfFiveMinute(Time t) const { return roundDown(t, 300); }
inline Time toStartOfFifteenMinutes(Time t) const { return roundDown(t, 900); }
inline time_t toStartOfTenMinutes(time_t t) const
inline Time toStartOfTenMinutes(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 600 * 600;
/// More complex logic is for Nepal - it has offset 05:45. Australia/Eucla is also unfortunate.
Int64 date = find(t).date;
Time date = find(t).date;
return date + (t - date) / 600 * 600;
}
/// NOTE: Assuming timezone transitions are multiple of hours. Lord Howe Island in Australia is a notable exception.
inline time_t toStartOfHour(time_t t) const
inline Time toStartOfHour(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 3600 * 3600;
Int64 date = find(t).date;
Time date = find(t).date;
return date + (t - date) / 3600 * 3600;
}
@ -506,11 +511,11 @@ public:
* because the same calendar day starts/ends at different timestamps in different time zones)
*/
inline time_t fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; }
inline time_t fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
inline Time fromDayNum(DayNum d) const { return lut[toLUTIndex(d)].date; }
inline Time fromDayNum(ExtendedDayNum d) const { return lut[toLUTIndex(d)].date; }
template <typename DateOrTime>
inline time_t toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; }
inline Time toDate(DateOrTime v) const { return lut[toLUTIndex(v)].date; }
template <typename DateOrTime>
inline unsigned toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
@ -578,7 +583,7 @@ public:
return toDayNum(toFirstDayNumOfISOYearIndex(v));
}
inline time_t toFirstDayOfISOYear(time_t t) const
inline Time toFirstDayOfISOYear(Time t) const
{
return lut[toFirstDayNumOfISOYearIndex(t)].date;
}
@ -773,7 +778,7 @@ public:
}
/// We count all hour-length intervals, unrelated to offset changes.
inline time_t toRelativeHourNum(time_t t) const
inline Time toRelativeHourNum(Time t) const
{
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t / 3600;
@ -784,18 +789,18 @@ public:
}
template <typename DateOrTime>
inline time_t toRelativeHourNum(DateOrTime v) const
inline Time toRelativeHourNum(DateOrTime v) const
{
return toRelativeHourNum(lut[toLUTIndex(v)].date);
}
inline time_t toRelativeMinuteNum(time_t t) const
inline Time toRelativeMinuteNum(Time t) const
{
return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60);
}
template <typename DateOrTime>
inline time_t toRelativeMinuteNum(DateOrTime v) const
inline Time toRelativeMinuteNum(DateOrTime v) const
{
return toRelativeMinuteNum(lut[toLUTIndex(v)].date);
}
@ -842,14 +847,14 @@ public:
return ExtendedDayNum(4 + (d - 4) / days * days);
}
inline time_t toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const
inline Time toStartOfDayInterval(ExtendedDayNum d, UInt64 days) const
{
if (days == 1)
return toDate(d);
return lut[toLUTIndex(ExtendedDayNum(d / days * days))].date;
}
inline time_t toStartOfHourInterval(time_t t, UInt64 hours) const
inline Time toStartOfHourInterval(Time t, UInt64 hours) const
{
if (hours == 1)
return toStartOfHour(t);
@ -867,7 +872,7 @@ public:
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
{
/// Align to new hour numbers before rounding.
@ -892,7 +897,7 @@ public:
return values.date + time;
}
inline time_t toStartOfMinuteInterval(time_t t, UInt64 minutes) const
inline Time toStartOfMinuteInterval(Time t, UInt64 minutes) const
{
if (minutes == 1)
return toStartOfMinute(t);
@ -909,7 +914,7 @@ public:
return roundDown(t, seconds);
}
inline time_t toStartOfSecondInterval(time_t t, UInt64 seconds) const
inline Time toStartOfSecondInterval(Time t, UInt64 seconds) const
{
if (seconds == 1)
return t;
@ -934,14 +939,14 @@ public:
return toDayNum(makeLUTIndex(year, month, day_of_month));
}
inline time_t makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
inline Time makeDate(Int16 year, UInt8 month, UInt8 day_of_month) const
{
return lut[makeLUTIndex(year, month, day_of_month)].date;
}
/** Does not accept daylight saving time as argument: in case of ambiguity, it choose greater timestamp.
*/
inline time_t makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
inline Time makeDateTime(Int16 year, UInt8 month, UInt8 day_of_month, UInt8 hour, UInt8 minute, UInt8 second) const
{
size_t index = makeLUTIndex(year, month, day_of_month);
UInt32 time_offset = hour * 3600 + minute * 60 + second;
@ -969,7 +974,7 @@ public:
return values.year * 10000 + values.month * 100 + values.day_of_month;
}
inline time_t YYYYMMDDToDate(UInt32 num) const
inline Time YYYYMMDDToDate(UInt32 num) const
{
return makeDate(num / 10000, num / 100 % 100, num % 100);
}
@ -1000,13 +1005,13 @@ public:
TimeComponents time;
};
inline DateComponents toDateComponents(time_t t) const
inline DateComponents toDateComponents(Time t) const
{
const Values & values = getValues(t);
return { values.year, values.month, values.day_of_month };
}
inline DateTimeComponents toDateTimeComponents(time_t t) const
inline DateTimeComponents toDateTimeComponents(Time t) const
{
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
@ -1017,7 +1022,7 @@ public:
res.date.month = values.month;
res.date.day = values.day_of_month;
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1042,7 +1047,7 @@ public:
}
inline UInt64 toNumYYYYMMDDhhmmss(time_t t) const
inline UInt64 toNumYYYYMMDDhhmmss(Time t) const
{
DateTimeComponents components = toDateTimeComponents(t);
@ -1055,7 +1060,7 @@ public:
+ UInt64(components.date.year) * 10000000000;
}
inline time_t YYYYMMDDhhmmssToTime(UInt64 num) const
inline Time YYYYMMDDhhmmssToTime(UInt64 num) const
{
return makeDateTime(
num / 10000000000,
@ -1069,12 +1074,12 @@ public:
/// Adding calendar intervals.
/// Implementation specific behaviour when delta is too big.
inline NO_SANITIZE_UNDEFINED time_t addDays(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED Time addDays(Time t, Int64 delta) const
{
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1086,7 +1091,7 @@ public:
return lut[new_index].date + time;
}
inline NO_SANITIZE_UNDEFINED time_t addWeeks(time_t t, Int64 delta) const
inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
{
return addDays(t, delta * 7);
}
@ -1131,14 +1136,14 @@ public:
/// If resulting month has less deys than source month, then saturation can happen.
/// Example: 31 Aug + 1 month = 30 Sep.
inline time_t NO_SANITIZE_UNDEFINED addMonths(time_t t, Int64 delta) const
inline Time NO_SANITIZE_UNDEFINED addMonths(Time t, Int64 delta) const
{
const auto result_day = addMonthsIndex(t, delta);
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1153,7 +1158,7 @@ public:
return toDayNum(addMonthsIndex(d, delta));
}
inline time_t NO_SANITIZE_UNDEFINED addQuarters(time_t t, Int64 delta) const
inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int64 delta) const
{
return addMonths(t, delta * 3);
}
@ -1180,14 +1185,14 @@ public:
}
/// Saturation can occur if 29 Feb is mapped to non-leap year.
inline time_t addYears(time_t t, Int64 delta) const
inline Time addYears(Time t, Int64 delta) const
{
auto result_day = addYearsIndex(t, delta);
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
time_t time = t - values.date;
Time time = t - values.date;
if (time >= values.time_at_offset_change())
time += values.amount_of_offset_change();
@ -1203,7 +1208,7 @@ public:
}
inline std::string timeToString(time_t t) const
inline std::string timeToString(Time t) const
{
DateTimeComponents components = toDateTimeComponents(t);
@ -1228,7 +1233,7 @@ public:
return s;
}
inline std::string dateToString(time_t t) const
inline std::string dateToString(Time t) const
{
const Values & values = getValues(t);

View File

@ -91,10 +91,12 @@ struct DecomposedFloat
/// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic.
/// This function is generic, big integers (128, 256 bit) are supported as well.
/// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers.
/// (note that we need total order)
/// Returns -1, 0 or 1.
template <typename Int>
int compare(Int rhs)
int compare(Int rhs) const
{
if (rhs == 0)
return sign();
@ -137,10 +139,11 @@ struct DecomposedFloat
if (normalized_exponent() >= static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
return is_negative() ? -1 : 1;
using UInt = make_unsigned_t<Int>;
using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)), make_unsigned_t<Int>, typename Traits::UInt>;
UInt uint_rhs = rhs < 0 ? -rhs : rhs;
/// Smaller octave: abs(rhs) < abs(float)
/// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade
if (uint_rhs < (static_cast<UInt>(1) << normalized_exponent()))
return is_negative() ? -1 : 1;
@ -154,11 +157,11 @@ struct DecomposedFloat
bool large_and_always_integer = normalized_exponent() >= static_cast<int16_t>(Traits::mantissa_bits);
typename Traits::UInt a = large_and_always_integer
? mantissa() << (normalized_exponent() - Traits::mantissa_bits)
: mantissa() >> (Traits::mantissa_bits - normalized_exponent());
UInt a = large_and_always_integer
? static_cast<UInt>(mantissa()) << (normalized_exponent() - Traits::mantissa_bits)
: static_cast<UInt>(mantissa()) >> (Traits::mantissa_bits - normalized_exponent());
typename Traits::UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());
UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());
if (a < b)
return is_negative() ? 1 : -1;
@ -175,37 +178,37 @@ struct DecomposedFloat
template <typename Int>
bool equals(Int rhs)
bool equals(Int rhs) const
{
return compare(rhs) == 0;
}
template <typename Int>
bool notEquals(Int rhs)
bool notEquals(Int rhs) const
{
return compare(rhs) != 0;
}
template <typename Int>
bool less(Int rhs)
bool less(Int rhs) const
{
return compare(rhs) < 0;
}
template <typename Int>
bool greater(Int rhs)
bool greater(Int rhs) const
{
return compare(rhs) > 0;
}
template <typename Int>
bool lessOrEquals(Int rhs)
bool lessOrEquals(Int rhs) const
{
return compare(rhs) <= 0;
}
template <typename Int>
bool greaterOrEquals(Int rhs)
bool greaterOrEquals(Int rhs) const
{
return compare(rhs) >= 0;
}

View File

@ -1,6 +1,6 @@
#include <common/ReadlineLineReader.h>
#include <common/errnoToString.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <errno.h>
#include <signal.h>

View File

@ -1,8 +1,9 @@
#include <common/ReplxxLineReader.h>
#include <common/errnoToString.h>
#include <errno.h>
#include <string.h>
#include <chrono>
#include <cerrno>
#include <cstring>
#include <unistd.h>
#include <functional>
#include <sys/file.h>
@ -24,6 +25,94 @@ void trim(String & s)
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
}
/// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx.
/// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org)
/// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com)
/// Copyright (c) 2010, Pieter Noordhuis (pcnoordhuis at gmail dot com)
std::string replxx_now_ms_str()
{
std::chrono::milliseconds ms(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()));
time_t t = ms.count() / 1000;
tm broken;
if (!localtime_r(&t, &broken))
{
return std::string();
}
static int const BUFF_SIZE(32);
char str[BUFF_SIZE];
strftime(str, BUFF_SIZE, "%Y-%m-%d %H:%M:%S.", &broken);
snprintf(str + sizeof("YYYY-mm-dd HH:MM:SS"), 5, "%03d", static_cast<int>(ms.count() % 1000));
return str;
}
/// Convert from readline to replxx format.
///
/// replxx requires each history line to prepended with time line:
///
/// ### YYYY-MM-DD HH:MM:SS.SSS
/// select 1
///
/// And w/o those service lines it will load all lines from history file as
/// one history line for suggestion. And if there are lots of lines in file it
/// will take lots of time (getline() + tons of reallocations).
///
/// NOTE: this code uses std::ifstream/std::ofstream like original replxx code.
void convertHistoryFile(const std::string & path, replxx::Replxx & rx)
{
std::ifstream in(path);
if (!in)
{
rx.print("Cannot open %s reading (for conversion): %s\n",
path.c_str(), errnoToString(errno).c_str());
return;
}
std::string line;
if (!getline(in, line).good())
{
rx.print("Cannot read from %s (for conversion): %s\n",
path.c_str(), errnoToString(errno).c_str());
return;
}
/// This is the marker of the date, no need to convert.
static char const REPLXX_TIMESTAMP_PATTERN[] = "### dddd-dd-dd dd:dd:dd.ddd";
if (line.starts_with("### ") && line.size() == strlen(REPLXX_TIMESTAMP_PATTERN))
{
return;
}
std::vector<std::string> lines;
in.seekg(0);
while (getline(in, line).good())
{
lines.push_back(line);
}
in.close();
size_t lines_size = lines.size();
std::sort(lines.begin(), lines.end());
lines.erase(std::unique(lines.begin(), lines.end()), lines.end());
rx.print("The history file (%s) is in old format. %zu lines, %zu unique lines.\n",
path.c_str(), lines_size, lines.size());
std::ofstream out(path);
if (!out)
{
rx.print("Cannot open %s for writing (for conversion): %s\n",
path.c_str(), errnoToString(errno).c_str());
return;
}
const std::string & timestamp = replxx_now_ms_str();
for (const auto & out_line : lines)
{
out << "### " << timestamp << "\n" << out_line << std::endl;
}
out.close();
}
}
ReplxxLineReader::ReplxxLineReader(
@ -47,6 +136,8 @@ ReplxxLineReader::ReplxxLineReader(
}
else
{
convertHistoryFile(history_file_path, rx);
if (flock(history_file_fd, LOCK_SH))
{
rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());

View File

@ -3,7 +3,7 @@
#include <map>
#include <tuple>
#include <mutex>
#include <ext/function_traits.h>
#include <common/function_traits.h>
/** The simplest cache for a free function.
@ -32,10 +32,11 @@ public:
template <typename... Args>
Result operator() (Args &&... args)
{
Key key{std::forward<Args>(args)...};
{
std::lock_guard lock(mutex);
Key key{std::forward<Args>(args)...};
auto it = cache.find(key);
if (cache.end() != it)
@ -43,7 +44,7 @@ public:
}
/// The calculations themselves are not done under mutex.
Result res = f(std::forward<Args>(args)...);
Result res = std::apply(f, key);
{
std::lock_guard lock(mutex);
@ -57,11 +58,12 @@ public:
template <typename... Args>
void update(Args &&... args)
{
Result res = f(std::forward<Args>(args)...);
Key key{std::forward<Args>(args)...};
Result res = std::apply(f, key);
{
std::lock_guard lock(mutex);
Key key{std::forward<Args>(args)...};
cache[key] = std::move(res);
}
}

7
base/common/arraySize.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include <cstdlib>
/** \brief Returns number of elements in an automatic array. */
template <typename T, std::size_t N>
constexpr size_t arraySize(const T (&)[N]) noexcept { return N; }

27
base/common/bit_cast.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#include <string.h>
#include <algorithm>
#include <type_traits>
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> bit_cast(const From & from)
{
To res {};
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
return res;
}
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> safe_bit_cast(const From & from)
{
static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
return bit_cast<To, From>(from);
}

46
base/common/chrono_io.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <chrono>
#include <string>
#include <sstream>
#include <cctz/time_zone.h>
inline std::string to_string(const std::time_t & time)
{
return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
}
template <typename Clock, typename Duration = typename Clock::duration>
std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
{
// Don't use DateLUT because it shows weird characters for
// TimePoint::max(). I wish we could use C++20 format, but it's not
// there yet.
// return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
auto in_time_t = std::chrono::system_clock::to_time_t(tp);
return to_string(in_time_t);
}
template <typename Rep, typename Period = std::ratio<1>>
std::string to_string(const std::chrono::duration<Rep, Period> & duration)
{
auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
if (seconds_as_int == duration)
return std::to_string(seconds_as_int.count()) + "s";
auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
return std::to_string(seconds_as_double.count()) + "s";
}
template <typename Clock, typename Duration = typename Clock::duration>
std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
{
return o << to_string(tp);
}
template <typename Rep, typename Period = std::ratio<1>>
std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
{
return o << to_string(duration);
}

View File

@ -4,23 +4,42 @@
#include <string>
#include <boost/algorithm/string/replace.hpp>
std::string_view getResource(std::string_view name)
{
// Convert the resource file name into the form generated by `ld -r -b binary`.
std::string name_replaced(name);
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
boost::replace_all(name_replaced, "+", "_PLUS_");
/// These are the names that are generated by "ld -r -b binary"
std::string symbol_name_data = "_binary_" + name_replaced + "_start";
std::string symbol_name_size = "_binary_" + name_replaced + "_size";
// In most `dlsym(3)` APIs, one passes the symbol name as it appears via
// something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
// looked up with the string `"_foo"`.
//
// Apple's linker is confusingly different. The NOTES on the man page for
// `dlsym(3)` claim that one looks up the symbol with "the name used in C
// source code". In this example, that would mean using the string `"foo"`.
// This apparently applies even in the case where the symbol did not originate
// from C source, such as the embedded binary resource files used here. So
// the symbol name must not have a leading `_` on Apple platforms. It's not
// clear how this applies to other symbols, such as those which _have_ a leading
// underscore in them by design, many leading underscores, etc.
#if defined OS_DARWIN
std::string prefix = "binary_";
#else
std::string prefix = "_binary_";
#endif
std::string symbol_name_start = prefix + name_replaced + "_start";
std::string symbol_name_end = prefix + name_replaced + "_end";
const void * sym_data = dlsym(RTLD_DEFAULT, symbol_name_data.c_str());
const void * sym_size = dlsym(RTLD_DEFAULT, symbol_name_size.c_str());
const char* sym_start = reinterpret_cast<const char*>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
const char* sym_end = reinterpret_cast<const char*>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));
if (sym_data && sym_size)
return { static_cast<const char *>(sym_data), unalignedLoad<size_t>(&sym_size) };
if (sym_start && sym_end)
{
auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
return { sym_start, resource_size };
}
return {};
}

52
base/common/map.h Normal file
View File

@ -0,0 +1,52 @@
#pragma once
#include <type_traits>
#include <boost/iterator/transform_iterator.hpp>
namespace collections
{
/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers
template <typename T>
using unqualified_t = std::remove_cv_t<std::remove_reference_t<T>>;
/** \brief Returns collection of the same container-type as the input collection,
* with each element transformed by the application of `mapper`.
*/
template <template <typename...> class Collection, typename... Params, typename Mapper>
auto map(const Collection<Params...> & collection, Mapper && mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return Collection<value_type>(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
/** \brief Returns collection of specified container-type,
* with each element transformed by the application of `mapper`.
* Allows conversion between different container-types, e.g. std::vector to std::list
*/
template <template <typename...> class ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, Mapper && mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return ResultCollection<value_type>(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
/** \brief Returns collection of specified type,
* with each element transformed by the application of `mapper`.
* Allows leveraging implicit conversion between the result of applying `mapper` and R::value_type.
*/
template <typename ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, Mapper && mapper)
{
return ResultCollection(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
}

View File

@ -4,9 +4,9 @@
#include <boost/range/adaptor/transformed.hpp>
#include <type_traits>
namespace ext
namespace collections
{
namespace internal
{
template <typename ResultType, typename CountingType, typename BeginType, typename EndType>
@ -24,11 +24,11 @@ namespace internal
/// For loop adaptor which is used to iterate through a half-closed interval [begin, end).
/// The parameters `begin` and `end` can have any integral or enum types.
template <typename BeginType,
typename EndType,
typename = std::enable_if_t<
(std::is_integral_v<BeginType> || std::is_enum_v<BeginType>) &&
(std::is_integral_v<EndType> || std::is_enum_v<EndType>) &&
(!std::is_enum_v<BeginType> || !std::is_enum_v<EndType> || std::is_same_v<BeginType, EndType>), void>>
typename EndType,
typename = std::enable_if_t<
(std::is_integral_v<BeginType> || std::is_enum_v<BeginType>) &&
(std::is_integral_v<EndType> || std::is_enum_v<EndType>) &&
(!std::is_enum_v<BeginType> || !std::is_enum_v<EndType> || std::is_same_v<BeginType, EndType>), void>>
inline auto range(BeginType begin, EndType end)
{
if constexpr (std::is_integral_v<BeginType> && std::is_integral_v<EndType>)
@ -51,7 +51,7 @@ inline auto range(BeginType begin, EndType end)
/// The parameter `end` can have any integral or enum type.
/// The same as range(0, end).
template <typename Type,
typename = std::enable_if_t<std::is_integral_v<Type> || std::is_enum_v<Type>, void>>
typename = std::enable_if_t<std::is_integral_v<Type> || std::is_enum_v<Type>, void>>
inline auto range(Type end)
{
if constexpr (std::is_integral_v<Type>)
@ -59,4 +59,5 @@ inline auto range(Type end)
else
return internal::rangeImpl<Type, std::underlying_type_t<Type>>(0, end);
}
}

View File

@ -4,9 +4,6 @@
#include <memory>
#include <utility>
namespace ext
{
template <class F>
class [[nodiscard]] basic_scope_guard
{
@ -105,10 +102,9 @@ using scope_guard = basic_scope_guard<std::function<void(void)>>;
template <class F>
inline basic_scope_guard<F> make_scope_guard(F && function_) { return std::forward<F>(function_); }
}
#define SCOPE_EXIT_CONCAT(n, ...) \
const auto scope_exit##n = ext::make_scope_guard([&] { __VA_ARGS__; })
const auto scope_exit##n = make_scope_guard([&] { __VA_ARGS__; })
#define SCOPE_EXIT_FWD(n, ...) SCOPE_EXIT_CONCAT(n, __VA_ARGS__)
#define SCOPE_EXIT(...) SCOPE_EXIT_FWD(__LINE__, __VA_ARGS__)

View File

@ -1,6 +1,6 @@
#pragma once
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <common/logger_useful.h>
#include <Common/MemoryTracker.h>

View File

@ -2,8 +2,6 @@
#include <memory>
namespace ext
{
/** Allows to make std::shared_ptr from T with protected constructor.
*
@ -36,4 +34,3 @@ struct is_shared_ptr<std::shared_ptr<T>>
template <typename T>
inline constexpr bool is_shared_ptr_v = is_shared_ptr<T>::value;
}

View File

@ -109,10 +109,7 @@ public:
constexpr explicit operator bool() const noexcept;
template <class T>
using _integral_not_wide_integer_class = typename std::enable_if<std::is_arithmetic<T>::value, T>::type;
template <class T, class = _integral_not_wide_integer_class<T>>
template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T>, T>>
constexpr operator T() const noexcept;
constexpr operator long double() const noexcept;

View File

@ -255,13 +255,13 @@ struct integer<Bits, Signed>::_impl
set_multiplier<double>(self, alpha);
self *= max_int;
self += static_cast<uint64_t>(t - alpha * static_cast<T>(max_int)); // += b_i
self += static_cast<uint64_t>(t - floor(alpha) * static_cast<T>(max_int)); // += b_i
}
constexpr static void wide_integer_from_builtin(integer<Bits, Signed>& self, double rhs) noexcept
constexpr static void wide_integer_from_builtin(integer<Bits, Signed> & self, double rhs) noexcept
{
constexpr int64_t max_int = std::numeric_limits<int64_t>::max();
constexpr int64_t min_int = std::numeric_limits<int64_t>::min();
constexpr int64_t min_int = std::numeric_limits<int64_t>::lowest();
/// There are values in int64 that have more than 53 significant bits (in terms of double
/// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up,
@ -271,14 +271,14 @@ struct integer<Bits, Signed>::_impl
/// The necessary check here is that long double has enough significant (mantissa) bits to store the
/// int64_t max value precisely.
//TODO Be compatible with Apple aarch64
// TODO Be compatible with Apple aarch64
#if not (defined(__APPLE__) && defined(__aarch64__))
static_assert(LDBL_MANT_DIG >= 64,
"On your system long double has less than 64 precision bits,"
"On your system long double has less than 64 precision bits, "
"which may result in UB when initializing double from int64_t");
#endif
if ((rhs > 0 && rhs < static_cast<long double>(max_int)) || (rhs < 0 && rhs > static_cast<long double>(min_int)))
if (rhs > static_cast<long double>(min_int) && rhs < static_cast<long double>(max_int))
{
self = static_cast<int64_t>(rhs);
return;

View File

@ -21,7 +21,7 @@
#include <fstream>
#include <sstream>
#include <memory>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <Poco/Observer.h>
#include <Poco/AutoPtr.h>

View File

@ -1,30 +0,0 @@
#pragma once
#include <string.h>
#include <algorithm>
#include <type_traits>
namespace ext
{
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> bit_cast(const From & from)
{
To res {};
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
return res;
}
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> safe_bit_cast(const From & from)
{
static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
return bit_cast<To, From>(from);
}
}

View File

@ -1,49 +0,0 @@
#pragma once
#include <chrono>
#include <string>
#include <sstream>
#include <cctz/time_zone.h>
namespace ext
{
inline std::string to_string(const std::time_t & time)
{
return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
}
template <typename Clock, typename Duration = typename Clock::duration>
std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
{
// Don't use DateLUT because it shows weird characters for
// TimePoint::max(). I wish we could use C++20 format, but it's not
// there yet.
// return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
auto in_time_t = std::chrono::system_clock::to_time_t(tp);
return to_string(in_time_t);
}
template <typename Rep, typename Period = std::ratio<1>>
std::string to_string(const std::chrono::duration<Rep, Period> & duration)
{
auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
if (seconds_as_int == duration)
return std::to_string(seconds_as_int.count()) + "s";
auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
return std::to_string(seconds_as_double.count()) + "s";
}
template <typename Clock, typename Duration = typename Clock::duration>
std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
{
return o << to_string(tp);
}
template <typename Rep, typename Period = std::ratio<1>>
std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
{
return o << to_string(duration);
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include <iterator>
namespace ext
{
/** \brief Returns collection of specified container-type.
* Retains stored value_type, constructs resulting collection using iterator range. */
template <template <typename...> class ResultCollection, typename Collection>
auto collection_cast(const Collection & collection)
{
using value_type = typename Collection::value_type;
return ResultCollection<value_type>(std::begin(collection), std::end(collection));
}
/** \brief Returns collection of specified type.
* Performs implicit conversion of between source and result value_type, if available and required. */
template <typename ResultCollection, typename Collection>
auto collection_cast(const Collection & collection)
{
return ResultCollection(std::begin(collection), std::end(collection));
}
}

View File

@ -1,60 +0,0 @@
#pragma once
#include <ext/size.h>
#include <type_traits>
#include <utility>
#include <iterator>
/** \brief Provides a wrapper view around a container, allowing to iterate over it's elements and indices.
* Allow writing code like shown below:
*
* std::vector<T> v = getVector();
* for (const std::pair<const std::size_t, T &> index_and_value : ext::enumerate(v))
* std::cout << "element " << index_and_value.first << " is " << index_and_value.second << std::endl;
*/
namespace ext
{
template <typename It> struct enumerate_iterator
{
using traits = typename std::iterator_traits<It>;
using iterator_category = typename traits::iterator_category;
using value_type = std::pair<const std::size_t, typename traits::value_type>;
using difference_type = typename traits::difference_type;
using reference = std::pair<const std::size_t, typename traits::reference>;
std::size_t idx;
It it;
enumerate_iterator(const std::size_t idx_, It it_) : idx{idx_}, it{it_} {}
auto operator*() const { return reference(idx, *it); }
bool operator!=(const enumerate_iterator & other) const { return it != other.it; }
enumerate_iterator & operator++() { return ++idx, ++it, *this; }
};
template <typename Collection> struct enumerate_wrapper
{
using underlying_iterator = decltype(std::begin(std::declval<Collection &>()));
using iterator = enumerate_iterator<underlying_iterator>;
Collection & collection;
enumerate_wrapper(Collection & collection_) : collection(collection_) {}
auto begin() { return iterator(0, std::begin(collection)); }
auto end() { return iterator(ext::size(collection), std::end(collection)); }
};
template <typename Collection> auto enumerate(Collection & collection)
{
return enumerate_wrapper<Collection>{collection};
}
template <typename Collection> auto enumerate(const Collection & collection)
{
return enumerate_wrapper<const Collection>{collection};
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include <utility>
namespace ext
{
/// \brief Identity function for use with other algorithms as a pass-through.
class identity
{
/** \brief Function pointer type template for converting identity to a function pointer.
* Presumably useless, provided for completeness. */
template <typename T> using function_ptr_t = T &&(*)(T &&);
/** \brief Implementation of identity as a non-instance member function for taking function pointer. */
template <typename T> static T && invoke(T && t) { return std::forward<T>(t); }
public:
/** \brief Returns the value passed as a sole argument using perfect forwarding. */
template <typename T> T && operator()(T && t) const { return std::forward<T>(t); }
/** \brief Allows conversion of identity instance to a function pointer. */
template <typename T> operator function_ptr_t<T>() const { return &invoke; };
};
}

View File

@ -1,43 +0,0 @@
#pragma once
#include <utility>
#include <type_traits>
#include <array>
/** \brief Produces std::array of specified size, containing copies of provided object.
* Copy is performed N-1 times, and the last element is being moved.
* This helper allows to initialize std::array in place.
*/
namespace ext
{
namespace detail
{
template<std::size_t size, typename T, std::size_t... indexes>
constexpr auto make_array_n_impl(T && value, std::index_sequence<indexes...>)
{
/// Comma is used to make N-1 copies of value
return std::array<std::decay_t<T>, size>{ (static_cast<void>(indexes), value)..., std::forward<T>(value) };
}
}
template<typename T>
constexpr auto make_array_n(std::integral_constant<std::size_t, 0>, T &&)
{
return std::array<std::decay_t<T>, 0>{};
}
template<std::size_t size, typename T>
constexpr auto make_array_n(std::integral_constant<std::size_t, size>, T && value)
{
return detail::make_array_n_impl<size>(std::forward<T>(value), std::make_index_sequence<size - 1>{});
}
template<std::size_t size, typename T>
constexpr auto make_array_n(T && value)
{
return make_array_n(std::integral_constant<std::size_t, size>{}, std::forward<T>(value));
}
}

View File

@ -1,51 +0,0 @@
#pragma once
#include <type_traits>
#include <boost/iterator/transform_iterator.hpp>
namespace ext
{
/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers
template <typename T>
using unqualified_t = std::remove_cv_t<std::remove_reference_t<T>>;
/** \brief Returns collection of the same container-type as the input collection,
* with each element transformed by the application of `mapper`.
*/
template <template <typename...> class Collection, typename... Params, typename Mapper>
auto map(const Collection<Params...> & collection, const Mapper mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return Collection<value_type>(
boost::make_transform_iterator(std::begin(collection), mapper),
boost::make_transform_iterator(std::end(collection), mapper));
}
/** \brief Returns collection of specified container-type,
* with each element transformed by the application of `mapper`.
* Allows conversion between different container-types, e.g. std::vector to std::list
*/
template <template <typename...> class ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, const Mapper mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return ResultCollection<value_type>(
boost::make_transform_iterator(std::begin(collection), mapper),
boost::make_transform_iterator(std::end(collection), mapper));
}
/** \brief Returns collection of specified type,
* with each element transformed by the application of `mapper`.
* Allows leveraging implicit conversion between the result of applying `mapper` and R::value_type.
*/
template <typename ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, const Mapper mapper)
{
return ResultCollection(
boost::make_transform_iterator(std::begin(collection), mapper),
boost::make_transform_iterator(std::end(collection), mapper));
}
}

View File

@ -1,25 +0,0 @@
#pragma once
#include <vector>
namespace ext
{
/// Moves all arguments starting from the second to the end of the vector.
/// For example, `push_back(vec, a1, a2, a3)` is a more compact way to write
/// `vec.push_back(a1); vec.push_back(a2); vec.push_back(a3);`
/// This function is like boost::range::push_back() but works for noncopyable types too.
template <typename T>
void push_back(std::vector<T> &)
{
}
template <typename T, typename FirstArg, typename... OtherArgs>
void push_back(std::vector<T> & vec, FirstArg && first, OtherArgs &&... other)
{
vec.reserve(vec.size() + sizeof...(other) + 1);
vec.emplace_back(std::move(first));
push_back(vec, std::move(other)...);
}
}

View File

@ -1,14 +0,0 @@
#pragma once
#include <cstdlib>
namespace ext
{
/** \brief Returns number of elements in an automatic array. */
template <typename T, std::size_t N>
constexpr std::size_t size(const T (&)[N]) noexcept { return N; }
/** \brief Returns number of in a container providing size() member function. */
template <typename T> constexpr auto size(const T & t) { return t.size(); }
}

View File

@ -1,27 +0,0 @@
#pragma once
namespace ext
{
template <typename T>
class unlock_guard
{
public:
unlock_guard(T & mutex_) : mutex(mutex_)
{
mutex.unlock();
}
~unlock_guard()
{
mutex.lock();
}
unlock_guard(const unlock_guard &) = delete;
unlock_guard & operator=(const unlock_guard &) = delete;
private:
T & mutex;
};
}

View File

@ -8,13 +8,6 @@
extern "C" {
#endif
#include <pthread.h>
size_t __pthread_get_minstack(const pthread_attr_t * attr)
{
return 1048576; /// This is a guess. Don't sure it is correct.
}
#include <signal.h>
#include <unistd.h>
#include <string.h>
@ -141,6 +134,8 @@ int __open_2(const char *path, int oflag)
}
#include <pthread.h>
/// No-ops.
int pthread_setname_np(pthread_t thread, const char *name) { return 0; }
int pthread_getname_np(pthread_t thread, char *name, size_t len) { name[0] = '\0'; return 0; };

View File

@ -4,12 +4,14 @@
#include <Core/Block.h>
#include <Interpreters/InternalTextLogsQueue.h>
#include <Interpreters/TextLog.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <sys/time.h>
#include <Poco/Message.h>
#include <Common/CurrentThread.h>
#include <Common/DNSResolver.h>
#include <common/getThreadId.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/IO.h>
namespace DB
{
@ -26,16 +28,48 @@ void OwnSplitChannel::log(const Poco::Message & msg)
auto matches = masker->wipeSensitiveData(message_text);
if (matches > 0)
{
logSplit({msg, message_text}); // we will continue with the copy of original message with text modified
tryLogSplit({msg, message_text}); // we will continue with the copy of original message with text modified
return;
}
}
logSplit(msg);
tryLogSplit(msg);
}
void OwnSplitChannel::tryLogSplit(const Poco::Message & msg)
{
try
{
logSplit(msg);
}
/// It is better to catch the errors here in order to avoid
/// breaking some functionality because of unexpected "File not
/// found" (or similar) error.
///
/// For example StorageDistributedDirectoryMonitor will mark batch
/// as broken, some MergeTree code can also be affected.
///
/// Also note, that we cannot log the exception here, since this
/// will lead to recursion, using regular tryLogCurrentException().
/// but let's log it into the stderr at least.
catch (...)
{
MemoryTracker::LockExceptionInThread lock_memory_tracker(VariableContext::Global);
const std::string & exception_message = getCurrentExceptionMessage(true);
const std::string & message = msg.getText();
/// NOTE: errors are ignored, since nothing can be done.
writeRetry(STDERR_FILENO, "Cannot add message to the log: ");
writeRetry(STDERR_FILENO, message.data(), message.size());
writeRetry(STDERR_FILENO, "\n");
writeRetry(STDERR_FILENO, exception_message.data(), exception_message.size());
writeRetry(STDERR_FILENO, "\n");
}
}
void OwnSplitChannel::logSplit(const Poco::Message & msg)
{
ExtendedLogMessage msg_ext = ExtendedLogMessage::getFrom(msg);

View File

@ -24,6 +24,7 @@ public:
private:
void logSplit(const Poco::Message & msg);
void tryLogSplit(const Poco::Message & msg);
using ChannelPtr = Poco::AutoPtr<Poco::Channel>;
/// Handler and its pointer casted to extended interface

View File

@ -2,7 +2,7 @@
#include <errmsg.h>
#include <mysql.h>
#else
#include <mysql/errmsg.h>
#include <mysql/errmsg.h> //Y_IGNORE
#include <mysql/mysql.h>
#endif

39
base/mysqlxx/ya.make Normal file
View File

@ -0,0 +1,39 @@
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
LIBRARY()
OWNER(g:clickhouse)
CFLAGS(-g0)
PEERDIR(
contrib/restricted/boost/libs
contrib/libs/libmysql_r
contrib/libs/poco/Foundation
contrib/libs/poco/Util
)
ADDINCL(
GLOBAL clickhouse/base
clickhouse/base
contrib/libs/libmysql_r
)
NO_COMPILER_WARNINGS()
NO_UTIL()
SRCS(
Connection.cpp
Exception.cpp
Pool.cpp
PoolFactory.cpp
PoolWithFailover.cpp
Query.cpp
ResultBase.cpp
Row.cpp
UseQueryResult.cpp
Value.cpp
)
END()

28
base/mysqlxx/ya.make.in Normal file
View File

@ -0,0 +1,28 @@
LIBRARY()
OWNER(g:clickhouse)
CFLAGS(-g0)
PEERDIR(
contrib/restricted/boost/libs
contrib/libs/libmysql_r
contrib/libs/poco/Foundation
contrib/libs/poco/Util
)
ADDINCL(
GLOBAL clickhouse/base
clickhouse/base
contrib/libs/libmysql_r
)
NO_COMPILER_WARNINGS()
NO_UTIL()
SRCS(
<? find . -name '*.cpp' | grep -v -F tests/ | grep -v -F examples | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -4,6 +4,7 @@ RECURSE(
common
daemon
loggers
mysqlxx
pcg-random
widechar_width
readpassphrase

View File

@ -1,9 +1,12 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54452)
# This variables autochanged by release_lib.sh:
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54453)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 976ccc2e908ac3bc28f763bfea8134ea0a121b40)
SET(VERSION_DESCRIBE v21.7.1.1-prestable)
SET(VERSION_STRING 21.7.1.1)
SET(VERSION_GITHASH fb895056568e26200629c7d19626e92d2dedc70d)
SET(VERSION_DESCRIBE v21.8.1.1-prestable)
SET(VERSION_STRING 21.8.1.1)
# end of autochange

57
cmake/embed_binary.cmake Normal file
View File

@ -0,0 +1,57 @@
# Embed a set of resource files into a resulting object file.
#
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
#
# This will generate a static library target named `<target>`, which contains the contents of
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
#
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
# These are:
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
#
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
macro(clickhouse_embed_binaries)
set(one_value_args TARGET RESOURCE_DIR)
set(resources RESOURCES)
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
if (NOT DEFINED EMBED_TARGET)
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
endif()
if (NOT DEFINED EMBED_RESOURCE_DIR)
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
list(LENGTH EMBED_RESOURCES N_RESOURCES)
if (N_RESOURCES LESS 1)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
endforeach()
endmacro()

View File

@ -1,7 +1,7 @@
if(NOT OS_FREEBSD AND NOT APPLE)
if(NOT OS_FREEBSD)
option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES})
elseif(ENABLE_S3 OR USE_INTERNAL_AWS_S3_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on Apple or FreeBSD")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on FreeBSD")
endif()
if(NOT ENABLE_S3)

View File

@ -4,6 +4,6 @@ if (NOT USE_YAML_CPP)
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp")
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp/README.md")
message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init --recursive")
endif()

View File

@ -34,7 +34,6 @@ endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (abseil-cpp-cmake)
add_subdirectory (antlr4-runtime-cmake)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing)
@ -61,7 +60,6 @@ endif()
add_subdirectory (poco-cmake)
add_subdirectory (croaring-cmake)
# TODO: refactor the contrib libraries below this comment.
if (USE_INTERNAL_ZSTD_LIBRARY)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 2a1bf7d87b4a03561fc66fbb49cee8a288983c5d
Subproject commit 976874b7aa7f422bf4ea595bb7d1166c617b1c26

@ -1 +0,0 @@
Subproject commit 672643e9a427ef803abf13bc8cb4989606553d64

View File

@ -1,156 +0,0 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/antlr4-runtime")
set (SRCS
"${LIBRARY_DIR}/ANTLRErrorListener.cpp"
"${LIBRARY_DIR}/ANTLRErrorStrategy.cpp"
"${LIBRARY_DIR}/ANTLRFileStream.cpp"
"${LIBRARY_DIR}/ANTLRInputStream.cpp"
"${LIBRARY_DIR}/atn/AbstractPredicateTransition.cpp"
"${LIBRARY_DIR}/atn/ActionTransition.cpp"
"${LIBRARY_DIR}/atn/AmbiguityInfo.cpp"
"${LIBRARY_DIR}/atn/ArrayPredictionContext.cpp"
"${LIBRARY_DIR}/atn/ATN.cpp"
"${LIBRARY_DIR}/atn/ATNConfig.cpp"
"${LIBRARY_DIR}/atn/ATNConfigSet.cpp"
"${LIBRARY_DIR}/atn/ATNDeserializationOptions.cpp"
"${LIBRARY_DIR}/atn/ATNDeserializer.cpp"
"${LIBRARY_DIR}/atn/ATNSerializer.cpp"
"${LIBRARY_DIR}/atn/ATNSimulator.cpp"
"${LIBRARY_DIR}/atn/ATNState.cpp"
"${LIBRARY_DIR}/atn/AtomTransition.cpp"
"${LIBRARY_DIR}/atn/BasicBlockStartState.cpp"
"${LIBRARY_DIR}/atn/BasicState.cpp"
"${LIBRARY_DIR}/atn/BlockEndState.cpp"
"${LIBRARY_DIR}/atn/BlockStartState.cpp"
"${LIBRARY_DIR}/atn/ContextSensitivityInfo.cpp"
"${LIBRARY_DIR}/atn/DecisionEventInfo.cpp"
"${LIBRARY_DIR}/atn/DecisionInfo.cpp"
"${LIBRARY_DIR}/atn/DecisionState.cpp"
"${LIBRARY_DIR}/atn/EmptyPredictionContext.cpp"
"${LIBRARY_DIR}/atn/EpsilonTransition.cpp"
"${LIBRARY_DIR}/atn/ErrorInfo.cpp"
"${LIBRARY_DIR}/atn/LexerAction.cpp"
"${LIBRARY_DIR}/atn/LexerActionExecutor.cpp"
"${LIBRARY_DIR}/atn/LexerATNConfig.cpp"
"${LIBRARY_DIR}/atn/LexerATNSimulator.cpp"
"${LIBRARY_DIR}/atn/LexerChannelAction.cpp"
"${LIBRARY_DIR}/atn/LexerCustomAction.cpp"
"${LIBRARY_DIR}/atn/LexerIndexedCustomAction.cpp"
"${LIBRARY_DIR}/atn/LexerModeAction.cpp"
"${LIBRARY_DIR}/atn/LexerMoreAction.cpp"
"${LIBRARY_DIR}/atn/LexerPopModeAction.cpp"
"${LIBRARY_DIR}/atn/LexerPushModeAction.cpp"
"${LIBRARY_DIR}/atn/LexerSkipAction.cpp"
"${LIBRARY_DIR}/atn/LexerTypeAction.cpp"
"${LIBRARY_DIR}/atn/LL1Analyzer.cpp"
"${LIBRARY_DIR}/atn/LookaheadEventInfo.cpp"
"${LIBRARY_DIR}/atn/LoopEndState.cpp"
"${LIBRARY_DIR}/atn/NotSetTransition.cpp"
"${LIBRARY_DIR}/atn/OrderedATNConfigSet.cpp"
"${LIBRARY_DIR}/atn/ParseInfo.cpp"
"${LIBRARY_DIR}/atn/ParserATNSimulator.cpp"
"${LIBRARY_DIR}/atn/PlusBlockStartState.cpp"
"${LIBRARY_DIR}/atn/PlusLoopbackState.cpp"
"${LIBRARY_DIR}/atn/PrecedencePredicateTransition.cpp"
"${LIBRARY_DIR}/atn/PredicateEvalInfo.cpp"
"${LIBRARY_DIR}/atn/PredicateTransition.cpp"
"${LIBRARY_DIR}/atn/PredictionContext.cpp"
"${LIBRARY_DIR}/atn/PredictionMode.cpp"
"${LIBRARY_DIR}/atn/ProfilingATNSimulator.cpp"
"${LIBRARY_DIR}/atn/RangeTransition.cpp"
"${LIBRARY_DIR}/atn/RuleStartState.cpp"
"${LIBRARY_DIR}/atn/RuleStopState.cpp"
"${LIBRARY_DIR}/atn/RuleTransition.cpp"
"${LIBRARY_DIR}/atn/SemanticContext.cpp"
"${LIBRARY_DIR}/atn/SetTransition.cpp"
"${LIBRARY_DIR}/atn/SingletonPredictionContext.cpp"
"${LIBRARY_DIR}/atn/StarBlockStartState.cpp"
"${LIBRARY_DIR}/atn/StarLoopbackState.cpp"
"${LIBRARY_DIR}/atn/StarLoopEntryState.cpp"
"${LIBRARY_DIR}/atn/TokensStartState.cpp"
"${LIBRARY_DIR}/atn/Transition.cpp"
"${LIBRARY_DIR}/atn/WildcardTransition.cpp"
"${LIBRARY_DIR}/BailErrorStrategy.cpp"
"${LIBRARY_DIR}/BaseErrorListener.cpp"
"${LIBRARY_DIR}/BufferedTokenStream.cpp"
"${LIBRARY_DIR}/CharStream.cpp"
"${LIBRARY_DIR}/CommonToken.cpp"
"${LIBRARY_DIR}/CommonTokenFactory.cpp"
"${LIBRARY_DIR}/CommonTokenStream.cpp"
"${LIBRARY_DIR}/ConsoleErrorListener.cpp"
"${LIBRARY_DIR}/DefaultErrorStrategy.cpp"
"${LIBRARY_DIR}/dfa/DFA.cpp"
"${LIBRARY_DIR}/dfa/DFASerializer.cpp"
"${LIBRARY_DIR}/dfa/DFAState.cpp"
"${LIBRARY_DIR}/dfa/LexerDFASerializer.cpp"
"${LIBRARY_DIR}/DiagnosticErrorListener.cpp"
"${LIBRARY_DIR}/Exceptions.cpp"
"${LIBRARY_DIR}/FailedPredicateException.cpp"
"${LIBRARY_DIR}/InputMismatchException.cpp"
"${LIBRARY_DIR}/InterpreterRuleContext.cpp"
"${LIBRARY_DIR}/IntStream.cpp"
"${LIBRARY_DIR}/Lexer.cpp"
"${LIBRARY_DIR}/LexerInterpreter.cpp"
"${LIBRARY_DIR}/LexerNoViableAltException.cpp"
"${LIBRARY_DIR}/ListTokenSource.cpp"
"${LIBRARY_DIR}/misc/InterpreterDataReader.cpp"
"${LIBRARY_DIR}/misc/Interval.cpp"
"${LIBRARY_DIR}/misc/IntervalSet.cpp"
"${LIBRARY_DIR}/misc/MurmurHash.cpp"
"${LIBRARY_DIR}/misc/Predicate.cpp"
"${LIBRARY_DIR}/NoViableAltException.cpp"
"${LIBRARY_DIR}/Parser.cpp"
"${LIBRARY_DIR}/ParserInterpreter.cpp"
"${LIBRARY_DIR}/ParserRuleContext.cpp"
"${LIBRARY_DIR}/ProxyErrorListener.cpp"
"${LIBRARY_DIR}/RecognitionException.cpp"
"${LIBRARY_DIR}/Recognizer.cpp"
"${LIBRARY_DIR}/RuleContext.cpp"
"${LIBRARY_DIR}/RuleContextWithAltNum.cpp"
"${LIBRARY_DIR}/RuntimeMetaData.cpp"
"${LIBRARY_DIR}/support/Any.cpp"
"${LIBRARY_DIR}/support/Arrays.cpp"
"${LIBRARY_DIR}/support/CPPUtils.cpp"
"${LIBRARY_DIR}/support/guid.cpp"
"${LIBRARY_DIR}/support/StringUtils.cpp"
"${LIBRARY_DIR}/Token.cpp"
"${LIBRARY_DIR}/TokenSource.cpp"
"${LIBRARY_DIR}/TokenStream.cpp"
"${LIBRARY_DIR}/TokenStreamRewriter.cpp"
"${LIBRARY_DIR}/tree/ErrorNode.cpp"
"${LIBRARY_DIR}/tree/ErrorNodeImpl.cpp"
"${LIBRARY_DIR}/tree/IterativeParseTreeWalker.cpp"
"${LIBRARY_DIR}/tree/ParseTree.cpp"
"${LIBRARY_DIR}/tree/ParseTreeListener.cpp"
"${LIBRARY_DIR}/tree/ParseTreeVisitor.cpp"
"${LIBRARY_DIR}/tree/ParseTreeWalker.cpp"
"${LIBRARY_DIR}/tree/pattern/Chunk.cpp"
"${LIBRARY_DIR}/tree/pattern/ParseTreeMatch.cpp"
"${LIBRARY_DIR}/tree/pattern/ParseTreePattern.cpp"
"${LIBRARY_DIR}/tree/pattern/ParseTreePatternMatcher.cpp"
"${LIBRARY_DIR}/tree/pattern/RuleTagToken.cpp"
"${LIBRARY_DIR}/tree/pattern/TagChunk.cpp"
"${LIBRARY_DIR}/tree/pattern/TextChunk.cpp"
"${LIBRARY_DIR}/tree/pattern/TokenTagToken.cpp"
"${LIBRARY_DIR}/tree/TerminalNode.cpp"
"${LIBRARY_DIR}/tree/TerminalNodeImpl.cpp"
"${LIBRARY_DIR}/tree/Trees.cpp"
"${LIBRARY_DIR}/tree/xpath/XPath.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathLexer.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathLexerErrorListener.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathRuleAnywhereElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathRuleElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathTokenAnywhereElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathTokenElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathWildcardAnywhereElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathWildcardElement.cpp"
"${LIBRARY_DIR}/UnbufferedCharStream.cpp"
"${LIBRARY_DIR}/UnbufferedTokenStream.cpp"
"${LIBRARY_DIR}/Vocabulary.cpp"
"${LIBRARY_DIR}/WritableToken.cpp"
)
add_library (antlr4-runtime ${SRCS})
target_include_directories (antlr4-runtime SYSTEM PUBLIC ${LIBRARY_DIR})

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 616b3dc76a0c8450b4027ded8a78e9619d7c845f
Subproject commit debf751a129bdda9ff4d1e895e08957ff77000a1

View File

@ -188,6 +188,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/array/util.cc"
"${LIBRARY_DIR}/array/validate.cc"
"${LIBRARY_DIR}/compute/api_aggregate.cc"
"${LIBRARY_DIR}/compute/api_scalar.cc"
"${LIBRARY_DIR}/compute/api_vector.cc"
"${LIBRARY_DIR}/compute/cast.cc"
@ -198,8 +199,11 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_tdigest.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc"
"${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
"${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
@ -243,6 +247,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
"${LIBRARY_DIR}/tensor/csf_converter.cc"
@ -256,11 +261,8 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/compression_lz4.cc"
"${LIBRARY_DIR}/util/compression_snappy.cc"
"${LIBRARY_DIR}/util/compression_zlib.cc"
"${LIBRARY_DIR}/util/compression_zstd.cc"
"${LIBRARY_DIR}/util/cpu_info.cc"
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
@ -268,13 +270,14 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/iterator.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
"${LIBRARY_DIR}/util/time.cc"
"${LIBRARY_DIR}/util/trie.cc"
@ -368,14 +371,14 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/column_reader.cc"
"${LIBRARY_DIR}/column_scanner.cc"
"${LIBRARY_DIR}/column_writer.cc"
"${LIBRARY_DIR}/deprecated_io.cc"
"${LIBRARY_DIR}/encoding.cc"
"${LIBRARY_DIR}/encryption.cc"
"${LIBRARY_DIR}/encryption_internal.cc"
"${LIBRARY_DIR}/encryption/encryption.cc"
"${LIBRARY_DIR}/encryption/encryption_internal.cc"
"${LIBRARY_DIR}/encryption/internal_file_decryptor.cc"
"${LIBRARY_DIR}/encryption/internal_file_encryptor.cc"
"${LIBRARY_DIR}/exception.cc"
"${LIBRARY_DIR}/file_reader.cc"
"${LIBRARY_DIR}/file_writer.cc"
"${LIBRARY_DIR}/internal_file_decryptor.cc"
"${LIBRARY_DIR}/internal_file_encryptor.cc"
"${LIBRARY_DIR}/level_conversion.cc"
"${LIBRARY_DIR}/level_comparison.cc"
"${LIBRARY_DIR}/metadata.cc"
@ -385,6 +388,8 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/properties.cc"
"${LIBRARY_DIR}/schema.cc"
"${LIBRARY_DIR}/statistics.cc"
"${LIBRARY_DIR}/stream_reader.cc"
"${LIBRARY_DIR}/stream_writer.cc"
"${LIBRARY_DIR}/types.cc"
"${GEN_LIBRARY_DIR}/parquet_constants.cpp"

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit 1ee16d8c5a7808acff5cf0475f771195d9aa3faa
Subproject commit e43c46e87fd32eafdc09471e95344555454c5ef8

View File

@ -26,7 +26,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
set_property (TARGET cctz PROPERTY IMPORTED_LOCATION ${LIBRARY_CCTZ})
set_property (TARGET cctz PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_CCTZ})
endif()
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
@ -39,6 +39,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
endif()
if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(USE_INTERNAL_CCTZ_LIBRARY 1)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
@ -70,63 +71,36 @@ if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
# Build a libray with embedded tzdata
if (OS_LINUX)
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
set(TZ_OBJS)
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | sort | paste -sd ';'"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
set(TIMEZONE_RESOURCE_FILES)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
string(REPLACE "/" "_" TIMEZONE_ID ${TIMEZONE})
string(REPLACE "+" "_PLUS_" TIMEZONE_ID ${TIMEZONE_ID})
set(TZ_OBJ ${TIMEZONE_ID}.o)
set(TZ_OBJS ${TZ_OBJS} ${TZ_OBJ})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
# https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
# PPC64LE fails to do this with objcopy, use ld or lld instead
if (ARCH_PPC64LE)
add_custom_command(OUTPUT ${TZ_OBJ}
COMMAND cp "${TZDIR}/${TIMEZONE}" "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}"
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o ${TZ_OBJ} ${TIMEZONE_ID}
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}")
else()
add_custom_command(OUTPUT ${TZ_OBJ}
COMMAND cp "${TZDIR}/${TIMEZONE}" "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}"
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS}
--rename-section .data=.rodata,alloc,load,readonly,data,contents ${TIMEZONE_ID} ${TZ_OBJ}
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}")
endif()
set_source_files_properties(${TZ_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
add_library(tzdata STATIC ${TZ_OBJS})
set_target_properties(tzdata PROPERTIES LINKER_LANGUAGE C)
# whole-archive prevents symbols from being discarded for unknown reason
# CMake can shuffle each of target_link_libraries arguments with other
# libraries in linker command. To avoid this we hardcode whole-archive
# library into single string.
add_dependencies(cctz tzdata)
target_link_libraries(cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
else ()
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {nullptr};\n" )
endif ()
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
clickhouse_embed_binaries(
TARGET tzdata
RESOURCE_DIR "${TZDIR}"
RESOURCES ${TIMEZONE_RESOURCE_FILES}
)
add_dependencies(cctz tzdata)
target_link_libraries(cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
message (STATUS "Using cctz")

2
contrib/cppkafka vendored

@ -1 +1 @@
Subproject commit 57a599d99c540e647bcd0eb9ea77c523cca011b3
Subproject commit 5a119f689f8a4d90d10a9635e7ee2bee5c127de1

2
contrib/croaring vendored

@ -1 +1 @@
Subproject commit d8402939b5c9fc134fd4fcf058fe0f7006d2b129
Subproject commit 2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0

2
contrib/flatbuffers vendored

@ -1 +1 @@
Subproject commit 22e3ffc66d2d7d72d1414390aa0f04ffd114a5a1
Subproject commit eb3f827948241ce0e701516f16cd67324802bce9

View File

@ -1,6 +1,6 @@
if (SANITIZE OR NOT (
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE)) OR
(OS_DARWIN AND CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
(OS_DARWIN AND (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Debug"))
))
if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL}

2
contrib/libpq vendored

@ -1 +1 @@
Subproject commit c7624588ddd84f153dd5990e81b886e4568bddde
Subproject commit e071ea570f8985aa00e34f5b9d50a3cfe666327e

View File

@ -8,7 +8,7 @@ set(SRCS
"${LIBPQ_SOURCE_DIR}/fe-lobj.c"
"${LIBPQ_SOURCE_DIR}/fe-misc.c"
"${LIBPQ_SOURCE_DIR}/fe-print.c"
"${LIBPQ_SOURCE_DIR}/fe-protocol2.c"
"${LIBPQ_SOURCE_DIR}/fe-trace.c"
"${LIBPQ_SOURCE_DIR}/fe-protocol3.c"
"${LIBPQ_SOURCE_DIR}/fe-secure.c"
"${LIBPQ_SOURCE_DIR}/fe-secure-common.c"
@ -18,8 +18,12 @@ set(SRCS
"${LIBPQ_SOURCE_DIR}/pqexpbuffer.c"
"${LIBPQ_SOURCE_DIR}/common/scram-common.c"
"${LIBPQ_SOURCE_DIR}/common/sha2_openssl.c"
"${LIBPQ_SOURCE_DIR}/common/sha2.c"
"${LIBPQ_SOURCE_DIR}/common/sha1.c"
"${LIBPQ_SOURCE_DIR}/common/md5.c"
"${LIBPQ_SOURCE_DIR}/common/md5_common.c"
"${LIBPQ_SOURCE_DIR}/common/hmac_openssl.c"
"${LIBPQ_SOURCE_DIR}/common/cryptohash.c"
"${LIBPQ_SOURCE_DIR}/common/saslprep.c"
"${LIBPQ_SOURCE_DIR}/common/unicode_norm.c"
"${LIBPQ_SOURCE_DIR}/common/ip.c"

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit 58d2a028d1600225ac3a478d6b3a06ba2f0c01f6
Subproject commit 357608d11b7a1961c3fb7db2ef9a5dbb2e87da77

View File

@ -64,7 +64,7 @@ set (HDRS
add_library(libpqxx ${SRCS} ${HDRS})
target_link_libraries(libpqxx PUBLIC ${LIBPQ_LIBRARY})
target_include_directories (libpqxx PRIVATE "${LIBRARY_DIR}/include")
target_include_directories (libpqxx SYSTEM PRIVATE "${LIBRARY_DIR}/include")
# crutch
set(CM_CONFIG_H_IN "${LIBRARY_DIR}/include/pqxx/config.h.in")

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit a491c27b33109a842d577c0f7ac5f5f218859181
Subproject commit cdcc3d8c6f6e80a0886082704a0902d61d8d3ffe

View File

@ -1,7 +1,7 @@
add_library(murmurhash
src/murmurhash2.cpp
src/murmurhash3.cpp
include/murmurhash2.h
include/murmurhash3.h)
src/MurmurHash2.cpp
src/MurmurHash3.cpp
include/MurmurHash2.h
include/MurmurHash3.h)
target_include_directories (murmurhash PUBLIC include)

View File

@ -0,0 +1,49 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#ifndef MURMURHASH2_H
#define MURMURHASH2_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed );
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed );
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed );
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed );
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH2_H_

View File

@ -2,7 +2,10 @@
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
#ifndef MURMURHASH3_H
#define MURMURHASH3_H
#include <stddef.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -23,20 +26,22 @@ typedef unsigned __int64 uint64_t;
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out );
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
#endif // _MURMURHASH3_H_

View File

@ -1,31 +0,0 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#pragma once
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2 (const void * key, int len, uint32_t seed);
uint64_t MurmurHash64A (const void * key, int len, uint64_t seed);
uint64_t MurmurHash64B (const void * key, int len, uint64_t seed);
uint32_t MurmurHash2A (const void * key, int len, uint32_t seed);
uint32_t MurmurHashNeutral2 (const void * key, int len, uint32_t seed);
uint32_t MurmurHashAligned2 (const void * key, int len, uint32_t seed);

View File

@ -0,0 +1,523 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed )
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = (const uint64_t *)key;
const uint64_t * end = data + (len/8);
while(data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = (const unsigned char*)data;
switch(len & 7)
{
case 7: h ^= uint64_t(data2[6]) << 48;
case 6: h ^= uint64_t(data2[5]) << 40;
case 5: h ^= uint64_t(data2[4]) << 32;
case 4: h ^= uint64_t(data2[3]) << 24;
case 3: h ^= uint64_t(data2[2]) << 16;
case 2: h ^= uint64_t(data2[1]) << 8;
case 1: h ^= uint64_t(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = uint32_t(seed) ^ len;
uint32_t h2 = uint32_t(seed >> 32);
const uint32_t * data = (const uint32_t *)key;
while(len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if(len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch(len)
{
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
case 1: h2 ^= ((unsigned char*)data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch(len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// CMurmurHash2A, by Austin Appleby
// This is a sample implementation of MurmurHash2A designed to work
// incrementally.
// Usage -
// CMurmurHash2A hasher
// hasher.Begin(seed);
// hasher.Add(data1,size1);
// hasher.Add(data2,size2);
// ...
// hasher.Add(dataN,sizeN);
// uint32_t hash = hasher.End()
class CMurmurHash2A
{
public:
void Begin ( uint32_t seed = 0 )
{
m_hash = seed;
m_tail = 0;
m_count = 0;
m_size = 0;
}
void Add ( const unsigned char * data, size_t len )
{
m_size += len;
MixTail(data,len);
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
mmix(m_hash,k);
data += 4;
len -= 4;
}
MixTail(data,len);
}
uint32_t End ( void )
{
mmix(m_hash,m_tail);
mmix(m_hash,m_size);
m_hash ^= m_hash >> 13;
m_hash *= m;
m_hash ^= m_hash >> 15;
return m_hash;
}
private:
static const uint32_t m = 0x5bd1e995;
static const int r = 24;
void MixTail ( const unsigned char * & data, size_t & len )
{
while( len && ((len<4) || m_count) )
{
m_tail |= (*data++) << (m_count * 8);
m_count++;
len--;
if(m_count == 4)
{
mmix(m_hash,m_tail);
m_tail = 0;
m_count = 0;
}
}
}
uint32_t m_hash;
uint32_t m_tail;
uint32_t m_count;
uint32_t m_size;
};
//-----------------------------------------------------------------------------
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed )
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = (const unsigned char *)key;
uint32_t h = seed ^ len;
size_t align = (uint64_t)data & 3;
if(align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch(align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while(len >= 4)
{
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if(len >= align)
{
switch(align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch(len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while(len >= 4)
{
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
//----------
// Handle tail bytes
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
//-----------------------------------------------------------------------------

View File

@ -1,3 +1,4 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
@ -6,8 +7,8 @@
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "murmurhash3.h"
#include <cstring>
#include "MurmurHash3.h"
#include <string.h>
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@ -93,7 +94,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len,
void MurmurHash3_x86_32 ( const void * key, size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -149,7 +150,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128 ( const void * key, const int len,
void MurmurHash3_x86_128 ( const void * key, const size_t len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -254,7 +255,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
void MurmurHash3_x64_128 ( const void * key, const size_t len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
@ -332,3 +333,6 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------

View File

@ -1,423 +0,0 @@
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
#include "murmurhash2.h"
#include <cstring>
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
uint32_t MurmurHash2(const void * key, int len, uint32_t seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t MurmurHash64A(const void * key, int len, uint64_t seed)
{
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = reinterpret_cast<const uint64_t *>(key);
const uint64_t * end = data + (len/8);
while (data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char * data2 = reinterpret_cast<const unsigned char *>(data);
switch (len & 7)
{
case 7: h ^= static_cast<uint64_t>(data2[6]) << 48;
case 6: h ^= static_cast<uint64_t>(data2[5]) << 40;
case 5: h ^= static_cast<uint64_t>(data2[4]) << 32;
case 4: h ^= static_cast<uint64_t>(data2[3]) << 24;
case 3: h ^= static_cast<uint64_t>(data2[2]) << 16;
case 2: h ^= static_cast<uint64_t>(data2[1]) << 8;
case 1: h ^= static_cast<uint64_t>(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
// 64-bit hash for 32-bit platforms
uint64_t MurmurHash64B(const void * key, int len, uint64_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h1 = static_cast<uint32_t>(seed) ^ len;
uint32_t h2 = static_cast<uint32_t>(seed >> 32);
const uint32_t * data = reinterpret_cast<const uint32_t *>(key);
while (len >= 8)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
uint32_t k2 = *data++;
k2 *= m; k2 ^= k2 >> r; k2 *= m;
h2 *= m; h2 ^= k2;
len -= 4;
}
if (len >= 4)
{
uint32_t k1 = *data++;
k1 *= m; k1 ^= k1 >> r; k1 *= m;
h1 *= m; h1 ^= k1;
len -= 4;
}
switch (len)
{
case 3: h2 ^= reinterpret_cast<const unsigned char *>(data)[2] << 16;
case 2: h2 ^= reinterpret_cast<const unsigned char *>(data)[1] << 8;
case 1: h2 ^= reinterpret_cast<const unsigned char *>(data)[0];
h2 *= m;
};
h1 ^= h2 >> 18; h1 *= m;
h2 ^= h1 >> 22; h2 *= m;
h1 ^= h2 >> 17; h1 *= m;
h2 ^= h1 >> 19; h2 *= m;
uint64_t h = h1;
h = (h << 32) | h2;
return h;
}
// MurmurHash2A, by Austin Appleby
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
// construction. Bulk speed should be identical to Murmur2, small-key speed
// will be 10%-20% slower due to the added overhead at the end of the hash.
// This variant fixes a minor issue where null keys were more likely to
// collide with each other than expected, and also makes the function
// more amenable to incremental implementations.
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHash2A(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t l = len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed;
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
mmix(h,k);
data += 4;
len -= 4;
}
uint32_t t = 0;
switch (len)
{
case 3: t ^= data[2] << 16;
case 2: t ^= data[1] << 8;
case 1: t ^= data[0];
};
mmix(h,t);
mmix(h,l);
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
// MurmurHashNeutral2, by Austin Appleby
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.
uint32_t MurmurHashNeutral2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
while (len >= 4)
{
uint32_t k;
k = data[0];
k |= data[1] << 8;
k |= data[2] << 16;
k |= data[3] << 24;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t MurmurHashAligned2(const void * key, int len, uint32_t seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
const unsigned char * data = reinterpret_cast<const unsigned char *>(key);
uint32_t h = seed ^ len;
int align = reinterpret_cast<uint64_t>(data) & 3;
if (align && (len >= 4))
{
// Pre-load the temp registers
uint32_t t = 0, d = 0;
switch (align)
{
case 1: t |= data[2] << 16;
case 2: t |= data[1] << 8;
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
int sl = 8 * (4-align);
int sr = 8 * align;
// Mix
while (len >= 4)
{
d = *(reinterpret_cast<const uint32_t *>(data));
t = (t >> sr) | (d << sl);
uint32_t k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
// Handle leftover data in temp registers
d = 0;
if (len >= align)
{
switch (align)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
}
else
{
switch (len)
{
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
case 0: h ^= (t >> sr) | (d << sl);
h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
else
{
while (len >= 4)
{
uint32_t k = *reinterpret_cast<const uint32_t *>(data);
MIX(h,k,m);
data += 4;
len -= 4;
}
// Handle tail bytes
switch (len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit 5981208e39447df84827f6a961d1da76bacb6078
Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7
Subproject commit c81be6c68b146f15f2096b7ef80e3f21fe27004c

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.7.1.1) unstable; urgency=low
clickhouse (21.8.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 20 May 2021 22:23:29 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 28 Jun 2021 00:50:15 +0300

View File

@ -1 +1 @@
#*/10 * * * * root (which service > /dev/null 2>&1 && (service clickhouse-server condstart ||:)) || /etc/init.d/clickhouse-server condstart > /dev/null 2>&1
#*/10 * * * * root ((which service > /dev/null 2>&1 && (service clickhouse-server condstart ||:)) || /etc/init.d/clickhouse-server condstart) > /dev/null 2>&1

View File

@ -43,29 +43,6 @@ command -v flock >/dev/null && FLOCK=flock
# Override defaults from optional config file
test -f /etc/default/clickhouse && . /etc/default/clickhouse
# On x86_64, check for required instruction set.
if uname -mpi | grep -q 'x86_64'; then
if ! grep -q 'sse4_2' /proc/cpuinfo; then
# On KVM, cpuinfo could falsely not report SSE 4.2 support, so skip the check.
if ! grep -q 'Common KVM processor' /proc/cpuinfo; then
# Some other VMs also report wrong flags in cpuinfo.
# Tricky way to test for instruction set:
# create temporary binary and run it;
# if it get caught illegal instruction signal,
# then required instruction set is not supported really.
#
# Generated this way:
# gcc -xc -Os -static -nostdlib - <<< 'void _start() { __asm__("pcmpgtq %%xmm0, %%xmm1; mov $0x3c, %%rax; xor %%rdi, %%rdi; syscall":::"memory"); }' && strip -R .note.gnu.build-id -R .comment -R .eh_frame -s ./a.out && gzip -c -9 ./a.out | base64 -w0; echo
if ! (echo -n 'H4sICAwAW1cCA2Eub3V0AKt39XFjYmRkgAEmBjsGEI+H0QHMd4CKGyCUAMUsGJiBJDNQNUiYlQEZOKDQclB9cnD9CmCSBYqJBRxQOvBpSQobGfqIAWn8FuYnPI4fsAGyPQz/87MeZtArziguKSpJTGLQK0mtKGGgGHADMSgoYH6AhTMPNHyE0NQzYuEzYzEXFr6CBPQDANAsXKTwAQAA' | base64 -d | gzip -d > /tmp/clickhouse_test_sse42 && chmod a+x /tmp/clickhouse_test_sse42 && /tmp/clickhouse_test_sse42); then
echo 'Warning! SSE 4.2 instruction set is not supported'
#exit 3
fi
fi
fi
fi
die()
{
@ -116,7 +93,7 @@ forcestop()
service_or_func()
{
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
service $PROGRAM $1
systemctl $1 $PROGRAM
else
$1
fi
@ -229,6 +206,7 @@ status()
case "$1" in
status)
status
exit 0
;;
esac

View File

@ -12,7 +12,6 @@ mkdir root
pushd root
mkdir lib lib64 etc tmp root
cp ${BUILD_DIR}/programs/clickhouse .
cp ${SRC_DIR}/programs/server/{config,users}.xml .
cp /lib/x86_64-linux-gnu/{libc.so.6,libdl.so.2,libm.so.6,libpthread.so.0,librt.so.1,libnss_dns.so.2,libresolv.so.2} lib
cp /lib64/ld-linux-x86-64.so.2 lib64
cp /etc/resolv.conf ./etc

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -72,7 +72,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin
# Build and install tools for cross-linking to Darwin (x86-64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
@ -81,8 +81,17 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
# Build and install tools for cross-linking to Darwin (aarch64)
RUN git clone https://github.com/tpoechtrager/cctools-port.git \
&& cd cctools-port/cctools \
&& ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=aarch64-apple-darwin \
&& make install \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain and SDK for Darwin
RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz
# Download toolchain for ARM
# It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.

View File

@ -3,7 +3,9 @@
set -x -e
mkdir -p build/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64
mkdir -p build/cmake/toolchain/linux-aarch64
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1

View File

@ -58,6 +58,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
CLANG_PREFIX = "clang"
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
@ -66,9 +67,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
is_clang = compiler.startswith(CLANG_PREFIX)
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_cross_compile = is_cross_darwin or is_cross_arm or is_cross_freebsd
is_cross_compile = is_cross_darwin or is_cross_darwin_arm or is_cross_arm or is_cross_freebsd
# Explicitly use LLD with Clang by default.
# Don't force linker for cross-compilation.
@ -82,6 +84,13 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/x86_64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake")
elif is_cross_darwin_arm:
cc = compiler[:-len(DARWIN_ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar")
cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/aarch64-apple-darwin-install_name_tool")
cmake_flags.append("-DCMAKE_RANLIB:FILEPATH=/cctools/bin/aarch64-apple-darwin-ranlib")
cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld")
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake")
elif is_cross_arm:
cc = compiler[:-len(ARM_SUFFIX)]
cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake")
@ -185,8 +194,8 @@ if __name__ == "__main__":
parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd",
"gcc-10"), default="clang-11")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
"clang-11-freebsd", "gcc-10"), default="clang-11")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true")

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.7.1.*
ARG version=21.8.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -46,6 +46,7 @@ RUN apt-get update \
pigz \
pkg-config \
tzdata \
pv \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)

View File

@ -22,9 +22,9 @@ ENV SHA=nosha
ENV DATA="data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
$CODEINDEX $HTML_RESULT_DIRECTORY -d $DATA && \
$CODEINDEX $HTML_RESULT_DIRECTORY -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
mv $HTML_RESULT_DIRECTORY /test_output

View File

@ -113,6 +113,7 @@ function start_server
echo "ClickHouse server pid '$server_pid' started and responded"
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
@ -159,7 +160,6 @@ function clone_submodules
SUBMODULES_TO_UPDATE=(
contrib/abseil-cpp
contrib/antlr4-runtime
contrib/boost
contrib/zlib-ng
contrib/libxml2
@ -381,6 +381,9 @@ function run_tests
# needs psql
01889_postgresql_protocol_null_fields
# needs pv
01923_network_receive_time_metric_insert
)
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \

View File

@ -97,16 +97,13 @@ function fuzz
NEW_TESTS_OPT="${NEW_TESTS_OPT:-}"
fi
export CLICKHOUSE_WATCHDOG_ENABLE=0 # interferes with gdb
clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
server_pid=$!
kill -0 $server_pid
while ! clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
clickhouse-client --query "select 1"
kill -0 $server_pid
echo Server started
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
@ -115,12 +112,31 @@ thread apply all backtrace
continue
" > script.gdb
gdb -batch -command script.gdb -p "$(pidof clickhouse-server)" &
gdb -batch -command script.gdb -p $server_pid &
# Check connectivity after we attach gdb, because it might cause the server
# to freeze and the fuzzer will fail.
for _ in {1..60}
do
sleep 1
if clickhouse-client --query "select 1"
then
break
fi
done
clickhouse-client --query "select 1" # This checks that the server is responding
kill -0 $server_pid # This checks that it is our server that is started and not some other one
echo Server started and responded
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.
# shellcheck disable=SC2012,SC2046
clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) $NEW_TESTS_OPT \
clickhouse-client \
--receive_timeout=10 \
--receive_data_timeout_ms=10000 \
--query-fuzzer-runs=1000 \
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
$NEW_TESTS_OPT \
> >(tail -n 100000 > fuzzer.log) \
2>&1 &
fuzzer_pid=$!
@ -185,7 +201,7 @@ continue
# The server has died.
task_exit_code=210
echo "failure" > status.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
then
echo "Lost connection to server. See the logs." > description.txt
fi
@ -198,13 +214,17 @@ continue
echo "success" > status.txt
echo "OK" > description.txt
else
# The server was alive, but the fuzzer returned some error. Probably this
# is a problem in the fuzzer itself. Don't grep the server log in this
# case, because we will find a message about normal server termination
# (Received signal 15), which is confusing.
# The server was alive, but the fuzzer returned some error. This might
# be some client-side error detected by fuzzing, or a problem in the
# fuzzer itself. Don't grep the server log in this case, because we will
# find a message about normal server termination (Received signal 15),
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
echo "Fuzzer failed ($fuzzer_exit_code). See the logs." > description.txt
{ grep --text -o "Found error:.*" fuzzer.log \
|| grep --text -o "Exception.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi
}

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-integration-test .
FROM yandex/clickhouse-test-base
SHELL ["/bin/bash", "-c"]
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install \
tzdata \
@ -20,7 +22,9 @@ RUN apt-get update \
krb5-user \
iproute2 \
lsof \
g++
g++ \
default-jre
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
@ -30,6 +34,19 @@ RUN apt-get clean
# Install MySQL ODBC driver
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper.
# ZooKeeper is not started by default, but consumes some space in containers.
# 777 perms used to allow anybody to start/stop ZooKeeper
ENV ZOOKEEPER_VERSION='3.6.3'
RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"
RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
RUN echo $'tickTime=2500 \n\
tickTime=2500 \n\
dataDir=/zookeeper \n\
clientPort=2181 \n\
maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg
RUN mkdir /zookeeper && chmod -R 777 /zookeeper
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -4,9 +4,9 @@ services:
image: sequenceiq/hadoop-docker:2.7.0
hostname: hdfs1
restart: always
ports:
- ${HDFS_NAME_EXTERNAL_PORT}:${HDFS_NAME_INTERNAL_PORT} #50070
- ${HDFS_DATA_EXTERNAL_PORT}:${HDFS_DATA_INTERNAL_PORT} #50075
expose:
- ${HDFS_NAME_PORT}
- ${HDFS_DATA_PORT}
entrypoint: /etc/bootstrap.sh -d
volumes:
- type: ${HDFS_FS:-tmpfs}

View File

@ -0,0 +1,23 @@
version: '2.3'
services:
bridge1:
image: yandex/clickhouse-jdbc-bridge
command: |
/bin/bash -c 'cat << EOF > config/datasources/self.json
{
"self": {
"jdbcUrl": "jdbc:clickhouse://instance:8123/test",
"username": "default",
"password": "",
"maximumPoolSize": 5
}
}
EOF
./docker-entrypoint.sh'
ports:
- 9020:9019
healthcheck:
test: ["CMD", "curl", "-s", "localhost:9019/ping"]
interval: 5s
timeout: 3s
retries: 30

View File

@ -14,18 +14,18 @@ services:
- type: ${KERBERIZED_HDFS_FS:-tmpfs}
source: ${KERBERIZED_HDFS_LOGS:-}
target: /var/log/hadoop-hdfs
ports:
- ${KERBERIZED_HDFS_NAME_EXTERNAL_PORT}:${KERBERIZED_HDFS_NAME_INTERNAL_PORT} #50070
- ${KERBERIZED_HDFS_DATA_EXTERNAL_PORT}:${KERBERIZED_HDFS_DATA_INTERNAL_PORT} #1006
expose:
- ${KERBERIZED_HDFS_NAME_PORT}
- ${KERBERIZED_HDFS_DATA_PORT}
depends_on:
- hdfskerberos
entrypoint: /etc/bootstrap.sh -d
hdfskerberos:
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
hostname: hdfskerberos
volumes:
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random
ports: [88, 749]
expose: [88, 749]

View File

@ -10,7 +10,7 @@ echo '{
"storage-driver": "overlay2",
"insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"],
"registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"]
}' | dd of=/etc/docker/daemon.json
}' | dd of=/etc/docker/daemon.json 2>/dev/null
dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log &

View File

@ -319,14 +319,14 @@ function get_profiles
wait
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
@ -409,10 +409,10 @@ create view right_query_log as select *
'$(cat "right-query-log.tsv.columns")');
create view query_logs as
select 0 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
select 0 version, query_id, ProfileEvents,
query_duration_ms, memory_usage from left_query_log
union all
select 1 version, query_id, ProfileEvents.Names, ProfileEvents.Values,
select 1 version, query_id, ProfileEvents,
query_duration_ms, memory_usage from right_query_log
;
@ -424,7 +424,7 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-
with (
-- sumMapState with the list of all keys with '-0.' values. Negative zero is because
-- sumMap removes keys with positive zeros.
with (select groupUniqArrayArray(ProfileEvents.Names) from query_logs) as all_names
with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names
select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))])
) as all_metrics
select test, query_index, version, query_id,
@ -433,8 +433,8 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-
[
all_metrics,
arrayReduce('sumMapState',
[(ProfileEvents.Names,
arrayMap(x->toFloat64(x), ProfileEvents.Values))]
[(mapKeys(ProfileEvents),
arrayMap(x->toFloat64(x), mapValues(ProfileEvents)))]
),
arrayReduce('sumMapState', [(
['client_time', 'server_time', 'memory_usage'],
@ -554,12 +554,6 @@ create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
" 2> >(tee -a analyze/errors.log 1>&2)
# Fetch historical query variability thresholds from the CI database
clickhouse-local --query "
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test
"
if [ -v CHPC_DATABASE_URL ]
then
set +x # Don't show password in the log
@ -577,7 +571,8 @@ then
--date_time_input_format=best_effort)
# Precision is going to be 1.5 times worse for PRs. How do I know it? I ran this:
# Precision is going to be 1.5 times worse for PRs, because we run the queries
# less times. How do I know it? I ran this:
# SELECT quantilesExact(0., 0.1, 0.5, 0.75, 0.95, 1.)(p / m)
# FROM
# (
@ -592,19 +587,27 @@ then
# query_display_name
# HAVING count(*) > 100
# )
# The file can be empty if the server is inaccessible, so we can't use TSVWithNamesAndTypes.
#
# The file can be empty if the server is inaccessible, so we can't use
# TSVWithNamesAndTypes.
#
"${client[@]}" --query "
select test, query_index,
quantileExact(0.99)(abs(diff)) max_diff,
quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 max_stat_threshold,
quantileExact(0.99)(abs(diff)) * 1.5 AS max_diff,
quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 AS max_stat_threshold,
query_display_name
from query_metrics_v2
where event_date > now() - interval 1 month
-- We use results at least one week in the past, so that the current
-- changes do not immediately influence the statistics, and we have
-- some time to notice that something is wrong.
where event_date between now() - interval 1 month - interval 1 week
and now() - interval 1 week
and metric = 'client_time'
and pr_number = 0
group by test, query_index, query_display_name
having count(*) > 100
" > analyze/historical-thresholds.tsv
set -x
else
touch analyze/historical-thresholds.tsv
fi
@ -1000,10 +1003,11 @@ create view query_log as select *
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select
test, query_index, query_id,
ProfileEvents.Values value, ProfileEvents.Names metric
from query_log array join ProfileEvents
select test, query_index, query_id, value, metric
from query_log
array join
mapValues(ProfileEvents) as value,
mapKeys(ProfileEvents) as metric
join unstable_query_runs using (query_id)
;
@ -1224,11 +1228,60 @@ unset IFS
function upload_results
{
# Prepare info for the CI checks table.
rm ci-checks.tsv
clickhouse-local --query "
create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes,
'changed_fail int, changed_show int, unstable_fail int, unstable_show int,
left float, right float, diff float, stat_threshold float,
test text, query_index int, query_display_name text');
create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
as select
$PR_TO_TEST pull_request_number,
'$SHA_TO_TEST' commit_sha,
'Performance' check_name,
'$(sed -n 's/.*<!--status: \(.*\)-->/\1/p' report.html)' check_status,
-- TODO toDateTime() can't parse output of 'date', so no time for now.
($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000 check_duration_ms,
fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time,
test_name,
test_status,
test_duration_ms,
report_url,
$PR_TO_TEST = 0
? 'https://github.com/ClickHouse/ClickHouse/commit/$SHA_TO_TEST'
: 'https://github.com/ClickHouse/ClickHouse/pull/$PR_TO_TEST' pull_request_url,
'' commit_url,
'' task_url,
'' base_ref,
'' base_repo,
'' head_ref,
'' head_repo
from (
select '' test_name,
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
union all
select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|| test || '.' || toString(query_index) report_url
from queries where changed_fail != 0 and diff > 0
union all
select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|| test || '.' || toString(query_index) report_url
from queries where unstable_fail != 0
)
;
"
if ! [ -v CHPC_DATABASE_URL ]
then
echo Database for test results is not specified, will not upload them.
return 0
fi
fi
set +x # Don't show password in the log
client=(clickhouse-client
@ -1292,6 +1345,10 @@ $REF_SHA $SHA_TO_TEST $(numactl --show | sed -n 's/^cpubind:[[:space:]]\+/numact
$REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/numactl-available /p')
EOF
# Also insert some data about the check into the CI checks table.
"${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \
< ci-checks.tsv
set -x
}

View File

@ -1,6 +1,9 @@
#!/bin/bash
set -ex
CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
export CHPC_CHECK_START_TIMESTAMP
# Use the packaged repository to find the revision we will compare to.
function find_reference_sha
{

View File

@ -489,7 +489,7 @@ if args.report == 'main':
text = tableStart('Test Times')
text += tableHeader(columns, attrs)
allowed_average_run_time = 1.6 # 30 seconds per test at 7 runs
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
@ -561,8 +561,9 @@ if args.report == 'main':
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
error_tests += very_unstable_queries
status = 'failure'
if very_unstable_queries > 5:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')
error_tests += slow_average_tests

View File

@ -112,12 +112,15 @@ timeout "$MAX_RUN_TIME" bash -c run_tests ||:
./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz ||:
mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
tar -chf /test_output/clickhouse_coverage.tar.gz /profraw ||:
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:

View File

@ -6,14 +6,16 @@ import argparse
import csv
OK_SIGN = "[ OK "
FAIL_SING = "[ FAIL "
TIMEOUT_SING = "[ Timeout! "
FAIL_SIGN = "[ FAIL "
TIMEOUT_SIGN = "[ Timeout! "
UNKNOWN_SIGN = "[ UNKNOWN "
SKIPPED_SIGN = "[ SKIPPED "
HUNG_SIGN = "Found hung queries in processlist"
NO_TASK_TIMEOUT_SIGN = "All tests have finished"
RETRIES_SIGN = "Some tests were restarted"
def process_test_log(log_path):
total = 0
skipped = 0
@ -21,6 +23,7 @@ def process_test_log(log_path):
failed = 0
success = 0
hung = False
retries = False
task_timeout = True
test_results = []
with open(log_path, 'r') as test_file:
@ -30,7 +33,9 @@ def process_test_log(log_path):
task_timeout = False
if HUNG_SIGN in line:
hung = True
if any(sign in line for sign in (OK_SIGN, FAIL_SING, UNKNOWN_SIGN, SKIPPED_SIGN)):
if RETRIES_SIGN in line:
retries = True
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
test_name = line.split(' ')[2].split(':')[0]
test_time = ''
@ -42,10 +47,10 @@ def process_test_log(log_path):
pass
total += 1
if TIMEOUT_SING in line:
if TIMEOUT_SIGN in line:
failed += 1
test_results.append((test_name, "Timeout", test_time))
elif FAIL_SING in line:
elif FAIL_SIGN in line:
failed += 1
test_results.append((test_name, "FAIL", test_time))
elif UNKNOWN_SIGN in line:
@ -57,7 +62,7 @@ def process_test_log(log_path):
else:
success += int(OK_SIGN in line)
test_results.append((test_name, "OK", test_time))
return total, skipped, unknown, failed, success, hung, task_timeout, test_results
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
def process_result(result_path):
test_results = []
@ -73,7 +78,7 @@ def process_result(result_path):
state = "error"
if result_path and os.path.exists(result_path):
total, skipped, unknown, failed, success, hung, task_timeout, test_results = process_test_log(result_path)
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -83,9 +88,14 @@ def process_result(result_path):
if hung:
description = "Some queries hung, "
state = "failure"
test_results.append(("Some queries hung", "FAIL", "0"))
elif task_timeout:
description = "Timeout, "
state = "failure"
test_results.append(("Timeout", "FAIL", "0"))
elif retries:
description = "Some tests restarted, "
test_results.append(("Some tests restarted", "SKIPPED", "0"))
else:
description = ""

View File

@ -35,7 +35,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
# simpliest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
else
service clickhouse-server start
sudo clickhouse start
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
@ -80,6 +80,8 @@ function run_tests()
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('2')
else
# Too many tests fail for DatabaseReplicated in parallel. All other
# configurations are OK.
@ -101,6 +103,7 @@ timeout "$MAX_RUN_TIME" bash -c run_tests ||:
clickhouse-client -q "system flush logs" ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
@ -138,6 +141,8 @@ tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_l
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:

View File

@ -8,6 +8,7 @@ RUN apt-get update -y && \
python3-wheel \
brotli \
netcat-openbsd \
postgresql-client \
zstd
RUN python3 -m pip install \

View File

@ -1,4 +1,6 @@
#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
set -x
@ -37,6 +39,17 @@ function stop()
function start()
{
# Rename existing log file - it will be more convenient to read separate files for separate server runs.
if [ -f '/var/log/clickhouse-server/clickhouse-server.log' ]
then
log_file_counter=1
while [ -f "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" ]
do
log_file_counter=$((log_file_counter + 1))
done
mv '/var/log/clickhouse-server/clickhouse-server.log' "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}"
fi
counter=0
until clickhouse-client --query "SELECT 1"
do
@ -55,6 +68,7 @@ function start()
done
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
@ -140,7 +154,11 @@ zgrep -Fa "########################################" /test_output/* > /dev/null
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
# Put logs into /test_output/
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
do
pigz < "${log_file}" > /test_output/"$(basename ${log_file})".gz
done
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
mv /var/log/clickhouse-server/stderr.log /test_output/
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:

View File

@ -34,6 +34,16 @@ def get_options(i):
if i % 2 == 1:
options.append(" --database=test_{}".format(i))
if i % 7 == 0:
options.append(" --client-option='join_use_nulls=1'")
if i % 14 == 0:
options.append(' --client-option="join_algorithm=\'partial_merge\'"')
if i % 21 == 0:
options.append(' --client-option="join_algorithm=\'auto\'"')
options.append(' --client-option="max_rows_in_join=1000"')
if i == 13:
options.append(" --client-option='memory_tracker_fault_probability=0.00001'")

Some files were not shown because too many files have changed in this diff Show More