mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge branch 'master' into master
This commit is contained in:
commit
b285e3f5df
@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
|
||||
endif ()
|
||||
|
||||
# Make sure the final executable has symbols exported
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
|
||||
if (OS_LINUX)
|
||||
# We should not export dynamic symbols, because:
|
||||
# - The main clickhouse binary does not use dlopen,
|
||||
# and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
|
||||
# - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
|
||||
# should not expose their symbols to ODBC drivers and libraries.
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
|
||||
endif ()
|
||||
|
||||
if (OS_DARWIN)
|
||||
# The `-all_load` flag forces loading of all symbols from all libraries,
|
||||
|
@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols)
|
||||
# Splits debug symbols into separate file, leaves the binary untouched:
|
||||
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
|
||||
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check.
|
||||
# Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces.
|
||||
COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
# Associate stripped binary with debug symbols:
|
||||
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
COMMENT "Stripping clickhouse binary" VERBATIM
|
||||
|
@ -64,7 +64,7 @@ then
|
||||
ninja $NINJA_FLAGS clickhouse-keeper
|
||||
|
||||
ls -la ./programs/
|
||||
ldd ./programs/clickhouse-keeper
|
||||
ldd ./programs/clickhouse-keeper ||:
|
||||
|
||||
if [ -n "$MAKE_DEB" ]; then
|
||||
# No quotes because I want it to expand to nothing if empty.
|
||||
|
@ -11,6 +11,7 @@ RUN apt-get update \
|
||||
pv \
|
||||
ripgrep \
|
||||
zstd \
|
||||
locales \
|
||||
--yes --no-install-recommends
|
||||
|
||||
# Sanitizer options for services (clickhouse-server)
|
||||
@ -28,6 +29,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
|
||||
|
||||
|
@ -18,10 +18,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
python3-pip \
|
||||
shellcheck \
|
||||
yamllint \
|
||||
locales \
|
||||
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
||||
|
@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
|
||||
library-bridge.cpp
|
||||
)
|
||||
|
||||
if (OS_LINUX)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
|
||||
endif ()
|
||||
|
||||
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
|
||||
|
||||
target_link_libraries(clickhouse-library-bridge PRIVATE
|
||||
|
@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
|
||||
validateODBCConnectionString.cpp
|
||||
)
|
||||
|
||||
if (OS_LINUX)
|
||||
# clickhouse-odbc-bridge is always a separate binary.
|
||||
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
|
||||
endif ()
|
||||
|
||||
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
|
||||
|
||||
target_link_libraries(clickhouse-odbc-bridge PRIVATE
|
||||
|
@ -1,7 +1,6 @@
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
|
||||
#include <Common/SymbolIndex.h>
|
||||
#include <base/hex.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
@ -62,9 +61,11 @@ Otherwise you will get only exported symbols from program headers.
|
||||
#endif
|
||||
|
||||
#define __msan_unpoison_string(X) // NOLINT
|
||||
#define __msan_unpoison(X, Y) // NOLINT
|
||||
#if defined(ch_has_feature)
|
||||
# if ch_has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison_string
|
||||
# undef __msan_unpoison
|
||||
# include <sanitizer/msan_interface.h>
|
||||
# endif
|
||||
#endif
|
||||
@ -98,10 +99,13 @@ void collectSymbolsFromProgramHeaders(
|
||||
/* Iterate over all headers of the current shared lib
|
||||
* (first call is for the executable itself)
|
||||
*/
|
||||
__msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum));
|
||||
__msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr));
|
||||
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
|
||||
{
|
||||
/* Further processing is only needed if the dynamic section is reached
|
||||
*/
|
||||
__msan_unpoison(&info->dlpi_phdr[header_index], sizeof(info->dlpi_phdr[header_index]));
|
||||
if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC)
|
||||
continue;
|
||||
|
||||
@ -109,6 +113,7 @@ void collectSymbolsFromProgramHeaders(
|
||||
* It's address is the shared lib's address + the virtual address
|
||||
*/
|
||||
const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
|
||||
__msan_unpoison(&dyn_begin, sizeof(dyn_begin));
|
||||
|
||||
/// For unknown reason, addresses are sometimes relative sometimes absolute.
|
||||
auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr)
|
||||
@ -122,44 +127,53 @@ void collectSymbolsFromProgramHeaders(
|
||||
*/
|
||||
|
||||
size_t sym_cnt = 0;
|
||||
for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
|
||||
{
|
||||
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
|
||||
|
||||
// TODO: this branch leads to invalid address of the hash table. Need further investigation.
|
||||
// if (it->d_tag == DT_HASH)
|
||||
// {
|
||||
// const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
|
||||
// sym_cnt = hash[1];
|
||||
// break;
|
||||
// }
|
||||
if (it->d_tag == DT_GNU_HASH)
|
||||
const auto * it = dyn_begin;
|
||||
while (true)
|
||||
{
|
||||
/// This code based on Musl-libc.
|
||||
__msan_unpoison(it, sizeof(*it));
|
||||
if (it->d_tag != DT_NULL)
|
||||
break;
|
||||
|
||||
const uint32_t * buckets = nullptr;
|
||||
const uint32_t * hashval = nullptr;
|
||||
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
|
||||
|
||||
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
|
||||
|
||||
buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
|
||||
|
||||
for (ElfW(Word) i = 0; i < hash[0]; ++i)
|
||||
if (buckets[i] > sym_cnt)
|
||||
sym_cnt = buckets[i];
|
||||
|
||||
if (sym_cnt)
|
||||
if (it->d_tag == DT_GNU_HASH)
|
||||
{
|
||||
sym_cnt -= hash[1];
|
||||
hashval = buckets + hash[0] + sym_cnt;
|
||||
do
|
||||
/// This code based on Musl-libc.
|
||||
|
||||
const uint32_t * buckets = nullptr;
|
||||
const uint32_t * hashval = nullptr;
|
||||
|
||||
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
|
||||
|
||||
__msan_unpoison(&hash[0], sizeof(*hash));
|
||||
__msan_unpoison(&hash[1], sizeof(*hash));
|
||||
__msan_unpoison(&hash[2], sizeof(*hash));
|
||||
|
||||
buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
|
||||
|
||||
__msan_unpoison(buckets, hash[0] * sizeof(buckets[0]));
|
||||
|
||||
for (ElfW(Word) i = 0; i < hash[0]; ++i)
|
||||
if (buckets[i] > sym_cnt)
|
||||
sym_cnt = buckets[i];
|
||||
|
||||
if (sym_cnt)
|
||||
{
|
||||
++sym_cnt;
|
||||
sym_cnt -= hash[1];
|
||||
hashval = buckets + hash[0] + sym_cnt;
|
||||
__msan_unpoison(&hashval, sizeof(hashval));
|
||||
do
|
||||
{
|
||||
++sym_cnt;
|
||||
}
|
||||
while (!(*hashval++ & 1));
|
||||
}
|
||||
while (!(*hashval++ & 1));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
@ -190,6 +204,8 @@ void collectSymbolsFromProgramHeaders(
|
||||
/* Get the pointer to the first entry of the symbol table */
|
||||
const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(base_address);
|
||||
|
||||
__msan_unpoison(elf_sym, sym_cnt * sizeof(*elf_sym));
|
||||
|
||||
/* Iterate over the symbol table */
|
||||
for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index)
|
||||
{
|
||||
@ -197,6 +213,7 @@ void collectSymbolsFromProgramHeaders(
|
||||
* This is located at the address of st_name relative to the beginning of the string table.
|
||||
*/
|
||||
const char * sym_name = &strtab[elf_sym[sym_index].st_name];
|
||||
__msan_unpoison_string(sym_name);
|
||||
|
||||
if (!sym_name)
|
||||
continue;
|
||||
@ -223,13 +240,18 @@ void collectSymbolsFromProgramHeaders(
|
||||
#if !defined USE_MUSL
|
||||
String getBuildIDFromProgramHeaders(dl_phdr_info * info)
|
||||
{
|
||||
__msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum));
|
||||
__msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr));
|
||||
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
|
||||
{
|
||||
const ElfPhdr & phdr = info->dlpi_phdr[header_index];
|
||||
__msan_unpoison(&phdr, sizeof(phdr));
|
||||
if (phdr.p_type != PT_NOTE)
|
||||
continue;
|
||||
|
||||
return Elf::getBuildID(reinterpret_cast<const char *>(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz);
|
||||
std::string_view view(reinterpret_cast<const char *>(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz);
|
||||
__msan_unpoison(view.data(), view.size());
|
||||
return Elf::getBuildID(view.data(), view.size());
|
||||
}
|
||||
return {};
|
||||
}
|
||||
@ -318,6 +340,7 @@ void collectSymbolsFromELF(
|
||||
build_id = our_build_id;
|
||||
#else
|
||||
/// MSan does not know that the program segments in memory are initialized.
|
||||
__msan_unpoison(info, sizeof(*info));
|
||||
__msan_unpoison_string(info->dlpi_name);
|
||||
|
||||
object_name = info->dlpi_name;
|
||||
|
@ -463,7 +463,7 @@ void CompressionCodecEncrypted::Configuration::loadImpl(
|
||||
|
||||
/// If there is only one key with non zero ID, curren_key_id should be defined.
|
||||
if (new_params->keys_storage[method].size() == 1 && !new_params->keys_storage[method].contains(0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. сurrent_key_id is required");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. current_key_id is required");
|
||||
}
|
||||
|
||||
/// Try to find which key will be used for encryption. If there is no current_key and only one key without id
|
||||
|
@ -433,10 +433,10 @@ void DiskEncrypted::applyNewSettings(
|
||||
{
|
||||
auto new_settings = parseDiskEncryptedSettings(name, config, config_prefix, disk_map);
|
||||
if (new_settings->wrapped_disk != delegate)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging wrapped disk on the fly is not supported. Disk {}", name);
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing wrapped disk on the fly is not supported. Disk {}", name);
|
||||
|
||||
if (new_settings->disk_path != disk_path)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name);
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing disk path on the fly is not supported. Disk {}", name);
|
||||
|
||||
current_settings.set(std::move(new_settings));
|
||||
IDisk::applyNewSettings(config, context, config_prefix, disk_map);
|
||||
|
@ -40,7 +40,7 @@ int readAndPrint(DB::ReadBuffer & in)
|
||||
int main(int, char **)
|
||||
{
|
||||
{
|
||||
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||
std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'";
|
||||
DB::ReadBufferFromString in(s);
|
||||
if (readAndPrint(in))
|
||||
std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl;
|
||||
@ -49,7 +49,7 @@ int main(int, char **)
|
||||
|
||||
std::shared_ptr<DB::ReadBufferFromOwnString> in;
|
||||
{
|
||||
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||
std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'";
|
||||
in = std::make_shared<DB::ReadBufferFromOwnString>(s);
|
||||
}
|
||||
if (readAndPrint(*in))
|
||||
|
@ -14,7 +14,7 @@ int main(int, char **)
|
||||
{
|
||||
DB::Int64 a = -123456;
|
||||
DB::Float64 b = 123.456;
|
||||
DB::String c = "вася пе\tтя";
|
||||
DB::String c = "вася pe\ttya";
|
||||
DB::String d = "'xyz\\";
|
||||
|
||||
std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
|
@ -14,7 +14,7 @@ int main(int, char **)
|
||||
{
|
||||
DB::Int64 a = -123456;
|
||||
DB::Float64 b = 123.456;
|
||||
DB::String c = "вася пе\tтя";
|
||||
DB::String c = "вася pe\ttya";
|
||||
DB::String d = "'xyz\\";
|
||||
|
||||
std::ofstream s("test");
|
||||
|
@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate()
|
||||
else if (name == names[3])
|
||||
{
|
||||
auto column = types[3]->createColumn();
|
||||
/// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6).
|
||||
/// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6).
|
||||
String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6";
|
||||
assert_cast<ColumnString &>(*column).insertData(version.data(), version.size());
|
||||
res.addColumn(std::move(column));
|
||||
|
@ -12,6 +12,7 @@
|
||||
# (simple regexps) to check if the code is likely to have basic style violations.
|
||||
# and then to run formatter only for the specified files.
|
||||
|
||||
LC_ALL="en_US.UTF-8"
|
||||
ROOT_PATH=$(git rev-parse --show-toplevel)
|
||||
EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
|
||||
|
||||
@ -413,3 +414,6 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -
|
||||
|
||||
# Check for bad punctuation: whitespace before comma.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
|
||||
|
||||
# Cyrillic characters hiding inside Latin.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
|
||||
|
Loading…
Reference in New Issue
Block a user