Merge branch 'master' into upload-build-check-statistics

This commit is contained in:
Alexey Milovidov 2023-07-30 12:48:17 +03:00 committed by GitHub
commit a1b83c51ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 101 additions and 58 deletions

View File

@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration")
endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if (OS_LINUX)
# We should not export dynamic symbols, because:
# - The main clickhouse binary does not use dlopen,
# and whatever is poisoning it by LD_PRELOAD should not link to our symbols.
# - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries
# should not expose their symbols to ODBC drivers and libraries.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
if (OS_DARWIN)
# The `-all_load` flag forces loading of all symbols from all libraries,

View File

@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols)
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check.
# Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces.
COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Associate stripped binary with debug symbols:
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM

View File

@ -64,7 +64,7 @@ then
ninja $NINJA_FLAGS clickhouse-keeper
ls -la ./programs/
ldd ./programs/clickhouse-keeper
ldd ./programs/clickhouse-keeper ||:
if [ -n "$MAKE_DEB" ]; then
# No quotes because I want it to expand to nothing if empty.

View File

@ -11,6 +11,7 @@ RUN apt-get update \
pv \
ripgrep \
zstd \
locales \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)
@ -28,6 +29,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone

View File

@ -18,9 +18,13 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \
&& rm -rf /root/.cache/pip
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
)
if (OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
target_link_libraries(clickhouse-library-bridge PRIVATE

View File

@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
validateODBCConnectionString.cpp
)
if (OS_LINUX)
# clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-odbc-bridge PRIVATE

View File

@ -328,7 +328,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
}
}
void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
bool ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
{
Node * config_root = getRootNode(config.get());
Node * with_root = getRootNode(with.get());
@ -343,11 +343,15 @@ void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with)
&& !((config_root_node_name == "yandex" || config_root_node_name == "clickhouse")
&& (merged_root_node_name == "yandex" || merged_root_node_name == "clickhouse")))
{
if (config_root_node_name != "clickhouse" && config_root_node_name != "yandex")
return false;
throw Poco::Exception("Root element doesn't have the corresponding root element as the config file."
" It must be <" + config_root->nodeName() + ">");
}
mergeRecursive(config, config_root, with_root);
return true;
}
void ConfigProcessor::doIncludesRecursive(
@ -645,7 +649,12 @@ XMLDocumentPtr ConfigProcessor::processConfig(
with = dom_parser.parse(merge_file);
}
merge(config, with);
if (!merge(config, with))
{
LOG_DEBUG(log, "Merging bypassed - configuration file '{}' doesn't belong to configuration '{}' - merging root node name '{}' doesn't match '{}'",
merge_file, path, getRootNode(with.get())->nodeName(), getRootNode(config.get())->nodeName());
continue;
}
contributing_files.push_back(merge_file);
}

View File

@ -144,7 +144,9 @@ private:
void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root);
void merge(XMLDocumentPtr config, XMLDocumentPtr with);
/// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false.
/// For compatibility root node 'yandex' considered equal to 'clickhouse'.
bool merge(XMLDocumentPtr config, XMLDocumentPtr with);
void doIncludesRecursive(
XMLDocumentPtr config,

View File

@ -1,7 +1,6 @@
#if defined(__ELF__) && !defined(OS_FREEBSD)
#include <Common/SymbolIndex.h>
#include <base/hex.h>
#include <algorithm>
#include <optional>
@ -62,9 +61,11 @@ Otherwise you will get only exported symbols from program headers.
#endif
#define __msan_unpoison_string(X) // NOLINT
#define __msan_unpoison(X, Y) // NOLINT
#if defined(ch_has_feature)
# if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison_string
# undef __msan_unpoison
# include <sanitizer/msan_interface.h>
# endif
#endif
@ -98,10 +99,13 @@ void collectSymbolsFromProgramHeaders(
/* Iterate over all headers of the current shared lib
* (first call is for the executable itself)
*/
__msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum));
__msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr));
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
{
/* Further processing is only needed if the dynamic section is reached
*/
__msan_unpoison(&info->dlpi_phdr[header_index], sizeof(info->dlpi_phdr[header_index]));
if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC)
continue;
@ -109,6 +113,7 @@ void collectSymbolsFromProgramHeaders(
* It's address is the shared lib's address + the virtual address
*/
const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
__msan_unpoison(&dyn_begin, sizeof(dyn_begin));
/// For unknown reason, addresses are sometimes relative sometimes absolute.
auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr)
@ -122,44 +127,53 @@ void collectSymbolsFromProgramHeaders(
*/
size_t sym_cnt = 0;
for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it)
{
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
// TODO: this branch leads to invalid address of the hash table. Need further investigation.
// if (it->d_tag == DT_HASH)
// {
// const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
// sym_cnt = hash[1];
// break;
// }
if (it->d_tag == DT_GNU_HASH)
const auto * it = dyn_begin;
while (true)
{
/// This code based on Musl-libc.
__msan_unpoison(it, sizeof(*it));
if (it->d_tag != DT_NULL)
break;
const uint32_t * buckets = nullptr;
const uint32_t * hashval = nullptr;
ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr);
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
for (ElfW(Word) i = 0; i < hash[0]; ++i)
if (buckets[i] > sym_cnt)
sym_cnt = buckets[i];
if (sym_cnt)
if (it->d_tag == DT_GNU_HASH)
{
sym_cnt -= hash[1];
hashval = buckets + hash[0] + sym_cnt;
do
/// This code based on Musl-libc.
const uint32_t * buckets = nullptr;
const uint32_t * hashval = nullptr;
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address);
__msan_unpoison(&hash[0], sizeof(*hash));
__msan_unpoison(&hash[1], sizeof(*hash));
__msan_unpoison(&hash[2], sizeof(*hash));
buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
__msan_unpoison(buckets, hash[0] * sizeof(buckets[0]));
for (ElfW(Word) i = 0; i < hash[0]; ++i)
if (buckets[i] > sym_cnt)
sym_cnt = buckets[i];
if (sym_cnt)
{
++sym_cnt;
sym_cnt -= hash[1];
hashval = buckets + hash[0] + sym_cnt;
__msan_unpoison(&hashval, sizeof(hashval));
do
{
++sym_cnt;
}
while (!(*hashval++ & 1));
}
while (!(*hashval++ & 1));
break;
}
break;
++it;
}
}
@ -190,6 +204,8 @@ void collectSymbolsFromProgramHeaders(
/* Get the pointer to the first entry of the symbol table */
const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(base_address);
__msan_unpoison(elf_sym, sym_cnt * sizeof(*elf_sym));
/* Iterate over the symbol table */
for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index)
{
@ -197,6 +213,7 @@ void collectSymbolsFromProgramHeaders(
* This is located at the address of st_name relative to the beginning of the string table.
*/
const char * sym_name = &strtab[elf_sym[sym_index].st_name];
__msan_unpoison_string(sym_name);
if (!sym_name)
continue;
@ -223,13 +240,18 @@ void collectSymbolsFromProgramHeaders(
#if !defined USE_MUSL
String getBuildIDFromProgramHeaders(dl_phdr_info * info)
{
__msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum));
__msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr));
for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
{
const ElfPhdr & phdr = info->dlpi_phdr[header_index];
__msan_unpoison(&phdr, sizeof(phdr));
if (phdr.p_type != PT_NOTE)
continue;
return Elf::getBuildID(reinterpret_cast<const char *>(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz);
std::string_view view(reinterpret_cast<const char *>(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz);
__msan_unpoison(view.data(), view.size());
return Elf::getBuildID(view.data(), view.size());
}
return {};
}
@ -318,6 +340,7 @@ void collectSymbolsFromELF(
build_id = our_build_id;
#else
/// MSan does not know that the program segments in memory are initialized.
__msan_unpoison(info, sizeof(*info));
__msan_unpoison_string(info->dlpi_name);
object_name = info->dlpi_name;

View File

@ -463,7 +463,7 @@ void CompressionCodecEncrypted::Configuration::loadImpl(
/// If there is only one key with non zero ID, curren_key_id should be defined.
if (new_params->keys_storage[method].size() == 1 && !new_params->keys_storage[method].contains(0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. сurrent_key_id is required");
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. current_key_id is required");
}
/// Try to find which key will be used for encryption. If there is no current_key and only one key without id

View File

@ -433,10 +433,10 @@ void DiskEncrypted::applyNewSettings(
{
auto new_settings = parseDiskEncryptedSettings(name, config, config_prefix, disk_map);
if (new_settings->wrapped_disk != delegate)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging wrapped disk on the fly is not supported. Disk {}", name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing wrapped disk on the fly is not supported. Disk {}", name);
if (new_settings->disk_path != disk_path)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing disk path on the fly is not supported. Disk {}", name);
current_settings.set(std::move(new_settings));
IDisk::applyNewSettings(config, context, config_prefix, disk_map);

View File

@ -40,7 +40,7 @@ int readAndPrint(DB::ReadBuffer & in)
int main(int, char **)
{
{
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'";
DB::ReadBufferFromString in(s);
if (readAndPrint(in))
std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl;
@ -49,7 +49,7 @@ int main(int, char **)
std::shared_ptr<DB::ReadBufferFromOwnString> in;
{
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'";
in = std::make_shared<DB::ReadBufferFromOwnString>(s);
}
if (readAndPrint(*in))

View File

@ -14,7 +14,7 @@ int main(int, char **)
{
DB::Int64 a = -123456;
DB::Float64 b = 123.456;
DB::String c = "вася пе\tтя";
DB::String c = "вася pe\ttya";
DB::String d = "'xyz\\";
std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM

View File

@ -14,7 +14,7 @@ int main(int, char **)
{
DB::Int64 a = -123456;
DB::Float64 b = 123.456;
DB::String c = "вася пе\tтя";
DB::String c = "вася pe\ttya";
DB::String d = "'xyz\\";
std::ofstream s("test");

View File

@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate()
else if (name == names[3])
{
auto column = types[3]->createColumn();
/// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6).
/// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6).
String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6";
assert_cast<ColumnString &>(*column).insertData(version.data(), version.size());
res.addColumn(std::move(column));

View File

@ -12,6 +12,7 @@
# (simple regexps) to check if the code is likely to have basic style violations.
# and then to run formatter only for the specified files.
LC_ALL="en_US.UTF-8"
ROOT_PATH=$(git rev-parse --show-toplevel)
EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
@ -413,3 +414,6 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -
# Check for bad punctuation: whitespace before comma.
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
# Cyrillic characters hiding inside Latin.
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."