Merge branch 's3_zero_copy_replication' of https://github.com/ianton-ru/ClickHouse into s3_zero_copy_replication

This commit is contained in:
Anton Ivashkin 2021-05-24 12:54:01 +03:00
commit d1be97fd30
209 changed files with 4435 additions and 690 deletions

4
.gitmodules vendored
View File

@ -228,3 +228,7 @@
[submodule "contrib/datasketches-cpp"]
path = contrib/datasketches-cpp
url = https://github.com/ClickHouse-Extras/datasketches-cpp.git
[submodule "contrib/yaml-cpp"]
path = contrib/yaml-cpp
url = https://github.com/ClickHouse-Extras/yaml-cpp.git

View File

@ -36,7 +36,7 @@ option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION
if(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)
set(RECONFIGURE_MESSAGE_LEVEL FATAL_ERROR)
else()
set(RECONFIGURE_MESSAGE_LEVEL STATUS)
set(RECONFIGURE_MESSAGE_LEVEL WARNING)
endif()
enable_language(C CXX ASM)
@ -527,6 +527,7 @@ include (cmake/find/nanodbc.cmake)
include (cmake/find/rocksdb.cmake)
include (cmake/find/libpqxx.cmake)
include (cmake/find/nuraft.cmake)
include (cmake/find/yaml-cpp.cmake)
if(NOT USE_INTERNAL_PARQUET_LIBRARY)

View File

@ -3,5 +3,11 @@ add_library (bridge
)
target_include_directories (daemon PUBLIC ..)
target_link_libraries (bridge PRIVATE daemon dbms Poco::Data Poco::Data::ODBC)
target_link_libraries (bridge
PRIVATE
daemon
dbms
Poco::Data
Poco::Data::ODBC
)

View File

@ -0,0 +1,9 @@
option(USE_YAML_CPP "Enable yaml-cpp" ${ENABLE_LIBRARIES})
if (NOT USE_YAML_CPP)
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp")
message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init --recursive")
endif()

View File

@ -50,6 +50,10 @@ add_subdirectory (replxx-cmake)
add_subdirectory (unixodbc-cmake)
add_subdirectory (nanodbc-cmake)
if (USE_YAML_CPP)
add_subdirectory (yaml-cpp-cmake)
endif()
if (USE_INTERNAL_XZ_LIBRARY)
add_subdirectory (xz)
endif()

1
contrib/yaml-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit 0c86adac6d117ee2b4afcedb8ade19036ca0327d

View File

@ -0,0 +1,39 @@
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp)
set (SRCS
${LIBRARY_DIR}/src/binary.cpp
${LIBRARY_DIR}/src/emitterutils.cpp
${LIBRARY_DIR}/src/null.cpp
${LIBRARY_DIR}/src/scantoken.cpp
${LIBRARY_DIR}/src/convert.cpp
${LIBRARY_DIR}/src/exceptions.cpp
${LIBRARY_DIR}/src/ostream_wrapper.cpp
${LIBRARY_DIR}/src/simplekey.cpp
${LIBRARY_DIR}/src/depthguard.cpp
${LIBRARY_DIR}/src/exp.cpp
${LIBRARY_DIR}/src/parse.cpp
${LIBRARY_DIR}/src/singledocparser.cpp
${LIBRARY_DIR}/src/directives.cpp
${LIBRARY_DIR}/src/memory.cpp
${LIBRARY_DIR}/src/parser.cpp
${LIBRARY_DIR}/src/stream.cpp
${LIBRARY_DIR}/src/emit.cpp
${LIBRARY_DIR}/src/nodebuilder.cpp
${LIBRARY_DIR}/src/regex_yaml.cpp
${LIBRARY_DIR}/src/tag.cpp
${LIBRARY_DIR}/src/emitfromevents.cpp
${LIBRARY_DIR}/src/node.cpp
${LIBRARY_DIR}/src/scanner.cpp
${LIBRARY_DIR}/src/emitter.cpp
${LIBRARY_DIR}/src/node_data.cpp
${LIBRARY_DIR}/src/scanscalar.cpp
${LIBRARY_DIR}/src/emitterstate.cpp
${LIBRARY_DIR}/src/nodeevents.cpp
${LIBRARY_DIR}/src/scantag.cpp
)
add_library (yaml-cpp ${SRCS})
target_include_directories(yaml-cpp PRIVATE ${LIBRARY_DIR}/include/yaml-cpp)
target_include_directories(yaml-cpp SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)

View File

@ -5,11 +5,11 @@ toc_title: "Функции для шифрования"
# Функции шифрования {#encryption-functions}
Даннвые функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма.
Данные функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма.
Длина ключа зависит от режима шифрования. Он может быть длинной в 16, 24 и 32 байта для режимов шифрования `-128-`, `-196-` и `-256-` соответственно.
Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются).
Длина инициализирующего вектора всегда 16 байт (лишние байты игнорируются).
Обратите внимание, что до версии Clickhouse 21.1 эти функции работали медленно.

View File

@ -0,0 +1,86 @@
# We can use 3 main node types in YAML: Scalar, Map and Sequence.
# A Scalar is a simple key-value pair:
scalar: 123
# Here we have a key "scalar" and value "123"
# If we rewrite this in XML, we will get <scalar>123</scalar>
# We can also represent an empty value with '':
key: ''
# A Map is a node, which contains other nodes:
map:
key1: value1
key2: value2
small_map:
key3: value3
# This map can be converted into:
# <map>
# <key1>value1</key1>
# <key2>value2</key2>
# <small_map>
# <key3>value3</key3>
# </small_map>
# </map>
# A Sequence is a node, which contains also other nodes.
# The main difference from Map is that Sequence can also contain simple values.
sequence:
- val1
- val2
- key: 123
- map:
mkey1: foo
mkey2: bar
# We can represent it in XML this way:
# <sequence>val1</sequence>
# <sequence>val2</sequence>
# <sequence>
# <key>123</key>
# </sequence>
# <sequence>
# <map>
# <mkey1>foo</mkey1>
# <mkey2>bar</mkey2>
# </map>
# </sequence>
# YAML does not have direct support for structures like XML attributes.
# We represent them as nodes with @ prefix in key. Note, that @ is reserved by YAML standard,
# so you will need to write double quotes around the key. Both Map and Sequence can have
# attributes as children nodes
map:
"@attr1": value1
"@attr2": value2
key: 123
# This gives us:
# <map attr1="value1" attr2="value2">
# <key>123</key>
# </map>
sequence:
- "@attr1": value1
- "@attr2": value2
- 123
- abc
# And this gives us:
# <map attr1="value1" attr2="value2">123</map>
# <map attr1="value1" attr2="value2">abc</map>

View File

@ -52,6 +52,9 @@ template <typename Value, bool float_return> using FuncQuantilesTDigest = Aggreg
template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;
template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
template <template <typename, bool> class Function>
static constexpr bool supportDecimal()
@ -156,6 +159,9 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantileTDigestWeighted>);
factory.registerFunction(NameQuantilesTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantilesTDigestWeighted>);
factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
factory.registerFunction(NameQuantilesBFloat16::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16>);
/// 'median' is an alias for 'quantile'
factory.registerAlias("median", NameQuantile::name);
factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
@ -167,6 +173,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
factory.registerAlias("medianTimingWeighted", NameQuantileTimingWeighted::name);
factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
}
}

View File

@ -9,6 +9,7 @@
#include <AggregateFunctions/QuantileExactWeighted.h>
#include <AggregateFunctions/QuantileTiming.h>
#include <AggregateFunctions/QuantileTDigest.h>
#include <AggregateFunctions/QuantileBFloat16Histogram.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/QuantilesCommon.h>
@ -228,4 +229,7 @@ struct NameQuantileTDigestWeighted { static constexpr auto name = "quantileTDige
struct NameQuantilesTDigest { static constexpr auto name = "quantilesTDigest"; };
struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDigestWeighted"; };
struct NameQuantileBFloat16 { static constexpr auto name = "quantileBFloat16"; };
struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16"; };
}

View File

@ -0,0 +1,207 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Common/HashTable/HashMap.h>
#include <common/types.h>
#include <ext/bit_cast.h>
namespace DB
{
/** `bfloat16` is a 16-bit floating point data type that is the same as the corresponding most significant 16 bits of the `float`.
* https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
*
* To calculate quantile, simply convert input value to 16 bit (convert to float, then take the most significant 16 bits),
* and calculate the histogram of these values.
*
* Hash table is the preferred way to store histogram, because the number of distinct values is small:
* ```
* SELECT uniq(bfloat)
* FROM
* (
* SELECT
* number,
* toFloat32(number) AS f,
* bitShiftRight(bitAnd(reinterpretAsUInt32(reinterpretAsFixedString(f)), 4294901760) AS cut, 16),
* reinterpretAsFloat32(reinterpretAsFixedString(cut)) AS bfloat
* FROM numbers(100000000)
* )
*
* uniq(bfloat)
* 2623
*
* ```
* (when increasing the range of values 1000 times, the number of distinct bfloat16 values increases just by 1280).
*
* Then calculate quantile from the histogram.
*
* This sketch is very simple and rough. Its relative precision is constant 1 / 256 = 0.390625%.
*/
template <typename Value>
struct QuantileBFloat16Histogram
{
using BFloat16 = UInt16;
using Weight = UInt64;
/// Make automatic memory for 16 elements to avoid allocations for small states.
/// The usage of trivial hash is ok, because we effectively take logarithm of the values and pathological cases are unlikely.
using Data = HashMapWithStackMemory<BFloat16, Weight, TrivialHash, 4>;
Data data;
void add(const Value & x)
{
add(x, 1);
}
void add(const Value & x, Weight w)
{
if (!isNaN(x))
data[toBFloat16(x)] += w;
}
void merge(const QuantileBFloat16Histogram & rhs)
{
for (const auto & pair : rhs.data)
data[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
{
data.write(buf);
}
void deserialize(ReadBuffer & buf)
{
data.read(buf);
}
Value get(Float64 level) const
{
return getImpl<Value>(level);
}
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
{
getManyImpl(levels, indices, size, result);
}
Float64 getFloat(Float64 level) const
{
return getImpl<Float64>(level);
}
void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const
{
getManyImpl(levels, indices, size, result);
}
private:
/// Take the most significant 16 bits of the floating point number.
BFloat16 toBFloat16(const Value & x) const
{
return ext::bit_cast<UInt32>(static_cast<Float32>(x)) >> 16;
}
/// Put the bits into most significant 16 bits of the floating point number and fill other bits with zeros.
Float32 toFloat32(const BFloat16 & x) const
{
return ext::bit_cast<Float32>(x << 16);
}
using Pair = PairNoInit<Float32, Weight>;
template <typename T>
T getImpl(Float64 level) const
{
size_t size = data.size();
if (0 == size)
return std::numeric_limits<T>::quiet_NaN();
std::unique_ptr<Pair[]> array_holder(new Pair[size]);
Pair * array = array_holder.get();
Float64 sum_weight = 0;
Pair * arr_it = array;
for (const auto & pair : data)
{
sum_weight += pair.getMapped();
*arr_it = {toFloat32(pair.getKey()), pair.getMapped()};
++arr_it;
}
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
Float64 threshold = std::ceil(sum_weight * level);
Float64 accumulated = 0;
for (const Pair * p = array; p != (array + size); ++p)
{
accumulated += p->second;
if (accumulated >= threshold)
return p->first;
}
return array[size - 1].first;
}
template <typename T>
void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, T * result) const
{
size_t size = data.size();
if (0 == size)
{
for (size_t i = 0; i < num_levels; ++i)
result[i] = std::numeric_limits<T>::quiet_NaN();
return;
}
std::unique_ptr<Pair[]> array_holder(new Pair[size]);
Pair * array = array_holder.get();
Float64 sum_weight = 0;
Pair * arr_it = array;
for (const auto & pair : data)
{
sum_weight += pair.getMapped();
*arr_it = {toFloat32(pair.getKey()), pair.getMapped()};
++arr_it;
}
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
size_t level_index = 0;
Float64 accumulated = 0;
Float64 threshold = std::ceil(sum_weight * levels[indices[level_index]]);
for (const Pair * p = array; p != (array + size); ++p)
{
accumulated += p->second;
while (accumulated >= threshold)
{
result[indices[level_index]] = p->first;
++level_index;
if (level_index == num_levels)
return;
threshold = std::ceil(sum_weight * levels[indices[level_index]]);
}
}
while (level_index < num_levels)
{
result[indices[level_index]] = array[size - 1].first;
++level_index;
}
}
};
}

View File

@ -43,6 +43,7 @@ SRCS(
AggregateFunctionRankCorrelation.cpp
AggregateFunctionResample.cpp
AggregateFunctionRetention.cpp
AggregateFunctionSegmentLengthSum.cpp
AggregateFunctionSequenceMatch.cpp
AggregateFunctionSimpleLinearRegression.cpp
AggregateFunctionSimpleState.cpp

View File

@ -1,309 +0,0 @@
#pragma once
#include <cstddef>
#include <cstdlib>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
}
/** An array of (almost) unchangeable size:
* the size is specified in the constructor;
* `resize` method removes old data, and necessary only for
* so that you can first create an empty object using the default constructor,
* and then decide on the size.
*
* There is a possibility to not initialize elements by default, but create them inplace.
* Member destructors are called automatically.
*
* `sizeof` is equal to the size of one pointer.
*
* Not exception-safe.
*
* Copying is supported via assign() method. Moving empties the original object.
* That is, it is inconvenient to use this array in many cases.
*
* Designed for situations in which many arrays of the same small size are created,
* but the size is not known at compile time.
* Also gives a significant advantage in cases where it is important that `sizeof` is minimal.
* For example, if arrays are put in an open-addressing hash table with inplace storage of values (like HashMap)
*
* In this case, compared to std::vector:
* - for arrays of 1 element size - an advantage of about 2 times;
* - for arrays of 5 elements - an advantage of about 1.5 times
* (DB::Field, containing UInt64 and String, used as T);
*/
const size_t empty_auto_array_helper = 0;
template <typename T>
class AutoArray
{
public:
/// For deferred creation.
AutoArray()
{
setEmpty();
}
explicit AutoArray(size_t size_)
{
init(size_, false);
}
/** Initializes all elements with a copy constructor with the `value` parameter.
*/
AutoArray(size_t size_, const T & value)
{
init(size_, true);
for (size_t i = 0; i < size_; ++i)
{
new (place(i)) T(value);
}
}
/** `resize` removes all existing items.
*/
void resize(size_t size_, bool dont_init_elems = false)
{
uninit();
init(size_, dont_init_elems);
}
/** Move operations.
*/
AutoArray(AutoArray && src)
{
if (this == &src)
return;
setEmpty();
data_ptr = src.data_ptr;
src.setEmpty();
}
AutoArray & operator= (AutoArray && src)
{
if (this == &src)
return *this;
uninit();
data_ptr = src.data_ptr;
src.setEmpty();
return *this;
}
~AutoArray()
{
uninit();
}
size_t size() const
{
return m_size();
}
bool empty() const
{
return size() == 0;
}
void clear()
{
uninit();
setEmpty();
}
template <typename It>
void assign(It from_begin, It from_end)
{
uninit();
size_t size = from_end - from_begin;
init(size, /* dont_init_elems = */ true);
It it = from_begin;
for (size_t i = 0; i < size; ++i, ++it)
new (place(i)) T(*it);
}
void assign(const AutoArray & from)
{
assign(from.begin(), from.end());
}
/** You can read and modify elements using the [] operator
* only if items were initialized
* (that is, into the constructor was not passed DontInitElemsTag,
* or you initialized them using `place` and `placement new`).
*/
T & operator[](size_t i)
{
return elem(i);
}
const T & operator[](size_t i) const
{
return elem(i);
}
T * data()
{
return elemPtr(0);
}
const T * data() const
{
return elemPtr(0);
}
/** Get the piece of memory in which the element should be located.
* The function is intended to initialize an element,
* which has not yet been initialized
* new (arr.place(i)) T(args);
*/
char * place(size_t i)
{
return data_ptr + sizeof(T) * i;
}
using iterator = T *;
using const_iterator = const T *;
iterator begin() { return elemPtr(0); }
iterator end() { return elemPtr(size()); }
const_iterator begin() const { return elemPtr(0); }
const_iterator end() const { return elemPtr(size()); }
bool operator== (const AutoArray<T> & rhs) const
{
size_t s = size();
if (s != rhs.size())
return false;
for (size_t i = 0; i < s; ++i)
if (elem(i) != rhs.elem(i))
return false;
return true;
}
bool operator!= (const AutoArray<T> & rhs) const
{
return !(*this == rhs);
}
bool operator< (const AutoArray<T> & rhs) const
{
size_t s = size();
size_t rhs_s = rhs.size();
if (s < rhs_s)
return true;
if (s > rhs_s)
return false;
for (size_t i = 0; i < s; ++i)
{
if (elem(i) < rhs.elem(i))
return true;
if (elem(i) > rhs.elem(i))
return false;
}
return false;
}
private:
static constexpr size_t alignment = alignof(T);
/// Bytes allocated to store size of array before data. It is padded to have minimum size as alignment.
/// Padding is at left and the size is stored at right (just before the first data element).
static constexpr size_t prefix_size = std::max(sizeof(size_t), alignment);
char * data_ptr;
size_t & m_size()
{
return reinterpret_cast<size_t *>(data_ptr)[-1];
}
size_t m_size() const
{
return reinterpret_cast<const size_t *>(data_ptr)[-1];
}
T * elemPtr(size_t i)
{
return reinterpret_cast<T *>(data_ptr) + i;
}
const T * elemPtr(size_t i) const
{
return reinterpret_cast<const T *>(data_ptr) + i;
}
T & elem(size_t i)
{
return *elemPtr(i);
}
const T & elem(size_t i) const
{
return *elemPtr(i);
}
void setEmpty()
{
data_ptr = const_cast<char *>(reinterpret_cast<const char *>(&empty_auto_array_helper)) + sizeof(size_t);
}
void init(size_t new_size, bool dont_init_elems)
{
if (!new_size)
{
setEmpty();
return;
}
void * new_data = nullptr;
int res = posix_memalign(&new_data, alignment, prefix_size + new_size * sizeof(T));
if (0 != res)
throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(new_size)),
ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
data_ptr = static_cast<char *>(new_data);
data_ptr += prefix_size;
m_size() = new_size;
if (!dont_init_elems)
for (size_t i = 0; i < new_size; ++i)
new (place(i)) T();
}
void uninit()
{
size_t s = size();
if (s)
{
for (size_t i = 0; i < s; ++i)
elem(i).~T();
data_ptr -= prefix_size;
free(data_ptr);
}
}
};
}

View File

@ -3,6 +3,7 @@ set (SRCS
ConfigProcessor.cpp
configReadClient.cpp
ConfigReloader.cpp
YAMLParser.cpp
)
add_library(clickhouse_common_config ${SRCS})
@ -15,3 +16,10 @@ target_link_libraries(clickhouse_common_config
PRIVATE
string_utils
)
if (USE_YAML_CPP)
target_link_libraries(clickhouse_common_config
PRIVATE
yaml-cpp
)
endif()

View File

@ -1,4 +1,8 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include "ConfigProcessor.h"
#include "YAMLParser.h"
#include <sys/utsname.h>
#include <cerrno>
@ -20,10 +24,8 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#define PREPROCESSED_SUFFIX "-preprocessed"
namespace fs = std::filesystem;
using namespace Poco::XML;
@ -438,8 +440,10 @@ ConfigProcessor::Files ConfigProcessor::getConfigMergeFiles(const std::string &
std::string base_name = path.getBaseName();
// Skip non-config and temporary files
if (file.isFile() && (extension == "xml" || extension == "conf") && !startsWith(base_name, "."))
files.push_back(file.path());
if (file.isFile() && (extension == "xml" || extension == "conf" || extension == "yaml" || extension == "yml") && !startsWith(base_name, "."))
{
files.push_back(file.path());
}
}
}
@ -453,12 +457,21 @@ XMLDocumentPtr ConfigProcessor::processConfig(
zkutil::ZooKeeperNodeCache * zk_node_cache,
const zkutil::EventPtr & zk_changed_event)
{
XMLDocumentPtr config;
LOG_DEBUG(log, "Processing configuration file '{}'.", path);
XMLDocumentPtr config;
if (fs::exists(path))
{
config = dom_parser.parse(path);
fs::path p(path);
if (p.extension() == ".xml")
{
config = dom_parser.parse(path);
}
else if (p.extension() == ".yaml" || p.extension() == ".yml")
{
config = YAMLParser::parse(path);
}
}
else
{
@ -493,8 +506,20 @@ XMLDocumentPtr ConfigProcessor::processConfig(
{
LOG_DEBUG(log, "Merging configuration file '{}'.", merge_file);
XMLDocumentPtr with = dom_parser.parse(merge_file);
XMLDocumentPtr with;
fs::path p(merge_file);
if (p.extension() == ".yaml" || p.extension() == ".yml")
{
with = YAMLParser::parse(merge_file);
}
else
{
with = dom_parser.parse(merge_file);
}
merge(config, with);
contributing_files.push_back(merge_file);
}
catch (Exception & e)

View File

@ -1,5 +1,9 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <string>
#include <unordered_set>
#include <vector>
@ -141,3 +145,4 @@ private:
};
}

View File

@ -0,0 +1,166 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_YAML_CPP
#include "YAMLParser.h"
#include <string>
#include <cstring>
#include <vector>
#include <Poco/DOM/Document.h>
#include <Poco/DOM/DOMParser.h>
#include <Poco/DOM/DOMWriter.h>
#include <Poco/DOM/NodeList.h>
#include <Poco/DOM/Element.h>
#include <Poco/DOM/AutoPtr.h>
#include <Poco/DOM/NamedNodeMap.h>
#include <Poco/DOM/Text.h>
#include <Common/Exception.h>
#include <yaml-cpp/yaml.h> // Y_IGNORE
#include <common/logger_useful.h>
using namespace Poco::XML;
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_OPEN_FILE;
extern const int CANNOT_PARSE_YAML;
}
/// A prefix symbol in yaml key
/// We add attributes to nodes by using a prefix symbol in the key part.
/// Currently we use @ as a prefix symbol. Note, that @ is reserved
/// by YAML standard, so we need to write a key-value pair like this: "@attribute": attr_value
const char YAML_ATTRIBUTE_PREFIX = '@';
namespace
{
Poco::AutoPtr<Poco::XML::Element> createCloneNode(Poco::XML::Element & original_node)
{
Poco::AutoPtr<Poco::XML::Element> clone_node = original_node.ownerDocument()->createElement(original_node.nodeName());
original_node.parentNode()->appendChild(clone_node);
return clone_node;
}
void processNode(const YAML::Node & node, Poco::XML::Element & parent_xml_element)
{
auto * xml_document = parent_xml_element.ownerDocument();
switch (node.Type())
{
case YAML::NodeType::Scalar:
{
auto value = node.as<std::string>();
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
parent_xml_element.appendChild(xml_value);
break;
}
/// We process YAML Sequences as a
/// list of <key>value</key> tags with same key and different values.
/// For example, we translate this sequence
/// seq:
/// - val1
/// - val2
///
/// into this:
/// <seq>val1</seq>
/// <seq>val2</seq>
case YAML::NodeType::Sequence:
{
for (const auto & child_node : node)
if (parent_xml_element.hasChildNodes())
{
/// We want to process sequences like that:
/// seq:
/// - val1
/// - k2: val2
/// - val3
/// - k4: val4
/// - val5
/// into xml like this:
/// <seq>val1</seq>
/// <seq>
/// <k2>val2</k2>
/// </seq>
/// <seq>val3</seq>
/// <seq>
/// <k4>val4</k4>
/// </seq>
/// <seq>val5</seq>
/// So, we create a new parent node with same tag for each child node
processNode(child_node, *createCloneNode(parent_xml_element));
}
else
{
processNode(child_node, parent_xml_element);
}
break;
}
case YAML::NodeType::Map:
{
for (const auto & key_value_pair : node)
{
const auto & key_node = key_value_pair.first;
const auto & value_node = key_value_pair.second;
auto key = key_node.as<std::string>();
bool is_attribute = (key.starts_with(YAML_ATTRIBUTE_PREFIX) && value_node.IsScalar());
if (is_attribute)
{
/// we use substr(1) here to remove YAML_ATTRIBUTE_PREFIX from key
auto attribute_name = key.substr(1);
auto value = value_node.as<std::string>();
parent_xml_element.setAttribute(attribute_name, value);
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_element.appendChild(xml_key);
processNode(value_node, *xml_key);
}
}
break;
}
case YAML::NodeType::Null: break;
case YAML::NodeType::Undefined:
{
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "YAMLParser has encountered node with undefined type and cannot continue parsing of the file");
}
}
}
}
Poco::AutoPtr<Poco::XML::Document> YAMLParser::parse(const String& path)
{
YAML::Node node_yml;
try
{
node_yml = YAML::LoadFile(path);
}
catch (const YAML::ParserException& e)
{
/// yaml-cpp cannot parse the file because its contents are incorrect
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "Unable to parse YAML configuration file {}", path, e.what());
}
catch (const YAML::BadFile&)
{
/// yaml-cpp cannot open the file even though it exists
throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open YAML configuration file {}", path);
}
Poco::AutoPtr<Poco::XML::Document> xml = new Document;
Poco::AutoPtr<Poco::XML::Element> root_node = xml->createElement("yandex");
xml->appendChild(root_node);
processNode(node_yml, *root_node);
return xml;
}
}
#endif

View File

@ -0,0 +1,55 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <string>
#include <Poco/DOM/Document.h>
#include "Poco/DOM/AutoPtr.h"
#include <common/logger_useful.h>
#if USE_YAML_CPP
namespace DB
{
/// Real YAML parser: loads yaml file into a YAML::Node
class YAMLParserImpl
{
public:
static Poco::AutoPtr<Poco::XML::Document> parse(const String& path);
};
using YAMLParser = YAMLParserImpl;
}
#else
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_YAML;
}
/// Fake YAML parser: throws an exception if we try to parse YAML configs in a build without yaml-cpp
class DummyYAMLParser
{
public:
static Poco::AutoPtr<Poco::XML::Document> parse(const String& path)
{
Poco::AutoPtr<Poco::XML::Document> xml = new Poco::XML::Document;
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "Unable to parse YAML configuration file {} without usage of yaml-cpp library", path);
return xml;
}
};
using YAMLParser = DummyYAMLParser;
}
#endif

View File

@ -87,9 +87,20 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
{
Poco::Net::IPAddress ip;
/// NOTE: Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
if (Poco::Net::IPAddress::tryParse(host, ip))
return DNSResolver::IPAddresses(1, ip);
/// NOTE:
/// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
/// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets)
if (host.starts_with('['))
{
assert(host.ends_with(']'));
if (Poco::Net::IPAddress::tryParse(host.substr(1, host.size() - 2), ip))
return DNSResolver::IPAddresses(1, ip);
}
else
{
if (Poco::Net::IPAddress::tryParse(host, ip))
return DNSResolver::IPAddresses(1, ip);
}
/// Family: AF_UNSPEC
/// AI_ALL is required for checking if client is allowed to connect from an address

View File

@ -552,6 +552,7 @@
M(582, NO_SUCH_PROJECTION_IN_TABLE) \
M(583, ILLEGAL_PROJECTION) \
M(584, PROJECTION_NOT_USED) \
M(585, CANNOT_PARSE_YAML) \
\
M(998, POSTGRESQL_CONNECTION_FAILURE) \
M(999, KEEPER_EXCEPTION) \

View File

@ -16,3 +16,4 @@
#cmakedefine01 USE_STATS
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 USE_DATASKETCHES
#cmakedefine01 USE_YAML_CPP

View File

@ -7,9 +7,6 @@ endif()
add_executable (sip_hash_perf sip_hash_perf.cpp)
target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io)
add_executable (auto_array auto_array.cpp)
target_link_libraries (auto_array PRIVATE clickhouse_common_io)
add_executable (small_table small_table.cpp)
target_link_libraries (small_table PRIVATE clickhouse_common_io)

View File

@ -1,197 +0,0 @@
#include <iostream>
#include <iomanip>
#include <map>
#include <pcg_random.hpp>
#include <Core/Field.h>
#include <Common/HashTable/HashMap.h>
#include <Common/AutoArray.h>
#include <IO/WriteHelpers.h>
#include <Common/Stopwatch.h>
int main(int argc, char ** argv)
{
pcg64 rng;
{
size_t n = 10;
using T = std::string;
DB::AutoArray<T> arr(n);
for (size_t i = 0; i < arr.size(); ++i)
arr[i] = "Hello, world! " + DB::toString(i);
for (auto & elem : arr)
std::cerr << elem << std::endl;
}
std::cerr << std::endl;
{
size_t n = 10;
using T = std::string;
using Arr = DB::AutoArray<T>;
Arr arr;
arr.resize(n);
for (size_t i = 0; i < arr.size(); ++i)
arr[i] = "Hello, world! " + DB::toString(i);
for (auto & elem : arr)
std::cerr << elem << std::endl;
std::cerr << std::endl;
Arr arr2 = std::move(arr);
std::cerr << arr.size() << ", " << arr2.size() << std::endl; // NOLINT
for (auto & elem : arr2)
std::cerr << elem << std::endl;
}
std::cerr << std::endl;
{
size_t n = 10;
size_t keys = 10;
using T = std::string;
using Arr = DB::AutoArray<T>;
using Map = std::map<Arr, T>;
Map map;
for (size_t i = 0; i < keys; ++i)
{
Arr key(n);
for (size_t j = 0; j < n; ++j)
key[j] = DB::toString(rng());
map[std::move(key)] = "Hello, world! " + DB::toString(i);
}
for (const auto & kv : map)
{
std::cerr << "[";
for (size_t j = 0; j < n; ++j)
std::cerr << (j == 0 ? "" : ", ") << kv.first[j];
std::cerr << "]";
std::cerr << ":\t" << kv.second << std::endl;
}
std::cerr << std::endl;
Map map2 = std::move(map);
for (const auto & kv : map2)
{
std::cerr << "[";
for (size_t j = 0; j < n; ++j)
std::cerr << (j == 0 ? "" : ", ") << kv.first[j];
std::cerr << "]";
std::cerr << ":\t" << kv.second << std::endl;
}
}
std::cerr << std::endl;
{
size_t n = 10;
size_t keys = 10;
using T = std::string;
using Arr = DB::AutoArray<T>;
using Vec = std::vector<Arr>;
Vec vec;
for (size_t i = 0; i < keys; ++i)
{
Arr key(n);
for (size_t j = 0; j < n; ++j)
key[j] = DB::toString(rng());
vec.push_back(std::move(key));
}
for (const auto & elem : vec)
{
std::cerr << "[";
for (size_t j = 0; j < n; ++j)
std::cerr << (j == 0 ? "" : ", ") << elem[j];
std::cerr << "]" << std::endl;
}
std::cerr << std::endl;
Vec vec2 = std::move(vec);
for (const auto & elem : vec2)
{
std::cerr << "[";
for (size_t j = 0; j < n; ++j)
std::cerr << (j == 0 ? "" : ", ") << elem[j];
std::cerr << "]" << std::endl;
}
}
if (argc == 2 && !strcmp(argv[1], "1"))
{
size_t n = 5;
size_t map_size = 1000000;
using T = DB::Field;
T field = std::string("Hello, world");
using Arr = std::vector<T>;
using Map = HashMap<UInt64, Arr>;
Stopwatch watch;
Map map;
for (size_t i = 0; i < map_size; ++i)
{
Map::LookupResult it;
bool inserted;
map.emplace(rng(), it, inserted);
if (inserted)
{
new (&it->getMapped()) Arr(n);
for (size_t j = 0; j < n; ++j)
(it->getMapped())[j] = field;
}
}
std::cerr << std::fixed << std::setprecision(2)
<< "Vector: Elapsed: " << watch.elapsedSeconds()
<< " (" << map_size / watch.elapsedSeconds() << " rows/sec., "
<< "sizeof(Map::value_type) = " << sizeof(Map::value_type)
<< std::endl;
}
{
size_t n = 10000;
using Arr = DB::AutoArray<std::string>;
Arr arr1(n);
Arr arr2(n);
for (size_t i = 0; i < n; ++i)
{
arr1[i] = "Hello, world! " + DB::toString(i);
arr2[i] = "Goodbye, world! " + DB::toString(i);
}
arr2 = std::move(arr1);
arr1.resize(n); // NOLINT
std::cerr
<< "arr1.size(): " << arr1.size() << ", arr2.size(): " << arr2.size() << std::endl
<< "arr1.data(): " << arr1.data() << ", arr2.data(): " << arr2.data() << std::endl
<< "arr1[0]: " << arr1[0] << ", arr2[0]: " << arr2[0] << std::endl;
}
return 0;
}

View File

@ -81,10 +81,41 @@ struct NetworkInterfaces
bool isLocalAddress(const Poco::Net::IPAddress & address)
{
/** 127.0.0.1 is treat as local address unconditionally.
* ::1 is also treat as local address unconditionally.
*
* 127.0.0.{2..255} are not treat as local addresses, because they are used in tests
* to emulate distributed queries across localhost.
*
* But 127.{0,1}.{0,1}.{0,1} are treat as local addresses,
* because they are used in Debian for localhost.
*/
if (address.isLoopback())
{
if (address.family() == Poco::Net::AddressFamily::IPv4)
{
/// The address is located in memory in big endian form (network byte order).
const unsigned char * digits = static_cast<const unsigned char *>(address.addr());
if (digits[0] == 127
&& digits[1] <= 1
&& digits[2] <= 1
&& digits[3] <= 1)
{
return true;
}
}
else if (address.family() == Poco::Net::AddressFamily::IPv6)
{
return true;
}
}
NetworkInterfaces interfaces;
return interfaces.hasAddress(address);
}
bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port)
{
return clickhouse_port == address.port() && isLocalAddress(address.host());

View File

@ -28,15 +28,27 @@ std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 defa
throw Exception("Illegal address passed to function parseAddress: "
"the address begins with opening square bracket, but no closing square bracket found", ErrorCodes::BAD_ARGUMENTS);
port = find_first_symbols<':'>(closing_square_bracket + 1, end);
port = closing_square_bracket + 1;
}
else
port = find_first_symbols<':'>(begin, end);
if (port != end)
{
UInt16 port_number = parse<UInt16>(port + 1);
return { std::string(begin, port), port_number };
if (*port != ':')
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Illegal port prefix passed to function parseAddress: {}", port);
++port;
UInt16 port_number;
ReadBufferFromMemory port_buf(port, end - port);
if (!tryReadText<UInt16>(port_number, port_buf) || !port_buf.eof())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Illegal port passed to function parseAddress: {}", port);
}
return { std::string(begin, port - 1), port_number };
}
else if (default_port)
{

View File

@ -11,9 +11,30 @@ TEST(LocalAddress, SmokeTest)
std::string address_str;
DB::readString(address_str, cmd->out);
cmd->wait();
std::cerr << "Got Address:" << address_str << std::endl;
std::cerr << "Got Address: " << address_str << std::endl;
Poco::Net::IPAddress address(address_str);
EXPECT_TRUE(DB::isLocalAddress(address));
}
TEST(LocalAddress, Localhost)
{
EXPECT_TRUE(DB::isLocalAddress(Poco::Net::IPAddress{"127.0.0.1"}));
EXPECT_TRUE(DB::isLocalAddress(Poco::Net::IPAddress{"127.0.1.1"}));
EXPECT_TRUE(DB::isLocalAddress(Poco::Net::IPAddress{"127.1.1.1"}));
EXPECT_TRUE(DB::isLocalAddress(Poco::Net::IPAddress{"127.1.0.1"}));
EXPECT_TRUE(DB::isLocalAddress(Poco::Net::IPAddress{"127.1.0.0"}));
EXPECT_TRUE(DB::isLocalAddress(Poco::Net::IPAddress{"::1"}));
/// Make sure we don't mess with the byte order.
EXPECT_FALSE(DB::isLocalAddress(Poco::Net::IPAddress{"1.0.0.127"}));
EXPECT_FALSE(DB::isLocalAddress(Poco::Net::IPAddress{"1.1.1.127"}));
EXPECT_FALSE(DB::isLocalAddress(Poco::Net::IPAddress{"0.0.0.0"}));
EXPECT_FALSE(DB::isLocalAddress(Poco::Net::IPAddress{"::"}));
EXPECT_FALSE(DB::isLocalAddress(Poco::Net::IPAddress{"::2"}));
/// See the comment in the implementation of isLocalAddress.
EXPECT_FALSE(DB::isLocalAddress(Poco::Net::IPAddress{"127.0.0.2"}));
}

View File

@ -721,6 +721,9 @@ private:
#undef DBMS_MIN_FIELD_SIZE
using Row = std::vector<Field>;
template <> struct Field::TypeToEnum<Null> { static const Types::Which value = Types::Null; };
template <> struct Field::TypeToEnum<UInt64> { static const Types::Which value = Types::UInt64; };
template <> struct Field::TypeToEnum<UInt128> { static const Types::Which value = Types::UInt128; };

View File

@ -452,7 +452,7 @@ namespace MySQLReplication
UInt32 number_columns;
String schema;
String table;
std::vector<Field> rows;
Row rows;
RowsEvent(std::shared_ptr<TableMapEvent> table_map_, EventHeader && header_, const RowsEventHeader & rows_header)
: EventBase(std::move(header_)), number_columns(0), table_map(table_map_)

View File

@ -1,18 +0,0 @@
#pragma once
#include <vector>
#include <Common/AutoArray.h>
#include <Core/Field.h>
namespace DB
{
/** The data type for representing one row of the table in the RAM.
* Warning! It is preferable to store column blocks instead of single rows. See Block.h
*/
using Row = AutoArray<Field>;
}

View File

@ -1,6 +1,5 @@
#pragma once
#include <Core/Row.h>
#include <Core/SortDescription.h>
#include <Core/SortCursor.h>

View File

@ -193,7 +193,7 @@ void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view
size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
const auto parse_value = array_info[idx].pqxx_parser;
std::vector<std::vector<Field>> dimensions(expected_dimensions + 1);
std::vector<Row> dimensions(expected_dimensions + 1);
while (parsed.first != pqxx::array_parser::juncture::done)
{

View File

@ -477,7 +477,7 @@ static inline void fillSignAndVersionColumnsData(Block & data, Int8 sign_value,
template <bool assert_nullable = false>
static void writeFieldsToColumn(
IColumn & column_to, const std::vector<Field> & rows_data, size_t column_index, const std::vector<bool> & mask, ColumnUInt8 * null_map_column = nullptr)
IColumn & column_to, const Row & rows_data, size_t column_index, const std::vector<bool> & mask, ColumnUInt8 * null_map_column = nullptr)
{
if (ColumnNullable * column_nullable = typeid_cast<ColumnNullable *>(&column_to))
writeFieldsToColumn<true>(column_nullable->getNestedColumn(), rows_data, column_index, mask, &column_nullable->getNullMapColumn());
@ -599,7 +599,7 @@ static void writeFieldsToColumn(
}
template <Int8 sign>
static size_t onWriteOrDeleteData(const std::vector<Field> & rows_data, Block & buffer, size_t version)
static size_t onWriteOrDeleteData(const Row & rows_data, Block & buffer, size_t version)
{
size_t prev_bytes = buffer.bytes();
for (size_t column = 0; column < buffer.columns() - 2; ++column)
@ -623,7 +623,7 @@ static inline bool differenceSortingKeys(const Tuple & row_old_data, const Tuple
return false;
}
static inline size_t onUpdateData(const std::vector<Field> & rows_data, Block & buffer, size_t version, const std::vector<size_t> & sorting_columns_index)
static inline size_t onUpdateData(const Row & rows_data, Block & buffer, size_t version, const std::vector<size_t> & sorting_columns_index)
{
if (rows_data.size() % 2 != 0)
throw Exception("LOGICAL ERROR: It is a bug.", ErrorCodes::LOGICAL_ERROR);

View File

@ -1,6 +1,5 @@
#include <Common/quoteString.h>
#include <Common/typeid_cast.h>
#include <Core/Row.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsMiscellaneous.h>

View File

@ -34,7 +34,7 @@ public:
FillColumnDescription & getFillDescription(size_t ind) { return description[ind].fill_description; }
private:
std::vector<Field> row;
Row row;
SortDescription description;
};

View File

@ -1,7 +1,6 @@
#include <optional>
#include <Core/Field.h>
#include <Core/Row.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>

View File

@ -12,7 +12,6 @@
#define DBMS_HASH_MAP_COUNT_COLLISIONS
*/
#include <common/types.h>
#include <Core/Row.h>
#include <IO/ReadBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
#include <Common/HashTable/HashMap.h>
@ -27,19 +26,6 @@
* This is important, because if you run all the tests one by one, the results will be incorrect.
* (Due to the peculiarities of the work of the allocator, the first test takes advantage.)
*
* Depending on USE_AUTO_ARRAY, one of the structures is selected as the value.
* USE_AUTO_ARRAY = 0 - uses std::vector (hard-copy structure, sizeof = 24 bytes).
* USE_AUTO_ARRAY = 1 - uses AutoArray (a structure specially designed for such cases, sizeof = 8 bytes).
*
* That is, the test also allows you to compare AutoArray and std::vector.
*
* If USE_AUTO_ARRAY = 0, then HashMap confidently overtakes all.
* If USE_AUTO_ARRAY = 1, then HashMap is slightly less serious (20%) ahead of google::dense_hash_map.
*
* When using HashMap, AutoArray has a rather serious (40%) advantage over std::vector.
* And when using other hash tables, AutoArray even more seriously overtakes std::vector
* (up to three and a half times in the case of std::unordered_map and google::sparse_hash_map).
*
* HashMap, unlike google::dense_hash_map, much more depends on the quality of the hash function.
*
* PS. Measure everything yourself, otherwise I'm almost confused.
@ -49,9 +35,6 @@
* But in this test, there was something similar to the old scenario of using hash tables in the aggregation.
*/
#define USE_AUTO_ARRAY 0
struct AlternativeHash
{
size_t operator() (UInt64 x) const
@ -85,12 +68,7 @@ int main(int argc, char ** argv)
using namespace DB;
using Key = UInt64;
#if USE_AUTO_ARRAY
using Value = AutoArray<IAggregateFunction*>;
#else
using Value = std::vector<IAggregateFunction*>;
#endif
size_t n = argc < 2 ? 10000000 : std::stol(argv[1]);
//size_t m = std::stol(argv[2]);
@ -119,13 +97,8 @@ int main(int argc, char ** argv)
INIT
#ifndef USE_AUTO_ARRAY
#undef INIT
#define INIT
#endif
Row row(1);
row[0] = UInt64(0);
std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;

View File

@ -344,7 +344,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
if (target.isEnum())
{
const auto & enum_type = dynamic_cast<const IDataTypeEnum &>(*target_type);
std::vector<Field> symbol_mapping;
Row symbol_mapping;
for (size_t i = 0; i < root_node->names(); i++)
{
symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i)));

View File

@ -12,6 +12,7 @@
#include <DataTypes/NestedUtils.h>
#include <IO/WriteHelpers.h>
namespace DB
{
@ -109,6 +110,9 @@ static bool isInPartitionKey(const std::string & column_name, const Names & part
return is_in_partition_key != partition_key_columns.end();
}
using Row = std::vector<Field>;
/// Returns true if merge result is not empty
static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc,
Row & row, const ColumnRawPtrs & raw_columns, size_t row_number)

View File

@ -2,7 +2,7 @@
#include <Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h>
#include <Processors/Merges/Algorithms/MergedData.h>
#include <Core/Row.h>
namespace DB
{

View File

@ -2,7 +2,6 @@
#include <DataStreams/IBlockInputStream.h>
#include <Core/Row.h>
#include <Core/Block.h>
#include <common/types.h>
#include <Core/NamesAndTypes.h>

View File

@ -211,9 +211,12 @@ namespace
virtual void insertStringColumn(const ColumnPtr & column, const String & name) = 0;
virtual void insertUInt64Column(const ColumnPtr & column, const String & name) = 0;
virtual void insertUUIDColumn(const ColumnPtr & column, const String & name) = 0;
virtual void
insertPartitionValueColumn(size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name)
= 0;
virtual void insertPartitionValueColumn(
size_t rows,
const Row & partition_value,
const DataTypePtr & partition_value_type,
const String & name) = 0;
};
}
@ -358,8 +361,8 @@ namespace
columns.push_back(column);
}
void
insertPartitionValueColumn(size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String &) final
void insertPartitionValueColumn(
size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String &) final
{
ColumnPtr column;
if (rows)

View File

@ -79,7 +79,7 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri
new_data_part->uuid = uuid;
new_data_part->setColumns(columns);
new_data_part->partition.value.assign(partition.value);
new_data_part->partition.value = partition.value;
new_data_part->minmax_idx = minmax_idx;
if (disk->exists(destination_path))

View File

@ -150,7 +150,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block
if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned.
{
result.emplace_back(Block(block), Row());
result.emplace_back(Block(block), Row{});
return result;
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <Core/Block.h>
#include <Core/Row.h>
#include <IO/WriteBufferFromFile.h>
#include <Compression/CompressedWriteBuffer.h>

View File

@ -1,9 +1,10 @@
#pragma once
#include <Core/Row.h>
#include <common/types.h>
#include <Disks/IDisk.h>
#include <IO/WriteBuffer.h>
#include <Core/Field.h>
namespace DB
{
@ -38,7 +39,7 @@ public:
void store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const;
void store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const;
void assign(const MergeTreePartition & other) { value.assign(other.value); }
void assign(const MergeTreePartition & other) { value = other.value; }
void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row);
};

View File

@ -185,9 +185,10 @@ def run_single_test(args, ext, server_logs_level, client_options, case_file, std
'stderr': stderr_file,
}
# >> append to stdout and stderr, because there are also output of per test database creation
# >> append to stderr (but not stdout since it is not used there),
# because there are also output of per test database creation
if not args.database:
pattern = '{test} >> {stdout} 2>> {stderr}'
pattern = '{test} > {stdout} 2>> {stderr}'
else:
pattern = '{test} > {stdout} 2> {stderr}'

View File

@ -126,7 +126,8 @@ class ClickHouseCluster:
"""
def __init__(self, base_path, name=None, base_config_dir=None, server_bin_path=None, client_bin_path=None,
odbc_bridge_bin_path=None, library_bridge_bin_path=None, zookeeper_config_path=None, custom_dockerd_host=None):
odbc_bridge_bin_path=None, library_bridge_bin_path=None, zookeeper_config_path=None,
custom_dockerd_host=None):
for param in list(os.environ.keys()):
print("ENV %40s %s" % (param, os.environ[param]))
self.base_dir = p.dirname(base_path)
@ -219,7 +220,9 @@ class ClickHouseCluster:
with_redis=False, with_minio=False, with_cassandra=False,
hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", tag=None,
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None,
zookeeper_docker_compose_path=None, zookeeper_use_tmpfs=True, minio_certs_dir=None, use_keeper=True):
zookeeper_docker_compose_path=None, zookeeper_use_tmpfs=True, minio_certs_dir=None, use_keeper=True,
main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True):
"""Add an instance to the cluster.
name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
@ -280,6 +283,9 @@ class ClickHouseCluster:
ipv4_address=ipv4_address,
ipv6_address=ipv6_address,
with_installed_binary=with_installed_binary,
main_config_name=main_config_name,
users_config_name=users_config_name,
copy_common_configs=copy_common_configs,
tmpfs=tmpfs or [])
docker_compose_yml_dir = get_docker_compose_path()
@ -944,7 +950,7 @@ class ClickHouseCluster:
subprocess_check_call(self.base_zookeeper_cmd + ["start", n])
CLICKHOUSE_START_COMMAND = "clickhouse server --config-file=/etc/clickhouse-server/config.xml --log-file=/var/log/clickhouse-server/clickhouse-server.log --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log"
CLICKHOUSE_START_COMMAND = "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file} --log-file=/var/log/clickhouse-server/clickhouse-server.log --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log"
CLICKHOUSE_STAY_ALIVE_COMMAND = 'bash -c "{} --daemon; tail -f /dev/null"'.format(CLICKHOUSE_START_COMMAND)
@ -1000,6 +1006,8 @@ class ClickHouseInstance:
macros, with_zookeeper, zookeeper_config_path, with_mysql, with_mysql_cluster, with_kafka, with_kerberized_kafka, with_rabbitmq, with_kerberized_hdfs,
with_mongo, with_redis, with_minio,
with_cassandra, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers,
clickhouse_start_command=CLICKHOUSE_START_COMMAND,
main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True,
hostname=None, env_variables=None,
image="yandex/clickhouse-integration-test", tag="latest",
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None):
@ -1036,6 +1044,12 @@ class ClickHouseInstance:
self.with_minio = with_minio
self.with_cassandra = with_cassandra
self.main_config_name = main_config_name
self.users_config_name = users_config_name
self.copy_common_configs = copy_common_configs
self.clickhouse_start_command = clickhouse_start_command.replace("{main_config_file}", self.main_config_name)
self.path = p.join(self.cluster.instances_dir, name)
self.docker_compose_path = p.join(self.path, 'docker-compose.yml')
self.env_variables = env_variables or {}
@ -1177,7 +1191,7 @@ class ClickHouseInstance:
if not self.stay_alive:
raise Exception("clickhouse can be started again only with stay_alive=True instance")
self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))
self.exec_in_container(["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)], user=str(os.getuid()))
# wait start
from helpers.test_tools import assert_eq_with_retry
assert_eq_with_retry(self, "select 1", "1", retry_count=int(start_wait_sec / 0.5), sleep_time=0.5)
@ -1263,7 +1277,7 @@ class ClickHouseInstance:
self.exec_in_container(["bash", "-c",
"cp /usr/share/clickhouse-odbc-bridge_fresh /usr/bin/clickhouse-odbc-bridge && chmod 777 /usr/bin/clickhouse"],
user='root')
self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))
self.exec_in_container(["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)], user=str(os.getuid()))
from helpers.test_tools import assert_eq_with_retry
# wait start
assert_eq_with_retry(self, "select 1", "1", retry_count=retries)
@ -1404,8 +1418,10 @@ class ClickHouseInstance:
os.makedirs(instance_config_dir)
print("Copy common default production configuration from {}".format(self.base_config_dir))
shutil.copyfile(p.join(self.base_config_dir, 'config.xml'), p.join(instance_config_dir, 'config.xml'))
shutil.copyfile(p.join(self.base_config_dir, 'users.xml'), p.join(instance_config_dir, 'users.xml'))
shutil.copyfile(p.join(self.base_config_dir, self.main_config_name), p.join(instance_config_dir, self.main_config_name))
shutil.copyfile(p.join(self.base_config_dir, self.users_config_name), p.join(instance_config_dir, self.users_config_name))
print("Create directory for configuration generated in this helper")
# used by all utils with any config
@ -1423,7 +1439,9 @@ class ClickHouseInstance:
print("Copy common configuration from helpers")
# The file is named with 0_ prefix to be processed before other configuration overloads.
shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir)
if self.copy_common_configs:
shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir)
shutil.copy(p.join(HELPERS_DIR, '0_common_instance_users.xml'), users_d_dir)
if len(self.custom_dictionaries_paths):
shutil.copy(p.join(HELPERS_DIR, '0_common_enable_dictionaries.xml'), self.config_d_dir)
@ -1502,11 +1520,11 @@ class ClickHouseInstance:
self._create_odbc_config_file()
odbc_ini_path = '- ' + self.odbc_ini_path
entrypoint_cmd = CLICKHOUSE_START_COMMAND
entrypoint_cmd = self.clickhouse_start_command
if self.stay_alive:
entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND
entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND.replace("{main_config_file}", self.main_config_name)
print("Entrypoint cmd: {}".format(entrypoint_cmd))
networks = app_net = ipv4_address = ipv6_address = net_aliases = net_alias1 = ""

View File

@ -70,11 +70,11 @@ def check_args_and_update_paths(args):
if not os.path.exists(path):
raise Exception("Path {} doesn't exist".format(path))
if not os.path.exists(os.path.join(args.base_configs_dir, "config.xml")):
raise Exception("No configs.xml in {}".format(args.base_configs_dir))
if (not os.path.exists(os.path.join(args.base_configs_dir, "config.xml"))) and (not os.path.exists(os.path.join(args.base_configs_dir, "config.yaml"))):
raise Exception("No configs.xml or configs.yaml in {}".format(args.base_configs_dir))
if not os.path.exists(os.path.join(args.base_configs_dir, "users.xml")):
raise Exception("No users.xml in {}".format(args.base_configs_dir))
if (not os.path.exists(os.path.join(args.base_configs_dir, "users.xml"))) and (not os.path.exists(os.path.join(args.base_configs_dir, "users.yaml"))):
raise Exception("No users.xml or users.yaml in {}".format(args.base_configs_dir))
def docker_kill_handler_handler(signum, frame):
subprocess.check_call('docker kill $(docker ps -a -q --filter name={name} --format="{{{{.ID}}}}")'.format(name=CONTAINER_NAME), shell=True)

View File

@ -0,0 +1,13 @@
<yandex>
<!-- Sources to read users, roles, access rights, profiles of settings, quotas. -->
<user_directories replace="replace">
<users_xml>
<!-- Path to configuration file with predefined users. -->
<path>users.xml</path>
</users_xml>
<local_directory>
<!-- Path to folder where users created by SQL commands are stored. -->
<path>access/</path>
</local_directory>
</user_directories>
</yandex>

View File

@ -0,0 +1,23 @@
<yandex>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<force_sync>false</force_sync>
<startup_timeout>60000</startup_timeout>
<!-- we want all logs for complex problems investigation -->
<reserved_log_items>1000000000000000</reserved_log_items>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>localhost</hostname>
<port>44444</port>
</server>
</raft_configuration>
</keeper_server>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<logger>
<console>true</console>
<log remove="remove"/>
<errorlog remove="remove"/>
</logger>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<logger>
<!-- Disable rotation
https://pocoproject.org/docs/Poco.FileChannel.html
-->
<size>never</size>
</logger>
</yandex>

View File

@ -0,0 +1,9 @@
<yandex>
<macros>
<test>Hello, world!</test>
<shard>s1</shard>
<replica>r1</replica>
<default_path_test>/clickhouse/tables/{database}/{shard}/</default_path_test>
<default_name_test>table_{table}</default_name_test>
</macros>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>

View File

@ -0,0 +1,49 @@
<yandex>
<remote_servers>
<![CDATA[
You can run additional servers simply as
./clickhouse-server -- --path=9001 --tcp_port=9001
]]>
<single_remote_shard_at_port_9001>
<shard>
<replica>
<host>localhost</host>
<port>9001</port>
</replica>
</shard>
</single_remote_shard_at_port_9001>
<two_remote_shards_at_port_9001_9002>
<shard>
<replica>
<host>localhost</host>
<port>9001</port>
</replica>
</shard>
<shard>
<replica>
<host>localhost</host>
<port>9002</port>
</replica>
</shard>
</two_remote_shards_at_port_9001_9002>
<two_shards_one_local_one_remote_at_port_9001>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>localhost</host>
<port>9001</port>
</replica>
</shard>
</two_shards_one_local_one_remote_at_port_9001>
</remote_servers>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<part_log>
<database>system</database>
<table>part_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<path replace="replace">./</path>
<tmp_path replace="replace">./tmp/</tmp_path>
<user_files_path replace="replace">./user_files/</user_files_path>
<format_schema_path replace="replace">./format_schemas/</format_schema_path>
<access_control_path replace="replace">./access/</access_control_path>
<top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
</yandex>

View File

@ -0,0 +1,10 @@
<?xml version="1.0"?>
<!-- Config for test server -->
<yandex>
<query_masking_rules>
<rule>
<regexp>TOPSECRET.TOPSECRET</regexp>
<replace>[hidden]</replace>
</rule>
</query_masking_rules>
</yandex>

View File

@ -0,0 +1,3 @@
<yandex>
<tcp_with_proxy_port>9010</tcp_with_proxy_port>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<text_log>
<database>system</database>
<table>text_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</text_log>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<zookeeper>
<node index="1">
<host>localhost</host>
<port>9181</port>
</node>
</zookeeper>
</yandex>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,40 @@
<?xml version="1.0"?>
<!-- Config that is used when server is run without config file. -->
<yandex>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default />
</quotas>
</yandex>

View File

@ -0,0 +1,8 @@
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<allow_introspection_functions>1</allow_introspection_functions>
</default>
</profiles>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<profiles>
<default>
<log_queries>1</log_queries>
</default>
</profiles>
</yandex>

View File

@ -0,0 +1,120 @@
<?xml version="1.0"?>
<yandex>
<!-- Profiles of settings. -->
<profiles>
<!-- Default settings. -->
<default>
<!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage>
<max_block_size>64999</max_block_size>
<!-- How to choose between replicas during distributed query processing.
random - choose random replica from set of replicas with minimum number of errors
nearest_hostname - from set of replicas with minimum number of errors, choose replica
with minimum number of different symbols between replica's hostname and local hostname
(Hamming distance).
in_order - first live replica is chosen in specified order.
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
-->
<load_balancing>random</load_balancing>
</default>
<!-- Profile that allows only read queries. -->
<readonly>
<readonly>1</readonly>
</readonly>
</profiles>
<!-- Users and ACL. -->
<users>
<!-- If user name was not specified, 'default' user is used. -->
<default>
<!-- Password could be specified in plaintext or in SHA256 (in hex format).
If you want to specify password in plaintext (not recommended), place it in 'password' element.
Example: <password>qwerty</password>.
Password could be empty.
If you want to specify SHA256, place it in 'password_sha256_hex' element.
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
Restrictions of SHA256: impossibility to connect to ClickHouse using MySQL JS client (as of July 2019).
If you want to specify double SHA1, place it in 'password_double_sha1_hex' element.
Example: <password_double_sha1_hex>e395796d6546b1b65db9d665cd43f0e858dd4303</password_double_sha1_hex>
If you want to specify a previously defined LDAP server (see 'ldap_servers' in the main config) for authentication,
place its name in 'server' element inside 'ldap' element.
Example: <ldap><server>my_ldap_server</server></ldap>
If you want to authenticate the user via Kerberos (assuming Kerberos is enabled, see 'kerberos' in the main config),
place 'kerberos' element instead of 'password' (and similar) elements.
The name part of the canonical principal name of the initiator must match the user name for authentication to succeed.
You can also place 'realm' element inside 'kerberos' element to further restrict authentication to only those requests
whose initiator's realm matches it.
Example: <kerberos />
Example: <kerberos><realm>EXAMPLE.COM</realm></kerberos>
How to generate decent password:
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
In first line will be password and in second - corresponding SHA256.
How to generate double SHA1:
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
In first line will be password and in second - corresponding double SHA1.
-->
<password></password>
<!-- List of networks with open access.
To open access from everywhere, specify:
<ip>::/0</ip>
To open access only from localhost, specify:
<ip>::1</ip>
<ip>127.0.0.1</ip>
Each element of list has one of the following forms:
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
<host> Hostname. Example: server01.yandex.ru.
To check access, DNS query is performed, and all received addresses compared to peer address.
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
To check access, DNS PTR query is performed for peer address and then regexp is applied.
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
Strongly recommended that regexp is ends with $
All results of DNS requests are cached till server restart.
-->
<networks>
<ip>::/0</ip>
</networks>
<!-- Settings profile for user. -->
<profile>default</profile>
<!-- Quota for user. -->
<quota>default</quota>
<!-- User can create other users and grant rights to them. -->
<!-- <access_management>1</access_management> -->
</default>
</users>
<!-- Quotas. -->
<quotas>
<!-- Name of quota. -->
<default>
<!-- Limits for time interval. You could specify many intervals with different limits. -->
<interval>
<!-- Length of interval. -->
<duration>3600</duration>
<!-- No limits. Just calculate resource usage for time interval. -->
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</yandex>

View File

@ -0,0 +1,40 @@
import time
import threading
from os import path as p, unlink
from tempfile import NamedTemporaryFile
import helpers
import pytest
from helpers.cluster import ClickHouseCluster
def test_xml_full_conf():
# all configs are in XML
cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/config.d/zookeeper.xml')
all_confd = ['configs/config.d/access_control.xml',
'configs/config.d/keeper_port.xml',
'configs/config.d/logging_no_rotate.xml',
'configs/config.d/log_to_console.xml',
'configs/config.d/macros.xml',
'configs/config.d/metric_log.xml',
'configs/config.d/more_clusters.xml',
'configs/config.d/part_log.xml',
'configs/config.d/path.xml',
'configs/config.d/query_masking_rules.xml',
'configs/config.d/tcp_with_proxy.xml',
'configs/config.d/text_log.xml',
'configs/config.d/zookeeper.xml']
all_userd = ['configs/users.d/allow_introspection_functions.xml',
'configs/users.d/log_queries.xml']
node = cluster.add_instance('node', base_config_dir='configs', main_configs=all_confd, user_configs=all_userd, with_zookeeper=False)
try:
cluster.start()
assert(node.query("select value from system.settings where name = 'max_memory_usage'") == "10000000000\n")
assert(node.query("select value from system.settings where name = 'max_block_size'") == "64999\n")
finally:
cluster.shutdown()

View File

@ -0,0 +1,7 @@
user_directories:
users_xml:
path: users.xml
local_directory:
path: access/
"@replace": replace

View File

@ -0,0 +1,15 @@
keeper_server:
tcp_port: 9181
server_id: 1
coordination_settings:
operation_timeout_ms: 10000
session_timeout_ms: 30000
force_sync: false
startup_timeout: 60000
reserved_log_items: 1000000000000000
raft_configuration:
server:
id: 1
hostname: localhost
port: 44444

View File

@ -0,0 +1,7 @@
logger:
console: true
log:
"@remove": remove
errorlog:
"@remove": remove

View File

@ -0,0 +1,2 @@
logger:
size: never

View File

@ -0,0 +1,7 @@
macros:
test: 'Hello, world!'
shard: s1
replica: r1
default_path_test: '/clickhouse/tables/{database}/{shard}/'
default_name_test: 'table_{table}'

View File

@ -0,0 +1,6 @@
metric_log:
database: system
table: metric_log
flush_interval_milliseconds: 7500
collect_interval_milliseconds: 1000

View File

@ -0,0 +1,23 @@
remote_servers:
single_remote_shard_at_port_9001:
shard:
replica:
host: localhost
port: 9001
two_remote_shards_at_port_9001_9002:
shard:
- replica:
host: localhost
port: 9001
- replica:
host: localhost
port: 9002
two_shards_one_local_one_remote_at_port_9001:
shard:
- replica:
host: localhost
port: 9000
- replica:
host: localhost
port: 9001

View File

@ -0,0 +1,5 @@
part_log:
database: system
table: part_log
flush_interval_milliseconds: 7500

View File

@ -0,0 +1,18 @@
path:
- ./
- "@replace": replace
tmp_path:
- ./tmp/
- "@replace": replace
user_files_path:
- ./user_files/
- "@replace": replace
format_schema_path:
- ./format_schemas/
- "@replace": replace
access_control_path:
- ./access/
- "@replace": replace
top_level_domains_path:
- ./top_level_domains/
- "@replace": replace

View File

@ -0,0 +1,4 @@
query_masking_rules:
rule:
regexp: TOPSECRET.TOPSECRET
replace: '[hidden]'

View File

@ -0,0 +1 @@
tcp_with_proxy_port: 9010

View File

@ -0,0 +1,11 @@
remote_servers:
test_cluster_with_incorrect_pw:
shard:
internal_replication: true
replica:
- host: 127.0.0.1
port: 9000
password: foo
- host: 127.0.0.2
port: 9000
password: foo

View File

@ -0,0 +1,4 @@
text_log:
database: system
table: text_log
flush_interval_milliseconds: 7500

View File

@ -0,0 +1,5 @@
zookeeper:
node:
host: localhost
port: 9181
"@index": 1

View File

@ -0,0 +1,277 @@
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<postgresql_port>9005</postgresql_port>
<interserver_http_port>9009</interserver_http_port>
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<grpc>
<enable_ssl>false</enable_ssl>
<ssl_cert_file>/path/to/ssl_cert_file</ssl_cert_file>
<ssl_key_file>/path/to/ssl_key_file</ssl_key_file>
<ssl_require_client_auth>false</ssl_require_client_auth>
<ssl_ca_cert_file>/path/to/ssl_ca_cert_file</ssl_ca_cert_file>
<compression>deflate</compression>
<compression_level>medium</compression_level>
<max_send_message_size>-1</max_send_message_size>
<max_receive_message_size>-1</max_receive_message_size>
<verbose_logs>false</verbose_logs>
</grpc>
<openSSL>
<server>
<certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
<dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
<verificationMode>none</verificationMode>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
</server>
<client>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>100</max_concurrent_queries>
<max_server_memory_usage>0</max_server_memory_usage>
<max_thread_pool_size>10000</max_thread_pool_size>
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mmap_cache_size>1000</mmap_cache_size>
<path>/var/lib/clickhouse/</path>
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<ldap_servers>
</ldap_servers>
<user_directories>
<users_xml>
<path>users.xml</path>
</users_xml>
<local_directory>
<path>/var/lib/clickhouse/access/</path>
</local_directory>
</user_directories>
<default_profile>default</default_profile>
<custom_settings_prefixes></custom_settings_prefixes>
<default_database>default</default_database>
<mlock_executable>true</mlock_executable>
<remap_executable>false</remap_executable>
<remote_servers>
<test_shard_localhost>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
</test_shard_localhost>
<test_cluster_two_shards_localhost>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards_localhost>
<test_cluster_two_shards>
<shard>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards>
<test_cluster_two_shards_internal_replication>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards_internal_replication>
<test_shard_localhost_secure>
<shard>
<replica>
<host>localhost</host>
<port>9440</port>
<secure>1</secure>
</replica>
</shard>
</test_shard_localhost_secure>
<test_unavailable_shard>
<shard>
<replica>
<host>localhost</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>localhost</host>
<port>1</port>
</replica>
</shard>
</test_unavailable_shard>
</remote_servers>
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<max_session_timeout>3600</max_session_timeout>
<default_session_timeout>60</default_session_timeout>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<flush_interval_milliseconds>60000</flush_interval_milliseconds>
</asynchronous_metric_log>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<top_level_domains_lists>
</top_level_domains_lists>
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<graphite_rollup_example>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>3600</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup_example>
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<query_masking_rules>
<rule>
<name>hide encrypt/decrypt arguments</name>
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
<replace>\1(???)</replace>
</rule>
</query_masking_rules>
<send_crash_reports>
<enabled>false</enabled>
<anonymize>false</anonymize>
<endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
</send_crash_reports>
</yandex>

View File

@ -0,0 +1,40 @@
<?xml version="1.0"?>
<!-- Config that is used when server is run without config file. -->
<yandex>
<logger>
<level>trace</level>
<console>true</console>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<path>./</path>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mlock_executable>true</mlock_executable>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
<access_management>1</access_management>
</default>
</users>
<profiles>
<default/>
</profiles>
<quotas>
<default />
</quotas>
</yandex>

View File

@ -0,0 +1,3 @@
profiles:
default:
allow_introspection_functions: 1

View File

@ -0,0 +1,3 @@
profiles:
default:
log_queries: 1

View File

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<max_memory_usage>10000000000</max_memory_usage>
<max_block_size>64999</max_block_size>
</default>
</profiles>
<users>
<default>
<password></password>
<networks replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
</default>
</users>
</yandex>

View File

@ -0,0 +1,43 @@
import time
import threading
from os import path as p, unlink
from tempfile import NamedTemporaryFile
import helpers
import pytest
from helpers.cluster import ClickHouseCluster
def test_xml_main_conf():
# main configs are in XML; config.d and users.d are in YAML
cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/config.d/zookeeper.yaml')
all_confd = ['configs/config.d/access_control.yaml',
'configs/config.d/keeper_port.yaml',
'configs/config.d/logging_no_rotate.yaml',
'configs/config.d/log_to_console.yaml',
'configs/config.d/macros.yaml',
'configs/config.d/metric_log.yaml',
'configs/config.d/more_clusters.yaml',
'configs/config.d/part_log.yaml',
'configs/config.d/path.yaml',
'configs/config.d/query_masking_rules.yaml',
'configs/config.d/tcp_with_proxy.yaml',
'configs/config.d/test_cluster_with_incorrect_pw.yaml',
'configs/config.d/text_log.yaml',
'configs/config.d/zookeeper.yaml']
all_userd = ['configs/users.d/allow_introspection_functions.yaml',
'configs/users.d/log_queries.yaml']
node = cluster.add_instance('node', base_config_dir='configs', main_configs=all_confd, user_configs=all_userd, with_zookeeper=False)
try:
cluster.start()
assert(node.query("select value from system.settings where name = 'max_memory_usage'") == "10000000000\n")
assert(node.query("select value from system.settings where name = 'max_block_size'") == "64999\n")
finally:
cluster.shutdown()

View File

@ -0,0 +1,6 @@
timezone: Europe/Moscow
listen_host: 0.0.0.0
custom_settings_prefixes: custom_
path: /var/lib/clickhouse/
tmp_path: /var/lib/clickhouse/tmp/
users_config: users.yaml

View File

@ -0,0 +1,7 @@
user_directories:
users_xml:
path: users.yaml
local_directory:
path: access/
"@replace": replace

View File

@ -0,0 +1,23 @@
<yandex>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<force_sync>false</force_sync>
<startup_timeout>60000</startup_timeout>
<!-- we want all logs for complex problems investigation -->
<reserved_log_items>1000000000000000</reserved_log_items>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>localhost</hostname>
<port>44444</port>
</server>
</raft_configuration>
</keeper_server>
</yandex>

View File

@ -0,0 +1,7 @@
logger:
console: true
log:
"@remove": remove
errorlog:
"@remove": remove

View File

@ -0,0 +1,8 @@
<yandex>
<logger>
<!-- Disable rotation
https://pocoproject.org/docs/Poco.FileChannel.html
-->
<size>never</size>
</logger>
</yandex>

View File

@ -0,0 +1,7 @@
macros:
test: 'Hello, world!'
shard: s1
replica: r1
default_path_test: '/clickhouse/tables/{database}/{shard}/'
default_name_test: 'table_{table}'

View File

@ -0,0 +1,8 @@
<yandex>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>

View File

@ -0,0 +1,23 @@
remote_servers:
single_remote_shard_at_port_9001:
shard:
replica:
host: localhost
port: 9001
two_remote_shards_at_port_9001_9002:
shard:
- replica:
host: localhost
port: 9001
- replica:
host: localhost
port: 9002
two_shards_one_local_one_remote_at_port_9001:
shard:
- replica:
host: localhost
port: 9000
- replica:
host: localhost
port: 9001

View File

@ -0,0 +1,8 @@
<yandex>
<part_log>
<database>system</database>
<table>part_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
</yandex>

Some files were not shown because too many files have changed in this diff Show More