Merge branch 'master' into update-openssl

This commit is contained in:
Alexey Milovidov 2020-02-25 21:31:18 +03:00
commit 6e7e140d8c
34 changed files with 439 additions and 228 deletions

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 60916318f76432b5d04814c2af50d04ec15664ad Subproject commit 560f0742cc0895d00d78359dbdeb82064a24adb8

View File

@ -1,14 +1,13 @@
set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include") set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src") set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
set(SIMDJSON_SRC set(SIMDJSON_SRC
${SIMDJSON_SRC_DIR}/document.cpp
${SIMDJSON_SRC_DIR}/error.cpp
${SIMDJSON_SRC_DIR}/implementation.cpp
${SIMDJSON_SRC_DIR}/jsonioutil.cpp ${SIMDJSON_SRC_DIR}/jsonioutil.cpp
${SIMDJSON_SRC_DIR}/jsonminifier.cpp ${SIMDJSON_SRC_DIR}/jsonminifier.cpp
${SIMDJSON_SRC_DIR}/jsonparser.cpp
${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp ${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp ${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
${SIMDJSON_SRC_DIR}/parsedjson.cpp
${SIMDJSON_SRC_DIR}/parsedjsoniterator.cpp
${SIMDJSON_SRC_DIR}/simdjson.cpp
) )
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC}) add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})

View File

@ -224,7 +224,7 @@ void SettingsConstraints::setProfile(const String & profile_name, const Poco::Ut
for (const std::string & key : config_keys) for (const std::string & key : config_keys)
{ {
if (key == "profile" || 0 == key.compare(0, strlen("profile["), "profile[")) /// Inheritance of profiles from the current one. if (key == "profile" || key.starts_with("profile[")) /// Inheritance of profiles from the current one.
setProfile(config.getString(elem + "." + key), config); setProfile(config.getString(elem + "." + key), config);
else else
continue; continue;

View File

@ -409,7 +409,10 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
if (is_trivial) if (is_trivial)
{ {
if (required_substring.empty()) if (required_substring.empty())
{
matches.emplace_back(Match{0, 0});
return 1; return 1;
}
const UInt8 * pos; const UInt8 * pos;
if (is_case_insensitive) if (is_case_insensitive)

View File

@ -37,7 +37,7 @@ void Settings::setProfile(const String & profile_name, const Poco::Util::Abstrac
{ {
if (key == "constraints") if (key == "constraints")
continue; continue;
if (key == "profile" || 0 == key.compare(0, strlen("profile["), "profile[")) /// Inheritance of profiles from the current one. if (key == "profile" || key.starts_with("profile[")) /// Inheritance of profiles from the current one.
setProfile(config.getString(elem + "." + key), config); setProfile(config.getString(elem + "." + key), config);
else else
set(key, config.getString(elem + "." + key)); set(key, config.getString(elem + "." + key));

View File

@ -18,8 +18,8 @@ struct BlockIO
BlockIO(const BlockIO &) = default; BlockIO(const BlockIO &) = default;
~BlockIO() = default; ~BlockIO() = default;
/** process_list_entry should be destroyed after in and after out, /** process_list_entry should be destroyed after in, after out and after pipeline,
* since in and out contain pointer to objects inside process_list_entry (query-level MemoryTracker for example), * since in, out and pipeline contain pointer to objects inside process_list_entry (query-level MemoryTracker for example),
* which could be used before destroying of in and out. * which could be used before destroying of in and out.
*/ */
std::shared_ptr<ProcessListEntry> process_list_entry; std::shared_ptr<ProcessListEntry> process_list_entry;
@ -56,6 +56,7 @@ struct BlockIO
out.reset(); out.reset();
in.reset(); in.reset();
pipeline = QueryPipeline();
process_list_entry.reset(); process_list_entry.reset();
process_list_entry = rhs.process_list_entry; process_list_entry = rhs.process_list_entry;

View File

@ -36,7 +36,8 @@ void RegionsHierarchiesDataProvider::discoverFilesWithCustomHierarchies()
{ {
std::string candidate_basename = dir_it.path().getBaseName(); std::string candidate_basename = dir_it.path().getBaseName();
if ((0 == candidate_basename.compare(0, basename.size(), basename)) && (candidate_basename.size() > basename.size() + 1) if (candidate_basename.starts_with(basename)
&& (candidate_basename.size() > basename.size() + 1)
&& (candidate_basename[basename.size()] == '_')) && (candidate_basename[basename.size()] == '_'))
{ {
const std::string suffix = candidate_basename.substr(basename.size() + 1); const std::string suffix = candidate_basename.substr(basename.size() + 1);

View File

@ -63,23 +63,20 @@ struct BinaryOperationImplBase
using ResultType = ResultType_; using ResultType = ResultType_;
static const constexpr bool allow_fixed_string = false; static const constexpr bool allow_fixed_string = false;
static void NO_INLINE vector_vector(const PaddedPODArray<A> & a, const PaddedPODArray<B> & b, PaddedPODArray<ResultType> & c) static void NO_INLINE vector_vector(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size)
{ {
size_t size = a.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<ResultType>(a[i], b[i]); c[i] = Op::template apply<ResultType>(a[i], b[i]);
} }
static void NO_INLINE vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<ResultType> & c) static void NO_INLINE vector_constant(const A * __restrict a, B b, ResultType * __restrict c, size_t size)
{ {
size_t size = a.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<ResultType>(a[i], b); c[i] = Op::template apply<ResultType>(a[i], b);
} }
static void NO_INLINE constant_vector(A a, const PaddedPODArray<B> & b, PaddedPODArray<ResultType> & c) static void NO_INLINE constant_vector(A a, const B * __restrict b, ResultType * __restrict c, size_t size)
{ {
size_t size = b.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<ResultType>(a, b[i]); c[i] = Op::template apply<ResultType>(a, b[i]);
} }
@ -93,25 +90,22 @@ struct BinaryOperationImplBase
template <typename Op> template <typename Op>
struct FixedStringOperationImpl struct FixedStringOperationImpl
{ {
static void NO_INLINE vector_vector(const ColumnFixedString::Chars & a, const ColumnFixedString::Chars & b, ColumnFixedString::Chars & c) static void NO_INLINE vector_vector(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size)
{ {
size_t size = a.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<UInt8>(a[i], b[i]); c[i] = Op::template apply<UInt8>(a[i], b[i]);
} }
static void NO_INLINE vector_constant(const ColumnFixedString::Chars & a, const ColumnFixedString::Chars & b, ColumnFixedString::Chars & c) static void NO_INLINE vector_constant(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
{ {
size_t size = a.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<UInt8>(a[i], b[i % b.size()]); c[i] = Op::template apply<UInt8>(a[i], b[i % N]);
} }
static void NO_INLINE constant_vector(const ColumnFixedString::Chars & a, const ColumnFixedString::Chars & b, ColumnFixedString::Chars & c) static void NO_INLINE constant_vector(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
{ {
size_t size = b.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<UInt8>(a[i % a.size()], b[i]); c[i] = Op::template apply<UInt8>(a[i % N], b[i]);
} }
}; };
@ -812,9 +806,10 @@ public:
auto col_res = ColumnFixedString::create(col_left->getN()); auto col_res = ColumnFixedString::create(col_left->getN());
auto & out_chars = col_res->getChars(); auto & out_chars = col_res->getChars();
out_chars.resize(col_left->getN()); out_chars.resize(col_left->getN());
OpImpl::vector_vector(col_left->getChars(), OpImpl::vector_vector(col_left->getChars().data(),
col_right->getChars(), col_right->getChars().data(),
out_chars); out_chars.data(),
out_chars.size());
block.getByPosition(result).column = ColumnConst::create(std::move(col_res), block.rows()); block.getByPosition(result).column = ColumnConst::create(std::move(col_res), block.rows());
return true; return true;
} }
@ -834,26 +829,36 @@ public:
{ {
if (col_left->getN() != col_right->getN()) if (col_left->getN() != col_right->getN())
return false; return false;
auto col_res = ColumnFixedString::create(col_left->getN()); auto col_res = ColumnFixedString::create(col_left->getN());
auto & out_chars = col_res->getChars(); auto & out_chars = col_res->getChars();
out_chars.resize((is_right_column_const ? col_left->size() : col_right->size()) * col_left->getN()); out_chars.resize((is_right_column_const ? col_left->size() : col_right->size()) * col_left->getN());
if (!is_left_column_const && !is_right_column_const) if (!is_left_column_const && !is_right_column_const)
{ {
OpImpl::vector_vector(col_left->getChars(), OpImpl::vector_vector(
col_right->getChars(), col_left->getChars().data(),
out_chars); col_right->getChars().data(),
out_chars.data(),
out_chars.size());
} }
else if (is_left_column_const) else if (is_left_column_const)
{ {
OpImpl::constant_vector(col_left->getChars(), OpImpl::constant_vector(
col_right->getChars(), col_left->getChars().data(),
out_chars); col_right->getChars().data(),
out_chars.data(),
out_chars.size(),
col_left->getN());
} }
else else
{ {
OpImpl::vector_constant(col_left->getChars(), OpImpl::vector_constant(
col_right->getChars(), col_left->getChars().data(),
out_chars); col_right->getChars().data(),
out_chars.data(),
out_chars.size(),
col_left->getN());
} }
block.getByPosition(result).column = std::move(col_res); block.getByPosition(result).column = std::move(col_res);
return true; return true;
@ -867,6 +872,7 @@ public:
using LeftDataType = std::decay_t<decltype(left)>; using LeftDataType = std::decay_t<decltype(left)>;
using RightDataType = std::decay_t<decltype(right)>; using RightDataType = std::decay_t<decltype(right)>;
using ResultDataType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::ResultDataType; using ResultDataType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::ResultDataType;
if constexpr (!std::is_same_v<ResultDataType, InvalidType>) if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
{ {
constexpr bool result_is_decimal = IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>; constexpr bool result_is_decimal = IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
@ -947,7 +953,7 @@ public:
scale_a, scale_b, check_decimal_overflow); scale_a, scale_b, check_decimal_overflow);
} }
else else
OpImpl::constant_vector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res); OpImpl::constant_vector(col_left_const->template getValue<T0>(), col_right->getData().data(), vec_res.data(), vec_res.size());
} }
else else
return false; return false;
@ -978,9 +984,9 @@ public:
else else
{ {
if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw)) if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
OpImpl::vector_vector(col_left->getData(), col_right->getData(), vec_res); OpImpl::vector_vector(col_left->getData().data(), col_right->getData().data(), vec_res.data(), vec_res.size());
else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw)) else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw))
OpImpl::vector_constant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res); OpImpl::vector_constant(col_left->getData().data(), col_right_const->template getValue<T1>(), vec_res.data(), vec_res.size());
else else
return false; return false;
} }

View File

@ -22,7 +22,6 @@ class Context;
class FunctionFactory : private boost::noncopyable, public IFactoryWithAliases<std::function<FunctionOverloadResolverImplPtr(const Context &)>> class FunctionFactory : private boost::noncopyable, public IFactoryWithAliases<std::function<FunctionOverloadResolverImplPtr(const Context &)>>
{ {
public: public:
static FunctionFactory & instance(); static FunctionFactory & instance();
template <typename Function> template <typename Function>

View File

@ -86,9 +86,9 @@ struct NumComparisonImpl
*/ */
size_t size = a.size(); size_t size = a.size();
const A * a_pos = a.data(); const A * __restrict a_pos = a.data();
const B * b_pos = b.data(); const B * __restrict b_pos = b.data();
UInt8 * c_pos = c.data(); UInt8 * __restrict c_pos = c.data();
const A * a_end = a_pos + size; const A * a_end = a_pos + size;
while (a_pos < a_end) while (a_pos < a_end)
@ -103,8 +103,8 @@ struct NumComparisonImpl
static void NO_INLINE vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<UInt8> & c) static void NO_INLINE vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<UInt8> & c)
{ {
size_t size = a.size(); size_t size = a.size();
const A * a_pos = a.data(); const A * __restrict a_pos = a.data();
UInt8 * c_pos = c.data(); UInt8 * __restrict c_pos = c.data();
const A * a_end = a_pos + size; const A * a_end = a_pos + size;
while (a_pos < a_end) while (a_pos < a_end)

View File

@ -1,26 +0,0 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsReinterpret.h>
namespace DB
{
void registerFunctionsReinterpret(FunctionFactory & factory)
{
factory.registerFunction<FunctionReinterpretAsUInt8>();
factory.registerFunction<FunctionReinterpretAsUInt16>();
factory.registerFunction<FunctionReinterpretAsUInt32>();
factory.registerFunction<FunctionReinterpretAsUInt64>();
factory.registerFunction<FunctionReinterpretAsInt8>();
factory.registerFunction<FunctionReinterpretAsInt16>();
factory.registerFunction<FunctionReinterpretAsInt32>();
factory.registerFunction<FunctionReinterpretAsInt64>();
factory.registerFunction<FunctionReinterpretAsFloat32>();
factory.registerFunction<FunctionReinterpretAsFloat64>();
factory.registerFunction<FunctionReinterpretAsDate>();
factory.registerFunction<FunctionReinterpretAsDateTime>();
factory.registerFunction<FunctionReinterpretAsString>();
factory.registerFunction<FunctionReinterpretAsFixedString>();
}
}

View File

@ -21,7 +21,7 @@ struct DivideIntegralByConstantImpl
using ResultType = typename DivideIntegralImpl<A, B>::ResultType; using ResultType = typename DivideIntegralImpl<A, B>::ResultType;
static const constexpr bool allow_fixed_string = false; static const constexpr bool allow_fixed_string = false;
static void vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<ResultType> & c) static NO_INLINE void vector_constant(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size)
{ {
if (unlikely(b == 0)) if (unlikely(b == 0))
throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION);
@ -31,9 +31,8 @@ struct DivideIntegralByConstantImpl
if (unlikely(is_signed_v<B> && b == -1)) if (unlikely(is_signed_v<B> && b == -1))
{ {
size_t size = a.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = -c[i]; c_pos[i] = -a_pos[i];
return; return;
} }
@ -41,10 +40,7 @@ struct DivideIntegralByConstantImpl
libdivide::divider<A> divider(b); libdivide::divider<A> divider(b);
size_t size = a.size();
const A * a_pos = a.data();
const A * a_end = a_pos + size; const A * a_end = a_pos + size;
ResultType * c_pos = c.data();
#ifdef __SSE2__ #ifdef __SSE2__
static constexpr size_t values_per_sse_register = 16 / sizeof(A); static constexpr size_t values_per_sse_register = 16 / sizeof(A);

View File

@ -41,7 +41,7 @@ struct ModuloByConstantImpl
using ResultType = typename ModuloImpl<A, B>::ResultType; using ResultType = typename ModuloImpl<A, B>::ResultType;
static const constexpr bool allow_fixed_string = false; static const constexpr bool allow_fixed_string = false;
static void vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<ResultType> & c) static NO_INLINE void vector_constant(const A * __restrict src, B b, ResultType * __restrict dst, size_t size)
{ {
if (unlikely(b == 0)) if (unlikely(b == 0))
throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION);
@ -51,9 +51,8 @@ struct ModuloByConstantImpl
if (unlikely((std::is_signed_v<B> && b == -1) || b == 1)) if (unlikely((std::is_signed_v<B> && b == -1) || b == 1))
{ {
size_t size = a.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
c[i] = 0; dst[i] = 0;
return; return;
} }
@ -62,11 +61,6 @@ struct ModuloByConstantImpl
libdivide::divider<A> divider(b); libdivide::divider<A> divider(b);
/// Here we failed to make the SSE variant from libdivide give an advantage. /// Here we failed to make the SSE variant from libdivide give an advantage.
size_t size = a.size();
/// strict aliasing optimization for char like arrays
auto * __restrict src = a.data();
auto * __restrict dst = c.data();
if (b & (b - 1)) if (b & (b - 1))
{ {

View File

@ -0,0 +1,17 @@
namespace DB
{
class FunctionFactory;
void registerFunctionsReinterpretStringAs(FunctionFactory & factory);
void registerFunctionReinterpretAsString(FunctionFactory & factory);
void registerFunctionReinterpretAsFixedString(FunctionFactory & factory);
void registerFunctionsReinterpret(FunctionFactory & factory)
{
registerFunctionsReinterpretStringAs(factory);
registerFunctionReinterpretAsString(factory);
registerFunctionReinterpretAsFixedString(factory);
}
}

View File

@ -0,0 +1,92 @@
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Columns/ColumnFixedString.h>
#include <Common/typeid_cast.h>
#include <Common/memcpySmall.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
class FunctionReinterpretAsFixedString : public IFunction
{
public:
static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsFixedString>(); }
static constexpr auto name = "reinterpretAsFixedString";
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const IDataType & type = *arguments[0];
if (type.isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
return std::make_shared<DataTypeFixedString>(type.getSizeOfValueInMemory());
throw Exception("Cannot reinterpret " + type.getName() + " as FixedString because it is not fixed size and contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
{
size_t rows = src.size();
ColumnFixedString::Chars & data_to = dst.getChars();
data_to.resize(n * rows);
ColumnFixedString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
{
StringRef data = src.getDataAt(i);
memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, n);
offset += n;
}
}
void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
{
size_t rows = src.size();
ColumnFixedString::Chars & data_to = dst.getChars();
data_to.resize(n * rows);
memcpy(data_to.data(), src.getRawData().data, data_to.size());
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const IColumn & src = *block.getByPosition(arguments[0]).column;
MutableColumnPtr dst = block.getByPosition(result).type->createColumn();
if (ColumnFixedString * dst_concrete = typeid_cast<ColumnFixedString *>(dst.get()))
{
if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN())
executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN());
else
executeToFixedString(src, *dst_concrete, dst_concrete->getN());
}
else
throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = std::move(dst);
}
};
void registerFunctionReinterpretAsFixedString(FunctionFactory & factory)
{
factory.registerFunction<FunctionReinterpretAsFixedString>();
}
}

View File

@ -0,0 +1,91 @@
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Common/typeid_cast.h>
#include <Common/memcpySmall.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** Function for transforming numbers and dates to strings that contain the same set of bytes in the machine representation. */
class FunctionReinterpretAsString : public IFunction
{
public:
static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsString>(); }
static constexpr auto name = "reinterpretAsString";
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const IDataType & type = *arguments[0];
if (type.isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
return std::make_shared<DataTypeString>();
throw Exception("Cannot reinterpret " + type.getName() + " as String because it is not contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void executeToString(const IColumn & src, ColumnString & dst)
{
size_t rows = src.size();
ColumnString::Chars & data_to = dst.getChars();
ColumnString::Offsets & offsets_to = dst.getOffsets();
offsets_to.resize(rows);
ColumnString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
{
StringRef data = src.getDataAt(i);
/// Cut trailing zero bytes.
while (data.size && data.data[data.size - 1] == 0)
--data.size;
data_to.resize(offset + data.size + 1);
memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, data.size);
offset += data.size;
data_to[offset] = 0;
++offset;
offsets_to[i] = offset;
}
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const IColumn & src = *block.getByPosition(arguments[0]).column;
MutableColumnPtr dst = block.getByPosition(result).type->createColumn();
if (ColumnString * dst_concrete = typeid_cast<ColumnString *>(dst.get()))
executeToString(src, *dst_concrete);
else
throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = std::move(dst);
}
};
void registerFunctionReinterpretAsString(FunctionFactory & factory)
{
factory.registerFunction<FunctionReinterpretAsString>();
}
}

View File

@ -1,20 +1,17 @@
#pragma once #include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h> #include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h> #include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h> #include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/memcpySmall.h> #include <Common/memcpySmall.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionHelpers.h>
namespace DB namespace DB
@ -23,132 +20,9 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
} }
/** Functions for transforming numbers and dates to strings that contain the same set of bytes in the machine representation, and vice versa.
*/
template <typename Name>
class FunctionReinterpretAsStringImpl : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsStringImpl>(); }
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const IDataType & type = *arguments[0];
if (type.isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
return std::make_shared<DataTypeString>();
throw Exception("Cannot reinterpret " + type.getName() + " as String because it is not contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void executeToString(const IColumn & src, ColumnString & dst)
{
size_t rows = src.size();
ColumnString::Chars & data_to = dst.getChars();
ColumnString::Offsets & offsets_to = dst.getOffsets();
offsets_to.resize(rows);
ColumnString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
{
StringRef data = src.getDataAt(i);
/// Cut trailing zero bytes.
while (data.size && data.data[data.size - 1] == 0)
--data.size;
data_to.resize(offset + data.size + 1);
memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, data.size);
offset += data.size;
data_to[offset] = 0;
++offset;
offsets_to[i] = offset;
}
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const IColumn & src = *block.getByPosition(arguments[0]).column;
MutableColumnPtr dst = block.getByPosition(result).type->createColumn();
if (ColumnString * dst_concrete = typeid_cast<ColumnString *>(dst.get()))
executeToString(src, *dst_concrete);
else
throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = std::move(dst);
}
};
template <typename Name>
class FunctionReinterpretAsFixedStringImpl : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionReinterpretAsFixedStringImpl>(); }
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const IDataType & type = *arguments[0];
if (type.isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
return std::make_shared<DataTypeFixedString>(type.getSizeOfValueInMemory());
throw Exception("Cannot reinterpret " + type.getName() + " as FixedString because it is not fixed size and contiguous in memory", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
{
size_t rows = src.size();
ColumnFixedString::Chars & data_to = dst.getChars();
data_to.resize(n * rows);
ColumnFixedString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
{
StringRef data = src.getDataAt(i);
memcpySmallAllowReadWriteOverflow15(&data_to[offset], data.data, n);
offset += n;
}
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const IColumn & src = *block.getByPosition(arguments[0]).column;
MutableColumnPtr dst = block.getByPosition(result).type->createColumn();
if (ColumnFixedString * dst_concrete = typeid_cast<ColumnFixedString *>(dst.get()))
executeToFixedString(src, *dst_concrete, dst_concrete->getN());
else
throw Exception("Illegal column " + src.getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = std::move(dst);
}
};
template <typename ToDataType, typename Name> template <typename ToDataType, typename Name>
class FunctionReinterpretStringAs : public IFunction class FunctionReinterpretStringAs : public IFunction
{ {
@ -244,8 +118,6 @@ struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretA
struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; }; struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; };
struct NameReinterpretAsDate { static constexpr auto name = "reinterpretAsDate"; }; struct NameReinterpretAsDate { static constexpr auto name = "reinterpretAsDate"; };
struct NameReinterpretAsDateTime { static constexpr auto name = "reinterpretAsDateTime"; }; struct NameReinterpretAsDateTime { static constexpr auto name = "reinterpretAsDateTime"; };
struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; };
struct NameReinterpretAsFixedString { static constexpr auto name = "reinterpretAsFixedString"; };
using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8>; using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8>;
using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>; using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>;
@ -260,8 +132,24 @@ using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64
using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate>; using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate>;
using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>; using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
using FunctionReinterpretAsString = FunctionReinterpretAsStringImpl<NameReinterpretAsString>;
using FunctionReinterpretAsFixedString = FunctionReinterpretAsFixedStringImpl<NameReinterpretAsFixedString>;
void registerFunctionsReinterpretStringAs(FunctionFactory & factory)
{
factory.registerFunction<FunctionReinterpretAsUInt8>();
factory.registerFunction<FunctionReinterpretAsUInt16>();
factory.registerFunction<FunctionReinterpretAsUInt32>();
factory.registerFunction<FunctionReinterpretAsUInt64>();
factory.registerFunction<FunctionReinterpretAsInt8>();
factory.registerFunction<FunctionReinterpretAsInt16>();
factory.registerFunction<FunctionReinterpretAsInt32>();
factory.registerFunction<FunctionReinterpretAsInt64>();
factory.registerFunction<FunctionReinterpretAsFloat32>();
factory.registerFunction<FunctionReinterpretAsFloat64>();
factory.registerFunction<FunctionReinterpretAsDate>();
factory.registerFunction<FunctionReinterpretAsDateTime>();
}
} }

View File

@ -0,0 +1,17 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>3</iterations>
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
</all_of>
<any_of>
<iterations>5</iterations>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(number + 1)))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(0xabcd0123cdef4567)))</query>
</test>

View File

@ -21,3 +21,63 @@
0 0
0 0
0 0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1

View File

@ -17,3 +17,9 @@ SELECT intDivOrZero( CAST(-1000, 'Int64') , CAST(1000, 'Int64') );
SELECT intDiv(-1, number) FROM numbers(1, 10); SELECT intDiv(-1, number) FROM numbers(1, 10);
SELECT intDivOrZero(-1, number) FROM numbers(1, 10); SELECT intDivOrZero(-1, number) FROM numbers(1, 10);
SELECT intDiv(toInt32(number), -1) FROM numbers(1, 10);
SELECT intDivOrZero(toInt32(number), -1) FROM numbers(1, 10);
SELECT intDiv(toInt64(number), -1) FROM numbers(1, 10);
SELECT intDivOrZero(toInt64(number), -1) FROM numbers(1, 10);
SELECT intDiv(number, -number) FROM numbers(1, 10);
SELECT intDivOrZero(number, -number) FROM numbers(1, 10);

View File

@ -0,0 +1,5 @@
DROP TABLE IF EXISTS tab;
create table tab (A Int64) Engine=MergeTree order by tuple();
insert into tab select cityHash64(number) from numbers(1000);
select sum(sleep(0.1)) from tab settings max_block_size = 1, max_execution_time=1; -- { serverError 159 }
DROP TABLE IF EXISTS tab;

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
SELECT extractAll('\0', '');

View File

@ -0,0 +1 @@
"hello"

View File

@ -0,0 +1,2 @@
WITH '{"a": "hello", "b": 12345678901234567890}' AS json
SELECT JSONExtractRaw(json, 'a');

View File

@ -0,0 +1,8 @@
1
1
1 0.1 a я
2 0.2 b ą
3 0.3 c d
1 0.1 a я
2 0.2 b ą
3 0.3 c d

View File

@ -0,0 +1,14 @@
select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (ANSI)}','system','tables'));
select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (Unicode)}','system','tables'));
DROP DATABASE IF EXISTS test_01086;
CREATE DATABASE test_01086;
USE test_01086;
CREATE TABLE t (x UInt8, y Float32, z String) ENGINE = Memory;
INSERT INTO t VALUES (1,0.1,'a я'),(2,0.2,'b ą'),(3,0.3,'c d');
select * from odbc('DSN={ClickHouse DSN (ANSI)}','test_01086','t') ORDER BY x;
select * from odbc('DSN={ClickHouse DSN (Unicode)}','test_01086','t') ORDER BY x;
DROP DATABASE test_01086;

View File

@ -25,6 +25,7 @@ find . -name '*.so.*' -print -exec mv '{}' /output \;
if [ "performance" == "$COMBINED_OUTPUT" ] if [ "performance" == "$COMBINED_OUTPUT" ]
then then
cp -r ../dbms/tests/performance /output cp -r ../dbms/tests/performance /output
cp -r ../docker/test/performance-comparison/config /output ||:
rm /output/unit_tests_dbms ||: rm /output/unit_tests_dbms ||:
rm /output/clickhouse-odbc-bridge ||: rm /output/clickhouse-odbc-bridge ||:
fi fi

View File

@ -70,7 +70,7 @@ function configure
<metric_log remove="remove"> <metric_log remove="remove">
<table remove="remove"/> <table remove="remove"/>
</metric_log> </metric_log>
<use_uncompressed_cache>1</use_uncompressed_cache> <use_uncompressed_cache>0</use_uncompressed_cache>
<!--1 GB--> <!--1 GB-->
<uncompressed_cache_size>1000000000</uncompressed_cache_size> <uncompressed_cache_size>1000000000</uncompressed_cache_size>
</yandex> </yandex>

View File

@ -0,0 +1,14 @@
<yandex>
<logger>
<console>true</console>
</logger>
<text_log remove="remove">
<table remove="remove"/>
</text_log>
<metric_log remove="remove">
<table remove="remove"/>
</metric_log>
<use_uncompressed_cache>0</use_uncompressed_cache>
<!--64 GiB-->
<uncompressed_cache_size>1000000000</uncompressed_cache_size>
</yandex>

View File

@ -0,0 +1,10 @@
<yandex>
<profiles>
<default>
<query_profiler_real_time_period_ns>10000000</query_profiler_real_time_period_ns>
<query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
<allow_introspection_functions>1</allow_introspection_functions>
<log_queries>1</log_queries>
</default>
</profiles>
</yandex>

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-stateless-test . # docker build -t yandex/clickhouse-stateless-test .
FROM yandex/clickhouse-deb-builder FROM yandex/clickhouse-deb-builder
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.3.20200115/clickhouse-odbc-1.1.3-Linux.tar.gz"
RUN apt-get update -y \ RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \ && env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \ apt-get install --yes --no-install-recommends \
@ -27,8 +29,16 @@ RUN apt-get update -y \
gdb \ gdb \
lsof \ lsof \
llvm-8 \ llvm-8 \
vim vim \
unixodbc \
wget
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget --quiet -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
&& odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp
ENV TZ=Europe/Moscow ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -1,6 +1,6 @@
# Начало работы # Начало работы
Если вы новичок в ClickHouse и хотите получить вживую оценить его производительность, прежде всего нужно пройти через [процесс установки](install.md). Если вы новичок в ClickHouse и хотите вживую оценить его производительность, прежде всего нужно пройти через [процесс установки](install.md).
После этого можно выбрать один из следующих вариантов: После этого можно выбрать один из следующих вариантов: