Merge branch 'master' of github.com:clickhouse/ClickHouse

This commit is contained in:
Ivan Blinkov 2019-12-11 16:08:35 +03:00
commit b0c61f933d
30 changed files with 411 additions and 229 deletions

View File

@ -1,5 +1,8 @@
find_library (TERMCAP_LIBRARY termcap)
find_library (TERMCAP_LIBRARY tinfo)
if (NOT TERMCAP_LIBRARY)
find_library (TERMCAP_LIBRARY tinfo)
find_library (TERMCAP_LIBRARY ncurses)
endif()
if (NOT TERMCAP_LIBRARY)
find_library (TERMCAP_LIBRARY termcap)
endif()
message (STATUS "Using termcap: ${TERMCAP_LIBRARY}")

View File

@ -27,12 +27,35 @@ option (LINKER_NAME "Linker name or full path")
find_program (LLD_PATH NAMES "ld.lld" "lld" "lld-${COMPILER_VERSION_MAJOR}")
find_program (GOLD_PATH NAMES "ld.gold" "gold")
# We prefer LLD linker over Gold or BFD.
if (NOT LINKER_NAME)
if (LLD_PATH)
# GCC driver requires one of supported linker names like "lld".
# Clang driver simply allows full linker path.
if (COMPILER_GCC)
get_filename_component(LLD_BASENAME ${LLD_PATH} NAME)
if (LLD_BASENAME STREQUAL ld.lld)
set (LINKER_NAME "lld")
elseif (GOLD_PATH)
endif ()
else ()
set (LINKER_NAME ${LLD_PATH})
endif ()
endif ()
endif ()
if (NOT LINKER_NAME)
if (GOLD_PATH)
if (COMPILER_GCC)
get_filename_component(GOLD_BASENAME ${GOLD_PATH} NAME)
if (GOLD_BASENAME STREQUAL ld.gold)
set (LINKER_NAME "gold")
endif ()
else ()
set (LINKER_NAME ${GOLD_PATH})
endif ()
endif ()
endif ()
if (LINKER_NAME)

View File

@ -52,6 +52,7 @@ if (USE_INTERNAL_BTRIE_LIBRARY)
endif ()
if (USE_INTERNAL_ZLIB_LIBRARY)
unset (BUILD_SHARED_LIBS CACHE)
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
set (ZLIB_COMPAT 1 CACHE INTERNAL "") # also enables WITH_GZFILEOP

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 2b273bfe9db89429b2040c024484dee0197e48c7
Subproject commit d478f62bd93c9cd14eb343756ef73a4ae622ddf5

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit cff0f500d9399d7cd3b9461a693d211e4b86fcc9
Subproject commit bba56a73be249514acfbc7d49aa2a68994dad8ab

View File

@ -34,6 +34,7 @@ namespace ErrorCodes
extern const int CANNOT_STATVFS;
extern const int NOT_ENOUGH_SPACE;
extern const int NOT_IMPLEMENTED;
extern const int NO_SUCH_DATA_PART;
extern const int SYSTEM_ERROR;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;

View File

@ -34,97 +34,23 @@ struct StaticVisitor
/// F is template parameter, to allow universal reference for field, that is useful for const and non-const values.
template <typename Visitor, typename F>
typename std::decay_t<Visitor>::ResultType applyVisitor(Visitor && visitor, F && field)
auto applyVisitor(Visitor && visitor, F && field)
{
switch (field.getType())
{
case Field::Types::Null: return visitor(field.template get<Null>());
case Field::Types::UInt64: return visitor(field.template get<UInt64>());
case Field::Types::UInt128: return visitor(field.template get<UInt128>());
case Field::Types::Int64: return visitor(field.template get<Int64>());
case Field::Types::Float64: return visitor(field.template get<Float64>());
case Field::Types::String: return visitor(field.template get<String>());
case Field::Types::Array: return visitor(field.template get<Array>());
case Field::Types::Tuple: return visitor(field.template get<Tuple>());
case Field::Types::Decimal32: return visitor(field.template get<DecimalField<Decimal32>>());
case Field::Types::Decimal64: return visitor(field.template get<DecimalField<Decimal64>>());
case Field::Types::Decimal128: return visitor(field.template get<DecimalField<Decimal128>>());
case Field::Types::AggregateFunctionState: return visitor(field.template get<AggregateFunctionStateData>());
default:
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
}
}
template <typename Visitor, typename F1, typename F2>
static typename std::decay_t<Visitor>::ResultType applyBinaryVisitorImpl(Visitor && visitor, F1 && field1, F2 && field2)
{
switch (field2.getType())
{
case Field::Types::Null: return visitor(field1, field2.template get<Null>());
case Field::Types::UInt64: return visitor(field1, field2.template get<UInt64>());
case Field::Types::UInt128: return visitor(field1, field2.template get<UInt128>());
case Field::Types::Int64: return visitor(field1, field2.template get<Int64>());
case Field::Types::Float64: return visitor(field1, field2.template get<Float64>());
case Field::Types::String: return visitor(field1, field2.template get<String>());
case Field::Types::Array: return visitor(field1, field2.template get<Array>());
case Field::Types::Tuple: return visitor(field1, field2.template get<Tuple>());
case Field::Types::Decimal32: return visitor(field1, field2.template get<DecimalField<Decimal32>>());
case Field::Types::Decimal64: return visitor(field1, field2.template get<DecimalField<Decimal64>>());
case Field::Types::Decimal128: return visitor(field1, field2.template get<DecimalField<Decimal128>>());
case Field::Types::AggregateFunctionState: return visitor(field1, field2.template get<AggregateFunctionStateData>());
default:
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
}
return Field::dispatch(visitor, field);
}
template <typename Visitor, typename F1, typename F2>
typename std::decay_t<Visitor>::ResultType applyVisitor(Visitor && visitor, F1 && field1, F2 && field2)
auto applyVisitor(Visitor && visitor, F1 && field1, F2 && field2)
{
switch (field1.getType())
return Field::dispatch([&](auto & field1_value)
{
case Field::Types::Null:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Null>(), std::forward<F2>(field2));
case Field::Types::UInt64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<UInt64>(), std::forward<F2>(field2));
case Field::Types::UInt128:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<UInt128>(), std::forward<F2>(field2));
case Field::Types::Int64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Int64>(), std::forward<F2>(field2));
case Field::Types::Float64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Float64>(), std::forward<F2>(field2));
case Field::Types::String:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<String>(), std::forward<F2>(field2));
case Field::Types::Array:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Array>(), std::forward<F2>(field2));
case Field::Types::Tuple:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<Tuple>(), std::forward<F2>(field2));
case Field::Types::Decimal32:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<DecimalField<Decimal32>>(), std::forward<F2>(field2));
case Field::Types::Decimal64:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<DecimalField<Decimal64>>(), std::forward<F2>(field2));
case Field::Types::Decimal128:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<DecimalField<Decimal128>>(), std::forward<F2>(field2));
case Field::Types::AggregateFunctionState:
return applyBinaryVisitorImpl(
std::forward<Visitor>(visitor), field1.template get<AggregateFunctionStateData>(), std::forward<F2>(field2));
default:
throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
}
return Field::dispatch([&](auto & field2_value)
{
return visitor(field1_value, field2_value);
},
field2);
},
field1);
}
@ -473,8 +399,14 @@ private:
public:
explicit FieldVisitorSum(const Field & rhs_) : rhs(rhs_) {}
bool operator() (UInt64 & x) const { x += get<UInt64>(rhs); return x != 0; }
bool operator() (Int64 & x) const { x += get<Int64>(rhs); return x != 0; }
// We can add all ints as unsigned regardless of their actual signedness.
bool operator() (Int64 & x) const { return this->operator()(reinterpret_cast<UInt64 &>(x)); }
bool operator() (UInt64 & x) const
{
x += rhs.reinterpret<UInt64>();
return x != 0;
}
bool operator() (Float64 & x) const { x += get<Float64>(rhs); return x != 0; }
bool operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); }

View File

@ -295,7 +295,7 @@ namespace DB
void writeFieldText(const Field & x, WriteBuffer & buf)
{
DB::String res = applyVisitor(DB::FieldVisitorToString(), x);
DB::String res = Field::dispatch(DB::FieldVisitorToString(), x);
buf.write(res.data(), res.size());
}

View File

@ -27,7 +27,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename T>
template <typename T, typename SFINAE = void>
struct NearestFieldTypeImpl;
template <typename T>
@ -151,6 +151,54 @@ private:
UInt32 scale;
};
/// char may be signed or unsigned, and behave identically to signed char or unsigned char,
/// but they are always three different types.
/// signedness of char is different in Linux on x86 and Linux on ARM.
template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<DayNum> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt128> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<UUID> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<Int16> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<Int32> { using Type = Int64; };
/// long and long long are always different types that may behave identically or not.
/// This is different on Linux and Mac.
template <> struct NearestFieldTypeImpl<long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<long long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<unsigned long long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Int128> { using Type = Int128; };
template <> struct NearestFieldTypeImpl<Decimal32> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal32>> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal64>> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal128>> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<Float32> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<Float64> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<const char *> { using Type = String; };
template <> struct NearestFieldTypeImpl<String> { using Type = String; };
template <> struct NearestFieldTypeImpl<Array> { using Type = Array; };
template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<AggregateFunctionStateData> { using Type = AggregateFunctionStateData; };
// For enum types, use the field type that corresponds to their underlying type.
template <typename T>
struct NearestFieldTypeImpl<T, std::enable_if_t<std::is_enum_v<T>>>
{
using Type = NearestFieldType<std::underlying_type_t<T>>;
};
/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
*/
@ -314,18 +362,24 @@ public:
bool isNull() const { return which == Types::Null; }
template <typename T> T & get()
template <typename T>
T & get();
template <typename T>
const T & get() const
{
using TWithoutRef = std::remove_reference_t<T>;
TWithoutRef * MAY_ALIAS ptr = reinterpret_cast<TWithoutRef*>(&storage);
return *ptr;
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->get<T>();
}
template <typename T> const T & get() const
template <typename T>
T & reinterpret();
template <typename T>
const T & reinterpret() const
{
using TWithoutRef = std::remove_reference_t<T>;
const TWithoutRef * MAY_ALIAS ptr = reinterpret_cast<const TWithoutRef*>(&storage);
return *ptr;
auto mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->reinterpret<T>();
}
template <typename T> bool tryGet(T & result)
@ -427,6 +481,8 @@ public:
return rhs <= *this;
}
// More like bitwise equality as opposed to semantic equality:
// Null equals Null and NaN equals NaN.
bool operator== (const Field & rhs) const
{
if (which != rhs.which)
@ -435,9 +491,13 @@ public:
switch (which)
{
case Types::Null: return true;
case Types::UInt64:
case Types::Int64:
case Types::Float64: return get<UInt64>() == rhs.get<UInt64>();
case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
case Types::Int64: return get<Int64>() == rhs.get<Int64>();
case Types::Float64:
{
// Compare as UInt64 so that NaNs compare as equal.
return reinterpret<UInt64>() == rhs.reinterpret<UInt64>();
}
case Types::String: return get<String>() == rhs.get<String>();
case Types::Array: return get<Array>() == rhs.get<Array>();
case Types::Tuple: return get<Tuple>() == rhs.get<Tuple>();
@ -457,6 +517,42 @@ public:
return !(*this == rhs);
}
/// Field is template parameter, to allow universal reference for field,
/// that is useful for const and non-const .
template <typename F, typename FieldRef>
static auto dispatch(F && f, FieldRef && field)
{
switch (field.which)
{
case Types::Null: return f(field.template get<Null>());
case Types::UInt64: return f(field.template get<UInt64>());
case Types::UInt128: return f(field.template get<UInt128>());
case Types::Int64: return f(field.template get<Int64>());
case Types::Float64: return f(field.template get<Float64>());
case Types::String: return f(field.template get<String>());
case Types::Array: return f(field.template get<Array>());
case Types::Tuple: return f(field.template get<Tuple>());
case Types::Decimal32: return f(field.template get<DecimalField<Decimal32>>());
case Types::Decimal64: return f(field.template get<DecimalField<Decimal64>>());
case Types::Decimal128: return f(field.template get<DecimalField<Decimal128>>());
case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
case Types::Int128:
// TODO: investigate where we need Int128 Fields. There are no
// field visitors that support them, and they only arise indirectly
// in some functions that use Decimal columns: they get the
// underlying Field value with get<Int128>(). Probably should be
// switched to DecimalField, but this is a whole endeavor in itself.
throw Exception("Unexpected Int128 in Field::dispatch()", ErrorCodes::LOGICAL_ERROR);
}
// GCC 9 complains that control reaches the end, despite that we handle
// all the cases above (maybe because of throw?). Return something to
// silence it.
Null null{};
return f(null);
}
private:
std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which),
Null, UInt64, UInt128, Int64, Int128, Float64, String, Array, Tuple,
@ -493,37 +589,6 @@ private:
}
template <typename F, typename Field> /// Field template parameter may be const or non-const Field.
static void dispatch(F && f, Field & field)
{
switch (field.which)
{
case Types::Null: f(field.template get<Null>()); return;
// gcc 7.3.0
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
case Types::UInt64: f(field.template get<UInt64>()); return;
case Types::UInt128: f(field.template get<UInt128>()); return;
case Types::Int64: f(field.template get<Int64>()); return;
case Types::Int128: f(field.template get<Int128>()); return;
case Types::Float64: f(field.template get<Float64>()); return;
#if !__clang__
#pragma GCC diagnostic pop
#endif
case Types::String: f(field.template get<String>()); return;
case Types::Array: f(field.template get<Array>()); return;
case Types::Tuple: f(field.template get<Tuple>()); return;
case Types::Decimal32: f(field.template get<DecimalField<Decimal32>>()); return;
case Types::Decimal64: f(field.template get<DecimalField<Decimal64>>()); return;
case Types::Decimal128: f(field.template get<DecimalField<Decimal128>>()); return;
case Types::AggregateFunctionState: f(field.template get<AggregateFunctionStateData>()); return;
}
}
void create(const Field & x)
{
dispatch([this] (auto & value) { createConcrete(value); }, x);
@ -621,6 +686,22 @@ template <> struct Field::EnumToType<Field::Types::Decimal64> { using Type = Dec
template <> struct Field::EnumToType<Field::Types::Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { using Type = DecimalField<AggregateFunctionStateData>; };
template <typename T>
T & Field::get()
{
using ValueType = std::decay_t<T>;
//assert(TypeToEnum<NearestFieldType<ValueType>>::value == which);
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
return *ptr;
}
template <typename T>
T & Field::reinterpret()
{
using ValueType = std::decay_t<T>;
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
return *ptr;
}
template <typename T>
T get(const Field & field)
@ -651,49 +732,6 @@ template <> struct TypeName<Array> { static std::string get() { return "Array";
template <> struct TypeName<Tuple> { static std::string get() { return "Tuple"; } };
template <> struct TypeName<AggregateFunctionStateData> { static std::string get() { return "AggregateFunctionState"; } };
/// char may be signed or unsigned, and behave identically to signed char or unsigned char,
/// but they are always three different types.
/// signedness of char is different in Linux on x86 and Linux on ARM.
template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<DayNum> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt128> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<UUID> { using Type = UInt128; };
template <> struct NearestFieldTypeImpl<Int16> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<Int32> { using Type = Int64; };
/// long and long long are always different types that may behave identically or not.
/// This is different on Linux and Mac.
template <> struct NearestFieldTypeImpl<long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<long long> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<unsigned long long> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Int128> { using Type = Int128; };
template <> struct NearestFieldTypeImpl<Decimal32> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<Decimal64> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<Decimal128> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal32>> { using Type = DecimalField<Decimal32>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal64>> { using Type = DecimalField<Decimal64>; };
template <> struct NearestFieldTypeImpl<DecimalField<Decimal128>> { using Type = DecimalField<Decimal128>; };
template <> struct NearestFieldTypeImpl<Float32> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<Float64> { using Type = Float64; };
template <> struct NearestFieldTypeImpl<const char *> { using Type = String; };
template <> struct NearestFieldTypeImpl<String> { using Type = String; };
template <> struct NearestFieldTypeImpl<Array> { using Type = Array; };
template <> struct NearestFieldTypeImpl<Tuple> { using Type = Tuple; };
template <> struct NearestFieldTypeImpl<bool> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<Null> { using Type = Null; };
template <> struct NearestFieldTypeImpl<AggregateFunctionStateData> { using Type = AggregateFunctionStateData; };
template <typename T>
decltype(auto) castToNearestFieldType(T && x)
{

View File

@ -176,7 +176,7 @@ void buildSingleAttribute(
AutoPtr<Element> null_value_element(doc->createElement("null_value"));
String null_value_str;
if (dict_attr->default_value)
null_value_str = queryToString(dict_attr->default_value);
null_value_str = getUnescapedFieldString(dict_attr->default_value->as<ASTLiteral>()->value);
AutoPtr<Text> null_value(doc->createTextNode(null_value_str));
null_value_element->appendChild(null_value);
attribute_element->appendChild(null_value_element);
@ -184,7 +184,19 @@ void buildSingleAttribute(
if (dict_attr->expression != nullptr)
{
AutoPtr<Element> expression_element(doc->createElement("expression"));
AutoPtr<Text> expression(doc->createTextNode(queryToString(dict_attr->expression)));
/// EXPRESSION PROPERTY should be expression or string
String expression_str;
if (const auto * literal = dict_attr->expression->as<ASTLiteral>();
literal && literal->value.getType() == Field::Types::String)
{
expression_str = getUnescapedFieldString(literal->value);
}
else
expression_str = queryToString(dict_attr->expression);
AutoPtr<Text> expression(doc->createTextNode(expression_str));
expression_element->appendChild(expression);
attribute_element->appendChild(expression_element);
}

View File

@ -71,12 +71,6 @@ void ZlibDeflatingWriteBuffer::nextImpl()
int rc = deflate(&zstr, Z_NO_FLUSH);
out->position() = out->buffer().end() - zstr.avail_out;
// Unpoison the result of deflate explicitly. It uses some custom SSE algo
// for computing CRC32, and it looks like msan is unable to comprehend
// it fully, so it complains about the resulting value depending on the
// uninitialized padding of the input buffer.
__msan_unpoison(out->position(), zstr.avail_out);
if (rc != Z_OK)
throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
}
@ -99,12 +93,6 @@ void ZlibDeflatingWriteBuffer::finish()
int rc = deflate(&zstr, Z_FINISH);
out->position() = out->buffer().end() - zstr.avail_out;
// Unpoison the result of deflate explicitly. It uses some custom SSE algo
// for computing CRC32, and it looks like msan is unable to comprehend
// it fully, so it complains about the resulting value depending on the
// uninitialized padding of the input buffer.
__msan_unpoison(out->position(), zstr.avail_out);
if (rc == Z_STREAM_END)
{
finished = true;

View File

@ -15,8 +15,6 @@
namespace DB
{
template <> struct NearestFieldTypeImpl<PartLogElement::Type> { using Type = UInt64; };
Block PartLogElement::createBlock()
{
auto event_type_datatype = std::make_shared<DataTypeEnum8>(

View File

@ -21,8 +21,6 @@
namespace DB
{
template <> struct NearestFieldTypeImpl<QueryLogElement::Type> { using Type = UInt64; };
Block QueryLogElement::createBlock()
{
auto query_status_datatype = std::make_shared<DataTypeEnum8>(

View File

@ -10,8 +10,6 @@
namespace DB
{
template <> struct NearestFieldTypeImpl<Message::Priority> { using Type = UInt64; };
Block TextLogElement::createBlock()
{
auto priority_datatype = std::make_shared<DataTypeEnum8>(

View File

@ -96,6 +96,12 @@ namespace ErrorCodes
}
namespace
{
const char * DELETE_ON_DESTROY_MARKER_PATH = "delete-on-destroy.txt";
}
MergeTreeData::MergeTreeData(
const String & database_,
const String & table_,
@ -801,6 +807,17 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
part->relative_path = part_name;
bool broken = false;
Poco::Path part_path(getFullPathOnDisk(part_disk_ptr), part_name);
Poco::Path marker_path(part_path, DELETE_ON_DESTROY_MARKER_PATH);
if (Poco::File(marker_path).exists())
{
LOG_WARNING(log, "Detaching stale part " << getFullPathOnDisk(part_disk_ptr) << part_name << ", which should have been deleted after a move. That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.");
std::lock_guard loading_lock(mutex);
broken_parts_to_detach.push_back(part);
++suspicious_broken_parts;
return;
}
try
{
part->loadColumnsChecksumsIndexes(require_part_metadata, true);
@ -2515,7 +2532,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(
void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
{
auto lock = lockParts();
for (const auto & original_active_part : getDataPartsStateRange(DataPartState::Committed))
for (auto original_active_part : getDataPartsStateRange(DataPartState::Committed))
{
if (part_copy->name == original_active_part->name)
{
@ -2528,6 +2545,16 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
auto part_it = data_parts_indexes.insert(part_copy).first;
modifyPartState(part_it, DataPartState::Committed);
Poco::Path marker_path(Poco::Path(original_active_part->getFullPath()), DELETE_ON_DESTROY_MARKER_PATH);
try
{
Poco::File(marker_path).createFile();
}
catch (Poco::Exception & e)
{
LOG_ERROR(log, e.what() << " (while creating DeleteOnDestroy marker: " + backQuote(marker_path.toString()) + ")");
}
return;
}
}
@ -2547,7 +2574,6 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String &
return getActiveContainingPart(part_info);
}
MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(MergeTreeData::DataPartState state, const String & partition_id)
{
DataPartStateAndPartitionID state_with_partition{state, partition_id};
@ -2713,8 +2739,9 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
DataPartsVector parts;
if (moving_part)
{
parts.push_back(getActiveContainingPart(partition_id));
if (!parts.back())
auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version);
parts.push_back(getActiveContainingPart(part_info));
if (!parts.back() || parts.back()->name != part_info.getPartName())
throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART);
}
else
@ -2729,6 +2756,9 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
return part_ptr->disk->getName() == disk->getName();
}), parts.end());
if (parts.empty())
throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART);
if (parts.empty())
{
String no_parts_to_move_message;
@ -2741,7 +2771,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
}
if (!movePartsToSpace(parts, std::static_pointer_cast<const DiskSpace::Space>(disk)))
throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED);
throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED);
}
@ -2757,18 +2787,21 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
DataPartsVector parts;
if (moving_part)
{
parts.push_back(getActiveContainingPart(partition_id));
if (!parts.back())
auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version);
parts.emplace_back(getActiveContainingPart(part_info));
if (!parts.back() || parts.back()->name != part_info.getPartName())
throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART);
}
else
parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
auto volume = storage_policy->getVolumeByName(name);
if (!volume)
throw Exception("Volume " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK);
if (parts.empty())
throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART);
parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr)
{
for (const auto & disk : volume->disks)
@ -2793,7 +2826,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
}
if (!movePartsToSpace(parts, std::static_pointer_cast<const DiskSpace::Space>(volume)))
throw Exception("Cannot move parts because moves are manually disabled.", ErrorCodes::ABORTED);
throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED);
}

View File

@ -346,6 +346,11 @@ MergeTreeDataPart::~MergeTreeDataPart()
}
dir.remove(true);
if (state == State::DeleteOnDestroy)
{
LOG_TRACE(storage.log, "Removed part from old location " << path);
}
}
catch (...)
{

View File

@ -261,6 +261,7 @@ const char * auto_contributors[] {
"Okada Haruki",
"Oleg Favstov",
"Oleg Komarov",
"Oleg Matrokhin",
"Oleg Obleukhov",
"Olga Khvostikova",
"Orivej Desh",
@ -319,6 +320,7 @@ const char * auto_contributors[] {
"The-Alchemist",
"Tobias Adamson",
"Tsarkova Anastasia",
"Ubus",
"VDimir",
"Vadim",
"Vadim Plakhtinskiy",
@ -384,6 +386,7 @@ const char * auto_contributors[] {
"ap11",
"aprudaev",
"artpaul",
"asiana21",
"avasiliev",
"avsharapov",
"benamazing",

View File

@ -628,11 +628,11 @@ class ClickHouseInstance:
def http_query(self, sql, data=None):
return urllib.urlopen("http://" + self.ip_address + ":8123/?query=" + urllib.quote(sql, safe=''), data).read()
def restart_clickhouse(self, stop_start_wait_sec=5):
def restart_clickhouse(self, stop_start_wait_sec=5, kill=False):
if not self.stay_alive:
raise Exception("clickhouse can be restarted only with stay_alive=True instance")
self.exec_in_container(["bash", "-c", "pkill clickhouse"], user='root')
self.exec_in_container(["bash", "-c", "pkill {} clickhouse".format("-9" if kill else "")], user='root')
time.sleep(stop_start_wait_sec)
self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid()))

View File

@ -3,6 +3,7 @@ import pytest
import random
import re
import string
import threading
import time
from multiprocessing.dummy import Pool
from helpers.client import QueryRuntimeException
@ -15,6 +16,7 @@ node1 = cluster.add_instance('node1',
config_dir='configs',
main_configs=['configs/logs_config.xml'],
with_zookeeper=True,
stay_alive=True,
tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/external:size=200M'],
macros={"shard": 0, "replica": 1} )
@ -22,6 +24,7 @@ node2 = cluster.add_instance('node2',
config_dir='configs',
main_configs=['configs/logs_config.xml'],
with_zookeeper=True,
stay_alive=True,
tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/external:size=200M'],
macros={"shard": 0, "replica": 2} )
@ -1028,6 +1031,7 @@ def test_rename(start_cluster):
node1.query("DROP TABLE IF EXISTS default.renaming_table1")
node1.query("DROP TABLE IF EXISTS test.renaming_table2")
def test_freeze(start_cluster):
try:
node1.query("""
@ -1057,6 +1061,108 @@ def test_freeze(start_cluster):
node1.exec_in_container(["bash", "-c", "find /jbod1/shadow -name '*.mrk2' | grep '.*'"])
node1.exec_in_container(["bash", "-c", "find /external/shadow -name '*.mrk2' | grep '.*'"])
finally:
node1.query("DROP TABLE IF EXISTS default.freezing_table")
def test_kill_while_insert(start_cluster):
try:
name = "test_kill_while_insert"
node1.query("DROP TABLE IF EXISTS {name}".format(name=name))
node1.query("""
CREATE TABLE {name} (
s String
) ENGINE = MergeTree
ORDER BY tuple()
SETTINGS storage_policy='small_jbod_with_external'
""".format(name=name))
data = []
dates = []
for i in range(10):
data.append(get_random_string(1024 * 1024)) # 1MB value
node1.query("INSERT INTO {name} VALUES {}".format(','.join(["('" + s + "')" for s in data]), name=name))
disks = get_used_disks_for_table(node1, name)
assert set(disks) == {"jbod1"}
start_time = time.time()
long_select = threading.Thread(target=node1.query, args=("SELECT sleep(3) FROM {name}".format(name=name),))
long_select.start()
time.sleep(0.5)
node1.query("ALTER TABLE {name} MOVE PARTITION tuple() TO DISK 'external'".format(name=name))
assert time.time() - start_time < 2
node1.restart_clickhouse(kill=True)
try:
long_select.join()
except:
""""""
time.sleep(0.5)
assert node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["10"]
finally:
"""Don't drop table afterwards to not shadow assertion."""
def test_move_while_merge(start_cluster):
try:
name = "test_move_while_merge"
node1.query("""
CREATE TABLE {name} (
n Int64
) ENGINE = MergeTree
ORDER BY sleep(2)
SETTINGS storage_policy='small_jbod_with_external'
""".format(name=name))
node1.query("INSERT INTO {name} VALUES (1)".format(name=name))
node1.query("INSERT INTO {name} VALUES (2)".format(name=name))
parts = node1.query("SELECT name FROM system.parts WHERE table = '{name}' AND active = 1".format(name=name)).splitlines()
assert len(parts) == 2
def optimize():
node1.query("OPTIMIZE TABLE {name}".format(name=name))
optimize = threading.Thread(target=optimize)
optimize.start()
time.sleep(0.5)
with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MOVE PART '{part}' TO DISK 'external'".format(name=name, part=parts[0]))
exiting = False
no_exception = {}
def alter():
while not exiting:
try:
node1.query("ALTER TABLE {name} MOVE PART '{part}' TO DISK 'external'".format(name=name, part=parts[0]))
no_exception['missing'] = 'exception'
break
except QueryRuntimeException:
""""""
alter_thread = threading.Thread(target=alter)
alter_thread.start()
optimize.join()
time.sleep(0.5)
exiting = True
alter_thread.join()
assert len(no_exception) == 0
assert node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["2"]
finally:
node1.query("DROP TABLE IF EXISTS {name}".format(name=name))

View File

@ -3,6 +3,7 @@
1
SYSTEM RELOAD DICTIONARY
0
0
10
1
CREATE DATABASE

View File

@ -18,6 +18,7 @@ SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND
SELECT 'SYSTEM RELOAD DICTIONARY';
SYSTEM RELOAD DICTIONARY 'dict_db_01036.dict';
SELECT sleep(0.3);
SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict';
SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0));
SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict';

View File

@ -3,6 +3,7 @@
1
SYSTEM RELOAD DICTIONARY
0
0
10
1
CREATE DATABASE

View File

@ -18,6 +18,7 @@ SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name
SELECT 'SYSTEM RELOAD DICTIONARY';
SYSTEM RELOAD DICTIONARY 'foo 1234.dict';
SELECT sleep(0.3);
SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict';
SELECT dictGetUInt64('foo 1234.dict', 'val', toUInt64(0));
SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict';

View File

@ -0,0 +1,4 @@
hello
world
21844
xxx

View File

@ -0,0 +1,27 @@
DROP DATABASE IF EXISTS dictdb;
CREATE DATABASE dictdb Engine = Ordinary;
CREATE TABLE dictdb.dicttbl(key Int64, value_default String, value_expression String) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO dictdb.dicttbl VALUES (12, 'hello', '55:66:77');
CREATE DICTIONARY dictdb.dict
(
key Int64 DEFAULT -1,
value_default String DEFAULT 'world',
value_expression String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)'
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb'))
LAYOUT(FLAT())
LIFETIME(1);
SELECT dictGetString('dictdb.dict', 'value_default', toUInt64(12));
SELECT dictGetString('dictdb.dict', 'value_default', toUInt64(14));
SELECT dictGetString('dictdb.dict', 'value_expression', toUInt64(12));
SELECT dictGetString('dictdb.dict', 'value_expression', toUInt64(14));
DROP DATABASE IF EXISTS dictdb;

View File

@ -4,7 +4,7 @@ FROM ubuntu:18.04
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install tzdata python llvm-6.0 llvm-6.0-dev libreadline-dev libicu-dev bsdutils llvm-8 \
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install tzdata python llvm-6.0 llvm-6.0-dev libreadline-dev libicu-dev bsdutils llvm-8 gdb \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -196,15 +196,16 @@ High compression levels are useful for asymmetric scenarios, like compress once,
ClickHouse supports temporary tables which have the following characteristics:
- Temporary tables disappear when the session ends, including if the connection is lost.
- A temporary table use the Memory engine only.
- A temporary table uses the Memory engine only.
- The DB can't be specified for a temporary table. It is created outside of databases.
- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session.
- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used.
- For distributed query processing, temporary tables used in a query are passed to remote servers.
To create a temporary table, use the following syntax:
```sql
CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name [ON CLUSTER cluster]
CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
@ -214,6 +215,8 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name [ON CLUSTER cluster]
In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections
It's possible to use tables with [ENGINE = Memory](../operations/table_engines/memory.md) instead of temporary tables.
## Distributed DDL queries (ON CLUSTER clause)
The `CREATE`, `DROP`, `ALTER`, and `RENAME` queries support distributed execution on a cluster.

View File

@ -203,9 +203,9 @@ ClickHouse использует небольшое подмножество фу
[Иван Блинков](https://github.com/blinkov/) - очень хороший человек. Сам сайт документации основан на технологиях, не удовлетворяющих требованиям задачи, и эти технологии трудно исправить.
### 3.4. Добавить японский язык в документацию.
### 3.4. + Добавить японский язык в документацию.
Эту задачу сделает [Иван Блинков](https://github.com/blinkov/), до конца декабря 2019.
Эту задачу сделает [Иван Блинков](https://github.com/blinkov/), до конца декабря 2019. Сделано.
## 4. Сетевое взаимодействие.
@ -1207,9 +1207,9 @@ zhang2014.
## 22. Долги и недоделанные возможности.
### 22.1. Исправление неработающих таймаутов, если используется TLS.
### 22.1. + Исправление неработающих таймаутов, если используется TLS.
Сейчас смотрит [Александр Сапин](https://github.com/alesapin), но он может делегировать задачу кому угодно. Нужно для Яндекс.Облака.
Нужно для Яндекс.Облака. Сделал Алексей Миловидов.
### 22.2. Убрать возможность изменить настройки в native протоколе в случае readonly.
@ -1257,12 +1257,14 @@ zhang2014.
Изначально было назначено на [Ивана Лежанкина](https://github.com/abyss7), но по неизвестной причине было не сделано в течение нескольких месяцев.
Сделал Михаил Филимонов, Altinity.
### 22.13. Посмотреть, почему не работают некоторые collations.
### 22.13. + Посмотреть, почему не работают некоторые collations.
Изначально было назначено на [Ивана Лежанкина](https://github.com/abyss7), но в результате сделал Александр Сапин.
### 22.14. Посмотреть, почему не работает StorageSet для MergeTree таблиц при некоторых условиях.
Вроде бы сделал Никита Михайлов - проверить существующие issues на эту тему.
### 22.15. Нормализация коммитов в Kafka и идемпотентности операций.

View File

@ -194,16 +194,17 @@ ENGINE = MergeTree()
ClickHouse поддерживает временные таблицы со следующими характеристиками:
- временные таблицы исчезают после завершения сессии; в том числе, при обрыве соединения;
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
- Временная таблица использует только модуль памяти.
- Невозможно указать базу данных для временной таблицы. Временные таблицы создается вне баз данных.
- если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица;
- при распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы.
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
- Невозможно создать временную таблицу распределнным DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
- При распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы.
Чтобы создать временную таблицу, используйте следующий синтаксис:
```sql
CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name [ON CLUSTER cluster]
CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
@ -213,6 +214,8 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name [ON CLUSTER cluster]
В большинстве случаев, временные таблицы создаются не вручную, а при использовании внешних данных для запроса, или при распределённом `(GLOBAL) IN`. Подробнее см. соответствующие разделы
Вместо временных можно использовать обычные таблицы с [ENGINE = Memory](../operations/table_engines/memory.md).
## Распределенные DDL запросы (секция ON CLUSTER)
Запросы `CREATE`, `DROP`, `ALTER`, `RENAME` поддерживают возможность распределенного выполнения на кластере.

View File

@ -158,7 +158,7 @@ def transfer_packages_dupload(ssh_key, path, repo_user, repo_url, incoming_direc
def clear_old_incoming_packages(ssh_connection, user):
for pkg in ('deb', 'rpm', 'tgz'):
for release_type in ('stable', 'testing', 'prestable'):
for release_type in ('stable', 'testing', 'prestable', 'lts'):
try:
if pkg != 'tgz':
ssh_connection.execute("rm /home/{user}/incoming/clickhouse/{pkg}/{release_type}/*".format(
@ -201,7 +201,7 @@ if __name__ == "__main__":
parser.add_argument('--deb-directory')
parser.add_argument('--rpm-directory')
parser.add_argument('--tgz-directory')
parser.add_argument('--release-type', choices=('testing', 'stable', 'prestable'), default='testing')
parser.add_argument('--release-type', choices=('testing', 'stable', 'prestable', 'lts'), default='testing')
parser.add_argument('--ssh-key-path')
parser.add_argument('--gpg-passphrase', required=True)
parser.add_argument('--gpg-sec-key-path')