Merge branch 'master' into synchronous_mutations

This commit is contained in:
alesapin 2019-12-19 15:44:02 +03:00
commit 88033a4333
28 changed files with 335 additions and 327 deletions

View File

@ -206,6 +206,13 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${C
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}")
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")
if (COMPILER_CLANG)
# Exception unwinding doesn't work in clang release build without this option
# TODO investigate if contrib/libcxxabi is out of date
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer")
endif ()
option (ENABLE_LIBRARIES "Enable all libraries (Global default switch)" ON)
option (UNBUNDLED "Try find all libraries in system. We recommend to avoid this mode for production builds, because we cannot guarantee exact versions and variants of libraries your system has installed. This mode exists for enthusiastic developers who search for trouble. Also it is useful for maintainers of OS packages." OFF)

View File

@ -218,6 +218,11 @@ else()
endif()
add_subdirectory(arrow-cmake)
# The library is large - avoid bloat.
target_compile_options (${ARROW_LIBRARY} PRIVATE -g0)
target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
endif()
endif()
@ -320,6 +325,11 @@ if (USE_INTERNAL_AWS_S3_LIBRARY)
set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS})
set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH})
add_subdirectory(aws-s3-cmake)
# The library is large - avoid bloat.
target_compile_options (aws_s3 PRIVATE -g0)
target_compile_options (aws_s3_checksums PRIVATE -g0)
target_compile_options (libcurl PRIVATE -g0)
endif ()
if (USE_BASE64)
@ -328,6 +338,13 @@ endif()
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
add_subdirectory (hyperscan)
# The library is large - avoid bloat.
if (USE_STATIC_LIBRARIES)
target_compile_options (hs PRIVATE -g0)
else ()
target_compile_options (hs_shared PRIVATE -g0)
endif ()
endif()
if (USE_SIMDJSON)
@ -341,7 +358,3 @@ endif()
if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
#if (USE_INTERNAL_ORC_LIBRARY)
# add_subdirectory(orc-cmake)
#endif ()

View File

@ -1,229 +0,0 @@
# modifyed copy of contrib/orc/c++/src/CMakeLists.txt
set(LIBRARY_INCLUDE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include)
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/src)
set(PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIR})
set(PROTOBUF_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
INCLUDE(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#include<fcntl.h>
#include<unistd.h>
int main(int,char*[]){
int f = open(\"/x/y\", O_RDONLY);
char buf[100];
return pread(f, buf, 100, 1000) == 0;
}"
HAS_PREAD
)
CHECK_CXX_SOURCE_COMPILES("
#include<time.h>
int main(int,char*[]){
struct tm time2020;
return !strptime(\"2020-02-02 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2020);
}"
HAS_STRPTIME
)
CHECK_CXX_SOURCE_COMPILES("
#include<string>
int main(int,char* argv[]){
return static_cast<int>(std::stoll(argv[0]));
}"
HAS_STOLL
)
CHECK_CXX_SOURCE_COMPILES("
#include<stdint.h>
#include<stdio.h>
int main(int,char*[]){
int64_t x = 1; printf(\"%lld\",x);
}"
INT64_IS_LL
)
CHECK_CXX_SOURCE_COMPILES("
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored \"-Wdeprecated\"
#pragma clang diagnostic pop
#elif defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored \"-Wdeprecated\"
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
#pragma warning( push )
#pragma warning( disable : 4996 )
#pragma warning( pop )
#else
unknownCompiler!
#endif
int main(int, char *[]) {}"
HAS_DIAGNOSTIC_PUSH
)
CHECK_CXX_SOURCE_COMPILES("
#include<cmath>
int main(int, char *[]) {
return std::isnan(1.0f);
}"
HAS_STD_ISNAN
)
CHECK_CXX_SOURCE_COMPILES("
#include<mutex>
int main(int, char *[]) {
std::mutex test_mutex;
std::lock_guard<std::mutex> lock_mutex(test_mutex);
}"
HAS_STD_MUTEX
)
CHECK_CXX_SOURCE_COMPILES("
#include<string>
std::string func() {
std::string var = \"test\";
return std::move(var);
}
int main(int, char *[]) {}"
NEEDS_REDUNDANT_MOVE
)
INCLUDE(CheckCXXSourceRuns)
CHECK_CXX_SOURCE_RUNS("
#include<time.h>
int main(int, char *[]) {
time_t t = -14210715; // 1969-07-20 12:34:45
struct tm *ptm = gmtime(&t);
return !(ptm && ptm->tm_year == 69);
}"
HAS_PRE_1970
)
CHECK_CXX_SOURCE_RUNS("
#include<stdlib.h>
#include<time.h>
int main(int, char *[]) {
setenv(\"TZ\", \"America/Los_Angeles\", 1);
tzset();
struct tm time2037;
struct tm time2038;
strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037);
strptime(\"2038-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2038);
return mktime(&time2038) - mktime(&time2037) != 31536000;
}"
HAS_POST_2038
)
set(CMAKE_REQUIRED_INCLUDES ${ZLIB_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES zlib)
CHECK_CXX_SOURCE_COMPILES("
#define Z_PREFIX
#include<zlib.h>
z_stream strm;
int main(int, char *[]) {
deflateReset(&strm);
}"
NEEDS_Z_PREFIX
)
configure_file (
"${LIBRARY_DIR}/Adaptor.hh.in"
"${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h ${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.cc
COMMAND ${PROTOBUF_EXECUTABLE}
-I${ClickHouse_SOURCE_DIR}/contrib/orc/proto
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
"${ClickHouse_SOURCE_DIR}/contrib/orc/proto/orc_proto.proto"
)
set(SOURCE_FILES
"${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h
${LIBRARY_DIR}/io/InputStream.cc
${LIBRARY_DIR}/io/OutputStream.cc
${LIBRARY_DIR}/wrap/orc-proto-wrapper.cc
${LIBRARY_DIR}/Adaptor.cc
${LIBRARY_DIR}/ByteRLE.cc
${LIBRARY_DIR}/ColumnPrinter.cc
${LIBRARY_DIR}/ColumnReader.cc
${LIBRARY_DIR}/ColumnWriter.cc
${LIBRARY_DIR}/Common.cc
${LIBRARY_DIR}/Compression.cc
${LIBRARY_DIR}/Exceptions.cc
${LIBRARY_DIR}/Int128.cc
${LIBRARY_DIR}/LzoDecompressor.cc
${LIBRARY_DIR}/MemoryPool.cc
${LIBRARY_DIR}/OrcFile.cc
${LIBRARY_DIR}/Reader.cc
${LIBRARY_DIR}/RLEv1.cc
${LIBRARY_DIR}/RLEv2.cc
${LIBRARY_DIR}/RLE.cc
${LIBRARY_DIR}/Statistics.cc
${LIBRARY_DIR}/StripeStream.cc
${LIBRARY_DIR}/Timezone.cc
${LIBRARY_DIR}/TypeImpl.cc
${LIBRARY_DIR}/Vector.cc
${LIBRARY_DIR}/Writer.cc
)
if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
set(SOURCE_FILES ${SOURCE_FILES} ${LIBRARY_DIR}/OrcHdfsFile.cc)
endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
#list(TRANSFORM SOURCE_FILES PREPEND ${LIBRARY_DIR}/)
configure_file (
"${LIBRARY_INCLUDE}/orc/orc-config.hh.in"
"${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"
)
add_library (orc ${SOURCE_FILES})
target_include_directories (orc
PRIVATE
${LIBRARY_INCLUDE}
${LIBRARY_DIR}
#PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
PRIVATE
${PROTOBUF_INCLUDE_DIR}
${ZLIB_INCLUDE_DIR}
${SNAPPY_INCLUDE_DIR}
${LZ4_INCLUDE_DIR}
${LIBHDFSPP_INCLUDE_DIR}
)
target_link_libraries (orc PRIVATE
${Protobuf_LIBRARY}
${ZLIB_LIBRARIES}
${SNAPPY_LIBRARY}
${LZ4_LIBRARY}
${LIBHDFSPP_LIBRARIES}
)
#install(TARGETS orc DESTINATION lib)
if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
add_definitions(-DBUILD_LIBHDFSPP)
endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)

View File

@ -14,7 +14,7 @@ template <typename T>
struct Avg
{
using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType>>;
using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
};
template <typename T>

View File

@ -1,46 +1,44 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
template <typename T>
template <typename T, typename Denominator>
struct AggregateFunctionAvgData
{
T sum = 0;
UInt64 count = 0;
T numerator = 0;
Denominator denominator = 0;
template <typename ResultT>
ResultT NO_SANITIZE_UNDEFINED result() const
{
if constexpr (std::is_floating_point_v<ResultT>)
if constexpr (std::numeric_limits<ResultT>::is_iec559)
return static_cast<ResultT>(sum) / count; /// allow division by zero
return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
if (count == 0)
if (denominator == 0)
return static_cast<ResultT>(0);
return static_cast<ResultT>(sum / count);
return static_cast<ResultT>(numerator / denominator);
}
};
/// Calculates arithmetic mean of numbers.
template <typename T, typename Data>
class AggregateFunctionAvg final : public IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>>
template <typename T, typename Data, typename Derived>
class AggregateFunctionAvgBase : public IAggregateFunctionDataHelper<Data, Derived>
{
public:
using ResultType = std::conditional_t<IsDecimalNumber<T>, T, Float64>;
@ -49,18 +47,13 @@ public:
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<Float64>>;
/// ctor for native types
AggregateFunctionAvg(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>>(argument_types_, {})
, scale(0)
{}
AggregateFunctionAvgBase(const DataTypes & argument_types_) : IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(0) {}
/// ctor for Decimals
AggregateFunctionAvg(const IDataType & data_type, const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>>(argument_types_, {})
, scale(getDecimalScale(data_type))
{}
String getName() const override { return "avg"; }
AggregateFunctionAvgBase(const IDataType & data_type, const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, Derived>(argument_types_, {}), scale(getDecimalScale(data_type))
{
}
DataTypePtr getReturnType() const override
{
@ -70,29 +63,22 @@ public:
return std::make_shared<ResultDataType>();
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).sum += column.getData()[row_num];
++this->data(place).count;
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).sum += this->data(rhs).sum;
this->data(place).count += this->data(rhs).count;
this->data(place).numerator += this->data(rhs).numerator;
this->data(place).denominator += this->data(rhs).denominator;
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
writeBinary(this->data(place).sum, buf);
writeVarUInt(this->data(place).count, buf);
writeBinary(this->data(place).numerator, buf);
writeVarUInt(this->data(place).denominator, buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
readBinary(this->data(place).sum, buf);
readVarUInt(this->data(place).count, buf);
readBinary(this->data(place).numerator, buf);
readVarUInt(this->data(place).denominator, buf);
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
@ -103,9 +89,25 @@ public:
const char * getHeaderFilePath() const override { return __FILE__; }
private:
protected:
UInt32 scale;
};
template <typename T, typename Data>
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>
{
public:
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvg<T, Data>>::AggregateFunctionAvgBase;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).numerator += column.getData()[row_num];
this->data(place).denominator += 1;
}
String getName() const override { return "avg"; }
};
}

View File

@ -0,0 +1,52 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionAvgWeighted.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include "registerAggregateFunctions.h"
namespace DB
{
namespace
{
template <typename T>
struct AvgWeighted
{
using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
};
template <typename T>
using AggregateFuncAvgWeighted = typename AvgWeighted<T>::Function;
AggregateFunctionPtr createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertNoParameters(name, parameters);
assertBinary(name, argument_types);
AggregateFunctionPtr res;
const auto data_type = static_cast<const DataTypePtr>(argument_types[0]);
const auto data_type_weight = static_cast<const DataTypePtr>(argument_types[1]);
if (!data_type->equals(*data_type_weight))
throw Exception("Different types " + data_type->getName() + " and " + data_type_weight->getName() + " of arguments for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (isDecimal(data_type))
res.reset(createWithDecimalType<AggregateFuncAvgWeighted>(*data_type, *data_type, argument_types));
else
res.reset(createWithNumericType<AggregateFuncAvgWeighted>(*data_type, argument_types));
if (!res)
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
}
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory & factory)
{
factory.registerFunction("avgWeighted", createAggregateFunctionAvgWeighted, AggregateFunctionFactory::CaseSensitive);
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <AggregateFunctions/AggregateFunctionAvg.h>
namespace DB
{
template <typename T, typename Data>
class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>
{
public:
using AggregateFunctionAvgBase<T, Data, AggregateFunctionAvgWeighted<T, Data>>::AggregateFunctionAvgBase;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & values = static_cast<const ColVecType &>(*columns[0]);
const auto & weights = static_cast<const ColVecType &>(*columns[1]);
this->data(place).numerator += values.getData()[row_num] * weights.getData()[row_num];
this->data(place).denominator += weights.getData()[row_num];
}
String getName() const override { return "avgWeighted"; }
};
}

View File

@ -13,6 +13,7 @@ void registerAggregateFunctions()
auto & factory = AggregateFunctionFactory::instance();
registerAggregateFunctionAvg(factory);
registerAggregateFunctionAvgWeighted(factory);
registerAggregateFunctionCount(factory);
registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupUniqArray(factory);

View File

@ -5,6 +5,7 @@ namespace DB
class AggregateFunctionFactory;
void registerAggregateFunctionAvg(AggregateFunctionFactory &);
void registerAggregateFunctionAvgWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);

View File

@ -287,10 +287,13 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid)
}
static thread_local pid_t current_tid = 0;
pid_t TaskStatsInfoGetter::getCurrentTID()
{
/// This call is always successful. - man gettid
return static_cast<pid_t>(syscall(SYS_gettid));
if (!current_tid)
current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid
return current_tid;
}

View File

@ -28,6 +28,7 @@ namespace ErrorCodes
namespace
{
using NamesToTypeNames = std::unordered_map<std::string, std::string>;
/// Get value from field and convert it to string.
/// Also remove quotes from strings.
String getUnescapedFieldString(const Field & field)
@ -112,21 +113,27 @@ void buildLayoutConfiguration(
* <range_min><name>StartDate</name></range_min>
* <range_max><name>EndDate</name></range_max>
*/
void buildRangeConfiguration(AutoPtr<Document> doc, AutoPtr<Element> root, const ASTDictionaryRange * range)
void buildRangeConfiguration(AutoPtr<Document> doc, AutoPtr<Element> root, const ASTDictionaryRange * range, const NamesToTypeNames & all_attrs)
{
// appends <key><name>value</name></key> to root
auto appendElem = [&doc, &root](const std::string & key, const std::string & value)
auto appendElem = [&doc, &root](const std::string & key, const std::string & name, const std::string & type)
{
AutoPtr<Element> element(doc->createElement(key));
AutoPtr<Element> name(doc->createElement("name"));
AutoPtr<Text> text(doc->createTextNode(value));
name->appendChild(text);
element->appendChild(name);
AutoPtr<Element> name_node(doc->createElement("name"));
AutoPtr<Text> name_text(doc->createTextNode(name));
name_node->appendChild(name_text);
element->appendChild(name_node);
AutoPtr<Element> type_node(doc->createElement("type"));
AutoPtr<Text> type_text(doc->createTextNode(type));
type_node->appendChild(type_text);
element->appendChild(type_node);
root->appendChild(element);
};
appendElem("range_min", range->min_attr_name);
appendElem("range_max", range->max_attr_name);
appendElem("range_min", range->min_attr_name, all_attrs.at(range->min_attr_name));
appendElem("range_max", range->max_attr_name, all_attrs.at(range->max_attr_name));
}
@ -296,25 +303,25 @@ void buildPrimaryKeyConfiguration(
/**
* Transforms list of ASTDictionaryAttributeDeclarations to list of dictionary attributes
*/
std::unordered_set<std::string> buildDictionaryAttributesConfiguration(
NamesToTypeNames buildDictionaryAttributesConfiguration(
AutoPtr<Document> doc,
AutoPtr<Element> root,
const ASTExpressionList * dictionary_attributes,
const Names & key_columns)
{
const auto & children = dictionary_attributes->children;
std::unordered_set<std::string> dictionary_attributes_names;
NamesToTypeNames attributes_names_and_types;
for (size_t i = 0; i < children.size(); ++i)
{
const ASTDictionaryAttributeDeclaration * dict_attr = children[i]->as<const ASTDictionaryAttributeDeclaration>();
if (!dict_attr->type)
throw Exception("Dictionary attribute must has type", ErrorCodes::INCORRECT_DICTIONARY_DEFINITION);
dictionary_attributes_names.insert(dict_attr->name);
attributes_names_and_types.emplace(dict_attr->name, queryToString(dict_attr->type));
if (std::find(key_columns.begin(), key_columns.end(), dict_attr->name) == key_columns.end())
buildSingleAttribute(doc, root, dict_attr);
}
return dictionary_attributes_names;
return attributes_names_and_types;
}
/** Transform function with key-value arguments to configuration
@ -404,7 +411,7 @@ void checkAST(const ASTCreateQuery & query)
/// Range can be empty
}
void checkPrimaryKey(const std::unordered_set<std::string> & all_attrs, const Names & key_attrs)
void checkPrimaryKey(const NamesToTypeNames & all_attrs, const Names & key_attrs)
{
for (const auto & key_attr : key_attrs)
if (all_attrs.count(key_attr) == 0)
@ -438,8 +445,8 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer
bool complex = DictionaryFactory::instance().isComplex(dictionary_layout->layout_type);
auto all_attr_names = buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_attrs);
checkPrimaryKey(all_attr_names, pk_attrs);
auto all_attr_names_and_types = buildDictionaryAttributesConfiguration(xml_document, structure_element, query.dictionary_attributes_list, pk_attrs);
checkPrimaryKey(all_attr_names_and_types, pk_attrs);
buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list);
@ -448,7 +455,7 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer
buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime);
if (query.dictionary->range)
buildRangeConfiguration(xml_document, structure_element, query.dictionary->range);
buildRangeConfiguration(xml_document, structure_element, query.dictionary->range, all_attr_names_and_types);
conf->load(xml_document);
return conf;

View File

@ -64,8 +64,11 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration)
EXPECT_EQ(config->getInt("dictionary.lifetime.max"), 10);
/// range
EXPECT_EQ(config->getString("dictionary.structure.range_min"), "second_column");
EXPECT_EQ(config->getString("dictionary.structure.range_max"), "third_column");
EXPECT_EQ(config->getString("dictionary.structure.range_min.name"), "second_column");
EXPECT_EQ(config->getString("dictionary.structure.range_max.name"), "third_column");
EXPECT_EQ(config->getString("dictionary.structure.range_min.type"), "UInt8");
EXPECT_EQ(config->getString("dictionary.structure.range_max.type"), "UInt8");
/// source
EXPECT_EQ(config->getString("dictionary.source.clickhouse.host"), "localhost");

View File

@ -65,7 +65,7 @@ public:
if (arguments.size() >= 1)
{
const auto & argument = arguments[0];
if (!isInteger(argument.type) || !isColumnConst(*argument.column))
if (!isInteger(argument.type) || !argument.column || !isColumnConst(*argument.column))
throw Exception("Illegal type " + argument.type->getName() +
" of 0" +
" argument of function " + getName() +
@ -81,13 +81,9 @@ public:
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result, size_t input_rows_count) override
{
auto & result_col = block.getByPosition(result);
UInt32 scale = DataTypeDateTime64::default_scale;
if (const auto * dt64 = assert_cast<const DataTypeDateTime64 *>(result_col.type.get()))
{
scale = dt64->getScale();
}
const UInt32 scale = assert_cast<const DataTypeDateTime64 *>(result_col.type.get())->getScale();
result_col.column = DataTypeDateTime64(scale).createColumnConst(input_rows_count, nowSubsecond(scale));
result_col.column = result_col.type->createColumnConst(input_rows_count, nowSubsecond(scale));
}
};

View File

@ -9,6 +9,7 @@
#include <DataTypes/DataTypeSet.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeFunction.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/FieldToDataType.h>
@ -37,6 +38,7 @@
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/interpretSubquery.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
namespace DB
{
@ -392,6 +394,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
auto child_column_name = child->getColumnName();
const auto * lambda = child->as<ASTFunction>();
const auto * identifier = child->as<ASTIdentifier>();
if (lambda && lambda->name == "lambda")
{
/// If the argument is a lambda expression, just remember its approximate type.
@ -435,6 +438,23 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else if (identifier && node.name == "joinGet" && arg == 0)
{
String database_name;
String table_name;
std::tie(database_name, table_name) = IdentifierSemantic::extractDatabaseAndTable(*identifier);
if (database_name.empty())
database_name = data.context.getCurrentDatabase();
auto column_string = ColumnString::create();
column_string->insert(database_name + "." + table_name);
ColumnWithTypeAndName column(
ColumnConst::create(std::move(column_string), 1),
std::make_shared<DataTypeString>(),
getUniqueName(data.getSampleBlock(), "__joinGet"));
data.addAction(ExpressionAction::addColumn(column));
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
else
{
/// If the argument is not a lambda expression, call it recursively and find out its type.

View File

@ -42,6 +42,14 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
if (!data.aliases.count(*opt_name))
setIdentifierSpecial(ast);
}
// first argument of joinGet can be a table identifier
if (func.name == "joinGet")
{
auto & ast = func.arguments->children.at(0);
if (auto opt_name = tryGetIdentifierName(ast))
setIdentifierSpecial(ast);
}
}
}

View File

@ -16,6 +16,8 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
#include <cassert>
namespace DB
{
@ -1095,10 +1097,14 @@ bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelo
}
else if (element.function == RPNElement::FUNCTION_NOT)
{
assert(!rpn_stack.empty());
rpn_stack.back() = !rpn_stack.back();
}
else if (element.function == RPNElement::FUNCTION_AND)
{
assert(!rpn_stack.empty());
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
@ -1106,6 +1112,8 @@ bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelo
}
else if (element.function == RPNElement::FUNCTION_OR)
{
assert(!rpn_stack.empty());
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
@ -1124,7 +1132,7 @@ bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelo
}
if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInParallelogram", ErrorCodes::LOGICAL_ERROR);
return rpn_stack[0].can_be_true;
}
@ -1223,6 +1231,8 @@ bool KeyCondition::alwaysUnknownOrTrue() const
}
else if (element.function == RPNElement::FUNCTION_AND)
{
assert(!rpn_stack.empty());
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
@ -1230,6 +1240,8 @@ bool KeyCondition::alwaysUnknownOrTrue() const
}
else if (element.function == RPNElement::FUNCTION_OR)
{
assert(!rpn_stack.empty());
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
@ -1239,6 +1251,9 @@ bool KeyCondition::alwaysUnknownOrTrue() const
throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}
if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in KeyCondition::alwaysUnknownOrTrue", ErrorCodes::LOGICAL_ERROR);
return rpn_stack[0];
}

View File

@ -21,7 +21,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d
# Wait for it to be replaced
wait
${CLICKHOUSE_CLIENT} --user=readonly --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
${CLICKHOUSE_CLIENT_BINARY} --user=readonly --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' &
wait_for_query_to_start '42'
# Trying to run another query with the same query_id

View File

@ -9,6 +9,14 @@ def [1,2] 2
abc [0] 1
--------joinGet--------
abc
def
\N
abc
def
abc
def

View File

@ -37,6 +37,13 @@ SELECT '';
SELECT joinGet('join_any_left_null', 's', number) FROM numbers(3);
SELECT '';
-- Using identifier as the first argument
SELECT joinGet(join_any_left, 's', number) FROM numbers(3);
SELECT '';
SELECT joinGet(test.join_any_left_null, 's', number) FROM numbers(3);
SELECT '';
CREATE TABLE test.join_string_key (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, LEFT, s);
INSERT INTO test.join_string_key VALUES ('abc', [0], 1), ('def', [1, 2], 2);
SELECT joinGet('join_string_key', 'x', 'abc'), joinGet('join_string_key', 'k', 'abc');

View File

@ -6,7 +6,15 @@ SELECT CAST(1 as DateTime64('abc')); -- { serverError 43 } # Invalid scale param
SELECT CAST(1 as DateTime64(100)); -- { serverError 69 } # too big scale
SELECT CAST(1 as DateTime64(-1)); -- { serverError 43 } # signed scale parameter type
SELECT CAST(1 as DateTime64(3, 'qqq')); -- { serverError 1000 } # invalid timezone
SELECT toDateTime64('2019-09-16 19:20:11.234', 'abc'); -- { serverError 43 } # invalid scale
SELECT toDateTime64('2019-09-16 19:20:11.234', 100); -- { serverError 69 } # too big scale
SELECT toDateTime64('2019-09-16 19:20:11.234', 3, 'qqq'); -- { serverError 1000 } # invalid timezone
SELECT ignore(now64(gccMurmurHash())); -- { serverError 43 } # Illegal argument type
SELECT ignore(now64('abcd')); -- { serverError 43 } # Illegal argument type
SELECT ignore(now64(number)) FROM system.numbers LIMIT 10; -- { serverError 43 } # Illegal argument type
SELECT toDateTime64('2019-09-16 19:20:11', 3, 'UTC'); -- this now works OK and produces timestamp with no subsecond part
CREATE TABLE A(t DateTime64(3, 'UTC')) ENGINE = MergeTree() ORDER BY t;

View File

@ -1,9 +1,17 @@
***date dict***
0.33
0.42
0.46
0
***datetime dict***
0.33
0.42
0.46
0
***ip trie dict***
17501
NP
***hierarchy dict***
Moscow
[3,2,1,10000]
1

View File

@ -4,7 +4,9 @@ DROP DATABASE IF EXISTS database_for_dict;
CREATE DATABASE database_for_dict Engine = Ordinary;
CREATE TABLE database_for_dict.table_for_dict
SELECT '***date dict***';
CREATE TABLE database_for_dict.date_table
(
CountryID UInt64,
StartDate Date,
@ -14,9 +16,9 @@ CREATE TABLE database_for_dict.table_for_dict
ENGINE = MergeTree()
ORDER BY CountryID;
INSERT INTO database_for_dict.table_for_dict VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
INSERT INTO database_for_dict.table_for_dict VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
INSERT INTO database_for_dict.table_for_dict VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46);
INSERT INTO database_for_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
INSERT INTO database_for_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
INSERT INTO database_for_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46);
CREATE DICTIONARY database_for_dict.dict1
(
@ -26,7 +28,7 @@ CREATE DICTIONARY database_for_dict.dict1
Tax Float64
)
PRIMARY KEY CountryID
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict'))
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'date_table' DB 'database_for_dict'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(RANGE_HASHED())
RANGE(MIN StartDate MAX EndDate);
@ -36,6 +38,42 @@ SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(1), toDate('201
SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-29'));
SELECT dictGetFloat64('database_for_dict.dict1', 'Tax', toUInt64(2), toDate('2019-05-31'));
SELECT '***datetime dict***';
CREATE TABLE database_for_dict.datetime_table
(
CountryID UInt64,
StartDate DateTime,
EndDate DateTime,
Tax Float64
)
ENGINE = MergeTree()
ORDER BY CountryID;
INSERT INTO database_for_dict.datetime_table VALUES(1, toDateTime('2019-05-05 00:00:00'), toDateTime('2019-05-20 00:00:00'), 0.33);
INSERT INTO database_for_dict.datetime_table VALUES(1, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.42);
INSERT INTO database_for_dict.datetime_table VALUES(2, toDateTime('2019-05-21 00:00:00'), toDateTime('2019-05-30 00:00:00'), 0.46);
CREATE DICTIONARY database_for_dict.dict2
(
CountryID UInt64,
StartDate DateTime,
EndDate DateTime,
Tax Float64
)
PRIMARY KEY CountryID
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'datetime_table' DB 'database_for_dict'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(RANGE_HASHED())
RANGE(MIN StartDate MAX EndDate);
SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-15 00:00:00'));
SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(1), toDateTime('2019-05-29 00:00:00'));
SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-29 00:00:00'));
SELECT dictGetFloat64('database_for_dict.dict2', 'Tax', toUInt64(2), toDateTime('2019-05-31 00:00:00'));
SELECT '***ip trie dict***';
CREATE TABLE database_for_dict.table_ip_trie
(
prefix String,
@ -61,6 +99,8 @@ LIFETIME(MIN 10 MAX 100);
SELECT dictGetUInt32('database_for_dict.dict_ip_trie', 'asn', tuple(IPv4StringToNum('202.79.32.0')));
SELECT dictGetString('database_for_dict.dict_ip_trie', 'cca2', tuple(IPv4StringToNum('202.79.32.0')));
SELECT '***hierarchy dict***';
CREATE TABLE database_for_dict.table_with_hierarchy
(
RegionID UInt64,
@ -91,4 +131,3 @@ SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(7), toUI
SELECT dictIsIn('database_for_dict.dictionary_with_hierarchy', toUInt64(1), toUInt64(5));
DROP DATABASE IF EXISTS database_for_dict;

View File

@ -0,0 +1,3 @@
2.3333333333333335
nan
1

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CUR_DIR/../shell_config.sh
${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, weight) FROM (SELECT t.1 AS x, t.2 AS weight FROM (SELECT arrayJoin([(1, 5), (2, 4), (3, 3), (4, 2), (5, 1)]) AS t));"
${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, weight) FROM (SELECT t.1 AS x, t.2 AS weight FROM (SELECT arrayJoin([(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)]) AS t));"
echo `${CLICKHOUSE_CLIENT} --server_logs_file=/dev/null --query="SELECT avgWeighted(toDecimal64(0, 0), toFloat64(0))" 2>&1` \
| grep -c 'Code: 43. DB::Exception: .* DB::Exception:.* Different types .* of arguments for aggregate function avgWeighted'

View File

@ -13,7 +13,7 @@ cd build/build_docker
ccache --show-stats ||:
ccache --zero-stats ||:
rm -f CMakeCache.txt
cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
ninja
ccache --show-stats ||:
mv ./dbms/programs/clickhouse* /output

View File

@ -1,6 +1,6 @@
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.htmll)via ClickHouse. This engine is similar
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](file.md) and [URL](url.md) engines, but provides Hadoop-specific features.
## Usage

View File

@ -189,7 +189,7 @@ SELECT geohashDecode('ezs42') AS res
└─────────────────────────────────┘
```
## h3IsValid {#h3IsValid}
## h3IsValid {#h3isvalid}
Проверяет корректность H3-индекса.
@ -234,7 +234,7 @@ h3GetResolution(h3index)
**Возвращаемые значения**
- Разрешение сетки, от 0 до 15.
- Для несуществующего идентификатора может быть возвращено произвольное значение, используйте [h3IsValid](#h3IsValid) для проверки идентификаторов
- Для несуществующего идентификатора может быть возвращено произвольное значение, используйте [h3IsValid](#h3isvalid) для проверки идентификаторов
Тип — [UInt8](../../data_types/int_uint.md).

View File

@ -82,11 +82,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
### Constraints {#constraints}
WARNING: This feature is experimental. Correct work is not guaranteed on non-MergeTree family engines.
Along with columns descriptions constraints could be defined:
``sql
```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1],
@ -100,15 +98,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Adding large amount of constraints can negatively affect performance of big `INSERT` queries.
### TTL expression
### TTL Expression
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl).
## Column Compression Codecs {#codecs}
### Column Compression Codecs {#codecs}
By default, ClickHouse applies to columns the compression method, defined in [server settings](../operations/server_settings/settings.md#compression). Also, you can define compression method for each individual column in the `CREATE TABLE` query.
By default, ClickHouse applies the compression method, defined in [server settings](../operations/server_settings/settings.md#compression), to columns. You can also define the compression method for each individual column in the `CREATE TABLE` query.
```
```sql
CREATE TABLE codec_example
(
dt Date CODEC(ZSTD),
@ -121,23 +119,23 @@ ENGINE = <Engine>
...
```
If a codec is specified, the default codec doesn't apply. Codecs can be combined in a pipeline, for example, `CODEC(Delta, ZSTD)`. To select the best codecs combination for you project, pass benchmarks, similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article.
If a codec is specified, the default codec doesn't apply. Codecs can be combined in a pipeline, for example, `CODEC(Delta, ZSTD)`. To select the best codec combination for you project, pass benchmarks similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article.
!!!warning
You cannot decompress ClickHouse database files with external utilities, for example, `lz4`. Use the special utility, [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/dbms/programs/compressor).
!!!warning "Warning"
You can't decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/yandex/ClickHouse/tree/master/dbms/programs/compressor) utility.
Compression is supported for the table engines:
Compression is supported for the following table engines:
- [*MergeTree](../operations/table_engines/mergetree.md) family
- [*Log](../operations/table_engines/log_family.md) family
- [MergeTree](../operations/table_engines/mergetree.md) family
- [Log](../operations/table_engines/log_family.md) family
- [Set](../operations/table_engines/set.md)
- [Join](../operations/table_engines/join.md)
ClickHouse supports common purpose codecs and specialized codecs.
### Specialized codecs {#create-query-specialized-codecs}
#### Specialized Codecs {#create-query-specialized-codecs}
These codecs are designed to make compression more effective using specifities of the data. Some of this codecs don't compress data by itself, but they prepare data to be compressed better by common purpose codecs.
These codecs are designed to make compression more effective by using specific features of data. Some of these codecs don't compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation.
Specialized codecs:
@ -157,7 +155,7 @@ CREATE TABLE codec_example
ENGINE = MergeTree()
```
### Common purpose codecs {#create-query-common-purpose-codecs}
#### Common purpose codecs {#create-query-common-purpose-codecs}
Codecs:
@ -166,7 +164,8 @@ Codecs:
- `LZ4HC[(level)]` — LZ4 HC (high compression) algorithm with configurable level. Default level: 9. Setting `level <= 0` applies the default level. Possible levels: [1, 12]. Recommended level range: [4, 9].
- `ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: [1, 22]. Default value: 1.
High compression levels useful for asymmetric scenarios, like compress once, decompress a lot of times. Greater levels stands for better compression and higher CPU usage.
High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage.
## 临时表