mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge branch 'master' into simplify-clickhouse-test
This commit is contained in:
commit
cb763f03e9
@ -59,7 +59,7 @@ if (NOT MAKE_STATIC_LIBRARIES)
|
||||
endif ()
|
||||
|
||||
if (SPLIT_SHARED_LIBRARIES)
|
||||
set (LINK_MODE SHARED)
|
||||
set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
|
||||
endif ()
|
||||
|
||||
if (USE_STATIC_LIBRARIES)
|
||||
|
@ -1,9 +1,8 @@
|
||||
if (OS_FREEBSD)
|
||||
find_library (EXECINFO_LIBRARY execinfo)
|
||||
find_library (ELF_LIBRARY elf)
|
||||
message (STATUS "Using execinfo: ${EXECINFO_LIBRARY}")
|
||||
message (STATUS "Using elf: ${ELF_LIBRARY}")
|
||||
set (EXECINFO_LIBRARIES ${EXECINFO_LIBRARY} ${ELF_LIBRARY})
|
||||
message (STATUS "Using execinfo: ${EXECINFO_LIBRARIES}")
|
||||
else ()
|
||||
set (EXECINFO_LIBRARY "")
|
||||
set (ELF_LIBRARY "")
|
||||
set (EXECINFO_LIBRARIES "")
|
||||
endif ()
|
||||
|
@ -41,7 +41,7 @@ set( thriftcpp_threads_SOURCES
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp
|
||||
)
|
||||
add_library(${THRIFT_LIBRARY} ${LINK_MODE} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
|
||||
add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
|
||||
set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
|
||||
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
|
||||
@ -149,7 +149,7 @@ if (ARROW_WITH_ZSTD)
|
||||
endif()
|
||||
|
||||
|
||||
add_library(${ARROW_LIBRARY} ${LINK_MODE} ${ARROW_SRCS})
|
||||
add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
|
||||
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} Threads::Threads)
|
||||
if (ARROW_WITH_LZ4)
|
||||
@ -195,7 +195,7 @@ list(APPEND PARQUET_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp
|
||||
)
|
||||
add_library(${PARQUET_LIBRARY} ${LINK_MODE} ${PARQUET_SRCS})
|
||||
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
|
||||
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
|
||||
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
|
||||
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
|
||||
|
@ -24,7 +24,7 @@ endif ()
|
||||
|
||||
configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
|
||||
|
||||
add_library(base64 ${LINK_MODE}
|
||||
add_library(base64
|
||||
${LIBRARY_DIR}/lib/lib.c
|
||||
${LIBRARY_DIR}/lib/codec_choose.c
|
||||
${LIBRARY_DIR}/lib/arch/avx/codec.c
|
||||
|
@ -20,7 +20,7 @@ endif()
|
||||
|
||||
macro(add_boost_lib lib_name)
|
||||
add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src)
|
||||
add_library(boost_${lib_name}_internal ${LINK_MODE} ${boost_${lib_name}_sources})
|
||||
add_library(boost_${lib_name}_internal ${boost_${lib_name}_sources})
|
||||
target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
|
||||
endmacro()
|
||||
|
@ -28,6 +28,6 @@ set(SRCS
|
||||
${BROTLI_SOURCE_DIR}/common/transform.c
|
||||
)
|
||||
|
||||
add_library(brotli ${LINK_MODE} ${SRCS})
|
||||
add_library(brotli ${SRCS})
|
||||
|
||||
target_include_directories(brotli PUBLIC ${BROTLI_SOURCE_DIR}/include)
|
||||
|
@ -1,6 +1,6 @@
|
||||
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cctz)
|
||||
|
||||
add_library(cctz ${LINK_MODE}
|
||||
add_library(cctz
|
||||
${LIBRARY_DIR}/src/civil_time_detail.cc
|
||||
${LIBRARY_DIR}/src/time_zone_fixed.cc
|
||||
${LIBRARY_DIR}/src/time_zone_format.cc
|
||||
|
@ -23,7 +23,7 @@ set(SRCS
|
||||
${CPPKAFKA_DIR}/src/consumer.cpp
|
||||
)
|
||||
|
||||
add_library(cppkafka ${LINK_MODE} ${SRCS})
|
||||
add_library(cppkafka ${SRCS})
|
||||
|
||||
target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY})
|
||||
target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka)
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_library (btrie
|
||||
add_library(btrie
|
||||
src/btrie.c
|
||||
include/btrie.h
|
||||
)
|
||||
|
@ -183,7 +183,7 @@ set(SRCS
|
||||
)
|
||||
|
||||
# target
|
||||
add_library(hdfs3 STATIC ${SRCS} ${PROTO_SOURCES} ${PROTO_HEADERS})
|
||||
add_library(hdfs3 ${SRCS} ${PROTO_SOURCES} ${PROTO_HEADERS})
|
||||
|
||||
if (USE_INTERNAL_PROTOBUF_LIBRARY)
|
||||
add_dependencies(hdfs3 protoc)
|
||||
|
@ -54,7 +54,7 @@ set(SRCS
|
||||
${RDKAFKA_SOURCE_DIR}/rdgz.c
|
||||
)
|
||||
|
||||
add_library(rdkafka ${LINK_MODE} ${SRCS})
|
||||
add_library(rdkafka ${SRCS})
|
||||
target_include_directories(rdkafka SYSTEM PUBLIC include)
|
||||
target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used.
|
||||
target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used.
|
||||
|
@ -16,7 +16,7 @@ set(SRCS
|
||||
${LIBXML2_SOURCE_DIR}/debugXML.c
|
||||
${LIBXML2_SOURCE_DIR}/xpointer.c
|
||||
${LIBXML2_SOURCE_DIR}/DOCBparser.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlcatalog.c
|
||||
#${LIBXML2_SOURCE_DIR}/xmlcatalog.c
|
||||
${LIBXML2_SOURCE_DIR}/c14n.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlreader.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlstring.c
|
||||
@ -26,7 +26,7 @@ set(SRCS
|
||||
${LIBXML2_SOURCE_DIR}/trionan.c
|
||||
${LIBXML2_SOURCE_DIR}/pattern.c
|
||||
${LIBXML2_SOURCE_DIR}/globals.c
|
||||
${LIBXML2_SOURCE_DIR}/xmllint.c
|
||||
#${LIBXML2_SOURCE_DIR}/xmllint.c
|
||||
${LIBXML2_SOURCE_DIR}/chvalid.c
|
||||
${LIBXML2_SOURCE_DIR}/relaxng.c
|
||||
${LIBXML2_SOURCE_DIR}/list.c
|
||||
@ -40,7 +40,7 @@ set(SRCS
|
||||
${LIBXML2_SOURCE_DIR}/xmlschemas.c
|
||||
${LIBXML2_SOURCE_DIR}/SAX2.c
|
||||
${LIBXML2_SOURCE_DIR}/threads.c
|
||||
${LIBXML2_SOURCE_DIR}/runsuite.c
|
||||
#${LIBXML2_SOURCE_DIR}/runsuite.c
|
||||
${LIBXML2_SOURCE_DIR}/catalog.c
|
||||
${LIBXML2_SOURCE_DIR}/uri.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlmodule.c
|
||||
@ -48,7 +48,7 @@ set(SRCS
|
||||
${LIBXML2_SOURCE_DIR}/parserInternals.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlwriter.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlunicode.c
|
||||
${LIBXML2_SOURCE_DIR}/runxmlconf.c
|
||||
#${LIBXML2_SOURCE_DIR}/runxmlconf.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlmemory.c
|
||||
${LIBXML2_SOURCE_DIR}/nanoftp.c
|
||||
${LIBXML2_SOURCE_DIR}/xmlschemastypes.c
|
||||
@ -56,7 +56,7 @@ set(SRCS
|
||||
${LIBXML2_SOURCE_DIR}/nanohttp.c
|
||||
${LIBXML2_SOURCE_DIR}/schematron.c
|
||||
)
|
||||
add_library(libxml2 STATIC ${SRCS})
|
||||
add_library(libxml2 ${SRCS})
|
||||
|
||||
target_link_libraries(libxml2 ${ZLIB_LIBRARIES})
|
||||
|
||||
|
@ -2,7 +2,7 @@ set(MARIADB_CLIENT_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/mariadb-connector-c)
|
||||
set(MARIADB_CLIENT_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/mariadb-connector-c)
|
||||
|
||||
set(SRCS
|
||||
${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/bmove_upp.c
|
||||
#${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/bmove_upp.c
|
||||
${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/get_password.c
|
||||
${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/ma_alloc.c
|
||||
${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/ma_array.c
|
||||
@ -58,10 +58,10 @@ if(OPENSSL_LIBRARIES)
|
||||
list(APPEND SRCS ${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/secure/openssl.c)
|
||||
endif()
|
||||
|
||||
add_library(mysqlclient STATIC ${SRCS})
|
||||
add_library(mysqlclient ${SRCS})
|
||||
|
||||
if(OPENSSL_LIBRARIES)
|
||||
target_link_libraries(mysqlclient ${OPENSSL_LIBRARIES})
|
||||
target_link_libraries(mysqlclient PRIVATE ${OPENSSL_LIBRARIES})
|
||||
target_compile_definitions(mysqlclient PRIVATE -D HAVE_OPENSSL -D HAVE_TLS)
|
||||
endif()
|
||||
|
||||
|
@ -10,7 +10,7 @@ foreach (src ${RE2_SOURCES_})
|
||||
list(APPEND RE2_ST_SOURCES ${RE2_SOURCE_DIR}/${src})
|
||||
endforeach ()
|
||||
|
||||
add_library (re2_st ${RE2_ST_SOURCES})
|
||||
add_library(re2_st ${RE2_ST_SOURCES})
|
||||
target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
|
||||
target_include_directories (re2_st PRIVATE .)
|
||||
target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${RE2_SOURCE_DIR})
|
||||
|
@ -23,7 +23,7 @@ ${ODBC_SOURCE_DIR}/libltdl/loaders/preopen.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/libltdl/libltdlcS.c
|
||||
)
|
||||
|
||||
add_library(ltdl ${LINK_MODE} ${SRCS})
|
||||
add_library(ltdl ${SRCS})
|
||||
|
||||
target_include_directories(ltdl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/libltdl)
|
||||
target_include_directories(ltdl PUBLIC ${ODBC_SOURCE_DIR}/libltdl)
|
||||
@ -273,7 +273,7 @@ ${ODBC_SOURCE_DIR}/lst/lstSetFreeFunc.c
|
||||
${ODBC_SOURCE_DIR}/lst/_lstVisible.c
|
||||
)
|
||||
|
||||
add_library(unixodbc ${LINK_MODE} ${SRCS})
|
||||
add_library(unixodbc ${SRCS})
|
||||
|
||||
target_link_libraries(unixodbc ltdl)
|
||||
|
||||
|
@ -125,6 +125,6 @@ IF (ZSTD_LEGACY_SUPPORT)
|
||||
${LIBRARY_LEGACY_DIR}/zstd_v07.h)
|
||||
ENDIF (ZSTD_LEGACY_SUPPORT)
|
||||
|
||||
ADD_LIBRARY(zstd ${LINK_MODE} ${Sources} ${Headers})
|
||||
ADD_LIBRARY(zstd ${Sources} ${Headers})
|
||||
|
||||
target_include_directories (zstd PUBLIC ${LIBRARY_DIR})
|
||||
|
@ -134,7 +134,7 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctio
|
||||
list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp)
|
||||
list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h)
|
||||
|
||||
add_library(clickhouse_common_io ${LINK_MODE} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
|
||||
add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
|
||||
|
||||
if (OS_FREEBSD)
|
||||
target_compile_definitions (clickhouse_common_io PUBLIC CLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST)
|
||||
@ -194,8 +194,7 @@ target_link_libraries (clickhouse_common_io
|
||||
${CITYHASH_LIBRARIES}
|
||||
PRIVATE
|
||||
${ZLIB_LIBRARIES}
|
||||
${EXECINFO_LIBRARY}
|
||||
${ELF_LIBRARY}
|
||||
${EXECINFO_LIBRARIES}
|
||||
PUBLIC
|
||||
${Boost_SYSTEM_LIBRARY}
|
||||
PRIVATE
|
||||
|
@ -44,7 +44,7 @@ macro(clickhouse_program_add_library name)
|
||||
set(CLICKHOUSE_${name_uc}_INCLUDE ${CLICKHOUSE_${name_uc}_INCLUDE} PARENT_SCOPE)
|
||||
|
||||
if(NOT CLICKHOUSE_ONE_SHARED)
|
||||
add_library(clickhouse-${name}-lib ${LINK_MODE} ${CLICKHOUSE_${name_uc}_SOURCES})
|
||||
add_library(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_SOURCES})
|
||||
|
||||
set(_link ${CLICKHOUSE_${name_uc}_LINK}) # can't use ${} in if()
|
||||
if(_link)
|
||||
|
@ -2,7 +2,7 @@ add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP
|
||||
|
||||
link_directories(${LLVM_LIBRARY_DIRS})
|
||||
|
||||
add_library(clickhouse-compiler-lib ${LINK_MODE}
|
||||
add_library(clickhouse-compiler-lib
|
||||
driver.cpp
|
||||
cc1_main.cpp
|
||||
cc1as_main.cpp
|
||||
@ -46,7 +46,7 @@ LLVMSupport
|
||||
#PollyISL
|
||||
#PollyPPCG
|
||||
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads
|
||||
${MALLOC_LIBRARIES}
|
||||
${GLIBC_COMPATIBILITY_LIBRARIES}
|
||||
${MEMCPY_LIBRARIES}
|
||||
|
@ -1,8 +1,9 @@
|
||||
|
||||
add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP_DEBUG)
|
||||
|
||||
link_directories(${LLVM_LIBRARY_DIRS})
|
||||
|
||||
add_library(clickhouse-compiler-lib ${LINK_MODE}
|
||||
add_library(clickhouse-compiler-lib
|
||||
driver.cpp
|
||||
cc1_main.cpp
|
||||
cc1as_main.cpp
|
||||
@ -46,7 +47,7 @@ ${REQUIRED_LLVM_LIBRARIES}
|
||||
#PollyISL
|
||||
#PollyPPCG
|
||||
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads
|
||||
${MALLOC_LIBRARIES}
|
||||
${GLIBC_COMPATIBILITY_LIBRARIES}
|
||||
${MEMCPY_LIBRARIES}
|
||||
|
@ -2,7 +2,7 @@ add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP
|
||||
|
||||
link_directories(${LLVM_LIBRARY_DIRS})
|
||||
|
||||
add_library(clickhouse-compiler-lib ${LINK_MODE}
|
||||
add_library(clickhouse-compiler-lib
|
||||
driver.cpp
|
||||
cc1_main.cpp
|
||||
cc1gen_reproducer_main.cpp
|
||||
@ -42,7 +42,7 @@ lldCore
|
||||
|
||||
${REQUIRED_LLVM_LIBRARIES}
|
||||
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads
|
||||
${MALLOC_LIBRARIES}
|
||||
${GLIBC_COMPATIBILITY_LIBRARIES}
|
||||
${MEMCPY_LIBRARIES}
|
||||
|
@ -2,7 +2,7 @@ add_definitions(-Wno-error -Wno-unused-parameter -Wno-non-virtual-dtor -U_LIBCPP
|
||||
|
||||
link_directories(${LLVM_LIBRARY_DIRS})
|
||||
|
||||
add_library(clickhouse-compiler-lib ${LINK_MODE}
|
||||
add_library(clickhouse-compiler-lib
|
||||
driver.cpp
|
||||
cc1_main.cpp
|
||||
cc1as_main.cpp
|
||||
@ -42,7 +42,7 @@ lldCore
|
||||
|
||||
${REQUIRED_LLVM_LIBRARIES}
|
||||
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads
|
||||
PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARIES} Threads::Threads
|
||||
${MALLOC_LIBRARIES}
|
||||
${GLIBC_COMPATIBILITY_LIBRARIES}
|
||||
${MEMCPY_LIBRARIES}
|
||||
|
@ -298,6 +298,8 @@ std::unordered_map<std::string, std::vector<std::size_t>> getTestQueryIndexes(co
|
||||
{
|
||||
std::unordered_map<std::string, std::vector<std::size_t>> result;
|
||||
const auto & options = parsed_opts.options;
|
||||
if (options.empty())
|
||||
return result;
|
||||
for (size_t i = 0; i < options.size() - 1; ++i)
|
||||
{
|
||||
const auto & opt = options[i];
|
||||
|
@ -9,55 +9,97 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Substitute return type for Date and DateTime
|
||||
class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType>
|
||||
template <typename has_limit>
|
||||
class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, has_limit>
|
||||
{
|
||||
public:
|
||||
AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type) : AggregateFunctionGroupUniqArray<DataTypeDate::FieldType>(argument_type) {}
|
||||
AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits<UInt64>::max()) : AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, has_limit>(argument_type, max_elems_) {}
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
|
||||
};
|
||||
|
||||
class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType>
|
||||
template <typename has_limit>
|
||||
class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType, has_limit>
|
||||
{
|
||||
public:
|
||||
AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type) : AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType>(argument_type) {}
|
||||
AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits<UInt64>::max()) : AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType, has_limit>(argument_type, max_elems_) {}
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
|
||||
};
|
||||
|
||||
|
||||
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type)
|
||||
template <typename has_limit, typename ... TArgs>
|
||||
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate(argument_type);
|
||||
else if (which.idx == TypeIndex::DateTime) return new AggregateFunctionGroupUniqArrayDateTime(argument_type);
|
||||
if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate<has_limit>(argument_type, std::forward<TArgs>(args)...);
|
||||
else if (which.idx == TypeIndex::DateTime) return new AggregateFunctionGroupUniqArrayDateTime<has_limit>(argument_type, std::forward<TArgs>(args)...);
|
||||
else
|
||||
{
|
||||
/// Check that we can use plain version of AggreagteFunctionGroupUniqArrayGeneric
|
||||
if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
return new AggreagteFunctionGroupUniqArrayGeneric<true>(argument_type);
|
||||
return new AggreagteFunctionGroupUniqArrayGeneric<true, has_limit>(argument_type, std::forward<TArgs>(args)...);
|
||||
else
|
||||
return new AggreagteFunctionGroupUniqArrayGeneric<false>(argument_type);
|
||||
return new AggreagteFunctionGroupUniqArrayGeneric<false, has_limit>(argument_type, std::forward<TArgs>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename has_limit, typename ... TArgs>
|
||||
inline AggregateFunctionPtr createAggregateFunctionGroupUniqArrayImpl(const std::string & name, const DataTypePtr & argument_type, TArgs ... args)
|
||||
{
|
||||
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionGroupUniqArray, has_limit, const DataTypePtr &, TArgs...>(*argument_type, argument_type, std::forward<TArgs>(args)...));
|
||||
|
||||
if (!res)
|
||||
res = AggregateFunctionPtr(createWithExtraTypes<has_limit>(argument_type, std::forward<TArgs>(args)...));
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + argument_type->getName() +
|
||||
" of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionGroupUniqArray>(*argument_types[0], argument_types[0]));
|
||||
bool limit_size = false;
|
||||
UInt64 max_elems = std::numeric_limits<UInt64>::max();
|
||||
|
||||
if (!res)
|
||||
res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0]));
|
||||
if (parameters.empty())
|
||||
{
|
||||
// no limit
|
||||
}
|
||||
else if (parameters.size() == 1)
|
||||
{
|
||||
auto type = parameters[0].getType();
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception("Parameter for aggregate function " + name + " should be positive number", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (!res)
|
||||
throw Exception("Illegal type " + argument_types[0]->getName() +
|
||||
" of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
throw Exception("Parameter for aggregate function " + name + " should be positive number", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return res;
|
||||
limit_size = true;
|
||||
max_elems = parameters[0].get<UInt64>();
|
||||
}
|
||||
else
|
||||
throw Exception("Incorrect number of parameters for aggregate function " + name + ", should be 0 or 1",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!limit_size)
|
||||
return createAggregateFunctionGroupUniqArrayImpl<std::false_type>(name, argument_types[0]);
|
||||
else
|
||||
return createAggregateFunctionGroupUniqArrayImpl<std::true_type>(name, argument_types[0], max_elems);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -36,16 +36,21 @@ struct AggregateFunctionGroupUniqArrayData
|
||||
|
||||
|
||||
/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
|
||||
template <typename T>
|
||||
template <typename T, typename Tlimit_num_elem>
|
||||
class AggregateFunctionGroupUniqArray
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T>>
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T, Tlimit_num_elem>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = Tlimit_num_elem::value;
|
||||
UInt64 max_elems;
|
||||
|
||||
private:
|
||||
using State = AggregateFunctionGroupUniqArrayData<T>;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>, AggregateFunctionGroupUniqArray<T>>({argument_type}, {}) {}
|
||||
AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayData<T>,
|
||||
AggregateFunctionGroupUniqArray<T, Tlimit_num_elem>>({argument_type}, {}),
|
||||
max_elems(max_elems_) {}
|
||||
|
||||
String getName() const override { return "groupUniqArray"; }
|
||||
|
||||
@ -56,12 +61,27 @@ public:
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if (limit_num_elems && this->data(place).value.size() >= max_elems)
|
||||
return;
|
||||
this->data(place).value.insert(static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).value.merge(this->data(rhs).value);
|
||||
if (!limit_num_elems)
|
||||
this->data(place).value.merge(this->data(rhs).value);
|
||||
else
|
||||
{
|
||||
auto & cur_set = this->data(place).value;
|
||||
auto & rhs_set = this->data(rhs).value;
|
||||
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (cur_set.size() >= max_elems)
|
||||
return;
|
||||
cur_set.insert(rhs_elem.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
@ -111,25 +131,43 @@ struct AggreagteFunctionGroupUniqArrayGenericData
|
||||
Set value;
|
||||
};
|
||||
|
||||
|
||||
/// Helper function for deserialize and insert for the class AggreagteFunctionGroupUniqArrayGeneric
|
||||
template <bool is_plain_column>
|
||||
static StringRef getSerializationImpl(const IColumn & column, size_t row_num, Arena & arena);
|
||||
|
||||
template <bool is_plain_column>
|
||||
static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false>
|
||||
template <bool is_plain_column = false, typename Tlimit_num_elem = std::false_type>
|
||||
class AggreagteFunctionGroupUniqArrayGeneric
|
||||
: public IAggregateFunctionDataHelper<AggreagteFunctionGroupUniqArrayGenericData, AggreagteFunctionGroupUniqArrayGeneric<is_plain_column>>
|
||||
: public IAggregateFunctionDataHelper<AggreagteFunctionGroupUniqArrayGenericData, AggreagteFunctionGroupUniqArrayGeneric<is_plain_column, Tlimit_num_elem>>
|
||||
{
|
||||
DataTypePtr & input_data_type;
|
||||
|
||||
static constexpr bool limit_num_elems = Tlimit_num_elem::value;
|
||||
UInt64 max_elems;
|
||||
|
||||
using State = AggreagteFunctionGroupUniqArrayGenericData;
|
||||
|
||||
static StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena);
|
||||
static StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena)
|
||||
{
|
||||
return getSerializationImpl<is_plain_column>(column, row_num, arena);
|
||||
}
|
||||
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to)
|
||||
{
|
||||
return deserializeAndInsertImpl<is_plain_column>(str, data_to);
|
||||
}
|
||||
|
||||
public:
|
||||
AggreagteFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type)
|
||||
: IAggregateFunctionDataHelper<AggreagteFunctionGroupUniqArrayGenericData, AggreagteFunctionGroupUniqArrayGeneric<is_plain_column>>({input_data_type}, {})
|
||||
, input_data_type(this->argument_types[0]) {}
|
||||
AggreagteFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<AggreagteFunctionGroupUniqArrayGenericData, AggreagteFunctionGroupUniqArrayGeneric<is_plain_column, Tlimit_num_elem>>({input_data_type}, {})
|
||||
, input_data_type(this->argument_types[0])
|
||||
, max_elems(max_elems_) {}
|
||||
|
||||
String getName() const override { return "groupUniqArray"; }
|
||||
|
||||
@ -174,7 +212,10 @@ public:
|
||||
bool inserted;
|
||||
State::Set::iterator it;
|
||||
|
||||
if (limit_num_elems && set.size() >= max_elems)
|
||||
return;
|
||||
StringRef str_serialized = getSerialization(*columns[0], row_num, *arena);
|
||||
|
||||
set.emplace(str_serialized, it, inserted);
|
||||
|
||||
if constexpr (!is_plain_column)
|
||||
@ -198,6 +239,8 @@ public:
|
||||
State::Set::iterator it;
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (limit_num_elems && cur_set.size() >= max_elems)
|
||||
return ;
|
||||
cur_set.emplace(rhs_elem.getValue(), it, inserted);
|
||||
if (inserted)
|
||||
{
|
||||
@ -229,31 +272,30 @@ public:
|
||||
|
||||
|
||||
template <>
|
||||
inline StringRef AggreagteFunctionGroupUniqArrayGeneric<false>::getSerialization(const IColumn & column, size_t row_num, Arena & arena)
|
||||
inline StringRef getSerializationImpl<false>(const IColumn & column, size_t row_num, Arena & arena)
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
return column.serializeValueIntoArena(row_num, arena, begin);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline StringRef AggreagteFunctionGroupUniqArrayGeneric<true>::getSerialization(const IColumn & column, size_t row_num, Arena &)
|
||||
inline StringRef getSerializationImpl<true>(const IColumn & column, size_t row_num, Arena &)
|
||||
{
|
||||
return column.getDataAt(row_num);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void AggreagteFunctionGroupUniqArrayGeneric<false>::deserializeAndInsert(StringRef str, IColumn & data_to)
|
||||
inline void deserializeAndInsertImpl<false>(StringRef str, IColumn & data_to)
|
||||
{
|
||||
data_to.deserializeAndInsertFromArena(str.data);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void AggreagteFunctionGroupUniqArrayGeneric<true>::deserializeAndInsert(StringRef str, IColumn & data_to)
|
||||
inline void deserializeAndInsertImpl<true>(StringRef str, IColumn & data_to)
|
||||
{
|
||||
data_to.insertData(str.data, str.size);
|
||||
}
|
||||
|
||||
|
||||
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE
|
||||
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ list(REMOVE_ITEM clickhouse_aggregate_functions_headers
|
||||
FactoryHelpers.h
|
||||
)
|
||||
|
||||
add_library(clickhouse_aggregate_functions ${LINK_MODE} ${clickhouse_aggregate_functions_sources})
|
||||
add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources})
|
||||
target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms PUBLIC ${CITYHASH_LIBRARIES})
|
||||
target_include_directories (clickhouse_aggregate_functions BEFORE PRIVATE ${COMMON_INCLUDE_DIR})
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# TODO: make separate lib datastream, block, ...
|
||||
#include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
#add_headers_and_sources(clickhouse_client .)
|
||||
#add_library(clickhouse_client ${LINK_MODE} ${clickhouse_client_headers} ${clickhouse_client_sources})
|
||||
#add_library(clickhouse_client ${clickhouse_client_headers} ${clickhouse_client_sources})
|
||||
#target_link_libraries (clickhouse_client clickhouse_common_io ${Poco_Net_LIBRARY})
|
||||
#target_include_directories (clickhouse_client PRIVATE ${DBMS_INCLUDE_DIR})
|
||||
|
||||
|
@ -2,7 +2,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
|
||||
add_headers_and_sources(clickhouse_common_config .)
|
||||
|
||||
add_library(clickhouse_common_config ${LINK_MODE} ${clickhouse_common_config_headers} ${clickhouse_common_config_sources})
|
||||
add_library(clickhouse_common_config ${clickhouse_common_config_headers} ${clickhouse_common_config_sources})
|
||||
|
||||
target_link_libraries(clickhouse_common_config PUBLIC common PRIVATE clickhouse_common_zookeeper string_utils PUBLIC ${Poco_XML_LIBRARY} ${Poco_Util_LIBRARY} Threads::Threads)
|
||||
target_include_directories(clickhouse_common_config PUBLIC ${DBMS_INCLUDE_DIR})
|
||||
|
@ -5,5 +5,5 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
|
||||
add_headers_and_sources(clickhouse_common_stringutils .)
|
||||
|
||||
add_library(string_utils ${LINK_MODE} ${clickhouse_common_stringutils_headers} ${clickhouse_common_stringutils_sources})
|
||||
add_library(string_utils ${clickhouse_common_stringutils_headers} ${clickhouse_common_stringutils_sources})
|
||||
target_include_directories (string_utils PRIVATE ${DBMS_INCLUDE_DIR})
|
||||
|
@ -2,7 +2,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
|
||||
add_headers_and_sources(clickhouse_common_zookeeper .)
|
||||
|
||||
add_library(clickhouse_common_zookeeper ${LINK_MODE} ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources})
|
||||
add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources})
|
||||
|
||||
target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common PRIVATE string_utils PUBLIC ${Poco_Util_LIBRARY} Threads::Threads)
|
||||
target_include_directories(clickhouse_common_zookeeper PUBLIC ${DBMS_INCLUDE_DIR})
|
||||
|
@ -1,6 +1,6 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(clickhouse_compression .)
|
||||
add_library(clickhouse_compression ${LINK_MODE} ${clickhouse_compression_headers} ${clickhouse_compression_sources})
|
||||
add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources})
|
||||
target_link_libraries(clickhouse_compression PRIVATE clickhouse_parsers clickhouse_common_io ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${CITYHASH_LIBRARIES})
|
||||
target_include_directories(clickhouse_compression PUBLIC ${DBMS_INCLUDE_DIR})
|
||||
target_include_directories(clickhouse_compression SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR})
|
||||
|
@ -153,7 +153,8 @@ struct Settings
|
||||
\
|
||||
M(SettingBool, add_http_cors_header, false, "Write add http CORS header.") \
|
||||
\
|
||||
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow and TSKV formats).") \
|
||||
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \
|
||||
M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \
|
||||
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
|
||||
M(SettingBool, input_format_defaults_for_omitted_fields, false, "For input data calculate default expressions for omitted fields (it works for JSONEachRow format).") \
|
||||
\
|
||||
|
@ -14,7 +14,7 @@ add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/gene
|
||||
list(REMOVE_ITEM clickhouse_dictionaries_sources DictionaryFactory.cpp DictionarySourceFactory.cpp DictionaryStructure.cpp)
|
||||
list(REMOVE_ITEM clickhouse_dictionaries_headers DictionaryFactory.h DictionarySourceFactory.h DictionaryStructure.h)
|
||||
|
||||
add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources})
|
||||
add_library(clickhouse_dictionaries ${clickhouse_dictionaries_sources})
|
||||
target_link_libraries(clickhouse_dictionaries PRIVATE dbms clickhouse_common_io ${BTRIE_LIBRARIES} PUBLIC Threads::Threads)
|
||||
|
||||
if(Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY)
|
||||
|
@ -1,5 +1,5 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(clickhouse_dictionaries_embedded .)
|
||||
add_headers_and_sources(clickhouse_dictionaries_embedded GeodataProviders)
|
||||
add_library(clickhouse_dictionaries_embedded ${LINK_MODE} ${clickhouse_dictionaries_embedded_sources})
|
||||
add_library(clickhouse_dictionaries_embedded ${clickhouse_dictionaries_embedded_sources})
|
||||
target_link_libraries(clickhouse_dictionaries_embedded PRIVATE clickhouse_common_io ${MYSQLXX_LIBRARY})
|
||||
|
@ -19,17 +19,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
CSVRowInputStream::CSVRowInputStream(ReadBuffer & istr_, const Block & header_, bool with_names_, const FormatSettings & format_settings)
|
||||
: istr(istr_), header(header_), with_names(with_names_), format_settings(format_settings)
|
||||
{
|
||||
size_t num_columns = header.columns();
|
||||
data_types.resize(num_columns);
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
data_types[i] = header.safeGetByPosition(i).type;
|
||||
}
|
||||
|
||||
|
||||
static void skipEndOfLine(ReadBuffer & istr)
|
||||
static inline void skipEndOfLine(ReadBuffer & istr)
|
||||
{
|
||||
/// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
|
||||
|
||||
@ -53,7 +43,7 @@ static void skipEndOfLine(ReadBuffer & istr)
|
||||
}
|
||||
|
||||
|
||||
static void skipDelimiter(ReadBuffer & istr, const char delimiter, bool is_last_column)
|
||||
static inline void skipDelimiter(ReadBuffer & istr, const char delimiter, bool is_last_column)
|
||||
{
|
||||
if (is_last_column)
|
||||
{
|
||||
@ -99,38 +89,149 @@ static void skipRow(ReadBuffer & istr, const FormatSettings::CSV & settings, siz
|
||||
}
|
||||
|
||||
|
||||
CSVRowInputStream::CSVRowInputStream(ReadBuffer & istr_, const Block & header_, bool with_names_, const FormatSettings & format_settings)
|
||||
: istr(istr_), header(header_), with_names(with_names_), format_settings(format_settings)
|
||||
{
|
||||
const auto num_columns = header.columns();
|
||||
|
||||
data_types.resize(num_columns);
|
||||
column_indexes_by_names.reserve(num_columns);
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
const auto & column_info = header.getByPosition(i);
|
||||
|
||||
data_types[i] = column_info.type;
|
||||
column_indexes_by_names.emplace(column_info.name, i);
|
||||
}
|
||||
|
||||
column_indexes_for_input_fields.reserve(num_columns);
|
||||
read_columns.assign(num_columns, false);
|
||||
}
|
||||
|
||||
|
||||
void CSVRowInputStream::setupAllColumnsByTableSchema()
|
||||
{
|
||||
read_columns.assign(header.columns(), true);
|
||||
column_indexes_for_input_fields.resize(header.columns());
|
||||
|
||||
for (size_t i = 0; i < column_indexes_for_input_fields.size(); ++i)
|
||||
{
|
||||
column_indexes_for_input_fields[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CSVRowInputStream::addInputColumn(const String & column_name)
|
||||
{
|
||||
const auto column_it = column_indexes_by_names.find(column_name);
|
||||
if (column_it == column_indexes_by_names.end())
|
||||
{
|
||||
if (format_settings.skip_unknown_fields)
|
||||
{
|
||||
column_indexes_for_input_fields.push_back(std::nullopt);
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
"Unknown field found in CSV header: '" + column_name + "' " +
|
||||
"at position " + std::to_string(column_indexes_for_input_fields.size()) +
|
||||
"\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
|
||||
ErrorCodes::INCORRECT_DATA
|
||||
);
|
||||
}
|
||||
|
||||
const auto column_index = column_it->second;
|
||||
|
||||
if (read_columns[column_index])
|
||||
throw Exception("Duplicate field found while parsing CSV header: " + column_name, ErrorCodes::INCORRECT_DATA);
|
||||
|
||||
read_columns[column_index] = true;
|
||||
column_indexes_for_input_fields.emplace_back(column_index);
|
||||
}
|
||||
|
||||
|
||||
void CSVRowInputStream::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension)
|
||||
{
|
||||
/// It is safe to memorize this on the first run - the format guarantees this does not change
|
||||
if (unlikely(row_num == 1))
|
||||
{
|
||||
columns_to_fill_with_default_values.clear();
|
||||
for (size_t index = 0; index < read_columns.size(); ++index)
|
||||
if (read_columns[index] == 0)
|
||||
columns_to_fill_with_default_values.push_back(index);
|
||||
}
|
||||
|
||||
for (const auto column_index : columns_to_fill_with_default_values)
|
||||
data_types[column_index]->insertDefaultInto(*columns[column_index]);
|
||||
|
||||
row_read_extension.read_columns = read_columns;
|
||||
}
|
||||
|
||||
|
||||
void CSVRowInputStream::readPrefix()
|
||||
{
|
||||
/// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes,
|
||||
/// so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it.
|
||||
skipBOMIfExists(istr);
|
||||
|
||||
size_t num_columns = data_types.size();
|
||||
String tmp;
|
||||
|
||||
if (with_names)
|
||||
skipRow(istr, format_settings.csv, num_columns);
|
||||
{
|
||||
if (format_settings.with_names_use_header)
|
||||
{
|
||||
String column_name;
|
||||
do
|
||||
{
|
||||
skipWhitespacesAndTabs(istr);
|
||||
readCSVString(column_name, istr, format_settings.csv);
|
||||
skipWhitespacesAndTabs(istr);
|
||||
|
||||
addInputColumn(column_name);
|
||||
}
|
||||
while (checkChar(format_settings.csv.delimiter, istr));
|
||||
|
||||
skipDelimiter(istr, format_settings.csv.delimiter, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
setupAllColumnsByTableSchema();
|
||||
skipRow(istr, format_settings.csv, column_indexes_for_input_fields.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
setupAllColumnsByTableSchema();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool CSVRowInputStream::read(MutableColumns & columns, RowReadExtension &)
|
||||
bool CSVRowInputStream::read(MutableColumns & columns, RowReadExtension & ext)
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
updateDiagnosticInfo();
|
||||
|
||||
size_t size = data_types.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
String tmp;
|
||||
for (size_t input_position = 0; input_position < column_indexes_for_input_fields.size(); ++input_position)
|
||||
{
|
||||
skipWhitespacesAndTabs(istr);
|
||||
data_types[i]->deserializeAsTextCSV(*columns[i], istr, format_settings);
|
||||
skipWhitespacesAndTabs(istr);
|
||||
const auto & column_index = column_indexes_for_input_fields[input_position];
|
||||
if (column_index)
|
||||
{
|
||||
skipWhitespacesAndTabs(istr);
|
||||
data_types[*column_index]->deserializeAsTextCSV(*columns[*column_index], istr, format_settings);
|
||||
skipWhitespacesAndTabs(istr);
|
||||
}
|
||||
else
|
||||
{
|
||||
readCSVString(tmp, istr, format_settings.csv);
|
||||
}
|
||||
|
||||
skipDelimiter(istr, format_settings.csv.delimiter, i + 1 == size);
|
||||
skipDelimiter(istr, format_settings.csv.delimiter, input_position + 1 == column_indexes_for_input_fields.size());
|
||||
}
|
||||
|
||||
fillUnreadColumnsWithDefaults(columns, ext);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -202,86 +303,101 @@ bool OPTIMIZE(1) CSVRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumn
|
||||
{
|
||||
const char delimiter = format_settings.csv.delimiter;
|
||||
|
||||
size_t size = data_types.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t input_position = 0; input_position < column_indexes_for_input_fields.size(); ++input_position)
|
||||
{
|
||||
if (i == 0 && istr.eof())
|
||||
if (input_position == 0 && istr.eof())
|
||||
{
|
||||
out << "<End of stream>\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
out << "Column " << i << ", " << std::string((i < 10 ? 2 : i < 100 ? 1 : 0), ' ')
|
||||
<< "name: " << header.safeGetByPosition(i).name << ", " << std::string(max_length_of_column_name - header.safeGetByPosition(i).name.size(), ' ')
|
||||
<< "type: " << data_types[i]->getName() << ", " << std::string(max_length_of_data_type_name - data_types[i]->getName().size(), ' ');
|
||||
|
||||
BufferBase::Position prev_position = istr.position();
|
||||
BufferBase::Position curr_position = istr.position();
|
||||
std::exception_ptr exception;
|
||||
|
||||
try
|
||||
if (column_indexes_for_input_fields[input_position].has_value())
|
||||
{
|
||||
skipWhitespacesAndTabs(istr);
|
||||
prev_position = istr.position();
|
||||
data_types[i]->deserializeAsTextCSV(*columns[i], istr, format_settings);
|
||||
curr_position = istr.position();
|
||||
skipWhitespacesAndTabs(istr);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
exception = std::current_exception();
|
||||
}
|
||||
const auto & column_index = *column_indexes_for_input_fields[input_position];
|
||||
const auto & current_column_type = data_types[column_index];
|
||||
|
||||
if (curr_position < prev_position)
|
||||
throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
|
||||
out << "Column " << input_position << ", " << std::string((input_position < 10 ? 2 : input_position < 100 ? 1 : 0), ' ')
|
||||
<< "name: " << header.safeGetByPosition(column_index).name << ", " << std::string(max_length_of_column_name - header.safeGetByPosition(column_index).name.size(), ' ')
|
||||
<< "type: " << current_column_type->getName() << ", " << std::string(max_length_of_data_type_name - current_column_type->getName().size(), ' ');
|
||||
|
||||
if (isNumber(data_types[i]) || isDateOrDateTime(data_types[i]))
|
||||
{
|
||||
/// An empty string instead of a value.
|
||||
if (curr_position == prev_position)
|
||||
BufferBase::Position prev_position = istr.position();
|
||||
BufferBase::Position curr_position = istr.position();
|
||||
std::exception_ptr exception;
|
||||
|
||||
try
|
||||
{
|
||||
out << "ERROR: text ";
|
||||
verbosePrintString(prev_position, std::min(prev_position + 10, istr.buffer().end()), out);
|
||||
out << " is not like " << data_types[i]->getName() << "\n";
|
||||
return false;
|
||||
skipWhitespacesAndTabs(istr);
|
||||
prev_position = istr.position();
|
||||
current_column_type->deserializeAsTextCSV(*columns[column_index], istr, format_settings);
|
||||
curr_position = istr.position();
|
||||
skipWhitespacesAndTabs(istr);
|
||||
}
|
||||
}
|
||||
|
||||
out << "parsed text: ";
|
||||
verbosePrintString(prev_position, curr_position, out);
|
||||
|
||||
if (exception)
|
||||
{
|
||||
if (data_types[i]->getName() == "DateTime")
|
||||
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
|
||||
else if (data_types[i]->getName() == "Date")
|
||||
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
|
||||
else
|
||||
out << "ERROR\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
out << "\n";
|
||||
|
||||
if (data_types[i]->haveMaximumSizeOfValue())
|
||||
{
|
||||
if (*curr_position != '\n' && *curr_position != '\r' && *curr_position != delimiter)
|
||||
catch (...)
|
||||
{
|
||||
out << "ERROR: garbage after " << data_types[i]->getName() << ": ";
|
||||
verbosePrintString(curr_position, std::min(curr_position + 10, istr.buffer().end()), out);
|
||||
out << "\n";
|
||||
exception = std::current_exception();
|
||||
}
|
||||
|
||||
if (data_types[i]->getName() == "DateTime")
|
||||
if (curr_position < prev_position)
|
||||
throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (isNumber(current_column_type) || isDateOrDateTime(current_column_type))
|
||||
{
|
||||
/// An empty string instead of a value.
|
||||
if (curr_position == prev_position)
|
||||
{
|
||||
out << "ERROR: text ";
|
||||
verbosePrintString(prev_position, std::min(prev_position + 10, istr.buffer().end()), out);
|
||||
out << " is not like " << current_column_type->getName() << "\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
out << "parsed text: ";
|
||||
verbosePrintString(prev_position, curr_position, out);
|
||||
|
||||
if (exception)
|
||||
{
|
||||
if (current_column_type->getName() == "DateTime")
|
||||
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
|
||||
else if (data_types[i]->getName() == "Date")
|
||||
else if (current_column_type->getName() == "Date")
|
||||
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
|
||||
|
||||
else
|
||||
out << "ERROR\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
out << "\n";
|
||||
|
||||
if (current_column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
if (*curr_position != '\n' && *curr_position != '\r' && *curr_position != delimiter)
|
||||
{
|
||||
out << "ERROR: garbage after " << current_column_type->getName() << ": ";
|
||||
verbosePrintString(curr_position, std::min(curr_position + 10, istr.buffer().end()), out);
|
||||
out << "\n";
|
||||
|
||||
if (current_column_type->getName() == "DateTime")
|
||||
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
|
||||
else if (current_column_type->getName() == "Date")
|
||||
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static const String skipped_column_str = "<SKIPPED COLUMN>";
|
||||
out << "Column " << input_position << ", " << std::string((input_position < 10 ? 2 : input_position < 100 ? 1 : 0), ' ')
|
||||
<< "name: " << skipped_column_str << ", " << std::string(max_length_of_column_name - skipped_column_str.length(), ' ')
|
||||
<< "type: " << skipped_column_str << ", " << std::string(max_length_of_data_type_name - skipped_column_str.length(), ' ');
|
||||
|
||||
String tmp;
|
||||
readCSVString(tmp, istr, format_settings.csv);
|
||||
}
|
||||
|
||||
/// Delimiters
|
||||
if (i + 1 == size)
|
||||
if (input_position + 1 == column_indexes_for_input_fields.size())
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/IRowInputStream.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
@ -21,7 +24,7 @@ public:
|
||||
*/
|
||||
CSVRowInputStream(ReadBuffer & istr_, const Block & header_, bool with_names_, const FormatSettings & format_settings);
|
||||
|
||||
bool read(MutableColumns & columns, RowReadExtension &) override;
|
||||
bool read(MutableColumns & columns, RowReadExtension & ext) override;
|
||||
void readPrefix() override;
|
||||
bool allowSyncAfterError() const override { return true; }
|
||||
void syncAfterError() override;
|
||||
@ -36,6 +39,19 @@ private:
|
||||
|
||||
const FormatSettings format_settings;
|
||||
|
||||
using IndexesMap = std::unordered_map<String, size_t>;
|
||||
IndexesMap column_indexes_by_names;
|
||||
|
||||
using OptionalIndexes = std::vector<std::optional<size_t>>;
|
||||
OptionalIndexes column_indexes_for_input_fields;
|
||||
|
||||
std::vector<UInt8> read_columns;
|
||||
std::vector<size_t> columns_to_fill_with_default_values;
|
||||
|
||||
void addInputColumn(const String & column_name);
|
||||
void setupAllColumnsByTableSchema();
|
||||
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);
|
||||
|
||||
/// For convenient diagnostics in case of an error.
|
||||
|
||||
size_t row_num = 0;
|
||||
|
@ -40,6 +40,7 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu
|
||||
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
|
||||
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
|
||||
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
|
||||
format_settings.with_names_use_header = settings.input_format_with_names_use_header;
|
||||
format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
|
||||
format_settings.import_nested_json = settings.input_format_import_nested_json;
|
||||
format_settings.date_time_input_format = settings.date_time_input_format;
|
||||
|
@ -48,6 +48,7 @@ struct FormatSettings
|
||||
Values values;
|
||||
|
||||
bool skip_unknown_fields = false;
|
||||
bool with_names_use_header = false;
|
||||
bool write_statistics = true;
|
||||
bool import_nested_json = false;
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#include <string>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -20,46 +22,14 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
TabSeparatedRowInputStream::TabSeparatedRowInputStream(
|
||||
ReadBuffer & istr_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings)
|
||||
: istr(istr_), header(header_), with_names(with_names_), with_types(with_types_), format_settings(format_settings)
|
||||
static void skipTSVRow(ReadBuffer & istr, const size_t num_columns)
|
||||
{
|
||||
size_t num_columns = header.columns();
|
||||
data_types.resize(num_columns);
|
||||
NullSink null_sink;
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
data_types[i] = header.safeGetByPosition(i).type;
|
||||
}
|
||||
|
||||
|
||||
void TabSeparatedRowInputStream::readPrefix()
|
||||
{
|
||||
size_t num_columns = header.columns();
|
||||
String tmp;
|
||||
|
||||
if (with_names || with_types)
|
||||
{
|
||||
/// In this format, we assume that column name or type cannot contain BOM,
|
||||
/// so, if format has header,
|
||||
/// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
|
||||
skipBOMIfExists(istr);
|
||||
}
|
||||
|
||||
if (with_names)
|
||||
{
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
readEscapedString(tmp, istr);
|
||||
assertChar(i == num_columns - 1 ? '\n' : '\t', istr);
|
||||
}
|
||||
}
|
||||
|
||||
if (with_types)
|
||||
{
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
readEscapedString(tmp, istr);
|
||||
assertChar(i == num_columns - 1 ? '\n' : '\t', istr);
|
||||
}
|
||||
readEscapedStringInto(null_sink, istr);
|
||||
assertChar(i == num_columns - 1 ? '\n' : '\t', istr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,34 +47,165 @@ static void checkForCarriageReturn(ReadBuffer & istr)
|
||||
}
|
||||
|
||||
|
||||
bool TabSeparatedRowInputStream::read(MutableColumns & columns, RowReadExtension &)
|
||||
TabSeparatedRowInputStream::TabSeparatedRowInputStream(
|
||||
ReadBuffer & istr_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings)
|
||||
: istr(istr_), header(header_), with_names(with_names_), with_types(with_types_), format_settings(format_settings)
|
||||
{
|
||||
const auto num_columns = header.columns();
|
||||
|
||||
data_types.resize(num_columns);
|
||||
column_indexes_by_names.reserve(num_columns);
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
const auto & column_info = header.getByPosition(i);
|
||||
|
||||
data_types[i] = column_info.type;
|
||||
column_indexes_by_names.emplace(column_info.name, i);
|
||||
}
|
||||
|
||||
column_indexes_for_input_fields.reserve(num_columns);
|
||||
read_columns.assign(num_columns, false);
|
||||
}
|
||||
|
||||
|
||||
void TabSeparatedRowInputStream::setupAllColumnsByTableSchema()
|
||||
{
|
||||
read_columns.assign(header.columns(), true);
|
||||
column_indexes_for_input_fields.resize(header.columns());
|
||||
|
||||
for (size_t i = 0; i < column_indexes_for_input_fields.size(); ++i)
|
||||
column_indexes_for_input_fields[i] = i;
|
||||
}
|
||||
|
||||
|
||||
void TabSeparatedRowInputStream::addInputColumn(const String & column_name)
|
||||
{
|
||||
const auto column_it = column_indexes_by_names.find(column_name);
|
||||
if (column_it == column_indexes_by_names.end())
|
||||
{
|
||||
if (format_settings.skip_unknown_fields)
|
||||
{
|
||||
column_indexes_for_input_fields.push_back(std::nullopt);
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
"Unknown field found in TSV header: '" + column_name + "' " +
|
||||
"at position " + std::to_string(column_indexes_for_input_fields.size()) +
|
||||
"\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed",
|
||||
ErrorCodes::INCORRECT_DATA
|
||||
);
|
||||
}
|
||||
|
||||
const auto column_index = column_it->second;
|
||||
|
||||
if (read_columns[column_index])
|
||||
throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA);
|
||||
|
||||
read_columns[column_index] = true;
|
||||
column_indexes_for_input_fields.emplace_back(column_index);
|
||||
}
|
||||
|
||||
|
||||
void TabSeparatedRowInputStream::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension)
|
||||
{
|
||||
/// It is safe to memorize this on the first run - the format guarantees this does not change
|
||||
if (unlikely(row_num == 1))
|
||||
{
|
||||
columns_to_fill_with_default_values.clear();
|
||||
for (size_t index = 0; index < read_columns.size(); ++index)
|
||||
if (read_columns[index] == 0)
|
||||
columns_to_fill_with_default_values.push_back(index);
|
||||
}
|
||||
|
||||
for (const auto column_index : columns_to_fill_with_default_values)
|
||||
data_types[column_index]->insertDefaultInto(*columns[column_index]);
|
||||
|
||||
row_read_extension.read_columns = read_columns;
|
||||
}
|
||||
|
||||
|
||||
void TabSeparatedRowInputStream::readPrefix()
|
||||
{
|
||||
if (with_names || with_types)
|
||||
{
|
||||
/// In this format, we assume that column name or type cannot contain BOM,
|
||||
/// so, if format has header,
|
||||
/// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
|
||||
skipBOMIfExists(istr);
|
||||
}
|
||||
|
||||
if (with_names)
|
||||
{
|
||||
if (format_settings.with_names_use_header)
|
||||
{
|
||||
String column_name;
|
||||
do
|
||||
{
|
||||
readEscapedString(column_name, istr);
|
||||
addInputColumn(column_name);
|
||||
}
|
||||
while (checkChar('\t', istr));
|
||||
|
||||
if (!istr.eof())
|
||||
{
|
||||
checkForCarriageReturn(istr);
|
||||
assertChar('\n', istr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
setupAllColumnsByTableSchema();
|
||||
skipTSVRow(istr, column_indexes_for_input_fields.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
setupAllColumnsByTableSchema();
|
||||
|
||||
if (with_types)
|
||||
{
|
||||
skipTSVRow(istr, column_indexes_for_input_fields.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool TabSeparatedRowInputStream::read(MutableColumns & columns, RowReadExtension & ext)
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
updateDiagnosticInfo();
|
||||
|
||||
size_t size = data_types.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t input_position = 0; input_position < column_indexes_for_input_fields.size(); ++input_position)
|
||||
{
|
||||
data_types[i]->deserializeAsTextEscaped(*columns[i], istr, format_settings);
|
||||
|
||||
/// skip separators
|
||||
if (i + 1 == size)
|
||||
const auto & column_index = column_indexes_for_input_fields[input_position];
|
||||
if (column_index)
|
||||
{
|
||||
if (!istr.eof())
|
||||
{
|
||||
if (unlikely(row_num == 1))
|
||||
checkForCarriageReturn(istr);
|
||||
|
||||
assertChar('\n', istr);
|
||||
}
|
||||
data_types[*column_index]->deserializeAsTextEscaped(*columns[*column_index], istr, format_settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
NullSink null_sink;
|
||||
readEscapedStringInto(null_sink, istr);
|
||||
}
|
||||
|
||||
/// skip separators
|
||||
if (input_position + 1 < column_indexes_for_input_fields.size())
|
||||
{
|
||||
assertChar('\t', istr);
|
||||
}
|
||||
else if (!istr.eof())
|
||||
{
|
||||
if (unlikely(row_num == 1))
|
||||
checkForCarriageReturn(istr);
|
||||
|
||||
assertChar('\n', istr);
|
||||
}
|
||||
}
|
||||
|
||||
fillUnreadColumnsWithDefaults(columns, ext);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -135,7 +236,7 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
|
||||
if (header.safeGetByPosition(i).type->getName().size() > max_length_of_data_type_name)
|
||||
max_length_of_data_type_name = header.safeGetByPosition(i).type->getName().size();
|
||||
|
||||
/// Roll back the cursor to the beginning of the previous or current line and pars all over again. But now we derive detailed information.
|
||||
/// Roll back the cursor to the beginning of the previous or current line and parse all over again. But now we derive detailed information.
|
||||
|
||||
if (pos_of_prev_row)
|
||||
{
|
||||
@ -173,83 +274,98 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
|
||||
bool OPTIMIZE(1) TabSeparatedRowInputStream::parseRowAndPrintDiagnosticInfo(
|
||||
MutableColumns & columns, WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
|
||||
{
|
||||
size_t size = data_types.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t input_position = 0; input_position < column_indexes_for_input_fields.size(); ++input_position)
|
||||
{
|
||||
if (i == 0 && istr.eof())
|
||||
if (input_position == 0 && istr.eof())
|
||||
{
|
||||
out << "<End of stream>\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
out << "Column " << i << ", " << std::string((i < 10 ? 2 : i < 100 ? 1 : 0), ' ')
|
||||
<< "name: " << header.safeGetByPosition(i).name << ", " << std::string(max_length_of_column_name - header.safeGetByPosition(i).name.size(), ' ')
|
||||
<< "type: " << data_types[i]->getName() << ", " << std::string(max_length_of_data_type_name - data_types[i]->getName().size(), ' ');
|
||||
|
||||
auto prev_position = istr.position();
|
||||
std::exception_ptr exception;
|
||||
|
||||
try
|
||||
if (column_indexes_for_input_fields[input_position].has_value())
|
||||
{
|
||||
data_types[i]->deserializeAsTextEscaped(*columns[i], istr, format_settings);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
exception = std::current_exception();
|
||||
}
|
||||
const auto & column_index = *column_indexes_for_input_fields[input_position];
|
||||
const auto & current_column_type = data_types[column_index];
|
||||
|
||||
auto curr_position = istr.position();
|
||||
out << "Column " << input_position << ", " << std::string((input_position < 10 ? 2 : input_position < 100 ? 1 : 0), ' ')
|
||||
<< "name: " << header.safeGetByPosition(column_index).name << ", " << std::string(max_length_of_column_name - header.safeGetByPosition(column_index).name.size(), ' ')
|
||||
<< "type: " << current_column_type->getName() << ", " << std::string(max_length_of_data_type_name - current_column_type->getName().size(), ' ');
|
||||
|
||||
if (curr_position < prev_position)
|
||||
throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
|
||||
auto prev_position = istr.position();
|
||||
std::exception_ptr exception;
|
||||
|
||||
if (isNumber(data_types[i]) || isDateOrDateTime(data_types[i]))
|
||||
{
|
||||
/// An empty string instead of a value.
|
||||
if (curr_position == prev_position)
|
||||
try
|
||||
{
|
||||
out << "ERROR: text ";
|
||||
verbosePrintString(prev_position, std::min(prev_position + 10, istr.buffer().end()), out);
|
||||
out << " is not like " << data_types[i]->getName() << "\n";
|
||||
return false;
|
||||
current_column_type->deserializeAsTextEscaped(*columns[column_index], istr, format_settings);
|
||||
}
|
||||
}
|
||||
|
||||
out << "parsed text: ";
|
||||
verbosePrintString(prev_position, curr_position, out);
|
||||
|
||||
if (exception)
|
||||
{
|
||||
if (data_types[i]->getName() == "DateTime")
|
||||
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
|
||||
else if (data_types[i]->getName() == "Date")
|
||||
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
|
||||
else
|
||||
out << "ERROR\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
out << "\n";
|
||||
|
||||
if (data_types[i]->haveMaximumSizeOfValue())
|
||||
{
|
||||
if (*curr_position != '\n' && *curr_position != '\t')
|
||||
catch (...)
|
||||
{
|
||||
out << "ERROR: garbage after " << data_types[i]->getName() << ": ";
|
||||
verbosePrintString(curr_position, std::min(curr_position + 10, istr.buffer().end()), out);
|
||||
out << "\n";
|
||||
exception = std::current_exception();
|
||||
}
|
||||
|
||||
if (data_types[i]->getName() == "DateTime")
|
||||
auto curr_position = istr.position();
|
||||
|
||||
if (curr_position < prev_position)
|
||||
throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (isNumber(current_column_type) || isDateOrDateTime(current_column_type))
|
||||
{
|
||||
/// An empty string instead of a value.
|
||||
if (curr_position == prev_position)
|
||||
{
|
||||
out << "ERROR: text ";
|
||||
verbosePrintString(prev_position, std::min(prev_position + 10, istr.buffer().end()), out);
|
||||
out << " is not like " << current_column_type->getName() << "\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
out << "parsed text: ";
|
||||
verbosePrintString(prev_position, curr_position, out);
|
||||
|
||||
if (exception)
|
||||
{
|
||||
if (current_column_type->getName() == "DateTime")
|
||||
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
|
||||
else if (data_types[i]->getName() == "Date")
|
||||
else if (current_column_type->getName() == "Date")
|
||||
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
|
||||
|
||||
else
|
||||
out << "ERROR\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
out << "\n";
|
||||
|
||||
if (current_column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
if (*curr_position != '\n' && *curr_position != '\t')
|
||||
{
|
||||
out << "ERROR: garbage after " << current_column_type->getName() << ": ";
|
||||
verbosePrintString(curr_position, std::min(curr_position + 10, istr.buffer().end()), out);
|
||||
out << "\n";
|
||||
|
||||
if (current_column_type->getName() == "DateTime")
|
||||
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format.\n";
|
||||
else if (current_column_type->getName() == "Date")
|
||||
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static const String skipped_column_str = "<SKIPPED COLUMN>";
|
||||
out << "Column " << input_position << ", " << std::string((input_position < 10 ? 2 : input_position < 100 ? 1 : 0), ' ')
|
||||
<< "name: " << skipped_column_str << ", " << std::string(max_length_of_column_name - skipped_column_str.length(), ' ')
|
||||
<< "type: " << skipped_column_str << ", " << std::string(max_length_of_data_type_name - skipped_column_str.length(), ' ');
|
||||
|
||||
NullSink null_sink;
|
||||
readEscapedStringInto(null_sink, istr);
|
||||
}
|
||||
|
||||
/// Delimiters
|
||||
if (i + 1 == size)
|
||||
if (input_position + 1 == column_indexes_for_input_fields.size())
|
||||
{
|
||||
if (!istr.eof())
|
||||
{
|
||||
|
@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/IRowInputStream.h>
|
||||
@ -22,7 +25,7 @@ public:
|
||||
TabSeparatedRowInputStream(
|
||||
ReadBuffer & istr_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings);
|
||||
|
||||
bool read(MutableColumns & columns, RowReadExtension &) override;
|
||||
bool read(MutableColumns & columns, RowReadExtension & ext) override;
|
||||
void readPrefix() override;
|
||||
bool allowSyncAfterError() const override { return true; }
|
||||
void syncAfterError() override;
|
||||
@ -37,6 +40,19 @@ private:
|
||||
const FormatSettings format_settings;
|
||||
DataTypes data_types;
|
||||
|
||||
using IndexesMap = std::unordered_map<String, size_t>;
|
||||
IndexesMap column_indexes_by_names;
|
||||
|
||||
using OptionalIndexes = std::vector<std::optional<size_t>>;
|
||||
OptionalIndexes column_indexes_for_input_fields;
|
||||
|
||||
std::vector<UInt8> read_columns;
|
||||
std::vector<size_t> columns_to_fill_with_default_values;
|
||||
|
||||
void addInputColumn(const String & column_name);
|
||||
void setupAllColumnsByTableSchema();
|
||||
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);
|
||||
|
||||
/// For convenient diagnostics in case of an error.
|
||||
|
||||
size_t row_num = 0;
|
||||
|
@ -7,7 +7,7 @@ add_headers_and_sources(clickhouse_functions .)
|
||||
list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp)
|
||||
list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h)
|
||||
|
||||
add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources})
|
||||
add_library(clickhouse_functions ${clickhouse_functions_sources})
|
||||
|
||||
target_link_libraries(clickhouse_functions
|
||||
PUBLIC
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
|
||||
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Storages/StorageJoin.h>
|
||||
|
||||
#include <DataStreams/copyData.h>
|
||||
|
@ -1,6 +1,6 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(clickhouse_parsers .)
|
||||
add_library(clickhouse_parsers ${LINK_MODE} ${clickhouse_parsers_headers} ${clickhouse_parsers_sources})
|
||||
add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources})
|
||||
target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io)
|
||||
target_include_directories(clickhouse_parsers PUBLIC ${DBMS_INCLUDE_DIR})
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
if(USE_RDKAFKA)
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(clickhouse_storage_kafka .)
|
||||
add_library(clickhouse_storage_kafka ${LINK_MODE} ${clickhouse_storage_kafka_sources})
|
||||
add_library(clickhouse_storage_kafka ${clickhouse_storage_kafka_sources})
|
||||
target_link_libraries(clickhouse_storage_kafka PRIVATE clickhouse_common_io ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY})
|
||||
if(NOT USE_INTERNAL_RDKAFKA_LIBRARY)
|
||||
target_include_directories(clickhouse_storage_kafka SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR})
|
||||
|
@ -11,5 +11,5 @@ configure_file (StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD})
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
add_headers_and_sources(storages_system .)
|
||||
list (APPEND storages_system_sources ${CONFIG_BUILD})
|
||||
add_library(clickhouse_storages_system ${LINK_MODE} ${storages_system_headers} ${storages_system_sources})
|
||||
add_library(clickhouse_storages_system ${storages_system_headers} ${storages_system_sources})
|
||||
target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper)
|
||||
|
@ -4,5 +4,5 @@ add_headers_and_sources(clickhouse_table_functions .)
|
||||
list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp)
|
||||
list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h)
|
||||
|
||||
add_library(clickhouse_table_functions ${LINK_MODE} ${clickhouse_table_functions_sources})
|
||||
add_library(clickhouse_table_functions ${clickhouse_table_functions_sources})
|
||||
target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_storages_system dbms ${Poco_Foundation_LIBRARY})
|
||||
|
@ -19,6 +19,8 @@ from errno import ESRCH
|
||||
import termcolor
|
||||
from random import random
|
||||
import commands
|
||||
from multiprocessing import Pool
|
||||
from contextlib import closing
|
||||
|
||||
|
||||
MESSAGES_TO_RETRY = [
|
||||
@ -86,23 +88,161 @@ def get_server_pid(server_tcp_port):
|
||||
except Exception as ex:
|
||||
return None
|
||||
|
||||
def colored(text, args, color=None, on_color=None, attrs=None):
|
||||
if sys.stdout.isatty() or args.force_color:
|
||||
return termcolor.colored(text, color, on_color, attrs)
|
||||
else:
|
||||
return text
|
||||
|
||||
|
||||
SERVER_DIED = False
|
||||
exit_code = 0
|
||||
|
||||
|
||||
#def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total):
|
||||
def run_tests_array(all_tests_with_params):
|
||||
all_tests, suite, suite_dir, suite_tmp_dir, run_total = all_tests_with_params
|
||||
global exit_code
|
||||
global SERVER_DIED
|
||||
|
||||
OP_SQUARE_BRACKET = colored("[", args, attrs=['bold'])
|
||||
CL_SQUARE_BRACKET = colored("]", args, attrs=['bold'])
|
||||
|
||||
MSG_FAIL = OP_SQUARE_BRACKET + colored(" FAIL ", args, "red", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", args, "yellow", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", args, "green", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", args, "cyan", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
|
||||
passed_total = 0
|
||||
skipped_total = 0
|
||||
failures_total = 0
|
||||
failures = 0
|
||||
failures_chain = 0
|
||||
|
||||
if len(all_tests):
|
||||
print("\nRunning {} {} tests.".format(len(all_tests), suite) + "\n")
|
||||
|
||||
for case in all_tests:
|
||||
if SERVER_DIED:
|
||||
break
|
||||
|
||||
case_file = os.path.join(suite_dir, case)
|
||||
(name, ext) = os.path.splitext(case)
|
||||
|
||||
try:
|
||||
sys.stdout.write("{0:72}".format(name + ": "))
|
||||
if run_total == 1:
|
||||
sys.stdout.flush()
|
||||
|
||||
if args.skip and any(s in name for s in args.skip):
|
||||
print(MSG_SKIPPED + " - skip")
|
||||
skipped_total += 1
|
||||
elif not args.zookeeper and 'zookeeper' in name:
|
||||
print(MSG_SKIPPED + " - no zookeeper")
|
||||
skipped_total += 1
|
||||
elif not args.shard and 'shard' in name:
|
||||
print(MSG_SKIPPED + " - no shard")
|
||||
skipped_total += 1
|
||||
elif not args.no_long and 'long' in name:
|
||||
print(MSG_SKIPPED + " - no long")
|
||||
skipped_total += 1
|
||||
else:
|
||||
disabled_file = os.path.join(suite_dir, name) + '.disabled'
|
||||
|
||||
if os.path.exists(disabled_file) and not args.disabled:
|
||||
message = open(disabled_file, 'r').read()
|
||||
print(MSG_SKIPPED + " - " + message)
|
||||
else:
|
||||
|
||||
if args.testname:
|
||||
clickhouse_proc = Popen(shlex.split(args.client_with_database), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite))
|
||||
|
||||
reference_file = os.path.join(suite_dir, name) + '.reference'
|
||||
stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
|
||||
stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
|
||||
|
||||
proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
proc.kill()
|
||||
except OSError as e:
|
||||
if e.errno != ESRCH:
|
||||
raise
|
||||
|
||||
failures += 1
|
||||
print("{0} - Timeout!".format(MSG_FAIL))
|
||||
else:
|
||||
counter = 1
|
||||
while proc.returncode != 0 and need_retry(stderr):
|
||||
proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
|
||||
sleep(2**counter)
|
||||
counter += 1
|
||||
if counter > 6:
|
||||
break
|
||||
|
||||
if proc.returncode != 0:
|
||||
failures += 1
|
||||
failures_chain += 1
|
||||
print("{0} - return code {1}".format(MSG_FAIL, proc.returncode))
|
||||
|
||||
if stderr:
|
||||
print(stderr.encode('utf-8'))
|
||||
|
||||
if args.stop and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) and not 'Received exception from server' in stderr:
|
||||
SERVER_DIED = True
|
||||
|
||||
elif stderr:
|
||||
failures += 1
|
||||
failures_chain += 1
|
||||
print("{0} - having stderror:\n{1}".format(MSG_FAIL, stderr.encode('utf-8')))
|
||||
elif 'Exception' in stdout:
|
||||
failures += 1
|
||||
failures_chain += 1
|
||||
print("{0} - having exception:\n{1}".format(MSG_FAIL, stdout.encode('utf-8')))
|
||||
elif not os.path.isfile(reference_file):
|
||||
print("{0} - no reference file".format(MSG_UNKNOWN))
|
||||
else:
|
||||
result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE)
|
||||
|
||||
if result_is_different:
|
||||
diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0]
|
||||
failures += 1
|
||||
print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff))
|
||||
else:
|
||||
passed_total += 1
|
||||
failures_chain = 0
|
||||
print(MSG_OK)
|
||||
if os.path.exists(stdout_file):
|
||||
os.remove(stdout_file)
|
||||
if os.path.exists(stderr_file):
|
||||
os.remove(stderr_file)
|
||||
except KeyboardInterrupt as e:
|
||||
print(colored("Break tests execution", args, "red"))
|
||||
raise e
|
||||
except:
|
||||
import traceback
|
||||
exc_type, exc_value, tb = sys.exc_info()
|
||||
failures += 1
|
||||
print("{0} - Test internal error: {1}\n{2}\n{3}".format(MSG_FAIL, exc_type.__name__, exc_value, "\n".join(traceback.format_tb(tb, 10))))
|
||||
|
||||
if failures_chain >= 20:
|
||||
break
|
||||
|
||||
failures_total = failures_total + failures
|
||||
|
||||
if failures_total > 0:
|
||||
print(colored("\nHaving {failures_total} errors! {passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total, failures_total = failures_total), args, "red", attrs=["bold"]))
|
||||
exit_code = 1
|
||||
else:
|
||||
print(colored("\n{passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total), args, "green", attrs=["bold"]))
|
||||
|
||||
server_logs_level = "warning"
|
||||
|
||||
def main(args):
|
||||
|
||||
SERVER_DIED = False
|
||||
|
||||
def colored(text, color=None, on_color=None, attrs=None):
|
||||
if sys.stdout.isatty() or args.force_color:
|
||||
return termcolor.colored(text, color, on_color, attrs)
|
||||
else:
|
||||
return text
|
||||
|
||||
OP_SQUARE_BRACKET = colored("[", attrs=['bold'])
|
||||
CL_SQUARE_BRACKET = colored("]", attrs=['bold'])
|
||||
|
||||
MSG_FAIL = OP_SQUARE_BRACKET + colored(" FAIL ", "red", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", "yellow", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", "green", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", "cyan", attrs=['bold']) + CL_SQUARE_BRACKET
|
||||
global SERVER_DIED
|
||||
global exit_code
|
||||
global server_logs_level
|
||||
|
||||
def is_data_present():
|
||||
clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
@ -112,7 +252,6 @@ def main(args):
|
||||
|
||||
return stdout.startswith('1')
|
||||
|
||||
|
||||
base_dir = os.path.abspath(args.queries)
|
||||
tmp_dir = os.path.abspath(args.tmp)
|
||||
|
||||
@ -127,7 +266,6 @@ def main(args):
|
||||
|
||||
# Force to print server warnings in stderr
|
||||
# Shell scripts could change logging level
|
||||
server_logs_level = "warning"
|
||||
os.environ.setdefault("CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL", server_logs_level)
|
||||
|
||||
if args.zookeeper is None:
|
||||
@ -147,10 +285,6 @@ def main(args):
|
||||
else:
|
||||
args.shard = False
|
||||
|
||||
passed_total = 0
|
||||
skipped_total = 0
|
||||
failures_total = 0
|
||||
|
||||
clickhouse_proc_create = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
clickhouse_proc_create.communicate("CREATE DATABASE IF NOT EXISTS " + args.database)
|
||||
if args.database != "test":
|
||||
@ -192,9 +326,7 @@ def main(args):
|
||||
suite = suite_re_obj.group(1)
|
||||
if os.path.isdir(suite_dir):
|
||||
|
||||
failures = 0
|
||||
failures_chain = 0
|
||||
if 'stateful' in suite and not is_data_present():
|
||||
if 'stateful' in suite and not args.no_stateful and not is_data_present():
|
||||
print("Won't run stateful tests because test data wasn't loaded.")
|
||||
continue
|
||||
if 'stateless' in suite and args.no_stateless:
|
||||
@ -222,150 +354,48 @@ def main(args):
|
||||
except ValueError:
|
||||
return 99997
|
||||
|
||||
run_n, run_total = args.parallel.split('/')
|
||||
run_n = float(run_n)
|
||||
run_total = float(run_total)
|
||||
all_tests = os.listdir(suite_dir)
|
||||
all_tests = filter(lambda case: is_test_from_dir(suite_dir, case), all_tests)
|
||||
all_tests = sorted(filter(lambda case: re.search(args.test, case) if args.test else True, all_tests), key=key_func)
|
||||
|
||||
run_n, run_total = args.parallel.split('/')
|
||||
run_n = float(run_n)
|
||||
run_total = float(run_total)
|
||||
tests_n = len(all_tests)
|
||||
start = int(tests_n / run_total * (run_n - 1))
|
||||
if start > 0:
|
||||
start = start + 1
|
||||
end = int(tests_n / run_total * (run_n))
|
||||
all_tests = all_tests[start : end]
|
||||
if run_total > tests_n:
|
||||
run_total = tests_n
|
||||
if run_n > run_total:
|
||||
continue
|
||||
|
||||
print("\nRunning {} {} tests.".format(tests_n, suite) + (" {} .. {} ".format(start, end) if run_total > 1 else "") + "\n")
|
||||
jobs = args.jobs
|
||||
if jobs > run_total:
|
||||
run_total = jobs
|
||||
|
||||
for case in all_tests:
|
||||
if SERVER_DIED:
|
||||
break
|
||||
all_tests_array = []
|
||||
for n in range(1, 1 + int(run_total)):
|
||||
start = int(tests_n / run_total * (n - 1))
|
||||
end = int(tests_n / run_total * n)
|
||||
all_tests_array.append([all_tests[start : end], suite, suite_dir, suite_tmp_dir, run_total])
|
||||
|
||||
case_file = os.path.join(suite_dir, case)
|
||||
(name, ext) = os.path.splitext(case)
|
||||
|
||||
try:
|
||||
sys.stdout.write("{0:72}".format(name + ": "))
|
||||
if run_total == 1:
|
||||
sys.stdout.flush()
|
||||
|
||||
if args.skip and any(s in name for s in args.skip):
|
||||
print(MSG_SKIPPED + " - skip")
|
||||
skipped_total += 1
|
||||
elif not args.zookeeper and 'zookeeper' in name:
|
||||
print(MSG_SKIPPED + " - no zookeeper")
|
||||
skipped_total += 1
|
||||
elif not args.shard and 'shard' in name:
|
||||
print(MSG_SKIPPED + " - no shard")
|
||||
skipped_total += 1
|
||||
elif not args.no_long and 'long' in name:
|
||||
print(MSG_SKIPPED + " - no long")
|
||||
skipped_total += 1
|
||||
else:
|
||||
disabled_file = os.path.join(suite_dir, name) + '.disabled'
|
||||
|
||||
if os.path.exists(disabled_file) and not args.disabled:
|
||||
message = open(disabled_file, 'r').read()
|
||||
print(MSG_SKIPPED + " - " + message)
|
||||
else:
|
||||
|
||||
if args.testname:
|
||||
clickhouse_proc = Popen(shlex.split(args.client_with_database), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite))
|
||||
|
||||
reference_file = os.path.join(suite_dir, name) + '.reference'
|
||||
stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout'
|
||||
stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr'
|
||||
|
||||
proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
proc.kill()
|
||||
except OSError as e:
|
||||
if e.errno != ESRCH:
|
||||
raise
|
||||
|
||||
failures += 1
|
||||
print("{0} - Timeout!".format(MSG_FAIL))
|
||||
else:
|
||||
counter = 1
|
||||
while proc.returncode != 0 and need_retry(stderr):
|
||||
proc, stdout, stderr = run_single_test(args, ext, server_logs_level, case_file, stdout_file, stderr_file)
|
||||
sleep(2**counter)
|
||||
counter += 1
|
||||
if counter > 6:
|
||||
break
|
||||
|
||||
if proc.returncode != 0:
|
||||
failures += 1
|
||||
failures_chain += 1
|
||||
print("{0} - return code {1}".format(MSG_FAIL, proc.returncode))
|
||||
|
||||
if stderr:
|
||||
print(stderr.encode('utf-8'))
|
||||
|
||||
if args.stop and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) and not 'Received exception from server' in stderr:
|
||||
SERVER_DIED = True
|
||||
|
||||
elif stderr:
|
||||
failures += 1
|
||||
failures_chain += 1
|
||||
print("{0} - having stderror:\n{1}".format(MSG_FAIL, stderr.encode('utf-8')))
|
||||
elif 'Exception' in stdout:
|
||||
failures += 1
|
||||
failures_chain += 1
|
||||
print("{0} - having exception:\n{1}".format(MSG_FAIL, stdout.encode('utf-8')))
|
||||
elif not os.path.isfile(reference_file):
|
||||
print("{0} - no reference file".format(MSG_UNKNOWN))
|
||||
else:
|
||||
result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE)
|
||||
|
||||
if result_is_different:
|
||||
diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0]
|
||||
|
||||
failures += 1
|
||||
print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff))
|
||||
else:
|
||||
passed_total += 1
|
||||
failures_chain = 0
|
||||
print(MSG_OK)
|
||||
if os.path.exists(stdout_file):
|
||||
os.remove(stdout_file)
|
||||
if os.path.exists(stderr_file):
|
||||
os.remove(stderr_file)
|
||||
except KeyboardInterrupt as e:
|
||||
print(colored("Break tests execution", "red"))
|
||||
raise e
|
||||
except:
|
||||
import traceback
|
||||
exc_type, exc_value, tb = sys.exc_info()
|
||||
failures += 1
|
||||
print("{0} - Test internal error: {1}\n{2}\n{3}".format(MSG_FAIL, exc_type.__name__, exc_value, "\n".join(traceback.format_tb(tb, 10))))
|
||||
|
||||
if failures_chain >= 20:
|
||||
break
|
||||
|
||||
failures_total = failures_total + failures
|
||||
|
||||
exit_code = 0
|
||||
if failures_total > 0:
|
||||
print(colored("\nHaving {failures_total} errors! {passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total, failures_total = failures_total), "red", attrs=["bold"]))
|
||||
exit_code = 1
|
||||
else:
|
||||
print(colored("\n{passed_total} tests passed. {skipped_total} tests skipped.".format(passed_total = passed_total, skipped_total = skipped_total), "green", attrs=["bold"]))
|
||||
if jobs > 1:
|
||||
with closing(Pool(processes=jobs)) as pool:
|
||||
pool.map(run_tests_array, all_tests_array)
|
||||
pool.terminate()
|
||||
else:
|
||||
run_tests_array(all_tests_array[int(run_n)-1])
|
||||
|
||||
if args.hung_check:
|
||||
processlist = get_processlist(args.client_with_database)
|
||||
if processlist:
|
||||
server_pid = get_server_pid(os.getenv("CLICKHOUSE_PORT_TCP", '9000'))
|
||||
print(colored("\nFound hung queries in processlist:", "red", attrs=["bold"]))
|
||||
print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"]))
|
||||
print(processlist)
|
||||
if server_pid:
|
||||
print("\nStacktraces of all threads:")
|
||||
print(get_stacktraces(server_pid))
|
||||
exit_code = 1
|
||||
else:
|
||||
print(colored("\nNo queries hung.", "green", attrs=["bold"]))
|
||||
print(colored("\nNo queries hung.", args, "green", attrs=["bold"]))
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
@ -400,7 +430,8 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--hung-check', action='store_true', default=False)
|
||||
parser.add_argument('--force-color', action='store_true', default=False)
|
||||
parser.add_argument('--database', default='test', help='Default database for tests')
|
||||
parser.add_argument('--parallel', default='1/1', help='Parralel test run number/total')
|
||||
parser.add_argument('--parallel', default='1/1', help='One parallel test run number/total')
|
||||
parser.add_argument('-j', '--jobs', default=1, help='Run all tests in parallel', type=int)
|
||||
|
||||
parser.add_argument('--no-stateless', action='store_true', help='Disable all stateless tests')
|
||||
parser.add_argument('--no-stateful', action='store_true', help='Disable all stateful tests')
|
||||
|
@ -18,3 +18,23 @@
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
|
@ -6,5 +6,7 @@ INSERT INTO test.group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM
|
||||
|
||||
SELECT length(groupUniqArray(v)) FROM test.group_uniq_str GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', 'test', 'group_uniq_str') GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(10)(v)) FROM test.group_uniq_str GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(10000)(v)) FROM test.group_uniq_str GROUP BY id ORDER BY id;
|
||||
|
||||
DROP TABLE IF EXISTS test.group_uniq_str;
|
||||
|
@ -18,3 +18,23 @@
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
10
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
20001
|
||||
|
@ -5,5 +5,8 @@ CREATE TABLE test.group_uniq_arr_int ENGINE = Memory AS
|
||||
|
||||
SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_int GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', 'test', 'group_uniq_arr_int') GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(10)(v)) FROM test.group_uniq_arr_int GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(100000)(v)) FROM test.group_uniq_arr_int GROUP BY id ORDER BY id;
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS test.group_uniq_arr_int;
|
||||
|
@ -1 +1,5 @@
|
||||
['2016-12-16'] ['2016-12-16 12:00:00']
|
||||
2 2 3 3
|
||||
1 1 1 1
|
||||
3 3
|
||||
1 1
|
||||
|
@ -1,5 +1,8 @@
|
||||
DROP TABLE IF EXISTS grop_uniq_array_date;
|
||||
CREATE TABLE grop_uniq_array_date (d Date, dt DateTime) ENGINE = Memory;
|
||||
INSERT INTO grop_uniq_array_date VALUES (toDate('2016-12-16'), toDateTime('2016-12-16 12:00:00')) (toDate('2016-12-16'), toDateTime('2016-12-16 12:00:00'));
|
||||
CREATE TABLE grop_uniq_array_date (d Date, dt DateTime, id Integer) ENGINE = Memory;
|
||||
INSERT INTO grop_uniq_array_date VALUES (toDate('2016-12-16'), toDateTime('2016-12-16 12:00:00'), 1) (toDate('2016-12-16'), toDateTime('2016-12-16 12:00:00'), 1);
|
||||
SELECT groupUniqArray(d), groupUniqArray(dt) FROM grop_uniq_array_date;
|
||||
INSERT INTO grop_uniq_array_date VALUES (toDate('2016-12-17'), toDateTime('2016-12-17 12:00:00'), 1), (toDate('2016-12-18'), toDateTime('2016-12-18 12:00:00'), 1), (toDate('2016-12-16'), toDateTime('2016-12-16 12:00:00'), 2);
|
||||
SELECT length(groupUniqArray(2)(d)), length(groupUniqArray(2)(dt)), length(groupUniqArray(d)), length(groupUniqArray(dt)) FROM grop_uniq_array_date GROUP BY id ORDER BY id;
|
||||
SELECT length(groupUniqArray(10000)(d)), length(groupUniqArray(10000)(dt)) FROM grop_uniq_array_date GROUP BY id ORDER BY id;
|
||||
DROP TABLE IF EXISTS grop_uniq_array_date;
|
||||
|
@ -0,0 +1,21 @@
|
||||
2019-04-18 42 Line1
|
||||
2019-04-18 42 Line2
|
||||
2019-04-18 42 Line3
|
||||
2019-04-18 42 Line4
|
||||
2019-04-18 42 Line5
|
||||
2019-04-18 42 Line6
|
||||
2019-04-18 42 Line7
|
||||
2019-04-18 42 Line8
|
||||
2019-04-18 42 Line9
|
||||
2019-04-18 1 Line10
|
||||
2019-04-18 2 Line11
|
||||
2019-04-18 1 Line12
|
||||
2019-04-18 2 Line13
|
||||
2019-04-18 1
|
||||
2019-04-18 2
|
||||
0000-00-00 1 Line16
|
||||
0000-00-00 2 Line17
|
||||
2019-04-18 0 Line18
|
||||
2019-04-18 0 Line19
|
||||
0000-00-00 0
|
||||
0000-00-00 0
|
36
dbms/tests/queries/0_stateless/00937_test_use_header_csv.sh
Executable file
36
dbms/tests/queries/0_stateless/00937_test_use_header_csv.sh
Executable file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.csv"
|
||||
$CLICKHOUSE_CLIENT --query="CREATE TABLE test.csv (d Date, u UInt8, str String) ENGINE = TinyLog"
|
||||
|
||||
INSERT_QUERY='$CLICKHOUSE_CLIENT --query="INSERT INTO test.csv FORMAT CSVWithNames"'
|
||||
USE_HEADER='--input_format_with_names_use_header=1'
|
||||
SKIP_UNKNOWN='--input_format_skip_unknown_fields=1'
|
||||
|
||||
# Simple check for parsing
|
||||
echo -ne 'd,u,str\n2019-04-18,42,Line1\n2019-04-18,42,Line2' | eval $INSERT_QUERY
|
||||
echo -ne 'd,u,str\n2019-04-18,42,Line3\n2019-04-18,42,Line4' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'd,u,str\n2019-04-18,42,Line5\n2019-04-18,42,Line6' | eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
|
||||
# Random order of fields
|
||||
echo -ne 'u,d,str\n42,2019-04-18,Line7\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'u,str,d\n42,Line8,2019-04-18\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'str,u,d\nLine9,42,2019-04-18\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
|
||||
# Excessive fields
|
||||
echo -ne 'd,u,str,more,unknown,fields\n2019-04-18,1,Line10,,,\n2019-04-18,2,Line11,,,\n' \
|
||||
| eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
echo -ne 'd,unknown,str,more,u,fields\n2019-04-18,blahblah,Line12,,1,\n2019-04-18,,Line13,blahblah,2,\n' \
|
||||
| eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
|
||||
# Missing fields (defaults)
|
||||
echo -ne 'd,u\n2019-04-18,1\n2019-04-18,2\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'str,u\nLine16,1\nLine17,2\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'd,str\n2019-04-18,Line18\n2019-04-18,Line19\n'| eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'unknown\n\n\n' | eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.csv"
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.csv"
|
@ -0,0 +1,21 @@
|
||||
2019-04-18 42 Line1
|
||||
2019-04-18 42 Line2
|
||||
2019-04-18 42 Line3
|
||||
2019-04-18 42 Line4
|
||||
2019-04-18 42 Line5
|
||||
2019-04-18 42 Line6
|
||||
2019-04-18 42 Line7
|
||||
2019-04-18 42 Line8
|
||||
2019-04-18 42 Line9
|
||||
2019-04-18 1 Line10
|
||||
2019-04-18 2 Line11
|
||||
2019-04-18 1 Line12
|
||||
2019-04-18 2 Line13
|
||||
2019-04-18 1
|
||||
2019-04-18 2
|
||||
0000-00-00 1 Line16
|
||||
0000-00-00 2 Line17
|
||||
2019-04-18 0 Line18
|
||||
2019-04-18 0 Line19
|
||||
0000-00-00 0
|
||||
0000-00-00 0
|
36
dbms/tests/queries/0_stateless/00937_test_use_header_tsv.sh
Executable file
36
dbms/tests/queries/0_stateless/00937_test_use_header_tsv.sh
Executable file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.tsv"
|
||||
$CLICKHOUSE_CLIENT --query="CREATE TABLE test.tsv (d Date, u UInt8, str String) ENGINE = TinyLog"
|
||||
|
||||
INSERT_QUERY='$CLICKHOUSE_CLIENT --query="INSERT INTO test.tsv FORMAT TSVWithNames"'
|
||||
USE_HEADER='--input_format_with_names_use_header=1'
|
||||
SKIP_UNKNOWN='--input_format_skip_unknown_fields=1'
|
||||
|
||||
# Simple check for parsing
|
||||
echo -ne 'd\tu\tstr\n2019-04-18\t42\tLine1\n2019-04-18\t42\tLine2' | eval $INSERT_QUERY
|
||||
echo -ne 'd\tu\tstr\n2019-04-18\t42\tLine3\n2019-04-18\t42\tLine4' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'd\tu\tstr\n2019-04-18\t42\tLine5\n2019-04-18\t42\tLine6' | eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
|
||||
# Random order of fields
|
||||
echo -ne 'u\td\tstr\n42\t2019-04-18\tLine7\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'u\tstr\td\n42\tLine8\t2019-04-18\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'str\tu\td\nLine9\t42\t2019-04-18\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
|
||||
# Excessive fields
|
||||
echo -ne 'd\tu\tstr\tmore\tunknown\tfields\n2019-04-18\t1\tLine10\t\t\t\n2019-04-18\t2\tLine11\t\t\t\n' \
|
||||
| eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
echo -ne 'd\tunknown\tstr\tmore\tu\tfields\n2019-04-18\tblahblah\tLine12\t\t1\t\n2019-04-18\t\tLine13\tblahblah\t2\t\n' \
|
||||
| eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
|
||||
# Missing fields (defaults)
|
||||
echo -ne 'd\tu\n2019-04-18\t1\n2019-04-18\t2\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'str\tu\nLine16\t1\nLine17\t2\n' | eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'd\tstr\n2019-04-18\tLine18\n2019-04-18\tLine19\n'| eval $INSERT_QUERY $USE_HEADER
|
||||
echo -ne 'unknown\n\n\n' | eval $INSERT_QUERY $USE_HEADER $SKIP_UNKNOWN
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.tsv"
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.tsv"
|
4
docker/test/compatibility/centos/Dockerfile
Normal file
4
docker/test/compatibility/centos/Dockerfile
Normal file
@ -0,0 +1,4 @@
|
||||
FROM centos:5
|
||||
|
||||
CMD /bin/sh -c "/clickhouse server --config /config/config.xml > /var/log/clickhouse-server/stderr.log 2>&1 & \
|
||||
sleep 5 && /clickhouse client --query \"select 'OK'\" 2> /var/log/clickhouse-server/clientstderr.log || echo 'FAIL'"
|
4
docker/test/compatibility/ubuntu/Dockerfile
Normal file
4
docker/test/compatibility/ubuntu/Dockerfile
Normal file
@ -0,0 +1,4 @@
|
||||
FROM ubuntu:12.04
|
||||
|
||||
CMD /bin/sh -c "/clickhouse server --config /config/config.xml > /var/log/clickhouse-server/stderr.log 2>&1 & \
|
||||
sleep 5 && /clickhouse client --query \"select 'OK'\" 2> /var/log/clickhouse-server/clientstderr.log || echo 'FAIL'"
|
@ -18,7 +18,9 @@ RUN apt-get update -y \
|
||||
openssl \
|
||||
netcat-openbsd \
|
||||
telnet \
|
||||
llvm-8
|
||||
llvm-8 \
|
||||
brotli
|
||||
|
||||
|
||||
COPY ./stress /stress
|
||||
COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml
|
||||
|
@ -11,7 +11,7 @@ import time
|
||||
def run_perf_test(cmd, xmls_path, output_folder):
|
||||
output_path = os.path.join(output_folder, "perf_stress_run.txt")
|
||||
f = open(output_path, 'w')
|
||||
p = Popen("{} --skip-tags=long --r {}".format(cmd, xmls_path), shell=True, stdout=f, stderr=f)
|
||||
p = Popen("{} --skip-tags=long --recursive --input-files {}".format(cmd, xmls_path), shell=True, stdout=f, stderr=f)
|
||||
return p
|
||||
|
||||
def run_func_test(cmd, output_prefix, num_processes):
|
||||
@ -55,7 +55,7 @@ if __name__ == "__main__":
|
||||
while True:
|
||||
retcodes = []
|
||||
for p in func_pipes:
|
||||
if p.poll():
|
||||
if p.poll() is not None:
|
||||
retcodes.append(p.returncode)
|
||||
if len(retcodes) == len(func_pipes):
|
||||
break
|
||||
|
@ -86,7 +86,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
|
||||
```
|
||||
|
||||
|
||||
## Data Processing
|
||||
## Data Processing {#data-processing}
|
||||
|
||||
When data are inserted into a table, they are saved as-is. Clickhouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
|
||||
|
||||
|
@ -369,10 +369,13 @@ Optional parameters:
|
||||
- The default value for substituting in empty positions.
|
||||
- The length of the resulting array. This allows you to receive arrays of the same size for all the aggregate keys. When using this parameter, the default value must be specified.
|
||||
|
||||
## groupUniqArray(x)
|
||||
## groupUniqArray(x), groupUniqArray(max_size)(x)
|
||||
|
||||
Creates an array from different argument values. Memory consumption is the same as for the `uniqExact` function.
|
||||
|
||||
The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements.
|
||||
For example, `groupUniqArray (1) (x)` is equivalent to `[any (x)]`.
|
||||
|
||||
## quantile(level)(x)
|
||||
|
||||
Approximates the `level` quantile. `level` is a constant, a floating-point number from 0 to 1.
|
||||
|
@ -1,9 +1,9 @@
|
||||
|
||||
# Dictionary
|
||||
|
||||
The `Dictionary` engine displays the dictionary data as a ClickHouse table.
|
||||
`Dictionary` 引擎将字典数据展示为一个ClickHouse的表。
|
||||
|
||||
As an example, consider a dictionary of `products` with the following configuration:
|
||||
例如,考虑使用一个具有以下配置的 `products` 字典:
|
||||
|
||||
```xml
|
||||
<dictionaries>
|
||||
@ -36,7 +36,7 @@ As an example, consider a dictionary of `products` with the following configurat
|
||||
</dictionaries>
|
||||
```
|
||||
|
||||
Query the dictionary data:
|
||||
查询字典中的数据:
|
||||
|
||||
``` sql
|
||||
select name, type, key, attribute.names, attribute.types, bytes_allocated, element_count,source from system.dictionaries where name = 'products';
|
||||
@ -60,17 +60,17 @@ WHERE name = 'products'
|
||||
└──────────┴──────┴────────┴─────────────────┴─────────────────┴─────────────────┴───────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
You can use the [dictGet*](../../query_language/functions/ext_dict_functions.md#ext_dict_functions) function to get the dictionary data in this format.
|
||||
你可以使用 [dictGet*](../../query_language/functions/ext_dict_functions.md#ext_dict_functions) 函数来获取这种格式的字典数据。
|
||||
|
||||
This view isn't helpful when you need to get raw data, or when performing a `JOIN` operation. For these cases, you can use the `Dictionary` engine, which displays the dictionary data in a table.
|
||||
当你需要获取原始数据,或者是想要使用 `JOIN` 操作的时候,这种视图并没有什么帮助。对于这些情况,你可以使用 `Dictionary` 引擎,它可以将字典数据展示在表中。
|
||||
|
||||
Syntax:
|
||||
语法:
|
||||
|
||||
```
|
||||
CREATE TABLE %table_name% (%fields%) engine = Dictionary(%dictionary_name%)`
|
||||
```
|
||||
|
||||
Usage example:
|
||||
示例:
|
||||
|
||||
``` sql
|
||||
create table products (product_id UInt64, title String) Engine = Dictionary(products);
|
||||
@ -89,7 +89,7 @@ Ok.
|
||||
0 rows in set. Elapsed: 0.004 sec.
|
||||
```
|
||||
|
||||
Take a look at what's in the table.
|
||||
看一看表中的内容。
|
||||
|
||||
``` sql
|
||||
select * from products limit 1;
|
||||
@ -108,4 +108,4 @@ LIMIT 1
|
||||
```
|
||||
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/dictionary/) <!--hide-->
|
||||
[来源文章](https://clickhouse.yandex/docs/en/operations/table_engines/dictionary/) <!--hide-->
|
||||
|
@ -87,7 +87,7 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa
|
||||
|
||||
<details markdown="1"><summary>已弃用的建表方法</summary>
|
||||
|
||||
!!! 注意
|
||||
!!! attention "注意"
|
||||
不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。
|
||||
|
||||
```
|
||||
|
@ -1,12 +1,12 @@
|
||||
# ReplacingMergeTree
|
||||
|
||||
The engine differs from [MergeTree](mergetree.md) in that it removes duplicate entries with the same primary key value.
|
||||
该引擎和[MergeTree](mergetree.md)的不同之处在于它会删除具有相同主键的重复项。
|
||||
|
||||
Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you can't plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, don't count on using it, because the `OPTIMIZE` query will read and write a large amount of data.
|
||||
数据的去重只会在合并的过程中出现。合并会在未知的时间在后台进行,因此你无法预先作出计划。有一些数据可能仍未被处理。尽管你可以调用 `OPTIMIZE` 语句发起计划外的合并,但请不要指望使用它,因为 `OPTIMIZE` 语句会引发对大量数据的读和写。
|
||||
|
||||
Thus, `ReplacingMergeTree` is suitable for clearing out duplicate data in the background in order to save space, but it doesn't guarantee the absence of duplicates.
|
||||
因此,`ReplacingMergeTree` 适用于在后台清除重复的数据以节省空间,但是它不保证没有重复的数据出现。
|
||||
|
||||
## Creating a Table
|
||||
## 建表
|
||||
|
||||
```sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
@ -21,24 +21,24 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
For a description of request parameters, see [request description](../../query_language/create.md).
|
||||
请求参数的描述,参考[请求参数](../../query_language/create.md)。
|
||||
|
||||
**ReplacingMergeTree Parameters**
|
||||
|
||||
- `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter.
|
||||
- `ver` — 版本列。类型为 `UInt*`, `Date` 或 `DateTime`。可选参数。
|
||||
|
||||
When merging, `ReplacingMergeTree` from all the rows with the same primary key leaves only one:
|
||||
- Last in the selection, if `ver` not set.
|
||||
- With the maximum version, if `ver` specified.
|
||||
合并的时候,`ReplacingMergeTree` 从所有具有相同主键的行中选择一行留下:
|
||||
- 如果 `ver` 列未指定,选择最后一条。
|
||||
- 如果 `ver` 列已指定,选择 `ver` 值最大的版本。
|
||||
|
||||
**Query clauses**
|
||||
**子句**
|
||||
|
||||
When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
|
||||
创建 `ReplacingMergeTree` 表时,需要与创建 `MergeTree` 表时相同的[子句](mergetree.md)。
|
||||
|
||||
<details markdown="1"><summary>Deprecated Method for Creating a Table</summary>
|
||||
<details markdown="1"><summary>已弃用的建表方法</summary>
|
||||
|
||||
!!! attention
|
||||
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
|
||||
!!! attention "注意"
|
||||
不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。
|
||||
|
||||
```sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
@ -49,10 +49,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
) ENGINE [=] ReplacingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [ver])
|
||||
```
|
||||
|
||||
All of the parameters excepting `ver` have the same meaning as in `MergeTree`.
|
||||
除了 `ver` 的所有参数都与 `MergeTree` 中的含义相同。
|
||||
|
||||
|
||||
- `ver` - column with the version. Optional parameter. For a description, see the text above.
|
||||
- `ver` - 版本列。可选参数,有关说明,请参阅上文。
|
||||
</details>
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/replacingmergetree/) <!--hide-->
|
||||
[来源文章](https://clickhouse.yandex/docs/en/operations/table_engines/replacingmergetree/) <!--hide-->
|
||||
|
@ -1,11 +1,11 @@
|
||||
|
||||
# SummingMergeTree
|
||||
|
||||
The engine inherits from [MergeTree](mergetree.md). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key with one row which contains summarized values for the columns with the numeric data type. If the primary key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
|
||||
该引擎继承自 [MergeTree](mergetree.md)。区别在于,当合并 `SummingMergeTree` 表的数据片段时,ClickHouse 会把所有具有相同主键的行合并为一行,该行包含了被合并的行中具有数值数据类型的列的汇总值。如果主键的组合方式使得单个键值对应于大量的行,则可以显著的减少存储空间并加快数据查询的速度。
|
||||
|
||||
We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.
|
||||
我们推荐将该引擎和 `MergeTree` 一起使用。例如,在准备做报告的时候,将完整的数据存储在 `MergeTree` 表中,并且使用 `SummingMergeTree` 来存储聚合数据。这种方法可以使你避免因为使用不正确的主键组合方式而丢失有价值的数据。
|
||||
|
||||
## Creating a Table
|
||||
## 建表
|
||||
|
||||
```
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
@ -20,23 +20,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
For a description of request parameters, see [request description](../../query_language/create.md).
|
||||
请求参数的描述,参考 [请求描述](../../query_language/create.md)。
|
||||
|
||||
**Parameters of SummingMergeTree**
|
||||
**SummingMergeTree 的参数**
|
||||
|
||||
- `columns` - a tuple with the names of columns where values will be summarized. Optional parameter.
|
||||
The columns must be of a numeric type and must not be in the primary key.
|
||||
- `columns` - 包含了将要被汇总的列的列名的元组。可选参数。
|
||||
所选的列必须是数值类型,并且不可位于主键中。
|
||||
|
||||
If `columns` not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key.
|
||||
如果没有指定 `columns`,ClickHouse 会把所有不在主键中的数值类型的列都进行汇总。
|
||||
|
||||
**Query clauses**
|
||||
**子句**
|
||||
|
||||
When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
|
||||
创建 `SummingMergeTree` 表时,需要与创建 `MergeTree` 表时相同的[子句](mergetree.md)。
|
||||
|
||||
<details markdown="1"><summary>Deprecated Method for Creating a Table</summary>
|
||||
<details markdown="1"><summary>已弃用的建表方法</summary>
|
||||
|
||||
!!! attention
|
||||
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
|
||||
!!! attention "注意"
|
||||
不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。
|
||||
|
||||
```
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
@ -47,14 +47,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
) ENGINE [=] SummingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [columns])
|
||||
```
|
||||
|
||||
All of the parameters excepting `columns` have the same meaning as in `MergeTree`.
|
||||
除 `columns` 外的所有参数都与 `MergeTree` 中的含义相同。
|
||||
|
||||
- `columns` — tuple with names of columns values of which will be summarized. Optional parameter. For a description, see the text above.
|
||||
- `columns` — 包含将要被汇总的列的列名的元组。可选参数。有关说明,请参阅上文。
|
||||
</details>
|
||||
|
||||
## Usage Example
|
||||
## 用法示例
|
||||
|
||||
Consider the following table:
|
||||
考虑如下的表:
|
||||
|
||||
```sql
|
||||
CREATE TABLE summtt
|
||||
@ -66,13 +66,13 @@ ENGINE = SummingMergeTree()
|
||||
ORDER BY key
|
||||
```
|
||||
|
||||
Insert data to it:
|
||||
向其中插入数据:
|
||||
|
||||
```
|
||||
:) INSERT INTO summtt Values(1,1),(1,2),(2,1)
|
||||
```
|
||||
|
||||
ClickHouse may sum all the rows not completely ([see below](#data-processing)), so we use an aggregate function `sum` and `GROUP BY` clause in the query.
|
||||
ClickHouse可能不会完整的汇总所有行([见下文](#data-processing)),因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。
|
||||
|
||||
```sql
|
||||
SELECT key, sum(value) FROM summtt GROUP BY key
|
||||
@ -86,38 +86,38 @@ SELECT key, sum(value) FROM summtt GROUP BY key
|
||||
```
|
||||
|
||||
|
||||
## Data Processing
|
||||
## 数据处理 {#data-processing}
|
||||
|
||||
When data are inserted into a table, they are saved as-is. Clickhouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
|
||||
当数据被插入到表中时,他们将被原样保存。ClickHouse 定期合并插入的数据片段,并在这个时候对所有具有相同主键的行中的列进行汇总,将这些行替换为包含汇总数据的一行记录。
|
||||
|
||||
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../query_language/agg_functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
|
||||
ClickHouse 会按片段合并数据,以至于不同的数据片段中会包含具有相同主键的行,即单个汇总片段将会是不完整的。因此,聚合函数 [sum()](../../query_language/agg_functions/reference.md#agg_function-sum) 和 `GROUP BY` 子句应该在(`SELECT`)查询语句中被使用,如上文中的例子所述。
|
||||
|
||||
### Common rules for summation
|
||||
### 汇总的通用规则
|
||||
|
||||
The values in the columns with the numeric data type are summarized. The set of columns is defined by the parameter `columns`.
|
||||
列中数值类型的值会被汇总。这些列的集合在参数 `columns` 中被定义。
|
||||
|
||||
If the values were 0 in all of the columns for summation, the row is deleted.
|
||||
如果用于汇总的所有列中的值均为0,则该行会被删除。
|
||||
|
||||
If column is not in the primary key and is not summarized, an arbitrary value is selected from the existing ones.
|
||||
如果列不在主键中且无法被汇总,则会在现有的值中任选一个。
|
||||
|
||||
The values are not summarized for columns in the primary key.
|
||||
主键所在的列中的值不会被汇总。
|
||||
|
||||
### The Summation in the AggregateFunction Columns
|
||||
### AggregateFunction 列中的汇总
|
||||
|
||||
For columns of [AggregateFunction type](../../data_types/nested_data_structures/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function.
|
||||
对于 [AggregateFunction 类型](../../data_types/nested_data_structures/aggregatefunction.md)的列,ClickHouse 根据对应函数表现为 [AggregatingMergeTree](aggregatingmergetree.md) 引擎的聚合。
|
||||
|
||||
### Nested Structures
|
||||
### 嵌套结构
|
||||
|
||||
Table can have nested data structures that are processed in a special way.
|
||||
表中可以具有以特殊方式处理的嵌套数据结构。
|
||||
|
||||
If the name of a nested table ends with `Map` and it contains at least two columns that meet the following criteria:
|
||||
如果嵌套表的名称以 `Map` 结尾,并且包含至少两个符合以下条件的列:
|
||||
|
||||
- the first column is numeric `(*Int*, Date, DateTime)`, let's call it `key`,
|
||||
- the other columns are arithmetic `(*Int*, Float32/64)`, let's call it `(values...)`,
|
||||
- 第一列是数值类型 `(*Int*, Date, DateTime)`,我们称之为 `key`,
|
||||
- 其他的列是可计算的 `(*Int*, Float32/64)`,我们称之为 `(values...)`,
|
||||
|
||||
then this nested table is interpreted as a mapping of `key => (values...)`, and when merging its rows, the elements of two data sets are merged by `key` with a summation of the corresponding `(values...)`.
|
||||
然后这个嵌套表会被解释为一个 `key => (values...)` 的映射,当合并它们的行时,两个数据集中的元素会被根据 `key` 合并为相应的 `(values...)` 的汇总值。
|
||||
|
||||
Examples:
|
||||
示例:
|
||||
|
||||
```
|
||||
[(1, 100)] + [(2, 150)] -> [(1, 100), (2, 150)]
|
||||
@ -126,8 +126,8 @@ Examples:
|
||||
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
|
||||
```
|
||||
|
||||
When requesting data, use the [sumMap(key, value)](../../query_language/agg_functions/reference.md) function for aggregation of `Map`.
|
||||
请求数据时,使用 [sumMap(key, value)](../../query_language/agg_functions/reference.md) 函数来对 `Map` 进行聚合。
|
||||
|
||||
For nested data structure, you do not need to specify its columns in the tuple of columns for summation.
|
||||
对于嵌套数据结构,你无需在列的元组中指定列以进行汇总。
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/summingmergetree/) <!--hide-->
|
||||
[来源文章](https://clickhouse.yandex/docs/en/operations/table_engines/summingmergetree/) <!--hide-->
|
||||
|
@ -1,7 +1,7 @@
|
||||
set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/include/common/config_common.h)
|
||||
configure_file (${CMAKE_CURRENT_SOURCE_DIR}/include/common/config_common.h.in ${CONFIG_COMMON})
|
||||
|
||||
add_library (apple_rt
|
||||
add_library(apple_rt
|
||||
src/apple_rt.cpp
|
||||
include/port/clock.h
|
||||
)
|
||||
@ -10,7 +10,7 @@ if (DEFINED APPLE_HAVE_CLOCK_GETTIME)
|
||||
target_compile_definitions(apple_rt PUBLIC -DAPPLE_HAVE_CLOCK_GETTIME=${APPLE_HAVE_CLOCK_GETTIME})
|
||||
endif ()
|
||||
|
||||
add_library (common ${LINK_MODE}
|
||||
add_library(common
|
||||
src/DateLUT.cpp
|
||||
src/DateLUTImpl.cpp
|
||||
src/preciseExp10.c
|
||||
@ -91,9 +91,13 @@ target_include_directories (common BEFORE PRIVATE ${CCTZ_INCLUDE_DIR})
|
||||
target_include_directories (common PUBLIC ${COMMON_INCLUDE_DIR})
|
||||
|
||||
if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
target_include_directories (common BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
endif ()
|
||||
|
||||
if(NOT USE_INTERNAL_POCO_LIBRARY)
|
||||
target_include_directories (common SYSTEM BEFORE PUBLIC ${Poco_Foundation_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
target_link_libraries (common
|
||||
PUBLIC
|
||||
${Poco_Foundation_LIBRARY}
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_library (daemon ${LINK_MODE}
|
||||
add_library (daemon
|
||||
src/BaseDaemon.cpp
|
||||
src/GraphiteWriter.cpp
|
||||
src/ExtendedLogChannel.cpp
|
||||
@ -22,4 +22,4 @@ endif ()
|
||||
|
||||
target_include_directories (daemon PUBLIC include)
|
||||
|
||||
target_link_libraries (daemon PRIVATE clickhouse_common_io clickhouse_common_config common ${Poco_Net_LIBRARY} ${Poco_Util_LIBRARY} ${EXECINFO_LIBRARY} ${ELF_LIBRARY})
|
||||
target_link_libraries (daemon PRIVATE clickhouse_common_io clickhouse_common_config common ${Poco_Net_LIBRARY} ${Poco_Util_LIBRARY} ${EXECINFO_LIBRARIES})
|
||||
|
@ -1 +1 @@
|
||||
add_library (memcpy memcpy.c)
|
||||
add_library(memcpy memcpy.c)
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_library (mysqlxx ${LINK_MODE}
|
||||
add_library (mysqlxx
|
||||
src/Connection.cpp
|
||||
src/Exception.cpp
|
||||
src/Query.cpp
|
||||
|
@ -1,17 +1,17 @@
|
||||
option (ENABLE_MYSQL "Enable MySQL" ${OS_LINUX})
|
||||
if (OS_LINUX)
|
||||
option (USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" ${NOT_UNBUNDLED})
|
||||
else ()
|
||||
option (USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" OFF)
|
||||
endif ()
|
||||
option(ENABLE_MYSQL "Enable MySQL" 1)
|
||||
if(ENABLE_MYSQL)
|
||||
if(OS_LINUX)
|
||||
option(USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" ${NOT_UNBUNDLED})
|
||||
else()
|
||||
option(USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" OFF)
|
||||
endif()
|
||||
|
||||
if (USE_INTERNAL_MYSQL_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c/README.md")
|
||||
message (WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
set (USE_INTERNAL_MYSQL_LIBRARY 0)
|
||||
endif ()
|
||||
if(USE_INTERNAL_MYSQL_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c/README.md")
|
||||
message(WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
set(USE_INTERNAL_MYSQL_LIBRARY 0)
|
||||
endif()
|
||||
|
||||
|
||||
if (ENABLE_MYSQL)
|
||||
if (USE_INTERNAL_MYSQL_LIBRARY)
|
||||
set (MYSQLCLIENT_LIBRARIES mysqlclient)
|
||||
set (USE_MYSQL 1)
|
||||
|
@ -124,7 +124,7 @@ if __name__ == "__main__":
|
||||
args.dataset_name, 'partitions', os.path.basename(file_path))
|
||||
elif args.s3_path is not None:
|
||||
s3_path = os.path.join(
|
||||
args.dataset_name, s3_path, os.path.base_name(file_path))
|
||||
args.dataset_name, args.s3_path, os.path.basename(file_path))
|
||||
else:
|
||||
raise Exception("Don't know s3-path to upload")
|
||||
|
||||
|
@ -447,6 +447,8 @@ clickhouse-client
|
||||
<li>Follow official <a
|
||||
href="https://twitter.com/ClickHouseDB"
|
||||
rel="external nofollow" target="_blank">Twitter account</a>.</li>
|
||||
<li>Open <a href="https://github.com/yandex/ClickHouse/issues/new/choose"
|
||||
rel="external nofollow" target="_blank">GitHub issue</a> if you have a bug report or feature request.</li>
|
||||
<li>Or email Yandex ClickHouse team directly at
|
||||
<a id="feedback_email" href="">turn on JavaScript to see email address</a>.
|
||||
You can also <a href="https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/" target="_blank" rel="external nofollow">fill this form</a> to meet us in person.</li>
|
||||
|
Loading…
Reference in New Issue
Block a user