mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-10-16 05:20:51 +00:00
Merge branch 'master' into custom_week_functions
Merge larst code from master
This commit is contained in:
commit
f58f6a4d6b
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -79,3 +79,6 @@
|
||||
[submodule "contrib/hyperscan"]
|
||||
path = contrib/hyperscan
|
||||
url = https://github.com/ClickHouse-Extras/hyperscan.git
|
||||
[submodule "contrib/simdjson"]
|
||||
path = contrib/simdjson
|
||||
url = https://github.com/lemire/simdjson.git
|
||||
|
@ -1,6 +1,15 @@
|
||||
project(ClickHouse)
|
||||
cmake_minimum_required(VERSION 3.3)
|
||||
cmake_policy(SET CMP0023 NEW)
|
||||
|
||||
foreach(policy
|
||||
CMP0023
|
||||
CMP0074 # CMake 3.12
|
||||
)
|
||||
if(POLICY ${policy})
|
||||
cmake_policy(SET ${policy} NEW)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json
|
||||
set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
|
||||
@ -318,6 +327,7 @@ include (cmake/find_consistent-hashing.cmake)
|
||||
include (cmake/find_base64.cmake)
|
||||
include (cmake/find_hyperscan.cmake)
|
||||
include (cmake/find_lfalloc.cmake)
|
||||
include (cmake/find_simdjson.cmake)
|
||||
find_contrib_lib(cityhash)
|
||||
find_contrib_lib(farmhash)
|
||||
find_contrib_lib(metrohash)
|
||||
|
@ -12,7 +12,6 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
* [ClickHouse Community Meetup in Limassol](https://www.facebook.com/events/386638262181785/) on May 7.
|
||||
* ClickHouse at [Percona Live 2019](https://www.percona.com/live/19/other-open-source-databases-track) in Austin on May 28-30.
|
||||
* [ClickHouse Community Meetup in Beijing](https://www.huodongxing.com/event/2483759276200) on June 8.
|
||||
* [ClickHouse Community Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20.
|
||||
|
@ -1,6 +1,9 @@
|
||||
option(ENABLE_ICU "Enable ICU" ON)
|
||||
|
||||
if(ENABLE_ICU)
|
||||
if (APPLE)
|
||||
set(ICU_ROOT "/usr/local/opt/icu4c" CACHE STRING "")
|
||||
endif()
|
||||
find_package(ICU COMPONENTS i18n uc data) # TODO: remove Modules/FindICU.cmake after cmake 3.7
|
||||
#set (ICU_LIBRARIES ${ICU_I18N_LIBRARY} ${ICU_UC_LIBRARY} ${ICU_DATA_LIBRARY} CACHE STRING "")
|
||||
if(ICU_FOUND)
|
||||
|
@ -1,4 +1,4 @@
|
||||
if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD)
|
||||
if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD AND NOT APPLE)
|
||||
option (ENABLE_LFALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED})
|
||||
endif ()
|
||||
|
||||
|
14
cmake/find_simdjson.cmake
Normal file
14
cmake/find_simdjson.cmake
Normal file
@ -0,0 +1,14 @@
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson/jsonparser.h")
|
||||
message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT HAVE_AVX2)
|
||||
message (WARNING "submodule contrib/simdjson requires AVX2 support")
|
||||
return()
|
||||
endif ()
|
||||
|
||||
option (USE_SIMDJSON "Use simdjson" ON)
|
||||
|
||||
set (SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
|
||||
set (SIMDJSON_LIBRARY "simdjson")
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -227,7 +227,7 @@ if (USE_INTERNAL_POCO_LIBRARY)
|
||||
set (ENABLE_TESTS 0)
|
||||
set (POCO_ENABLE_TESTS 0)
|
||||
set (CMAKE_DISABLE_FIND_PACKAGE_ZLIB 1)
|
||||
if (MSVC)
|
||||
if (MSVC OR NOT USE_POCO_DATAODBC)
|
||||
set (ENABLE_DATA_ODBC 0 CACHE INTERNAL "") # TODO (build fail)
|
||||
endif ()
|
||||
add_subdirectory (poco)
|
||||
@ -313,3 +313,7 @@ endif()
|
||||
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
|
||||
add_subdirectory (hyperscan)
|
||||
endif()
|
||||
|
||||
if (USE_SIMDJSON)
|
||||
add_subdirectory (simdjson-cmake)
|
||||
endif()
|
||||
|
1
contrib/simdjson
vendored
Submodule
1
contrib/simdjson
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 681cd3369860f4eada49a387cbff93030f759c95
|
26
contrib/simdjson-cmake/CMakeLists.txt
Normal file
26
contrib/simdjson-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,26 @@
|
||||
if (NOT HAVE_AVX2)
|
||||
message (FATAL_ERROR "No AVX2 support")
|
||||
endif ()
|
||||
|
||||
if(MAKE_STATIC_LIBRARIES)
|
||||
set(SIMDJSON_LIB_TYPE STATIC)
|
||||
MESSAGE(STATUS "Building static library ${SIMDJSON_LIBRARY}")
|
||||
else()
|
||||
set(SIMDJSON_LIB_TYPE SHARED)
|
||||
MESSAGE(STATUS "Building dynamic library ${SIMDJSON_LIBRARY}")
|
||||
endif()
|
||||
|
||||
set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
|
||||
set(SIMDJSON_SRC
|
||||
${SIMDJSON_SRC_DIR}/jsonioutil.cpp
|
||||
${SIMDJSON_SRC_DIR}/jsonminifier.cpp
|
||||
${SIMDJSON_SRC_DIR}/jsonparser.cpp
|
||||
${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
|
||||
${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
|
||||
${SIMDJSON_SRC_DIR}/parsedjson.cpp
|
||||
${SIMDJSON_SRC_DIR}/parsedjsoniterator.cpp
|
||||
)
|
||||
|
||||
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
|
||||
target_include_directories(${SIMDJSON_LIBRARY} PRIVATE "${SIMDJSON_INCLUDE_DIR}")
|
||||
target_compile_options(${SIMDJSON_LIBRARY} PRIVATE -mavx2 -mbmi -mbmi2 -mpclmul)
|
@ -1,7 +1,6 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
|
||||
|
||||
#define MIN_LENGTH_FOR_STRSTR 3
|
||||
#define MAX_SUBPATTERNS 5
|
||||
|
||||
@ -211,20 +210,18 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
{
|
||||
if (!has_alternative_on_depth_0)
|
||||
{
|
||||
/** We choose the non-alternative substring of the maximum length, among the prefixes,
|
||||
* or a non-alternative substring of maximum length.
|
||||
*/
|
||||
/// We choose the non-alternative substring of the maximum length for first search.
|
||||
|
||||
/// Tuning for typical usage domain
|
||||
auto tuning_strings_condition = [](const std::string & str)
|
||||
{
|
||||
return str != "://" && str != "http://" && str != "www" && str != "Windows ";
|
||||
};
|
||||
size_t max_length = 0;
|
||||
Substrings::const_iterator candidate_it = trivial_substrings.begin();
|
||||
for (Substrings::const_iterator it = trivial_substrings.begin(); it != trivial_substrings.end(); ++it)
|
||||
{
|
||||
if (((it->second == 0 && candidate_it->second != 0)
|
||||
|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
|
||||
/// Tuning for typical usage domain
|
||||
&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
|
||||
&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
|
||||
&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
|
||||
&& (it->first.size() > strlen("Windows ") || strncmp(it->first.data(), "Windows ", strlen("Windows "))))
|
||||
if (it->first.size() > max_length && tuning_strings_condition(it->first))
|
||||
{
|
||||
max_length = it->first.size();
|
||||
candidate_it = it;
|
||||
|
@ -122,6 +122,9 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
|
||||
|
||||
LockHolder res(new LockHolderImpl(shared_from_this(), it_group, it_client));
|
||||
|
||||
/// Wait a notification until we will be the only in the group.
|
||||
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
|
||||
|
||||
/// Insert myself (weak_ptr to the holder) to threads set to implement recursive lock
|
||||
thread_to_holder.emplace(this_thread_id, res);
|
||||
res->thread_id = this_thread_id;
|
||||
@ -130,17 +133,6 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
|
||||
query_id_to_holder.emplace(query_id, res);
|
||||
res->query_id = query_id;
|
||||
|
||||
/// We are first, we should not wait anything
|
||||
/// If we are not the first client in the group, a notification could be already sent
|
||||
if (it_group == queue.begin())
|
||||
{
|
||||
finalize_metrics();
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Wait a notification
|
||||
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
|
||||
|
||||
finalize_metrics();
|
||||
return res;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
@ -25,6 +25,7 @@
|
||||
#cmakedefine01 USE_BROTLI
|
||||
#cmakedefine01 USE_SSL
|
||||
#cmakedefine01 USE_HYPERSCAN
|
||||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_LFALLOC
|
||||
#cmakedefine01 USE_LFALLOC_RANDOM_HINT
|
||||
|
||||
|
@ -123,3 +123,7 @@
|
||||
#else
|
||||
#define OPTIMIZE(x)
|
||||
#endif
|
||||
|
||||
/// This number is only used for distributed version compatible.
|
||||
/// It could be any magic number.
|
||||
#define DBMS_DISTRIBUTED_SENDS_MAGIC_NUMBER 0xCAFECABE
|
||||
|
@ -109,5 +109,4 @@ void Settings::addProgramOptions(boost::program_options::options_description & o
|
||||
Settings::getDescription(index).data)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <DataStreams/AggregatingSortedBlockInputStream.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -22,7 +24,7 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
|
||||
ColumnWithTypeAndName & column = header.safeGetByPosition(i);
|
||||
|
||||
/// We leave only states of aggregate functions.
|
||||
if (!startsWith(column.type->getName(), "AggregateFunction"))
|
||||
if (!dynamic_cast<const DataTypeAggregateFunction *>(column.type.get()) && !dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName()))
|
||||
{
|
||||
column_numbers_not_to_aggregate.push_back(i);
|
||||
continue;
|
||||
@ -40,7 +42,17 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
|
||||
continue;
|
||||
}
|
||||
|
||||
column_numbers_to_aggregate.push_back(i);
|
||||
if (auto simple_aggr = dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName()))
|
||||
{
|
||||
// simple aggregate function
|
||||
SimpleAggregateDescription desc{simple_aggr->getFunction(), i};
|
||||
columns_to_simple_aggregate.emplace_back(std::move(desc));
|
||||
}
|
||||
else
|
||||
{
|
||||
// standard aggregate function
|
||||
column_numbers_to_aggregate.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,7 +103,11 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
|
||||
|
||||
/// if there are enough rows accumulated and the last one is calculated completely
|
||||
if (key_differs && merged_rows >= max_block_size)
|
||||
{
|
||||
/// Write the simple aggregation result for the previous group.
|
||||
insertSimpleAggregationResult(merged_columns);
|
||||
return;
|
||||
}
|
||||
|
||||
queue.pop();
|
||||
|
||||
@ -110,6 +126,14 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
|
||||
for (auto & column_to_aggregate : columns_to_aggregate)
|
||||
column_to_aggregate->insertDefault();
|
||||
|
||||
/// Write the simple aggregation result for the previous group.
|
||||
if (merged_rows > 0)
|
||||
insertSimpleAggregationResult(merged_columns);
|
||||
|
||||
/// Reset simple aggregation states for next row
|
||||
for (auto & desc : columns_to_simple_aggregate)
|
||||
desc.createState();
|
||||
|
||||
++merged_rows;
|
||||
}
|
||||
|
||||
@ -127,6 +151,9 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the simple aggregation result for the previous group.
|
||||
insertSimpleAggregationResult(merged_columns);
|
||||
|
||||
finished = true;
|
||||
}
|
||||
|
||||
@ -138,6 +165,21 @@ void AggregatingSortedBlockInputStream::addRow(SortCursor & cursor)
|
||||
size_t j = column_numbers_to_aggregate[i];
|
||||
columns_to_aggregate[i]->insertMergeFrom(*cursor->all_columns[j], cursor->pos);
|
||||
}
|
||||
|
||||
for (auto & desc : columns_to_simple_aggregate)
|
||||
{
|
||||
auto & col = cursor->all_columns[desc.column_number];
|
||||
desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void AggregatingSortedBlockInputStream::insertSimpleAggregationResult(MutableColumns & merged_columns)
|
||||
{
|
||||
for (auto & desc : columns_to_simple_aggregate)
|
||||
{
|
||||
desc.function->insertResultInto(desc.state.data(), *merged_columns[desc.column_number]);
|
||||
desc.destroyState();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <DataStreams/MergingSortedBlockInputStream.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnAggregateFunction.h>
|
||||
#include <Common/AlignedBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -38,10 +39,13 @@ private:
|
||||
/// Read finished.
|
||||
bool finished = false;
|
||||
|
||||
struct SimpleAggregateDescription;
|
||||
|
||||
/// Columns with which numbers should be aggregated.
|
||||
ColumnNumbers column_numbers_to_aggregate;
|
||||
ColumnNumbers column_numbers_not_to_aggregate;
|
||||
std::vector<ColumnAggregateFunction *> columns_to_aggregate;
|
||||
std::vector<SimpleAggregateDescription> columns_to_simple_aggregate;
|
||||
|
||||
RowRef current_key; /// The current primary key.
|
||||
RowRef next_key; /// The primary key of the next row.
|
||||
@ -54,6 +58,53 @@ private:
|
||||
/** Extract all states of aggregate functions and merge them with the current group.
|
||||
*/
|
||||
void addRow(SortCursor & cursor);
|
||||
|
||||
/** Insert all values of current row for simple aggregate functions
|
||||
*/
|
||||
void insertSimpleAggregationResult(MutableColumns & merged_columns);
|
||||
|
||||
/// Stores information for aggregation of SimpleAggregateFunction columns
|
||||
struct SimpleAggregateDescription
|
||||
{
|
||||
/// An aggregate function 'anyLast', 'sum'...
|
||||
AggregateFunctionPtr function;
|
||||
IAggregateFunction::AddFunc add_function;
|
||||
size_t column_number;
|
||||
AlignedBuffer state;
|
||||
bool created = false;
|
||||
|
||||
SimpleAggregateDescription(const AggregateFunctionPtr & function_, const size_t column_number_) : function(function_), column_number(column_number_)
|
||||
{
|
||||
add_function = function->getAddressOfAddFunction();
|
||||
state.reset(function->sizeOfData(), function->alignOfData());
|
||||
}
|
||||
|
||||
void createState()
|
||||
{
|
||||
if (created)
|
||||
return;
|
||||
function->create(state.data());
|
||||
created = true;
|
||||
}
|
||||
|
||||
void destroyState()
|
||||
{
|
||||
if (!created)
|
||||
return;
|
||||
function->destroy(state.data());
|
||||
created = false;
|
||||
}
|
||||
|
||||
/// Explicitly destroy aggregation state if the stream is terminated
|
||||
~SimpleAggregateDescription()
|
||||
{
|
||||
destroyState();
|
||||
}
|
||||
|
||||
SimpleAggregateDescription() = default;
|
||||
SimpleAggregateDescription(SimpleAggregateDescription &&) = default;
|
||||
SimpleAggregateDescription(const SimpleAggregateDescription &) = delete;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <cstddef>
|
||||
#include <Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,21 +12,21 @@ class WriteBuffer;
|
||||
struct FormatSettings;
|
||||
class IColumn;
|
||||
|
||||
/** Further refinment of the properties of data type.
|
||||
*
|
||||
* Contains methods for serialization/deserialization.
|
||||
* Implementations of this interface represent a data type domain (example: IPv4)
|
||||
* which is a refinement of the exsitgin type with a name and specific text
|
||||
* representation.
|
||||
*
|
||||
* IDataTypeDomain is totally immutable object. You can always share them.
|
||||
/** Allow to customize an existing data type and set a different name and/or text serialization/deserialization methods.
|
||||
* See use in IPv4 and IPv6 data types, and also in SimpleAggregateFunction.
|
||||
*/
|
||||
class IDataTypeDomain
|
||||
class IDataTypeCustomName
|
||||
{
|
||||
public:
|
||||
virtual ~IDataTypeDomain() {}
|
||||
virtual ~IDataTypeCustomName() {}
|
||||
|
||||
virtual const char* getName() const = 0;
|
||||
virtual String getName() const = 0;
|
||||
};
|
||||
|
||||
class IDataTypeCustomTextSerialization
|
||||
{
|
||||
public:
|
||||
virtual ~IDataTypeCustomTextSerialization() {}
|
||||
|
||||
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
|
||||
* Without escaping or quoting.
|
||||
@ -56,4 +58,31 @@ public:
|
||||
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
|
||||
};
|
||||
|
||||
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
|
||||
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
|
||||
|
||||
/** Describe a data type customization
|
||||
*/
|
||||
struct DataTypeCustomDesc
|
||||
{
|
||||
DataTypeCustomNamePtr name;
|
||||
DataTypeCustomTextSerializationPtr text_serialization;
|
||||
|
||||
DataTypeCustomDesc(DataTypeCustomNamePtr name_, DataTypeCustomTextSerializationPtr text_serialization_)
|
||||
: name(std::move(name_)), text_serialization(std::move(text_serialization_)) {}
|
||||
};
|
||||
|
||||
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
|
||||
|
||||
/** A simple implementation of IDataTypeCustomName
|
||||
*/
|
||||
class DataTypeCustomFixedName : public IDataTypeCustomName
|
||||
{
|
||||
private:
|
||||
String name;
|
||||
public:
|
||||
DataTypeCustomFixedName(String name_) : name(name_) {}
|
||||
String getName() const override { return name; }
|
||||
};
|
||||
|
||||
} // namespace DB
|
@ -1,9 +1,9 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
#include <DataTypes/DataTypeDomainWithSimpleSerialization.h>
|
||||
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/IDataTypeDomain.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionsCoding.h>
|
||||
|
||||
@ -20,20 +20,15 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
class DataTypeDomainIPv4 : public DataTypeDomainWithSimpleSerialization
|
||||
class DataTypeCustomIPv4Serialization : public DataTypeCustomSimpleTextSerialization
|
||||
{
|
||||
public:
|
||||
const char * getName() const override
|
||||
{
|
||||
return "IPv4";
|
||||
}
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
|
||||
{
|
||||
const auto col = checkAndGetColumn<ColumnUInt32>(&column);
|
||||
if (!col)
|
||||
{
|
||||
throw Exception(String(getName()) + " domain can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
|
||||
@ -48,7 +43,7 @@ public:
|
||||
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
|
||||
if (!col)
|
||||
{
|
||||
throw Exception(String(getName()) + " domain can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
|
||||
@ -63,20 +58,16 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class DataTypeDomainIPv6 : public DataTypeDomainWithSimpleSerialization
|
||||
class DataTypeCustomIPv6Serialization : public DataTypeCustomSimpleTextSerialization
|
||||
{
|
||||
public:
|
||||
const char * getName() const override
|
||||
{
|
||||
return "IPv6";
|
||||
}
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
|
||||
{
|
||||
const auto col = checkAndGetColumn<ColumnFixedString>(&column);
|
||||
if (!col)
|
||||
{
|
||||
throw Exception(String(getName()) + " domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception("IPv6 type domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
|
||||
@ -91,7 +82,7 @@ public:
|
||||
ColumnFixedString * col = typeid_cast<ColumnFixedString *>(&column);
|
||||
if (!col)
|
||||
{
|
||||
throw Exception(String(getName()) + " domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception("IPv6 type domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
|
||||
@ -100,7 +91,7 @@ public:
|
||||
std::string ipv6_value(IPV6_BINARY_LENGTH, '\0');
|
||||
if (!parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data())))
|
||||
{
|
||||
throw Exception(String("Invalid ") + getName() + " value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
|
||||
throw Exception("Invalid IPv6 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
|
||||
}
|
||||
|
||||
col->insertString(ipv6_value);
|
||||
@ -111,8 +102,17 @@ public:
|
||||
|
||||
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomainIPv4>());
|
||||
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomainIPv6>());
|
||||
factory.registerSimpleDataTypeCustom("IPv4", []
|
||||
{
|
||||
return std::make_pair(DataTypeFactory::instance().get("UInt32"),
|
||||
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<DataTypeCustomIPv4Serialization>()));
|
||||
});
|
||||
|
||||
factory.registerSimpleDataTypeCustom("IPv6", []
|
||||
{
|
||||
return std::make_pair(DataTypeFactory::instance().get("FixedString(16)"),
|
||||
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<DataTypeCustomIPv6Serialization>()));
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace DB
|
137
dbms/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
Normal file
137
dbms/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
Normal file
@ -0,0 +1,137 @@
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <Columns/ColumnAggregateFunction.h>
|
||||
|
||||
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum"};
|
||||
|
||||
|
||||
String DataTypeCustomSimpleAggregateFunction::getName() const
|
||||
{
|
||||
std::stringstream stream;
|
||||
stream << "SimpleAggregateFunction(" << function->getName();
|
||||
|
||||
if (!parameters.empty())
|
||||
{
|
||||
stream << "(";
|
||||
for (size_t i = 0; i < parameters.size(); ++i)
|
||||
{
|
||||
if (i)
|
||||
stream << ", ";
|
||||
stream << applyVisitor(DB::FieldVisitorToString(), parameters[i]);
|
||||
}
|
||||
stream << ")";
|
||||
}
|
||||
|
||||
for (const auto & argument_type : argument_types)
|
||||
stream << ", " << argument_type->getName();
|
||||
|
||||
stream << ")";
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
|
||||
static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
|
||||
{
|
||||
String function_name;
|
||||
AggregateFunctionPtr function;
|
||||
DataTypes argument_types;
|
||||
Array params_row;
|
||||
|
||||
if (!arguments || arguments->children.empty())
|
||||
throw Exception("Data type SimpleAggregateFunction requires parameters: "
|
||||
"name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>())
|
||||
{
|
||||
if (parametric->parameters)
|
||||
throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
|
||||
function_name = parametric->name;
|
||||
|
||||
const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
|
||||
params_row.resize(parameters.size());
|
||||
|
||||
for (size_t i = 0; i < parameters.size(); ++i)
|
||||
{
|
||||
const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
|
||||
if (!lit)
|
||||
throw Exception("Parameters to aggregate functions must be literals",
|
||||
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
|
||||
|
||||
params_row[i] = lit->value;
|
||||
}
|
||||
}
|
||||
else if (auto opt_name = getIdentifierName(arguments->children[0]))
|
||||
{
|
||||
function_name = *opt_name;
|
||||
}
|
||||
else if (arguments->children[0]->as<ASTLiteral>())
|
||||
{
|
||||
throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
else
|
||||
throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
for (size_t i = 1; i < arguments->children.size(); ++i)
|
||||
argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
|
||||
|
||||
if (function_name.empty())
|
||||
throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row);
|
||||
|
||||
// check function
|
||||
if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
|
||||
{
|
||||
throw Exception("Unsupported aggregate function " + function->getName() + ", supported functions are " + boost::algorithm::join(supported_functions, ","),
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());
|
||||
|
||||
if (!function->getReturnType()->equals(*removeLowCardinality(storage_type)))
|
||||
{
|
||||
throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(),
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row);
|
||||
|
||||
return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
|
||||
}
|
||||
|
||||
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataTypeCustom("SimpleAggregateFunction", create);
|
||||
}
|
||||
|
||||
}
|
42
dbms/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
Normal file
42
dbms/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
Normal file
@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard
|
||||
* data type but when rows are merged, an aggregation function is applied.
|
||||
*
|
||||
* The aggregation function is limited to simple functions whose merge state is the final result:
|
||||
* any, anyLast, min, max, sum
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* SimpleAggregateFunction(sum, Nullable(Float64))
|
||||
* SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String)))
|
||||
* SimpleAggregateFunction(anyLast, IPv4)
|
||||
*
|
||||
* Technically, a standard IDataType is instanciated and customized with IDataTypeCustomName and DataTypeCustomDesc.
|
||||
*/
|
||||
|
||||
class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName
|
||||
{
|
||||
private:
|
||||
const AggregateFunctionPtr function;
|
||||
const DataTypes argument_types;
|
||||
const Array parameters;
|
||||
|
||||
public:
|
||||
DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_)
|
||||
: function(function_), argument_types(argument_types_), parameters(parameters_) {}
|
||||
|
||||
const AggregateFunctionPtr getFunction() const { return function; }
|
||||
String getName() const override;
|
||||
};
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
#include <DataTypes/DataTypeDomainWithSimpleSerialization.h>
|
||||
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
|
||||
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -9,7 +9,7 @@ namespace
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
static String serializeToString(const DataTypeDomainWithSimpleSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
|
||||
static String serializeToString(const DataTypeCustomSimpleTextSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
|
||||
{
|
||||
WriteBufferFromOwnString buffer;
|
||||
domain.serializeText(column, row_num, buffer, settings);
|
||||
@ -17,7 +17,7 @@ static String serializeToString(const DataTypeDomainWithSimpleSerialization & do
|
||||
return buffer.str();
|
||||
}
|
||||
|
||||
static void deserializeFromString(const DataTypeDomainWithSimpleSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
|
||||
static void deserializeFromString(const DataTypeCustomSimpleTextSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
|
||||
{
|
||||
ReadBufferFromString istr(s);
|
||||
domain.deserializeText(column, istr, settings);
|
||||
@ -28,59 +28,59 @@ static void deserializeFromString(const DataTypeDomainWithSimpleSerialization &
|
||||
namespace DB
|
||||
{
|
||||
|
||||
DataTypeDomainWithSimpleSerialization::~DataTypeDomainWithSimpleSerialization()
|
||||
DataTypeCustomSimpleTextSerialization::~DataTypeCustomSimpleTextSerialization()
|
||||
{
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readEscapedString(str, istr);
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readQuotedString(str, istr);
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readCSVString(str, istr, settings.csv);
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readJSONString(str, istr);
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void DataTypeDomainWithSimpleSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
void DataTypeCustomSimpleTextSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeXMLString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/IDataTypeDomain.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,12 +10,12 @@ class WriteBuffer;
|
||||
struct FormatSettings;
|
||||
class IColumn;
|
||||
|
||||
/** Simple DataTypeDomain that uses serializeText/deserializeText
|
||||
/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText
|
||||
* for all serialization and deserialization. */
|
||||
class DataTypeDomainWithSimpleSerialization : public IDataTypeDomain
|
||||
class DataTypeCustomSimpleTextSerialization : public IDataTypeCustomTextSerialization
|
||||
{
|
||||
public:
|
||||
virtual ~DataTypeDomainWithSimpleSerialization() override;
|
||||
virtual ~DataTypeCustomSimpleTextSerialization() override;
|
||||
|
||||
// Methods that subclasses must override in order to get full serialization/deserialization support.
|
||||
virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;
|
@ -1,5 +1,5 @@
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/IDataTypeDomain.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -115,19 +115,23 @@ void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator
|
||||
}, case_sensitiveness);
|
||||
}
|
||||
|
||||
void DataTypeFactory::registerDataTypeDomain(const String & type_name, DataTypeDomainPtr domain, CaseSensitiveness case_sensitiveness)
|
||||
void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
|
||||
{
|
||||
all_domains.reserve(all_domains.size() + 1);
|
||||
|
||||
auto data_type = get(type_name);
|
||||
setDataTypeDomain(*data_type, *domain);
|
||||
|
||||
registerDataType(domain->getName(), [data_type](const ASTPtr & /*ast*/)
|
||||
registerDataType(family_name, [creator](const ASTPtr & ast)
|
||||
{
|
||||
return data_type;
|
||||
}, case_sensitiveness);
|
||||
auto res = creator(ast);
|
||||
res.first->setCustomization(std::move(res.second));
|
||||
|
||||
all_domains.emplace_back(std::move(domain));
|
||||
return res.first;
|
||||
}, case_sensitiveness);
|
||||
}
|
||||
|
||||
void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
|
||||
{
|
||||
registerDataTypeCustom(name, [creator](const ASTPtr & /*ast*/)
|
||||
{
|
||||
return creator();
|
||||
}, case_sensitiveness);
|
||||
}
|
||||
|
||||
const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String & family_name) const
|
||||
@ -153,11 +157,6 @@ const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String
|
||||
throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE);
|
||||
}
|
||||
|
||||
void DataTypeFactory::setDataTypeDomain(const IDataType & data_type, const IDataTypeDomain & domain)
|
||||
{
|
||||
data_type.setDomain(&domain);
|
||||
}
|
||||
|
||||
void registerDataTypeNumbers(DataTypeFactory & factory);
|
||||
void registerDataTypeDecimal(DataTypeFactory & factory);
|
||||
void registerDataTypeDate(DataTypeFactory & factory);
|
||||
@ -175,6 +174,7 @@ void registerDataTypeNested(DataTypeFactory & factory);
|
||||
void registerDataTypeInterval(DataTypeFactory & factory);
|
||||
void registerDataTypeLowCardinality(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
|
||||
|
||||
|
||||
DataTypeFactory::DataTypeFactory()
|
||||
@ -196,6 +196,7 @@ DataTypeFactory::DataTypeFactory()
|
||||
registerDataTypeInterval(*this);
|
||||
registerDataTypeLowCardinality(*this);
|
||||
registerDataTypeDomainIPv4AndIPv6(*this);
|
||||
registerDataTypeDomainSimpleAggregateFunction(*this);
|
||||
}
|
||||
|
||||
DataTypeFactory::~DataTypeFactory()
|
||||
|
@ -17,9 +17,6 @@ namespace DB
|
||||
class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
|
||||
class IDataTypeDomain;
|
||||
using DataTypeDomainPtr = std::unique_ptr<const IDataTypeDomain>;
|
||||
|
||||
|
||||
/** Creates a data type by name of data type family and parameters.
|
||||
*/
|
||||
@ -28,6 +25,8 @@ class DataTypeFactory final : public ext::singleton<DataTypeFactory>, public IFa
|
||||
private:
|
||||
using SimpleCreator = std::function<DataTypePtr()>;
|
||||
using DataTypesDictionary = std::unordered_map<String, Creator>;
|
||||
using CreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>(const ASTPtr & parameters)>;
|
||||
using SimpleCreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>()>;
|
||||
|
||||
public:
|
||||
DataTypePtr get(const String & full_name) const;
|
||||
@ -40,11 +39,13 @@ public:
|
||||
/// Register a simple data type, that have no parameters.
|
||||
void registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
|
||||
// Register a domain - a refinement of existing type.
|
||||
void registerDataTypeDomain(const String & type_name, DataTypeDomainPtr domain, CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
/// Register a customized type family
|
||||
void registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
|
||||
/// Register a simple customized data type
|
||||
void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
|
||||
private:
|
||||
static void setDataTypeDomain(const IDataType & data_type, const IDataTypeDomain & domain);
|
||||
const Creator& findCreatorByName(const String & family_name) const;
|
||||
|
||||
private:
|
||||
@ -53,9 +54,6 @@ private:
|
||||
/// Case insensitive data types will be additionally added here with lowercased name.
|
||||
DataTypesDictionary case_insensitive_data_types;
|
||||
|
||||
// All domains are owned by factory and shared amongst DataType instances.
|
||||
std::vector<DataTypeDomainPtr> all_domains;
|
||||
|
||||
DataTypeFactory();
|
||||
~DataTypeFactory() override;
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/IDataTypeDomain.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
|
||||
|
||||
@ -23,8 +23,7 @@ namespace ErrorCodes
|
||||
extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
|
||||
}
|
||||
|
||||
IDataType::IDataType()
|
||||
: domain(nullptr)
|
||||
IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
@ -34,9 +33,9 @@ IDataType::~IDataType()
|
||||
|
||||
String IDataType::getName() const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_name)
|
||||
{
|
||||
return domain->getName();
|
||||
return custom_name->getName();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -142,9 +141,9 @@ void IDataType::insertDefaultInto(IColumn & column) const
|
||||
|
||||
void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->serializeTextEscaped(column, row_num, ostr, settings);
|
||||
custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -154,9 +153,9 @@ void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, W
|
||||
|
||||
void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->deserializeTextEscaped(column, istr, settings);
|
||||
custom_text_serialization->deserializeTextEscaped(column, istr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -166,9 +165,9 @@ void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, co
|
||||
|
||||
void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->serializeTextQuoted(column, row_num, ostr, settings);
|
||||
custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -178,9 +177,9 @@ void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, Wr
|
||||
|
||||
void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->deserializeTextQuoted(column, istr, settings);
|
||||
custom_text_serialization->deserializeTextQuoted(column, istr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -190,9 +189,9 @@ void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, con
|
||||
|
||||
void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->serializeTextCSV(column, row_num, ostr, settings);
|
||||
custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -202,9 +201,9 @@ void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, Write
|
||||
|
||||
void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->deserializeTextCSV(column, istr, settings);
|
||||
custom_text_serialization->deserializeTextCSV(column, istr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -214,9 +213,9 @@ void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const
|
||||
|
||||
void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->serializeText(column, row_num, ostr, settings);
|
||||
custom_text_serialization->serializeText(column, row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -226,9 +225,9 @@ void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuf
|
||||
|
||||
void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->serializeTextJSON(column, row_num, ostr, settings);
|
||||
custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -238,9 +237,9 @@ void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, Writ
|
||||
|
||||
void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->deserializeTextJSON(column, istr, settings);
|
||||
custom_text_serialization->deserializeTextJSON(column, istr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -250,9 +249,9 @@ void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const
|
||||
|
||||
void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (domain)
|
||||
if (custom_text_serialization)
|
||||
{
|
||||
domain->serializeTextXML(column, row_num, ostr, settings);
|
||||
custom_text_serialization->serializeTextXML(column, row_num, ostr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -260,13 +259,14 @@ void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, Write
|
||||
}
|
||||
}
|
||||
|
||||
void IDataType::setDomain(const IDataTypeDomain* const new_domain) const
|
||||
void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
|
||||
{
|
||||
if (domain != nullptr)
|
||||
{
|
||||
throw Exception("Type " + getName() + " already has a domain.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
domain = new_domain;
|
||||
/// replace only if not null
|
||||
if (custom_desc_->name)
|
||||
custom_name = std::move(custom_desc_->name);
|
||||
|
||||
if (custom_desc_->text_serialization)
|
||||
custom_text_serialization = std::move(custom_desc_->text_serialization);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/COW.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -12,7 +13,6 @@ namespace DB
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
class IDataTypeDomain;
|
||||
class IDataType;
|
||||
struct FormatSettings;
|
||||
|
||||
@ -459,18 +459,19 @@ public:
|
||||
|
||||
private:
|
||||
friend class DataTypeFactory;
|
||||
/** Sets domain on existing DataType, can be considered as second phase
|
||||
* of construction explicitly done by DataTypeFactory.
|
||||
* Will throw an exception if domain is already set.
|
||||
/** Customize this DataType
|
||||
*/
|
||||
void setDomain(const IDataTypeDomain* newDomain) const;
|
||||
void setCustomization(DataTypeCustomDescPtr custom_desc_) const;
|
||||
|
||||
private:
|
||||
/** This is mutable to allow setting domain on `const IDataType` post construction,
|
||||
* simplifying creation of domains for all types, without them even knowing
|
||||
* of domain existence.
|
||||
/** This is mutable to allow setting custom name and serialization on `const IDataType` post construction.
|
||||
*/
|
||||
mutable IDataTypeDomain const* domain;
|
||||
mutable DataTypeCustomNamePtr custom_name;
|
||||
mutable DataTypeCustomTextSerializationPtr custom_text_serialization;
|
||||
|
||||
public:
|
||||
const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
|
||||
const IDataTypeCustomTextSerialization * getCustomTextSerialization() const { return custom_text_serialization.get(); }
|
||||
};
|
||||
|
||||
|
||||
|
@ -69,3 +69,8 @@ if (USE_HYPERSCAN)
|
||||
target_link_libraries (clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY})
|
||||
target_include_directories (clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
||||
if (USE_SIMDJSON)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
|
||||
target_include_directories(clickhouse_functions PRIVATE ${SIMDJSON_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
@ -9,7 +9,7 @@ using StoragePtr = std::shared_ptr<IStorage>;
|
||||
class Join;
|
||||
using JoinPtr = std::shared_ptr<Join>;
|
||||
|
||||
class FunctionJoinGet final : public IFunction, public std::enable_shared_from_this<FunctionJoinGet>
|
||||
class FunctionJoinGet final : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "joinGet";
|
||||
|
@ -406,7 +406,12 @@ private:
|
||||
{
|
||||
const ColumnAggregateFunction * columns[2];
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
|
||||
{
|
||||
if (auto argument_column_const = typeid_cast<const ColumnConst *>(block.getByPosition(arguments[i]).column.get()))
|
||||
columns[i] = typeid_cast<const ColumnAggregateFunction *>(argument_column_const->getDataColumnPtr().get());
|
||||
else
|
||||
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
@ -511,7 +516,12 @@ private:
|
||||
{
|
||||
const ColumnAggregateFunction * columns[2];
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
|
||||
{
|
||||
if (auto argument_column_const = typeid_cast<const ColumnConst *>(block.getByPosition(arguments[i]).column.get()))
|
||||
columns[i] = typeid_cast<const ColumnAggregateFunction *>(argument_column_const->getDataColumnPtr().get());
|
||||
else
|
||||
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
|
||||
}
|
||||
|
||||
auto col_to = ColumnAggregateFunction::create(columns[0]->getAggregateFunction());
|
||||
|
||||
|
378
dbms/src/Functions/FunctionsJSON.cpp
Normal file
378
dbms/src/Functions/FunctionsJSON.cpp
Normal file
@ -0,0 +1,378 @@
|
||||
#include <Functions/FunctionsJSON.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_SIMDJSON
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T>
|
||||
class JSONNullableImplBase
|
||||
{
|
||||
public:
|
||||
static DataTypePtr getType() { return std::make_shared<DataTypeNullable>(std::make_shared<T>()); }
|
||||
|
||||
static Field getDefault() { return {}; }
|
||||
};
|
||||
|
||||
class JSONHasImpl : public JSONNullableImplBase<DataTypeUInt8>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonHas"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator &) { return {1}; }
|
||||
};
|
||||
|
||||
class JSONLengthImpl : public JSONNullableImplBase<DataTypeUInt64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonLength"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (!pjh.is_object_or_array())
|
||||
return getDefault();
|
||||
|
||||
size_t size = 0;
|
||||
|
||||
if (pjh.down())
|
||||
{
|
||||
size += 1;
|
||||
|
||||
while (pjh.next())
|
||||
size += 1;
|
||||
}
|
||||
|
||||
return {size};
|
||||
}
|
||||
};
|
||||
|
||||
class JSONTypeImpl : public JSONNullableImplBase<DataTypeString>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonType"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
switch (pjh.get_type())
|
||||
{
|
||||
case '[':
|
||||
return "Array";
|
||||
case '{':
|
||||
return "Object";
|
||||
case '"':
|
||||
return "String";
|
||||
case 'l':
|
||||
return "Int64";
|
||||
case 'd':
|
||||
return "Float64";
|
||||
case 't':
|
||||
return "Bool";
|
||||
case 'f':
|
||||
return "Bool";
|
||||
case 'n':
|
||||
return "Null";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractImpl
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtract"};
|
||||
|
||||
static DataTypePtr getType(const DataTypePtr & type)
|
||||
{
|
||||
WhichDataType which{type};
|
||||
|
||||
if (which.isNativeUInt() || which.isNativeInt() || which.isFloat() || which.isEnum() || which.isDateOrDateTime()
|
||||
|| which.isStringOrFixedString() || which.isInterval())
|
||||
return std::make_shared<DataTypeNullable>(type);
|
||||
|
||||
if (which.isArray())
|
||||
{
|
||||
auto array_type = static_cast<const DataTypeArray *>(type.get());
|
||||
|
||||
return std::make_shared<DataTypeArray>(getType(array_type->getNestedType()));
|
||||
}
|
||||
|
||||
if (which.isTuple())
|
||||
{
|
||||
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
|
||||
|
||||
DataTypes types;
|
||||
types.reserve(tuple_type->getElements().size());
|
||||
|
||||
for (const DataTypePtr & element : tuple_type->getElements())
|
||||
{
|
||||
types.push_back(getType(element));
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeTuple>(std::move(types));
|
||||
}
|
||||
|
||||
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
static Field getDefault(const DataTypePtr & type)
|
||||
{
|
||||
WhichDataType which{type};
|
||||
|
||||
if (which.isNativeUInt() || which.isNativeInt() || which.isFloat() || which.isEnum() || which.isDateOrDateTime()
|
||||
|| which.isStringOrFixedString() || which.isInterval())
|
||||
return {};
|
||||
|
||||
if (which.isArray())
|
||||
return {Array{}};
|
||||
|
||||
if (which.isTuple())
|
||||
{
|
||||
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
|
||||
|
||||
Tuple tuple;
|
||||
tuple.toUnderType().reserve(tuple_type->getElements().size());
|
||||
|
||||
for (const DataTypePtr & element : tuple_type->getElements())
|
||||
tuple.toUnderType().push_back(getDefault(element));
|
||||
|
||||
return {tuple};
|
||||
}
|
||||
|
||||
// should not reach
|
||||
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh, const DataTypePtr & type)
|
||||
{
|
||||
WhichDataType which{type};
|
||||
|
||||
if (which.isNativeUInt() || which.isNativeInt() || which.isEnum() || which.isDateOrDateTime() || which.isInterval())
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {pjh.get_integer()};
|
||||
else
|
||||
return getDefault(type);
|
||||
}
|
||||
|
||||
if (which.isFloat())
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {static_cast<double>(pjh.get_integer())};
|
||||
else if (pjh.is_double())
|
||||
return {pjh.get_double()};
|
||||
else
|
||||
return getDefault(type);
|
||||
}
|
||||
|
||||
if (which.isStringOrFixedString())
|
||||
{
|
||||
if (pjh.is_string())
|
||||
return {String{pjh.get_string()}};
|
||||
else
|
||||
return getDefault(type);
|
||||
}
|
||||
|
||||
if (which.isArray())
|
||||
{
|
||||
if (!pjh.is_object_or_array())
|
||||
return getDefault(type);
|
||||
|
||||
auto array_type = static_cast<const DataTypeArray *>(type.get());
|
||||
|
||||
Array array;
|
||||
|
||||
bool first = true;
|
||||
|
||||
while (first ? pjh.down() : pjh.next())
|
||||
{
|
||||
first = false;
|
||||
|
||||
ParsedJson::iterator pjh1{pjh};
|
||||
|
||||
array.push_back(getValue(pjh1, array_type->getNestedType()));
|
||||
}
|
||||
|
||||
return {array};
|
||||
}
|
||||
|
||||
if (which.isTuple())
|
||||
{
|
||||
if (!pjh.is_object_or_array())
|
||||
return getDefault(type);
|
||||
|
||||
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
|
||||
|
||||
Tuple tuple;
|
||||
tuple.toUnderType().reserve(tuple_type->getElements().size());
|
||||
|
||||
bool valid = true;
|
||||
bool first = true;
|
||||
|
||||
for (const DataTypePtr & element : tuple_type->getElements())
|
||||
{
|
||||
if (valid)
|
||||
{
|
||||
valid &= first ? pjh.down() : pjh.next();
|
||||
first = false;
|
||||
|
||||
ParsedJson::iterator pjh1{pjh};
|
||||
|
||||
tuple.toUnderType().push_back(getValue(pjh1, element));
|
||||
}
|
||||
else
|
||||
tuple.toUnderType().push_back(getDefault(element));
|
||||
}
|
||||
|
||||
return {tuple};
|
||||
}
|
||||
|
||||
// should not reach
|
||||
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractUIntImpl : public JSONNullableImplBase<DataTypeUInt64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractUInt"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {pjh.get_integer()};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractIntImpl : public JSONNullableImplBase<DataTypeInt64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractInt"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_integer())
|
||||
return {pjh.get_integer()};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractFloatImpl : public JSONNullableImplBase<DataTypeFloat64>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractFloat"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_double())
|
||||
return {pjh.get_double()};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
class JSONExtractBoolImpl : public JSONNullableImplBase<DataTypeUInt8>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractBool"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.get_type() == 't')
|
||||
return {1};
|
||||
else if (pjh.get_type() == 'f')
|
||||
return {0};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
// class JSONExtractRawImpl: public JSONNullableImplBase<DataTypeString>
|
||||
// {
|
||||
// public:
|
||||
// static constexpr auto name {"jsonExtractRaw"};
|
||||
|
||||
// static Field getValue(ParsedJson::iterator & pjh)
|
||||
// {
|
||||
// //
|
||||
// }
|
||||
// };
|
||||
|
||||
class JSONExtractStringImpl : public JSONNullableImplBase<DataTypeString>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name{"jsonExtractString"};
|
||||
|
||||
static Field getValue(ParsedJson::iterator & pjh)
|
||||
{
|
||||
if (pjh.is_string())
|
||||
return {String{pjh.get_string()}};
|
||||
else
|
||||
return getDefault();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#else
|
||||
namespace DB
|
||||
{
|
||||
struct JSONHasImpl { static constexpr auto name{"jsonHas"}; };
|
||||
struct JSONLengthImpl { static constexpr auto name{"jsonLength"}; };
|
||||
struct JSONTypeImpl { static constexpr auto name{"jsonType"}; };
|
||||
struct JSONExtractImpl { static constexpr auto name{"jsonExtract"}; };
|
||||
struct JSONExtractUIntImpl { static constexpr auto name{"jsonExtractUInt"}; };
|
||||
struct JSONExtractIntImpl { static constexpr auto name{"jsonExtractInt"}; };
|
||||
struct JSONExtractFloatImpl { static constexpr auto name{"jsonExtractFloat"}; };
|
||||
struct JSONExtractBoolImpl { static constexpr auto name{"jsonExtractBool"}; };
|
||||
//struct JSONExtractRawImpl { static constexpr auto name {"jsonExtractRaw"}; };
|
||||
struct JSONExtractStringImpl { static constexpr auto name{"jsonExtractString"}; };
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionsJSON(FunctionFactory & factory)
|
||||
{
|
||||
#if USE_SIMDJSON
|
||||
if (__builtin_cpu_supports("avx2"))
|
||||
{
|
||||
factory.registerFunction<FunctionJSONBase<JSONHasImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONLengthImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONTypeImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractImpl, true>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractUIntImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractIntImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractFloatImpl, false>>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractBoolImpl, false>>();
|
||||
// factory.registerFunction<FunctionJSONBase<
|
||||
// JSONExtractRawImpl,
|
||||
// false
|
||||
// >>();
|
||||
factory.registerFunction<FunctionJSONBase<JSONExtractStringImpl, false>>();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
factory.registerFunction<FunctionJSONDummy<JSONHasImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONLengthImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONTypeImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractUIntImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractIntImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractFloatImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractBoolImpl>>();
|
||||
//factory.registerFunction<FunctionJSONDummy<JSONExtractRawImpl>>();
|
||||
factory.registerFunction<FunctionJSONDummy<JSONExtractStringImpl>>();
|
||||
}
|
||||
|
||||
}
|
243
dbms/src/Functions/FunctionsJSON.h
Normal file
243
dbms/src/Functions/FunctionsJSON.h
Normal file
@ -0,0 +1,243 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_SIMDJSON
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
#pragma clang diagnostic ignored "-Wnewline-eof"
|
||||
#endif
|
||||
|
||||
#include <simdjson/jsonparser.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
template <typename Impl, bool ExtraArg>
|
||||
class FunctionJSONBase : public IFunction
|
||||
{
|
||||
private:
|
||||
enum class Action
|
||||
{
|
||||
key = 1,
|
||||
index = 2,
|
||||
};
|
||||
|
||||
mutable std::vector<Action> actions;
|
||||
mutable DataTypePtr virtual_type;
|
||||
|
||||
bool tryMove(ParsedJson::iterator & pjh, Action action, const Field & accessor)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case Action::key:
|
||||
if (!pjh.is_object() || !pjh.move_to_key(accessor.get<String>().data()))
|
||||
return false;
|
||||
|
||||
break;
|
||||
case Action::index:
|
||||
if (!pjh.is_object_or_array() || !pjh.down())
|
||||
return false;
|
||||
|
||||
int steps = accessor.get<Int64>();
|
||||
|
||||
if (steps > 0)
|
||||
steps -= 1;
|
||||
else if (steps < 0)
|
||||
{
|
||||
steps += 1;
|
||||
|
||||
ParsedJson::iterator pjh1{pjh};
|
||||
|
||||
while (pjh1.next())
|
||||
steps += 1;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
for (const auto i : ext::range(0, steps))
|
||||
{
|
||||
(void)i;
|
||||
|
||||
if (!pjh.next())
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto name = Impl::name;
|
||||
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJSONBase>(); }
|
||||
|
||||
String getName() const override { return Impl::name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if constexpr (ExtraArg)
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception{"Function " + getName() + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
auto col_type_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
|
||||
|
||||
if (!col_type_const)
|
||||
throw Exception{"Illegal non-const column " + arguments[1].column->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
virtual_type = DataTypeFactory::instance().get(col_type_const->getValue<String>());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (arguments.size() < 1)
|
||||
throw Exception{"Function " + getName() + " requires at least one arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
}
|
||||
|
||||
if (!isString(arguments[0].type))
|
||||
throw Exception{"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
actions.reserve(arguments.size() - 1 - ExtraArg);
|
||||
|
||||
for (const auto i : ext::range(1 + ExtraArg, arguments.size()))
|
||||
{
|
||||
if (isString(arguments[i].type))
|
||||
actions.push_back(Action::key);
|
||||
else if (isInteger(arguments[i].type))
|
||||
actions.push_back(Action::index);
|
||||
else
|
||||
throw Exception{"Illegal type " + arguments[i].type->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
if constexpr (ExtraArg)
|
||||
return Impl::getType(virtual_type);
|
||||
else
|
||||
return Impl::getType();
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count) override
|
||||
{
|
||||
MutableColumnPtr to{block.getByPosition(result_pos).type->createColumn()};
|
||||
to->reserve(input_rows_count);
|
||||
|
||||
const ColumnPtr & arg_json = block.getByPosition(arguments[0]).column;
|
||||
|
||||
auto col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
|
||||
|
||||
auto col_json_string
|
||||
= typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
|
||||
|
||||
if (!col_json_string)
|
||||
throw Exception{"Illegal column " + arg_json->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
const ColumnString::Chars & chars = col_json_string->getChars();
|
||||
const ColumnString::Offsets & offsets = col_json_string->getOffsets();
|
||||
|
||||
size_t max_size = 1;
|
||||
|
||||
for (const auto i : ext::range(0, input_rows_count))
|
||||
if (max_size < offsets[i] - offsets[i - 1] - 1)
|
||||
max_size = offsets[i] - offsets[i - 1] - 1;
|
||||
|
||||
ParsedJson pj;
|
||||
if (!pj.allocateCapacity(max_size))
|
||||
throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
|
||||
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
|
||||
|
||||
for (const auto i : ext::range(0, input_rows_count))
|
||||
{
|
||||
bool ok = json_parse(&chars[offsets[i - 1]], offsets[i] - offsets[i - 1] - 1, pj) == 0;
|
||||
|
||||
ParsedJson::iterator pjh{pj};
|
||||
|
||||
for (const auto j : ext::range(0, actions.size()))
|
||||
{
|
||||
if (!ok)
|
||||
break;
|
||||
|
||||
ok = tryMove(pjh, actions[j], (*block.getByPosition(arguments[j + 1 + ExtraArg]).column)[i]);
|
||||
}
|
||||
|
||||
if (ok)
|
||||
{
|
||||
if constexpr (ExtraArg)
|
||||
to->insert(Impl::getValue(pjh, virtual_type));
|
||||
else
|
||||
to->insert(Impl::getValue(pjh));
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (ExtraArg)
|
||||
to->insert(Impl::getDefault(virtual_type));
|
||||
else
|
||||
to->insert(Impl::getDefault());
|
||||
}
|
||||
}
|
||||
|
||||
block.getByPosition(result_pos).column = std::move(to);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <typename Impl>
|
||||
class FunctionJSONDummy : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Impl::name;
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJSONDummy>(); }
|
||||
|
||||
String getName() const override { return Impl::name; }
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override
|
||||
{
|
||||
throw Exception{"Function " + getName() + " is not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED};
|
||||
}
|
||||
|
||||
void executeImpl(Block &, const ColumnNumbers &, size_t, size_t) override
|
||||
{
|
||||
throw Exception{"Function " + getName() + " is not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED};
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -449,44 +449,27 @@ struct NameMultiSearchFirstPositionCaseInsensitiveUTF8
|
||||
using FunctionPosition = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveASCII>, NamePosition>;
|
||||
using FunctionPositionUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveUTF8>, NamePositionUTF8>;
|
||||
using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveASCII>, NamePositionCaseInsensitive>;
|
||||
using FunctionPositionCaseInsensitiveUTF8
|
||||
= FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
|
||||
using FunctionPositionCaseInsensitiveUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMultiSearchAllPositions
|
||||
= FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>;
|
||||
using FunctionMultiSearchAllPositionsUTF8
|
||||
= FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>;
|
||||
using FunctionMultiSearchAllPositionsCaseInsensitive
|
||||
= FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>;
|
||||
using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition<
|
||||
MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>,
|
||||
NameMultiSearchAllPositionsCaseInsensitiveUTF8>;
|
||||
using FunctionMultiSearchAllPositions = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>;
|
||||
using FunctionMultiSearchAllPositionsUTF8 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>;
|
||||
using FunctionMultiSearchAllPositionsCaseInsensitive = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>;
|
||||
using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAllPositionsCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearchAny>;
|
||||
using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAnyUTF8>;
|
||||
using FunctionMultiSearchCaseInsensitive
|
||||
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>;
|
||||
using FunctionMultiSearchCaseInsensitiveUTF8
|
||||
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>;
|
||||
using FunctionMultiSearchCaseInsensitive = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>;
|
||||
using FunctionMultiSearchCaseInsensitiveUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMultiSearchFirstIndex
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>;
|
||||
using FunctionMultiSearchFirstIndexUTF8
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>;
|
||||
using FunctionMultiSearchFirstIndexCaseInsensitive
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>;
|
||||
using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>;
|
||||
using FunctionMultiSearchFirstIndex = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>;
|
||||
using FunctionMultiSearchFirstIndexUTF8 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>;
|
||||
using FunctionMultiSearchFirstIndexCaseInsensitive = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>;
|
||||
using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>;
|
||||
|
||||
using FunctionMultiSearchFirstPosition
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>;
|
||||
using FunctionMultiSearchFirstPositionUTF8
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>;
|
||||
using FunctionMultiSearchFirstPositionCaseInsensitive
|
||||
= FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>;
|
||||
using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch<
|
||||
MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>,
|
||||
NameMultiSearchFirstPositionCaseInsensitiveUTF8>;
|
||||
using FunctionMultiSearchFirstPosition = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>;
|
||||
using FunctionMultiSearchFirstPositionUTF8 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>;
|
||||
using FunctionMultiSearchFirstPositionCaseInsensitive = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>;
|
||||
using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstPositionCaseInsensitiveUTF8>;
|
||||
|
||||
|
||||
void registerFunctionsStringSearch(FunctionFactory & factory)
|
||||
|
@ -164,43 +164,46 @@ struct NgramDistanceImpl
|
||||
return num;
|
||||
}
|
||||
|
||||
template <bool SaveNgrams>
|
||||
static ALWAYS_INLINE inline size_t calculateNeedleStats(
|
||||
const char * data,
|
||||
const size_t size,
|
||||
NgramStats & ngram_stats,
|
||||
[[maybe_unused]] UInt16 * ngram_storage,
|
||||
size_t (*read_code_points)(CodePoint *, const char *&, const char *),
|
||||
UInt16 (*hash_functor)(const CodePoint *))
|
||||
{
|
||||
// To prevent size_t overflow below.
|
||||
if (size < N)
|
||||
return 0;
|
||||
|
||||
const char * start = data;
|
||||
const char * end = data + size;
|
||||
CodePoint cp[simultaneously_codepoints_num] = {};
|
||||
|
||||
/// read_code_points returns the position of cp where it stopped reading codepoints.
|
||||
size_t found = read_code_points(cp, start, end);
|
||||
/// We need to start for the first time here, because first N - 1 codepoints mean nothing.
|
||||
size_t i = N - 1;
|
||||
/// Initialize with this value because for the first time `found` does not initialize first N - 1 codepoints.
|
||||
size_t len = -N + 1;
|
||||
size_t len = 0;
|
||||
do
|
||||
{
|
||||
len += found - N + 1;
|
||||
for (; i + N <= found; ++i)
|
||||
++ngram_stats[hash_functor(cp + i)];
|
||||
{
|
||||
++len;
|
||||
UInt16 hash = hash_functor(cp + i);
|
||||
if constexpr (SaveNgrams)
|
||||
*ngram_storage++ = hash;
|
||||
++ngram_stats[hash];
|
||||
}
|
||||
i = 0;
|
||||
} while (start < end && (found = read_code_points(cp, start, end)));
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
template <bool ReuseStats>
|
||||
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
|
||||
const char * data,
|
||||
const size_t size,
|
||||
NgramStats & ngram_stats,
|
||||
size_t & distance,
|
||||
[[maybe_unused]] UInt16 * ngram_storage,
|
||||
size_t (*read_code_points)(CodePoint *, const char *&, const char *),
|
||||
UInt16 (*hash_functor)(const CodePoint *))
|
||||
{
|
||||
@ -209,18 +212,6 @@ struct NgramDistanceImpl
|
||||
const char * end = data + size;
|
||||
CodePoint cp[simultaneously_codepoints_num] = {};
|
||||
|
||||
/// allocation tricks, most strings are relatively small
|
||||
static constexpr size_t small_buffer_size = 256;
|
||||
std::unique_ptr<UInt16[]> big_buffer;
|
||||
UInt16 small_buffer[small_buffer_size];
|
||||
UInt16 * ngram_storage = small_buffer;
|
||||
|
||||
if (size > small_buffer_size)
|
||||
{
|
||||
ngram_storage = new UInt16[size];
|
||||
big_buffer.reset(ngram_storage);
|
||||
}
|
||||
|
||||
/// read_code_points returns the position of cp where it stopped reading codepoints.
|
||||
size_t found = read_code_points(cp, start, end);
|
||||
/// We need to start for the first time here, because first N - 1 codepoints mean nothing.
|
||||
@ -235,21 +226,25 @@ struct NgramDistanceImpl
|
||||
--distance;
|
||||
else
|
||||
++distance;
|
||||
|
||||
ngram_storage[ngram_cnt++] = hash;
|
||||
if constexpr (ReuseStats)
|
||||
ngram_storage[ngram_cnt] = hash;
|
||||
++ngram_cnt;
|
||||
--ngram_stats[hash];
|
||||
}
|
||||
iter = 0;
|
||||
} while (start < end && (found = read_code_points(cp, start, end)));
|
||||
|
||||
/// Return the state of hash map to its initial.
|
||||
for (size_t i = 0; i < ngram_cnt; ++i)
|
||||
++ngram_stats[ngram_storage[i]];
|
||||
if constexpr (ReuseStats)
|
||||
{
|
||||
for (size_t i = 0; i < ngram_cnt; ++i)
|
||||
++ngram_stats[ngram_storage[i]];
|
||||
}
|
||||
return ngram_cnt;
|
||||
}
|
||||
|
||||
template <class Callback, class... Args>
|
||||
static inline size_t dispatchSearcher(Callback callback, Args &&... args)
|
||||
static inline auto dispatchSearcher(Callback callback, Args &&... args)
|
||||
{
|
||||
if constexpr (!UTF8)
|
||||
return callback(std::forward<Args>(args)..., readASCIICodePoints, ASCIIHash);
|
||||
@ -259,8 +254,7 @@ struct NgramDistanceImpl
|
||||
|
||||
static void constant_constant(std::string data, std::string needle, Float32 & res)
|
||||
{
|
||||
NgramStats common_stats;
|
||||
memset(common_stats, 0, sizeof(common_stats));
|
||||
NgramStats common_stats = {};
|
||||
|
||||
/// We use unsafe versions of getting ngrams, so I decided to use padded strings.
|
||||
const size_t needle_size = needle.size();
|
||||
@ -268,11 +262,11 @@ struct NgramDistanceImpl
|
||||
needle.resize(needle_size + default_padding);
|
||||
data.resize(data_size + default_padding);
|
||||
|
||||
size_t second_size = dispatchSearcher(calculateNeedleStats, needle.data(), needle_size, common_stats);
|
||||
size_t second_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr);
|
||||
size_t distance = second_size;
|
||||
if (data_size <= max_string_size)
|
||||
{
|
||||
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric, data.data(), data_size, common_stats, distance);
|
||||
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr);
|
||||
res = distance * 1.f / std::max(first_size + second_size, size_t(1));
|
||||
}
|
||||
else
|
||||
@ -281,18 +275,89 @@ struct NgramDistanceImpl
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_vector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
PaddedPODArray<Float32> & res)
|
||||
{
|
||||
const size_t haystack_offsets_size = haystack_offsets.size();
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needle_offset = 0;
|
||||
|
||||
NgramStats common_stats = {};
|
||||
|
||||
/// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
|
||||
/// And we can reuse these storages in one thread because we care only about what was written to first places.
|
||||
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
|
||||
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
|
||||
|
||||
for (size_t i = 0; i < haystack_offsets_size; ++i)
|
||||
{
|
||||
const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
|
||||
const size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
|
||||
const size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
|
||||
|
||||
if (needle_size <= max_string_size && haystack_size <= max_string_size)
|
||||
{
|
||||
/// Get needle stats.
|
||||
const size_t needle_stats_size = dispatchSearcher(
|
||||
calculateNeedleStats<true>,
|
||||
needle,
|
||||
needle_size,
|
||||
common_stats,
|
||||
needle_ngram_storage.get());
|
||||
|
||||
size_t distance = needle_stats_size;
|
||||
|
||||
/// Combine with haystack stats, return to initial needle stats.
|
||||
const size_t haystack_stats_size = dispatchSearcher(
|
||||
calculateHaystackStatsAndMetric<true>,
|
||||
haystack,
|
||||
haystack_size,
|
||||
common_stats,
|
||||
distance,
|
||||
haystack_ngram_storage.get());
|
||||
|
||||
/// Return to zero array stats.
|
||||
for (size_t j = 0; j < needle_stats_size; ++j)
|
||||
--common_stats[needle_ngram_storage[j]];
|
||||
|
||||
/// For now, common stats is a zero array.
|
||||
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Strings are too big, we are assuming they are not the same. This is done because of limiting number
|
||||
/// of bigrams added and not allocating too much memory.
|
||||
res[i] = 1.f;
|
||||
}
|
||||
|
||||
prev_needle_offset = needle_offsets[i];
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string needle, PaddedPODArray<Float32> & res)
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
std::string needle,
|
||||
PaddedPODArray<Float32> & res)
|
||||
{
|
||||
/// zeroing our map
|
||||
NgramStats common_stats;
|
||||
memset(common_stats, 0, sizeof(common_stats));
|
||||
NgramStats common_stats = {};
|
||||
|
||||
/// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
|
||||
/// And we can reuse these storages in one thread because we care only about what was written to first places.
|
||||
std::unique_ptr<UInt16[]> ngram_storage(new UInt16[max_string_size]);
|
||||
|
||||
/// We use unsafe versions of getting ngrams, so I decided to use padded_data even in needle case.
|
||||
const size_t needle_size = needle.size();
|
||||
needle.resize(needle_size + default_padding);
|
||||
|
||||
const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats, needle.data(), needle_size, common_stats);
|
||||
const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr);
|
||||
|
||||
size_t distance = needle_stats_size;
|
||||
size_t prev_offset = 0;
|
||||
@ -303,7 +368,11 @@ struct NgramDistanceImpl
|
||||
if (haystack_size <= max_string_size)
|
||||
{
|
||||
size_t haystack_stats_size = dispatchSearcher(
|
||||
calculateHaystackStatsAndMetric, reinterpret_cast<const char *>(haystack), haystack_size, common_stats, distance);
|
||||
calculateHaystackStatsAndMetric<true>,
|
||||
reinterpret_cast<const char *>(haystack),
|
||||
haystack_size, common_stats,
|
||||
distance,
|
||||
ngram_storage.get());
|
||||
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
|
||||
}
|
||||
else
|
||||
@ -339,11 +408,9 @@ struct NameNgramDistanceUTF8CaseInsensitive
|
||||
};
|
||||
|
||||
using FunctionNgramDistance = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, false>, NameNgramDistance>;
|
||||
using FunctionNgramDistanceCaseInsensitive
|
||||
= FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true>, NameNgramDistanceCaseInsensitive>;
|
||||
using FunctionNgramDistanceCaseInsensitive = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true>, NameNgramDistanceCaseInsensitive>;
|
||||
using FunctionNgramDistanceUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, false>, NameNgramDistanceUTF8>;
|
||||
using FunctionNgramDistanceCaseInsensitiveUTF8
|
||||
= FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true>, NameNgramDistanceUTF8CaseInsensitive>;
|
||||
using FunctionNgramDistanceCaseInsensitiveUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true>, NameNgramDistanceUTF8CaseInsensitive>;
|
||||
|
||||
void registerFunctionsStringSimilarity(FunctionFactory & factory)
|
||||
{
|
||||
|
@ -62,10 +62,7 @@ public:
|
||||
const ColumnConst * col_haystack_const = typeid_cast<const ColumnConst *>(&*column_haystack);
|
||||
const ColumnConst * col_needle_const = typeid_cast<const ColumnConst *>(&*column_needle);
|
||||
|
||||
if (!col_needle_const)
|
||||
throw Exception("Second argument of function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
if (col_haystack_const)
|
||||
if (col_haystack_const && col_needle_const)
|
||||
{
|
||||
ResultType res{};
|
||||
const String & needle = col_needle_const->getValue<String>();
|
||||
@ -88,8 +85,9 @@ public:
|
||||
vec_res.resize(column_haystack->size());
|
||||
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
|
||||
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
|
||||
|
||||
if (col_haystack_vector)
|
||||
if (col_haystack_vector && col_needle_const)
|
||||
{
|
||||
const String & needle = col_needle_const->getValue<String>();
|
||||
if (needle.size() > Impl::max_string_size)
|
||||
@ -101,6 +99,27 @@ public:
|
||||
}
|
||||
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res);
|
||||
}
|
||||
else if (col_haystack_vector && col_needle_vector)
|
||||
{
|
||||
Impl::vector_vector(
|
||||
col_haystack_vector->getChars(),
|
||||
col_haystack_vector->getOffsets(),
|
||||
col_needle_vector->getChars(),
|
||||
col_needle_vector->getOffsets(),
|
||||
vec_res);
|
||||
}
|
||||
else if (col_haystack_const && col_needle_vector)
|
||||
{
|
||||
const String & needle = col_haystack_const->getValue<String>();
|
||||
if (needle.size() > Impl::max_string_size)
|
||||
{
|
||||
throw Exception(
|
||||
"String size of needle is too big for function " + getName() + ". Should be at most "
|
||||
+ std::to_string(Impl::max_string_size),
|
||||
ErrorCodes::TOO_LARGE_STRING_SIZE);
|
||||
}
|
||||
Impl::vector_constant(col_needle_vector->getChars(), col_needle_vector->getOffsets(), needle, vec_res);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
|
@ -135,9 +135,10 @@ namespace MultiRegexps
|
||||
for (const StringRef ref : str_patterns)
|
||||
{
|
||||
ptrns.push_back(ref.data);
|
||||
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH);
|
||||
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8);
|
||||
if constexpr (CompileForEditDistance)
|
||||
{
|
||||
flags.back() &= ~HS_FLAG_UTF8;
|
||||
ext_exprs.emplace_back();
|
||||
ext_exprs.back().flags = HS_EXT_FLAG_EDIT_DISTANCE;
|
||||
ext_exprs.back().edit_distance = edit_distance.value();
|
||||
|
51
dbms/src/Functions/ignoreExceptNull.cpp
Normal file
51
dbms/src/Functions/ignoreExceptNull.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** ignoreExceptNull(...) is a function that takes any arguments, and always returns 0 except Null.
|
||||
*/
|
||||
class FunctionIgnoreExceptNull : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "ignoreExceptNull";
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionIgnoreExceptNull>();
|
||||
}
|
||||
|
||||
bool isVariadic() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override
|
||||
{
|
||||
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(0));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void registerFunctionIgnoreExceptNull(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionIgnoreExceptNull>();
|
||||
}
|
||||
|
||||
}
|
@ -73,11 +73,6 @@ public:
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
|
||||
{
|
||||
/// Second argument must be ColumnSet.
|
||||
@ -89,7 +84,7 @@ public:
|
||||
|
||||
Block block_of_key_columns;
|
||||
|
||||
/// First argument may be tuple or single column.
|
||||
/// First argument may be a tuple or a single column.
|
||||
const ColumnWithTypeAndName & left_arg = block.getByPosition(arguments[0]);
|
||||
const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(left_arg.column.get());
|
||||
const ColumnConst * const_tuple = checkAndGetColumnConst<ColumnTuple>(left_arg.column.get());
|
||||
|
@ -40,6 +40,7 @@ void registerFunctionsMath(FunctionFactory &);
|
||||
void registerFunctionsGeo(FunctionFactory &);
|
||||
void registerFunctionsNull(FunctionFactory &);
|
||||
void registerFunctionsFindCluster(FunctionFactory &);
|
||||
void registerFunctionsJSON(FunctionFactory &);
|
||||
void registerFunctionTransform(FunctionFactory &);
|
||||
|
||||
#if USE_ICU
|
||||
@ -82,6 +83,7 @@ void registerFunctions()
|
||||
registerFunctionsGeo(factory);
|
||||
registerFunctionsNull(factory);
|
||||
registerFunctionsFindCluster(factory);
|
||||
registerFunctionsJSON(factory);
|
||||
registerFunctionTransform(factory);
|
||||
|
||||
#if USE_ICU
|
||||
|
@ -19,6 +19,7 @@ void registerFunctionSleep(FunctionFactory &);
|
||||
void registerFunctionSleepEachRow(FunctionFactory &);
|
||||
void registerFunctionMaterialize(FunctionFactory &);
|
||||
void registerFunctionIgnore(FunctionFactory &);
|
||||
void registerFunctionIgnoreExceptNull(FunctionFactory &);
|
||||
void registerFunctionIndexHint(FunctionFactory &);
|
||||
void registerFunctionIdentity(FunctionFactory &);
|
||||
void registerFunctionArrayJoin(FunctionFactory &);
|
||||
@ -62,6 +63,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionSleepEachRow(factory);
|
||||
registerFunctionMaterialize(factory);
|
||||
registerFunctionIgnore(factory);
|
||||
registerFunctionIgnoreExceptNull(factory);
|
||||
registerFunctionIndexHint(factory);
|
||||
registerFunctionIdentity(factory);
|
||||
registerFunctionArrayJoin(factory);
|
||||
|
@ -1,3 +1,5 @@
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
@ -69,3 +71,5 @@ TEST(ReadBufferAIOTest, TestReadAfterAIO)
|
||||
EXPECT_EQ(read_after_eof_big, data.length());
|
||||
EXPECT_TRUE(testbuf.eof());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -328,10 +328,10 @@ void ActionsVisitor::visit(const ASTPtr & ast)
|
||||
if (!only_consts)
|
||||
{
|
||||
/// We are in the part of the tree that we are not going to compute. You just need to define types.
|
||||
/// Do not subquery and create sets. We treat "IN" as "ignore" function.
|
||||
/// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function.
|
||||
|
||||
actions_stack.addAction(ExpressionAction::applyFunction(
|
||||
FunctionFactory::instance().get("ignore", context),
|
||||
FunctionFactory::instance().get("ignoreExceptNull", context),
|
||||
{ node->arguments->children.at(0)->getColumnName() },
|
||||
getColumnName()));
|
||||
}
|
||||
|
@ -191,12 +191,12 @@ void AsynchronousMetrics::update()
|
||||
"Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name()));
|
||||
}
|
||||
|
||||
calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getData().getMaxPartsCountForPartition());
|
||||
calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getMaxPartsCountForPartition());
|
||||
}
|
||||
|
||||
if (table_merge_tree)
|
||||
{
|
||||
calculateMax(max_part_count_for_partition, table_merge_tree->getData().getMaxPartsCountForPartition());
|
||||
calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountForPartition());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -702,9 +702,8 @@ void Context::checkDatabaseAccessRightsImpl(const std::string & database_name) c
|
||||
throw Exception("Access denied to database " + database_name + " for user " + client_info.current_user , ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
}
|
||||
|
||||
void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
|
||||
void Context::addDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
|
||||
{
|
||||
auto lock = getLock();
|
||||
checkDatabaseAccessRightsImpl(from.first);
|
||||
checkDatabaseAccessRightsImpl(where.first);
|
||||
shared->view_dependencies[from].insert(where);
|
||||
@ -715,9 +714,14 @@ void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAnd
|
||||
table->updateDependencies();
|
||||
}
|
||||
|
||||
void Context::removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
|
||||
void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
|
||||
{
|
||||
auto lock = getLock();
|
||||
addDependencyUnsafe(from, where);
|
||||
}
|
||||
|
||||
void Context::removeDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
|
||||
{
|
||||
checkDatabaseAccessRightsImpl(from.first);
|
||||
checkDatabaseAccessRightsImpl(where.first);
|
||||
shared->view_dependencies[from].erase(where);
|
||||
@ -728,6 +732,12 @@ void Context::removeDependency(const DatabaseAndTableName & from, const Database
|
||||
table->updateDependencies();
|
||||
}
|
||||
|
||||
void Context::removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
|
||||
{
|
||||
auto lock = getLock();
|
||||
removeDependencyUnsafe(from, where);
|
||||
}
|
||||
|
||||
Dependencies Context::getDependencies(const String & database_name, const String & table_name) const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
@ -215,6 +215,10 @@ public:
|
||||
void removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where);
|
||||
Dependencies getDependencies(const String & database_name, const String & table_name) const;
|
||||
|
||||
/// Functions where we can lock the context manually
|
||||
void addDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where);
|
||||
void removeDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where);
|
||||
|
||||
/// Checking the existence of the table/database. Database can be empty - in this case the current database is used.
|
||||
bool isTableExist(const String & database_name, const String & table_name) const;
|
||||
bool isDatabaseExist(const String & database_name) const;
|
||||
|
@ -513,13 +513,14 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only
|
||||
columns_added_by_join_list));
|
||||
}
|
||||
|
||||
static void appendRequiredColumns(NameSet & required_columns, const Block & sample, const AnalyzedJoin & analyzed_join)
|
||||
static void appendRequiredColumns(
|
||||
NameSet & required_columns, const Block & sample, const Names & key_names_right, const JoinedColumnsList & columns_added_by_join)
|
||||
{
|
||||
for (auto & column : analyzed_join.key_names_right)
|
||||
for (auto & column : key_names_right)
|
||||
if (!sample.has(column))
|
||||
required_columns.insert(column);
|
||||
|
||||
for (auto & column : analyzed_join.columns_from_joined_table)
|
||||
for (auto & column : columns_added_by_join)
|
||||
if (!sample.has(column.name_and_type.name))
|
||||
required_columns.insert(column.name_and_type.name);
|
||||
}
|
||||
@ -606,7 +607,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
Names action_columns = joined_block_actions->getRequiredColumns();
|
||||
NameSet required_columns(action_columns.begin(), action_columns.end());
|
||||
|
||||
appendRequiredColumns(required_columns, joined_block_actions->getSampleBlock(), analyzed_join);
|
||||
appendRequiredColumns(
|
||||
required_columns, joined_block_actions->getSampleBlock(), analyzed_join.key_names_right, columns_added_by_join);
|
||||
|
||||
Names original_columns = analyzed_join.getOriginalColumnNames(required_columns);
|
||||
|
||||
|
@ -30,6 +30,8 @@
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ParserSelectQuery.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
@ -43,8 +45,7 @@
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
@ -590,13 +591,11 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
|
||||
|
||||
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
|
||||
if (settings.optimize_move_to_prewhere && query.where() && !query.prewhere() && !query.final())
|
||||
MergeTreeWhereOptimizer{query_info, context, merge_tree.getData(), query_analyzer->getRequiredSourceColumns(), log};
|
||||
MergeTreeWhereOptimizer{query_info, context, merge_tree, query_analyzer->getRequiredSourceColumns(), log};
|
||||
};
|
||||
|
||||
if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(storage.get()))
|
||||
optimize_prewhere(*merge_tree);
|
||||
else if (const StorageReplicatedMergeTree * replicated_merge_tree = dynamic_cast<const StorageReplicatedMergeTree *>(storage.get()))
|
||||
optimize_prewhere(*replicated_merge_tree);
|
||||
if (const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
|
||||
optimize_prewhere(*merge_tree_data);
|
||||
}
|
||||
|
||||
AnalysisResult expressions;
|
||||
|
@ -377,11 +377,11 @@ namespace
|
||||
template <typename Map, typename KeyGetter>
|
||||
struct Inserter<ASTTableJoin::Strictness::Any, Map, KeyGetter>
|
||||
{
|
||||
static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
|
||||
static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
|
||||
{
|
||||
auto emplace_result = key_getter.emplaceKey(map, i, pool);
|
||||
|
||||
if (emplace_result.isInserted() || emplace_result.getMapped().overwrite)
|
||||
if (emplace_result.isInserted() || join.anyTakeLastRow())
|
||||
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
|
||||
}
|
||||
};
|
||||
@ -659,7 +659,7 @@ void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added,
|
||||
|
||||
if constexpr (STRICTNESS == ASTTableJoin::Strictness::All)
|
||||
{
|
||||
for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(mapped); current != nullptr; current = current->next)
|
||||
for (auto current = &static_cast<const typename Map::mapped_type::Base &>(mapped); current != nullptr; current = current->next)
|
||||
{
|
||||
added.appendFromBlock(*current->block, current->row_num);
|
||||
++current_offset;
|
||||
@ -1078,10 +1078,7 @@ void Join::joinGet(Block & block, const String & column_name) const
|
||||
|
||||
if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any)
|
||||
{
|
||||
if (any_take_last_row)
|
||||
joinGetImpl(block, column_name, std::get<MapsAnyOverwrite>(maps));
|
||||
else
|
||||
joinGetImpl(block, column_name, std::get<MapsAny>(maps));
|
||||
joinGetImpl(block, column_name, std::get<MapsAny>(maps));
|
||||
}
|
||||
else
|
||||
throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR);
|
||||
@ -1156,7 +1153,7 @@ struct AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>
|
||||
{
|
||||
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
|
||||
{
|
||||
for (auto current = &static_cast<const typename Mapped::Base_t &>(mapped); current != nullptr; current = current->next)
|
||||
for (auto current = &static_cast<const typename Mapped::Base &>(mapped); current != nullptr; current = current->next)
|
||||
{
|
||||
for (size_t j = 0; j < columns_right.size(); ++j)
|
||||
columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num);
|
||||
|
@ -25,6 +25,43 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace JoinStuff
|
||||
{
|
||||
|
||||
/// Base class with optional flag attached that's needed to implement RIGHT and FULL JOINs.
|
||||
template <typename T, bool with_used>
|
||||
struct WithFlags;
|
||||
|
||||
template <typename T>
|
||||
struct WithFlags<T, true> : T
|
||||
{
|
||||
using Base = T;
|
||||
using T::T;
|
||||
|
||||
mutable std::atomic<bool> used {};
|
||||
void setUsed() const { used.store(true, std::memory_order_relaxed); } /// Could be set simultaneously from different threads.
|
||||
bool getUsed() const { return used; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct WithFlags<T, false> : T
|
||||
{
|
||||
using Base = T;
|
||||
using T::T;
|
||||
|
||||
void setUsed() const {}
|
||||
bool getUsed() const { return true; }
|
||||
};
|
||||
|
||||
using MappedAny = WithFlags<RowRef, false>;
|
||||
using MappedAll = WithFlags<RowRefList, false>;
|
||||
using MappedAnyFull = WithFlags<RowRef, true>;
|
||||
using MappedAllFull = WithFlags<RowRefList, true>;
|
||||
using MappedAsof = WithFlags<AsofRowRefs, false>;
|
||||
|
||||
}
|
||||
|
||||
/** Data structure for implementation of JOIN.
|
||||
* It is just a hash table: keys -> rows of joined ("right") table.
|
||||
* Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys.
|
||||
@ -132,36 +169,7 @@ public:
|
||||
|
||||
ASTTableJoin::Kind getKind() const { return kind; }
|
||||
AsofRowRefs::Type getAsofType() const { return *asof_type; }
|
||||
|
||||
/** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined).
|
||||
* Depending on template parameter, decide whether to overwrite existing values when encountering the same key again
|
||||
* with_used is for implementation of RIGHT and FULL JOINs.
|
||||
* overwrite is for implementation of StorageJoin with overwrite setting enabled
|
||||
* NOTE: It is possible to store the flag in one bit of pointer to block or row_num. It seems not reasonable, because memory saving is minimal.
|
||||
*/
|
||||
template <bool with_used, bool overwrite_, typename Base>
|
||||
struct WithFlags;
|
||||
|
||||
template <bool overwrite_, typename Base>
|
||||
struct WithFlags<true, overwrite_, Base> : Base
|
||||
{
|
||||
static constexpr bool overwrite = overwrite_;
|
||||
mutable std::atomic<bool> used {};
|
||||
using Base::Base;
|
||||
using Base_t = Base;
|
||||
void setUsed() const { used.store(true, std::memory_order_relaxed); } /// Could be set simultaneously from different threads.
|
||||
bool getUsed() const { return used; }
|
||||
};
|
||||
|
||||
template <bool overwrite_, typename Base>
|
||||
struct WithFlags<false, overwrite_, Base> : Base
|
||||
{
|
||||
static constexpr bool overwrite = overwrite_;
|
||||
using Base::Base;
|
||||
using Base_t = Base;
|
||||
void setUsed() const {}
|
||||
bool getUsed() const { return true; }
|
||||
};
|
||||
bool anyTakeLastRow() const { return any_take_last_row; }
|
||||
|
||||
/// Different types of keys for maps.
|
||||
#define APPLY_FOR_JOIN_VARIANTS(M) \
|
||||
@ -257,13 +265,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using MapsAny = MapsTemplate<WithFlags<false, false, RowRef>>;
|
||||
using MapsAnyOverwrite = MapsTemplate<WithFlags<false, true, RowRef>>;
|
||||
using MapsAll = MapsTemplate<WithFlags<false, false, RowRefList>>;
|
||||
using MapsAnyFull = MapsTemplate<WithFlags<true, false, RowRef>>;
|
||||
using MapsAnyFullOverwrite = MapsTemplate<WithFlags<true, true, RowRef>>;
|
||||
using MapsAllFull = MapsTemplate<WithFlags<true, false, RowRefList>>;
|
||||
using MapsAsof = MapsTemplate<WithFlags<false, false, AsofRowRefs>>;
|
||||
using MapsAny = MapsTemplate<JoinStuff::MappedAny>;
|
||||
using MapsAll = MapsTemplate<JoinStuff::MappedAll>;
|
||||
using MapsAnyFull = MapsTemplate<JoinStuff::MappedAnyFull>;
|
||||
using MapsAllFull = MapsTemplate<JoinStuff::MappedAllFull>;
|
||||
using MapsAsof = MapsTemplate<JoinStuff::MappedAsof>;
|
||||
|
||||
template <ASTTableJoin::Kind KIND>
|
||||
struct KindTrait
|
||||
@ -276,13 +282,14 @@ public:
|
||||
static constexpr bool fill_right = static_in_v<KIND, ASTTableJoin::Kind::Right, ASTTableJoin::Kind::Full>;
|
||||
};
|
||||
|
||||
template <bool fill_right, typename ASTTableJoin::Strictness, bool overwrite>
|
||||
template <bool fill_right, typename ASTTableJoin::Strictness>
|
||||
struct MapGetterImpl;
|
||||
|
||||
template <ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness, bool overwrite>
|
||||
using Map = typename MapGetterImpl<KindTrait<kind>::fill_right, strictness, overwrite>::Map;
|
||||
template <ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness>
|
||||
using Map = typename MapGetterImpl<KindTrait<kind>::fill_right, strictness>::Map;
|
||||
|
||||
static constexpr std::array<ASTTableJoin::Strictness, 3> STRICTNESSES = {ASTTableJoin::Strictness::Any, ASTTableJoin::Strictness::All, ASTTableJoin::Strictness::Asof};
|
||||
static constexpr std::array<ASTTableJoin::Strictness, 3> STRICTNESSES
|
||||
= {ASTTableJoin::Strictness::Any, ASTTableJoin::Strictness::All, ASTTableJoin::Strictness::Asof};
|
||||
static constexpr std::array<ASTTableJoin::Kind, 4> KINDS
|
||||
= {ASTTableJoin::Kind::Left, ASTTableJoin::Kind::Inner, ASTTableJoin::Kind::Full, ASTTableJoin::Kind::Right};
|
||||
|
||||
@ -298,12 +305,12 @@ public:
|
||||
if (kind == KINDS[i] && strictness == ASTTableJoin::Strictness::Any)
|
||||
{
|
||||
if constexpr (std::is_same_v<Func, MapInitTag>)
|
||||
maps = Map<KINDS[i], ASTTableJoin::Strictness::Any, true>();
|
||||
maps = Map<KINDS[i], ASTTableJoin::Strictness::Any>();
|
||||
else
|
||||
func(
|
||||
std::integral_constant<ASTTableJoin::Kind, KINDS[i]>(),
|
||||
std::integral_constant<ASTTableJoin::Strictness, ASTTableJoin::Strictness::Any>(),
|
||||
std::get<Map<KINDS[i], ASTTableJoin::Strictness::Any, true>>(maps));
|
||||
std::get<Map<KINDS[i], ASTTableJoin::Strictness::Any>>(maps));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -320,12 +327,12 @@ public:
|
||||
if (kind == KINDS[i] && strictness == STRICTNESSES[j])
|
||||
{
|
||||
if constexpr (std::is_same_v<Func, MapInitTag>)
|
||||
maps = Map<KINDS[i], STRICTNESSES[j], false>();
|
||||
maps = Map<KINDS[i], STRICTNESSES[j]>();
|
||||
else
|
||||
func(
|
||||
std::integral_constant<ASTTableJoin::Kind, KINDS[i]>(),
|
||||
std::integral_constant<ASTTableJoin::Strictness, STRICTNESSES[j]>(),
|
||||
std::get<Map<KINDS[i], STRICTNESSES[j], false>>(maps));
|
||||
std::get<Map<KINDS[i], STRICTNESSES[j]>>(maps));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -359,7 +366,7 @@ private:
|
||||
*/
|
||||
BlocksList blocks;
|
||||
|
||||
std::variant<MapsAny, MapsAnyOverwrite, MapsAll, MapsAnyFull, MapsAnyFullOverwrite, MapsAllFull, MapsAsof> maps;
|
||||
std::variant<MapsAny, MapsAll, MapsAnyFull, MapsAllFull, MapsAsof> maps;
|
||||
|
||||
/// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.
|
||||
Arena pool;
|
||||
@ -421,32 +428,32 @@ private:
|
||||
using JoinPtr = std::shared_ptr<Join>;
|
||||
using Joins = std::vector<JoinPtr>;
|
||||
|
||||
template <bool overwrite_>
|
||||
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::Any, overwrite_>
|
||||
template <>
|
||||
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::Any>
|
||||
{
|
||||
using Map = std::conditional_t<overwrite_, MapsAnyOverwrite, MapsAny>;
|
||||
};
|
||||
|
||||
template <bool overwrite_>
|
||||
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::Any, overwrite_>
|
||||
{
|
||||
using Map = std::conditional_t<overwrite_, MapsAnyFullOverwrite, MapsAnyFull>;
|
||||
using Map = MapsAny;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::All, false>
|
||||
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::Any>
|
||||
{
|
||||
using Map = MapsAnyFull;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::All>
|
||||
{
|
||||
using Map = MapsAll;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::All, false>
|
||||
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::All>
|
||||
{
|
||||
using Map = MapsAllFull;
|
||||
};
|
||||
|
||||
template <bool fill_right>
|
||||
struct Join::MapGetterImpl<fill_right, ASTTableJoin::Strictness::Asof, false>
|
||||
struct Join::MapGetterImpl<fill_right, ASTTableJoin::Strictness::Asof>
|
||||
{
|
||||
using Map = MapsAsof;
|
||||
};
|
||||
|
@ -1,8 +1,7 @@
|
||||
#include <Interpreters/MutationsInterpreter.h>
|
||||
#include <Interpreters/SyntaxAnalyzer.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <DataStreams/FilterBlockInputStream.h>
|
||||
#include <DataStreams/ExpressionBlockInputStream.h>
|
||||
#include <DataStreams/CreatingSetsBlockInputStream.h>
|
||||
@ -86,12 +85,8 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const
|
||||
|
||||
static NameSet getKeyColumns(const StoragePtr & storage)
|
||||
{
|
||||
const MergeTreeData * merge_tree_data = nullptr;
|
||||
if (auto merge_tree = dynamic_cast<StorageMergeTree *>(storage.get()))
|
||||
merge_tree_data = &merge_tree->getData();
|
||||
else if (auto replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
|
||||
merge_tree_data = &replicated_merge_tree->getData();
|
||||
else
|
||||
const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get());
|
||||
if (!merge_tree_data)
|
||||
return {};
|
||||
|
||||
NameSet key_columns;
|
||||
|
@ -220,10 +220,11 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
||||
|
||||
ReadBufferFromFile in{file_path};
|
||||
|
||||
Settings insert_settings;
|
||||
std::string insert_query;
|
||||
readStringBinary(insert_query, in);
|
||||
readQueryAndSettings(in, insert_settings, insert_query);
|
||||
|
||||
RemoteBlockOutputStream remote{*connection, insert_query};
|
||||
RemoteBlockOutputStream remote{*connection, insert_query, &insert_settings};
|
||||
|
||||
remote.writePrefix();
|
||||
remote.writePrepared(in);
|
||||
@ -240,20 +241,39 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
||||
LOG_TRACE(log, "Finished processing `" << file_path << '`');
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::readQueryAndSettings(
|
||||
ReadBuffer & in, Settings & insert_settings, std::string & insert_query) const
|
||||
{
|
||||
UInt64 magic_number_or_query_size;
|
||||
|
||||
readVarUInt(magic_number_or_query_size, in);
|
||||
|
||||
if (magic_number_or_query_size == UInt64(DBMS_DISTRIBUTED_SENDS_MAGIC_NUMBER))
|
||||
{
|
||||
insert_settings.deserialize(in);
|
||||
readVarUInt(magic_number_or_query_size, in);
|
||||
}
|
||||
insert_query.resize(magic_number_or_query_size);
|
||||
in.readStrict(insert_query.data(), magic_number_or_query_size);
|
||||
}
|
||||
|
||||
struct StorageDistributedDirectoryMonitor::BatchHeader
|
||||
{
|
||||
Settings settings;
|
||||
String query;
|
||||
Block sample_block;
|
||||
|
||||
BatchHeader(String query_, Block sample_block_)
|
||||
: query(std::move(query_))
|
||||
BatchHeader(Settings settings_, String query_, Block sample_block_)
|
||||
: settings(std::move(settings_))
|
||||
, query(std::move(query_))
|
||||
, sample_block(std::move(sample_block_))
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const BatchHeader & other) const
|
||||
{
|
||||
return query == other.query && blocksHaveEqualStructure(sample_block, other.sample_block);
|
||||
return settings == other.settings && query == other.query &&
|
||||
blocksHaveEqualStructure(sample_block, other.sample_block);
|
||||
}
|
||||
|
||||
struct Hash
|
||||
@ -320,6 +340,7 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
bool batch_broken = false;
|
||||
try
|
||||
{
|
||||
Settings insert_settings;
|
||||
String insert_query;
|
||||
std::unique_ptr<RemoteBlockOutputStream> remote;
|
||||
bool first = true;
|
||||
@ -335,12 +356,12 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
}
|
||||
|
||||
ReadBufferFromFile in(file_path->second);
|
||||
readStringBinary(insert_query, in); /// NOTE: all files must have the same insert_query
|
||||
parent.readQueryAndSettings(in, insert_settings, insert_query);
|
||||
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
remote = std::make_unique<RemoteBlockOutputStream>(*connection, insert_query);
|
||||
remote = std::make_unique<RemoteBlockOutputStream>(*connection, insert_query, &insert_settings);
|
||||
remote->writePrefix();
|
||||
}
|
||||
|
||||
@ -436,12 +457,13 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
||||
size_t total_rows = 0;
|
||||
size_t total_bytes = 0;
|
||||
Block sample_block;
|
||||
Settings insert_settings;
|
||||
String insert_query;
|
||||
try
|
||||
{
|
||||
/// Determine metadata of the current file and check if it is not broken.
|
||||
ReadBufferFromFile in{file_path};
|
||||
readStringBinary(insert_query, in);
|
||||
readQueryAndSettings(in, insert_settings, insert_query);
|
||||
|
||||
CompressedReadBuffer decompressing_in(in);
|
||||
NativeBlockInputStream block_in(decompressing_in, ClickHouseRevision::get());
|
||||
@ -468,7 +490,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
||||
throw;
|
||||
}
|
||||
|
||||
BatchHeader batch_header(std::move(insert_query), std::move(sample_block));
|
||||
BatchHeader batch_header(std::move(insert_settings), std::move(insert_query), std::move(sample_block));
|
||||
Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
|
||||
|
||||
batch.file_indices.push_back(file_idx);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -57,6 +58,9 @@ private:
|
||||
std::condition_variable cond;
|
||||
Logger * log;
|
||||
ThreadFromGlobalPool thread{&StorageDistributedDirectoryMonitor::run, this};
|
||||
|
||||
/// Read insert query and insert settings for backward compatible.
|
||||
void readQueryAndSettings(ReadBuffer & in, Settings & insert_settings, std::string & insert_query) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -59,10 +59,10 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
DistributedBlockOutputStream::DistributedBlockOutputStream(
|
||||
StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_,
|
||||
const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_)
|
||||
: storage(storage), query_ast(query_ast), query_string(queryToString(query_ast)),
|
||||
cluster(cluster_), settings(settings_), insert_sync(insert_sync_),
|
||||
const Context & context_, StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_,
|
||||
bool insert_sync_, UInt64 insert_timeout_)
|
||||
: context(context_), storage(storage), query_ast(query_ast), query_string(queryToString(query_ast)),
|
||||
cluster(cluster_), insert_sync(insert_sync_),
|
||||
insert_timeout(insert_timeout_), log(&Logger::get("DistributedBlockOutputStream"))
|
||||
{
|
||||
}
|
||||
@ -249,7 +249,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
throw Exception("There are several writing job for an automatically replicated shard", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here
|
||||
auto connections = shard_info.pool->getMany(&settings, PoolMode::GET_ONE);
|
||||
auto connections = shard_info.pool->getMany(&context.getSettingsRef(), PoolMode::GET_ONE);
|
||||
if (connections.empty() || connections.front().isNull())
|
||||
throw Exception("Expected exactly one connection for shard " + toString(job.shard_index), ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
@ -263,7 +263,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
if (!connection_pool)
|
||||
throw Exception("Connection pool for replica " + replica.readableString() + " does not exist", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
job.connection_entry = connection_pool->get(&settings);
|
||||
job.connection_entry = connection_pool->get(&context.getSettingsRef());
|
||||
if (job.connection_entry.isNull())
|
||||
throw Exception("Got empty connection for replica" + replica.readableString(), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
@ -271,7 +271,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
if (throttler)
|
||||
job.connection_entry->setThrottler(throttler);
|
||||
|
||||
job.stream = std::make_shared<RemoteBlockOutputStream>(*job.connection_entry, query_string, &settings);
|
||||
job.stream = std::make_shared<RemoteBlockOutputStream>(*job.connection_entry, query_string, &context.getSettingsRef());
|
||||
job.stream->writePrefix();
|
||||
}
|
||||
|
||||
@ -283,8 +283,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
if (!job.stream)
|
||||
{
|
||||
/// Forward user settings
|
||||
job.local_context = std::make_unique<Context>(storage.global_context);
|
||||
job.local_context->setSettings(settings);
|
||||
job.local_context = std::make_unique<Context>(context);
|
||||
|
||||
InterpreterInsertQuery interp(query_ast, *job.local_context);
|
||||
job.stream = interp.execute().out;
|
||||
@ -304,6 +303,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
|
||||
|
||||
void DistributedBlockOutputStream::writeSync(const Block & block)
|
||||
{
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
const auto & shards_info = cluster->getShardsInfo();
|
||||
size_t num_shards = shards_info.size();
|
||||
|
||||
@ -504,7 +504,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
|
||||
void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
|
||||
{
|
||||
/// Async insert does not support settings forwarding yet whereas sync one supports
|
||||
InterpreterInsertQuery interp(query_ast, storage.global_context);
|
||||
InterpreterInsertQuery interp(query_ast, context);
|
||||
|
||||
auto block_io = interp.execute();
|
||||
block_io.out->writePrefix();
|
||||
@ -553,6 +553,8 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std::
|
||||
CompressedWriteBuffer compress{out};
|
||||
NativeBlockOutputStream stream{compress, ClickHouseRevision::get(), block.cloneEmpty()};
|
||||
|
||||
writeVarUInt(UInt64(DBMS_DISTRIBUTED_SENDS_MAGIC_NUMBER), out);
|
||||
context.getSettingsRef().serialize(out);
|
||||
writeStringBinary(query_string, out);
|
||||
|
||||
stream.writePrefix();
|
||||
|
@ -35,8 +35,8 @@ class StorageDistributed;
|
||||
class DistributedBlockOutputStream : public IBlockOutputStream
|
||||
{
|
||||
public:
|
||||
DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_,
|
||||
const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_);
|
||||
DistributedBlockOutputStream(const Context & context_, StorageDistributed & storage, const ASTPtr & query_ast,
|
||||
const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_);
|
||||
|
||||
Block getHeader() const override;
|
||||
void write(const Block & block) override;
|
||||
@ -78,11 +78,11 @@ private:
|
||||
std::string getCurrentStateDescription();
|
||||
|
||||
private:
|
||||
const Context & context;
|
||||
StorageDistributed & storage;
|
||||
ASTPtr query_ast;
|
||||
String query_string;
|
||||
ClusterPtr cluster;
|
||||
const Settings & settings;
|
||||
size_t inserted_blocks = 0;
|
||||
size_t inserted_rows = 0;
|
||||
|
||||
|
@ -23,6 +23,16 @@ namespace ErrorCodes
|
||||
extern const int EMPTY_LIST_OF_COLUMNS_PASSED;
|
||||
}
|
||||
|
||||
const ColumnsDescription & ITableDeclaration::getColumns() const
|
||||
{
|
||||
return columns;
|
||||
}
|
||||
|
||||
const IndicesDescription & ITableDeclaration::getIndices() const
|
||||
{
|
||||
return indices;
|
||||
}
|
||||
|
||||
|
||||
void ITableDeclaration::setColumns(ColumnsDescription columns_)
|
||||
{
|
||||
|
@ -13,11 +13,11 @@ namespace DB
|
||||
class ITableDeclaration
|
||||
{
|
||||
public:
|
||||
virtual const ColumnsDescription & getColumns() const { return columns; }
|
||||
virtual void setColumns(ColumnsDescription columns_);
|
||||
const ColumnsDescription & getColumns() const;
|
||||
void setColumns(ColumnsDescription columns_);
|
||||
|
||||
virtual const IndicesDescription & getIndices() const { return indices; }
|
||||
virtual void setIndices(IndicesDescription indices_);
|
||||
const IndicesDescription & getIndices() const;
|
||||
void setIndices(IndicesDescription indices_);
|
||||
|
||||
/// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns
|
||||
/// (they are treated separately).
|
||||
|
@ -84,30 +84,30 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart()
|
||||
MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader)
|
||||
{
|
||||
if (!current_task.size_predictor)
|
||||
return current_max_block_size_rows;
|
||||
return static_cast<size_t>(current_max_block_size_rows);
|
||||
|
||||
/// Calculates number of rows will be read using preferred_block_size_bytes.
|
||||
/// Can't be less than avg_index_granularity.
|
||||
UInt64 rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes);
|
||||
size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes);
|
||||
if (!rows_to_read)
|
||||
return rows_to_read;
|
||||
UInt64 total_row_in_current_granule = current_reader.numRowsInCurrentGranule();
|
||||
rows_to_read = std::max<UInt64>(total_row_in_current_granule, rows_to_read);
|
||||
auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule();
|
||||
rows_to_read = std::max(total_row_in_current_granule, rows_to_read);
|
||||
|
||||
if (current_preferred_max_column_in_block_size_bytes)
|
||||
{
|
||||
/// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes.
|
||||
UInt64 rows_to_read_for_max_size_column
|
||||
auto rows_to_read_for_max_size_column
|
||||
= current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes);
|
||||
double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio);
|
||||
auto rows_to_read_for_max_size_column_with_filtration
|
||||
= static_cast<UInt64>(rows_to_read_for_max_size_column / filtration_ratio);
|
||||
= static_cast<size_t>(rows_to_read_for_max_size_column / filtration_ratio);
|
||||
|
||||
/// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity.
|
||||
rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration);
|
||||
}
|
||||
|
||||
UInt64 unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule();
|
||||
auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule();
|
||||
if (unread_rows_in_current_granule >= rows_to_read)
|
||||
return rows_to_read;
|
||||
|
||||
|
@ -14,7 +14,7 @@ Block MergeTreeBlockOutputStream::getHeader() const
|
||||
|
||||
void MergeTreeBlockOutputStream::write(const Block & block)
|
||||
{
|
||||
storage.data.delayInsertOrThrowIfNeeded();
|
||||
storage.delayInsertOrThrowIfNeeded();
|
||||
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block);
|
||||
for (auto & current_block : part_blocks)
|
||||
@ -22,7 +22,7 @@ void MergeTreeBlockOutputStream::write(const Block & block)
|
||||
Stopwatch watch;
|
||||
|
||||
MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block);
|
||||
storage.data.renameTempPartAndAdd(part, &storage.increment);
|
||||
storage.renameTempPartAndAdd(part, &storage.increment);
|
||||
|
||||
PartLog::addNewPart(storage.global_context, part, watch.elapsed());
|
||||
|
||||
|
@ -116,7 +116,7 @@ MergeTreeData::MergeTreeData(
|
||||
database_name(database_), table_name(table_),
|
||||
full_path(full_path_),
|
||||
broken_part_callback(broken_part_callback_),
|
||||
log_name(database_name + "." + table_name), log(&Logger::get(log_name + " (Data)")),
|
||||
log_name(database_name + "." + table_name), log(&Logger::get(log_name)),
|
||||
data_parts_by_info(data_parts_indexes.get<TagByInfo>()),
|
||||
data_parts_by_state_and_info(data_parts_indexes.get<TagByStateAndInfo>())
|
||||
{
|
||||
@ -730,7 +730,7 @@ String MergeTreeData::MergingParams::getModeName() const
|
||||
}
|
||||
|
||||
|
||||
Int64 MergeTreeData::getMaxBlockNumber()
|
||||
Int64 MergeTreeData::getMaxBlockNumber() const
|
||||
{
|
||||
auto lock = lockParts();
|
||||
|
||||
@ -2665,7 +2665,7 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const A
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const
|
||||
bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const
|
||||
{
|
||||
/// Make sure that the left side of the IN operator contain part of the key.
|
||||
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
|
||||
@ -2694,18 +2694,12 @@ bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) con
|
||||
}
|
||||
}
|
||||
|
||||
MergeTreeData * MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const
|
||||
MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const
|
||||
{
|
||||
MergeTreeData * src_data;
|
||||
if (auto storage_merge_tree = dynamic_cast<StorageMergeTree *>(source_table.get()))
|
||||
src_data = &storage_merge_tree->data;
|
||||
else if (auto storage_replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(source_table.get()))
|
||||
src_data = &storage_replicated_merge_tree->data;
|
||||
else
|
||||
{
|
||||
throw Exception("Table " + table_name + " supports attachPartitionFrom only for MergeTree or ReplicatedMergeTree engines."
|
||||
MergeTreeData * src_data = dynamic_cast<MergeTreeData *>(source_table.get());
|
||||
if (!src_data)
|
||||
throw Exception("Table " + table_name + " supports attachPartitionFrom only for MergeTree family of table engines."
|
||||
" Got " + source_table->getName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
if (getColumns().getAllPhysical().sizeOfDifference(src_data->getColumns().getAllPhysical()))
|
||||
throw Exception("Tables have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS);
|
||||
@ -2724,7 +2718,7 @@ MergeTreeData * MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePt
|
||||
if (format_version != src_data->format_version)
|
||||
throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return src_data;
|
||||
return *src_data;
|
||||
}
|
||||
|
||||
MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part,
|
||||
|
@ -3,10 +3,11 @@
|
||||
#include <Common/SimpleIncrement.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Storages/ITableDeclaration.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndices.h>
|
||||
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <Storages/MergeTree/MergeTreeMutationStatus.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
@ -89,7 +90,7 @@ namespace ErrorCodes
|
||||
/// - MergeTreeDataWriter
|
||||
/// - MergeTreeDataMergerMutator
|
||||
|
||||
class MergeTreeData : public ITableDeclaration
|
||||
class MergeTreeData : public IStorage
|
||||
{
|
||||
public:
|
||||
/// Function to call if the part is suspected to contain corrupt data.
|
||||
@ -344,12 +345,21 @@ public:
|
||||
bool attach,
|
||||
BrokenPartCallback broken_part_callback_ = [](const String &){});
|
||||
|
||||
/// Load the set of data parts from disk. Call once - immediately after the object is created.
|
||||
void loadDataParts(bool skip_sanity_checks);
|
||||
ASTPtr getPartitionKeyAST() const override { return partition_by_ast; }
|
||||
ASTPtr getSortingKeyAST() const override { return sorting_key_expr_ast; }
|
||||
ASTPtr getPrimaryKeyAST() const override { return primary_key_expr_ast; }
|
||||
ASTPtr getSamplingKeyAST() const override { return sample_by_ast; }
|
||||
|
||||
bool supportsPrewhere() const { return true; }
|
||||
Names getColumnsRequiredForPartitionKey() const override { return (partition_key_expr ? partition_key_expr->getRequiredColumns() : Names{}); }
|
||||
Names getColumnsRequiredForSortingKey() const override { return sorting_key_expr->getRequiredColumns(); }
|
||||
Names getColumnsRequiredForPrimaryKey() const override { return primary_key_expr->getRequiredColumns(); }
|
||||
Names getColumnsRequiredForSampling() const override { return columns_required_for_sampling; }
|
||||
Names getColumnsRequiredForFinal() const override { return sorting_key_expr->getRequiredColumns(); }
|
||||
|
||||
bool supportsFinal() const
|
||||
bool supportsPrewhere() const override { return true; }
|
||||
bool supportsSampling() const override { return sample_by_ast != nullptr; }
|
||||
|
||||
bool supportsFinal() const override
|
||||
{
|
||||
return merging_params.mode == MergingParams::Collapsing
|
||||
|| merging_params.mode == MergingParams::Summing
|
||||
@ -358,9 +368,7 @@ public:
|
||||
|| merging_params.mode == MergingParams::VersionedCollapsing;
|
||||
}
|
||||
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const;
|
||||
|
||||
Int64 getMaxBlockNumber();
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const override;
|
||||
|
||||
NameAndTypePair getColumn(const String & column_name) const override
|
||||
{
|
||||
@ -385,14 +393,17 @@ public:
|
||||
|| column_name == "_sample_factor";
|
||||
}
|
||||
|
||||
String getDatabaseName() const { return database_name; }
|
||||
String getDatabaseName() const override { return database_name; }
|
||||
String getTableName() const override { return table_name; }
|
||||
|
||||
String getTableName() const { return table_name; }
|
||||
/// Load the set of data parts from disk. Call once - immediately after the object is created.
|
||||
void loadDataParts(bool skip_sanity_checks);
|
||||
|
||||
String getFullPath() const { return full_path; }
|
||||
|
||||
String getLogName() const { return log_name; }
|
||||
|
||||
Int64 getMaxBlockNumber() const;
|
||||
|
||||
/// Returns a copy of the list so that the caller shouldn't worry about locks.
|
||||
DataParts getDataParts(const DataPartStates & affordable_states) const;
|
||||
/// Returns sorted list of the parts with specified states
|
||||
@ -539,23 +550,11 @@ public:
|
||||
*/
|
||||
static ASTPtr extractKeyExpressionList(const ASTPtr & node);
|
||||
|
||||
Names getColumnsRequiredForPartitionKey() const { return (partition_key_expr ? partition_key_expr->getRequiredColumns() : Names{}); }
|
||||
|
||||
bool hasSortingKey() const { return !sorting_key_columns.empty(); }
|
||||
bool hasPrimaryKey() const { return !primary_key_columns.empty(); }
|
||||
bool hasSkipIndices() const { return !skip_indices.empty(); }
|
||||
bool hasTableTTL() const { return ttl_table_ast != nullptr; }
|
||||
|
||||
ASTPtr getSortingKeyAST() const { return sorting_key_expr_ast; }
|
||||
ASTPtr getPrimaryKeyAST() const { return primary_key_expr_ast; }
|
||||
|
||||
Names getColumnsRequiredForSortingKey() const { return sorting_key_expr->getRequiredColumns(); }
|
||||
Names getColumnsRequiredForPrimaryKey() const { return primary_key_expr->getRequiredColumns(); }
|
||||
|
||||
bool supportsSampling() const { return sample_by_ast != nullptr; }
|
||||
ASTPtr getSamplingExpression() const { return sample_by_ast; }
|
||||
Names getColumnsRequiredForSampling() const { return columns_required_for_sampling; }
|
||||
|
||||
/// Check that the part is not broken and calculate the checksums for it if they are not present.
|
||||
MutableDataPartPtr loadPartAndFixMetadata(const String & relative_path);
|
||||
|
||||
@ -592,11 +591,13 @@ public:
|
||||
/// Extracts MergeTreeData of other *MergeTree* storage
|
||||
/// and checks that their structure suitable for ALTER TABLE ATTACH PARTITION FROM
|
||||
/// Tables structure should be locked.
|
||||
MergeTreeData * checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const;
|
||||
MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const;
|
||||
|
||||
MergeTreeData::MutableDataPartPtr cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix,
|
||||
const MergeTreePartInfo & dst_part_info);
|
||||
|
||||
virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
|
||||
|
||||
MergeTreeDataFormatVersion format_version;
|
||||
|
||||
Context global_context;
|
||||
@ -655,13 +656,12 @@ public:
|
||||
/// For generating names of temporary parts during insertion.
|
||||
SimpleIncrement insert_increment;
|
||||
|
||||
private:
|
||||
protected:
|
||||
friend struct MergeTreeDataPart;
|
||||
friend class StorageMergeTree;
|
||||
friend class StorageReplicatedMergeTree;
|
||||
friend class MergeTreeDataMergerMutator;
|
||||
friend class ReplicatedMergeTreeAlterThread;
|
||||
friend struct ReplicatedMergeTreeTableMetadata;
|
||||
friend class StorageReplicatedMergeTree;
|
||||
|
||||
ASTPtr partition_by_ast;
|
||||
ASTPtr order_by_ast;
|
||||
|
@ -449,7 +449,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
|
||||
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
ASTPtr args = std::make_shared<ASTExpressionList>();
|
||||
args->children.push_back(data.getSamplingExpression());
|
||||
args->children.push_back(data.getSamplingKeyAST());
|
||||
args->children.push_back(std::make_shared<ASTLiteral>(lower));
|
||||
|
||||
lower_function = std::make_shared<ASTFunction>();
|
||||
@ -466,7 +466,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
|
||||
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
ASTPtr args = std::make_shared<ASTExpressionList>();
|
||||
args->children.push_back(data.getSamplingExpression());
|
||||
args->children.push_back(data.getSamplingKeyAST());
|
||||
args->children.push_back(std::make_shared<ASTLiteral>(upper));
|
||||
|
||||
upper_function = std::make_shared<ASTFunction>();
|
||||
|
@ -36,7 +36,7 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
try
|
||||
{
|
||||
/** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns),
|
||||
* as well as a description of columns in local file with metadata (storage.data.getColumnsList()).
|
||||
* as well as a description of columns in local file with metadata (storage.getColumnsList()).
|
||||
*
|
||||
* If these descriptions are different - you need to do ALTER.
|
||||
*
|
||||
@ -83,7 +83,7 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
|
||||
const String & metadata_str = metadata_znode.contents;
|
||||
auto metadata_in_zk = ReplicatedMergeTreeTableMetadata::parse(metadata_str);
|
||||
auto metadata_diff = ReplicatedMergeTreeTableMetadata(storage.data).checkAndFindDiff(metadata_in_zk, /* allow_alter = */ true);
|
||||
auto metadata_diff = ReplicatedMergeTreeTableMetadata(storage).checkAndFindDiff(metadata_in_zk, /* allow_alter = */ true);
|
||||
|
||||
/// If you need to lock table structure, then suspend merges.
|
||||
ActionLock merge_blocker = storage.merger_mutator.actions_blocker.cancel();
|
||||
@ -123,7 +123,7 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
}
|
||||
|
||||
/// You need to get a list of parts under table lock to avoid race condition with merge.
|
||||
parts = storage.data.getDataParts();
|
||||
parts = storage.getDataParts();
|
||||
|
||||
storage.columns_version = columns_version;
|
||||
storage.metadata_version = metadata_version;
|
||||
@ -140,7 +140,7 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
int changed_parts = 0;
|
||||
|
||||
if (!changed_columns_version)
|
||||
parts = storage.data.getDataParts();
|
||||
parts = storage.getDataParts();
|
||||
|
||||
const auto columns_for_parts = storage.getColumns().getAllPhysical();
|
||||
const auto indices_for_parts = storage.getIndices();
|
||||
@ -150,7 +150,7 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
/// Update the part and write result to temporary files.
|
||||
/// TODO: You can skip checking for too large changes if ZooKeeper has, for example,
|
||||
/// node /flags/force_alter.
|
||||
auto transaction = storage.data.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false);
|
||||
auto transaction = storage.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false);
|
||||
if (!transaction)
|
||||
continue;
|
||||
|
||||
@ -160,7 +160,7 @@ void ReplicatedMergeTreeAlterThread::run()
|
||||
}
|
||||
|
||||
/// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN
|
||||
storage.data.recalculateColumnSizes();
|
||||
storage.recalculateColumnSizes();
|
||||
|
||||
if (changed_columns_version)
|
||||
{
|
||||
|
@ -35,7 +35,7 @@ namespace ErrorCodes
|
||||
ReplicatedMergeTreeBlockOutputStream::ReplicatedMergeTreeBlockOutputStream(
|
||||
StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, size_t max_parts_per_block, bool deduplicate_)
|
||||
: storage(storage_), quorum(quorum_), quorum_timeout_ms(quorum_timeout_ms_), max_parts_per_block(max_parts_per_block), deduplicate(deduplicate_),
|
||||
log(&Logger::get(storage.data.getLogName() + " (Replicated OutputStream)"))
|
||||
log(&Logger::get(storage.getLogName() + " (Replicated OutputStream)"))
|
||||
{
|
||||
/// The quorum value `1` has the same meaning as if it is disabled.
|
||||
if (quorum == 1)
|
||||
@ -109,7 +109,7 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block)
|
||||
last_block_is_duplicate = false;
|
||||
|
||||
/// TODO Is it possible to not lock the table structure here?
|
||||
storage.data.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event);
|
||||
storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event);
|
||||
|
||||
auto zookeeper = storage.getZooKeeper();
|
||||
assertSessionIsNotExpired(zookeeper);
|
||||
@ -297,8 +297,8 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
|
||||
quorum_info.host_node_version));
|
||||
}
|
||||
|
||||
MergeTreeData::Transaction transaction(storage.data); /// If you can not add a part to ZK, we'll remove it back from the working set.
|
||||
storage.data.renameTempPartAndAdd(part, nullptr, &transaction);
|
||||
MergeTreeData::Transaction transaction(storage); /// If you can not add a part to ZK, we'll remove it back from the working set.
|
||||
storage.renameTempPartAndAdd(part, nullptr, &transaction);
|
||||
|
||||
Coordination::Responses responses;
|
||||
int32_t multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT
|
||||
@ -414,7 +414,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
|
||||
|
||||
void ReplicatedMergeTreeBlockOutputStream::writePrefix()
|
||||
{
|
||||
storage.data.throwInsertIfNeeded();
|
||||
storage.throwInsertIfNeeded();
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,8 +27,8 @@ ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplic
|
||||
|
||||
void ReplicatedMergeTreeCleanupThread::run()
|
||||
{
|
||||
const auto CLEANUP_SLEEP_MS = storage.data.settings.cleanup_delay_period * 1000
|
||||
+ std::uniform_int_distribution<UInt64>(0, storage.data.settings.cleanup_delay_period_random_add * 1000)(rng);
|
||||
const auto CLEANUP_SLEEP_MS = storage.settings.cleanup_delay_period * 1000
|
||||
+ std::uniform_int_distribution<UInt64>(0, storage.settings.cleanup_delay_period_random_add * 1000)(rng);
|
||||
|
||||
try
|
||||
{
|
||||
@ -57,7 +57,7 @@ void ReplicatedMergeTreeCleanupThread::iterate()
|
||||
{
|
||||
/// TODO: Implement tryLockStructureForShare.
|
||||
auto lock = storage.lockStructureForShare(false, "");
|
||||
storage.data.clearOldTemporaryDirectories();
|
||||
storage.clearOldTemporaryDirectories();
|
||||
}
|
||||
|
||||
/// This is loose condition: no problem if we actually had lost leadership at this moment
|
||||
@ -82,7 +82,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
||||
int children_count = stat.numChildren;
|
||||
|
||||
/// We will wait for 1.1 times more records to accumulate than necessary.
|
||||
if (static_cast<double>(children_count) < storage.data.settings.min_replicated_logs_to_keep * 1.1)
|
||||
if (static_cast<double>(children_count) < storage.settings.min_replicated_logs_to_keep * 1.1)
|
||||
return;
|
||||
|
||||
Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat);
|
||||
@ -100,8 +100,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
||||
std::sort(entries.begin(), entries.end());
|
||||
|
||||
String min_saved_record_log_str = entries[
|
||||
entries.size() > storage.data.settings.max_replicated_logs_to_keep.value
|
||||
? entries.size() - storage.data.settings.max_replicated_logs_to_keep.value
|
||||
entries.size() > storage.settings.max_replicated_logs_to_keep.value
|
||||
? entries.size() - storage.settings.max_replicated_logs_to_keep.value
|
||||
: 0];
|
||||
|
||||
/// Replicas that were marked is_lost but are active.
|
||||
@ -203,7 +203,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
|
||||
min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate);
|
||||
|
||||
/// We will not touch the last `min_replicated_logs_to_keep` records.
|
||||
entries.erase(entries.end() - std::min<UInt64>(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end());
|
||||
entries.erase(entries.end() - std::min<UInt64>(entries.size(), storage.settings.min_replicated_logs_to_keep.value), entries.end());
|
||||
/// We will not touch records that are no less than `min_saved_log_pointer`.
|
||||
entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end());
|
||||
|
||||
@ -294,12 +294,12 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
|
||||
|
||||
/// Use ZooKeeper's first node (last according to time) timestamp as "current" time.
|
||||
Int64 current_time = timed_blocks.front().ctime;
|
||||
Int64 time_threshold = std::max(static_cast<Int64>(0), current_time - static_cast<Int64>(1000 * storage.data.settings.replicated_deduplication_window_seconds));
|
||||
Int64 time_threshold = std::max(static_cast<Int64>(0), current_time - static_cast<Int64>(1000 * storage.settings.replicated_deduplication_window_seconds));
|
||||
|
||||
/// Virtual node, all nodes that are "greater" than this one will be deleted
|
||||
NodeWithStat block_threshold{{}, time_threshold};
|
||||
|
||||
size_t current_deduplication_window = std::min<size_t>(timed_blocks.size(), storage.data.settings.replicated_deduplication_window.value);
|
||||
size_t current_deduplication_window = std::min<size_t>(timed_blocks.size(), storage.settings.replicated_deduplication_window.value);
|
||||
auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window;
|
||||
auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime);
|
||||
auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold);
|
||||
@ -392,10 +392,10 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper &
|
||||
|
||||
void ReplicatedMergeTreeCleanupThread::clearOldMutations()
|
||||
{
|
||||
if (!storage.data.settings.finished_mutations_to_keep)
|
||||
if (!storage.settings.finished_mutations_to_keep)
|
||||
return;
|
||||
|
||||
if (storage.queue.countFinishedMutations() <= storage.data.settings.finished_mutations_to_keep)
|
||||
if (storage.queue.countFinishedMutations() <= storage.settings.finished_mutations_to_keep)
|
||||
{
|
||||
/// Not strictly necessary, but helps to avoid unnecessary ZooKeeper requests.
|
||||
/// If even this replica hasn't finished enough mutations yet, then we don't need to clean anything.
|
||||
@ -422,10 +422,10 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations()
|
||||
|
||||
/// Do not remove entries that are greater than `min_pointer` (they are not done yet).
|
||||
entries.erase(std::upper_bound(entries.begin(), entries.end(), padIndex(min_pointer)), entries.end());
|
||||
/// Do not remove last `storage.data.settings.finished_mutations_to_keep` entries.
|
||||
if (entries.size() <= storage.data.settings.finished_mutations_to_keep)
|
||||
/// Do not remove last `storage.settings.finished_mutations_to_keep` entries.
|
||||
if (entries.size() <= storage.settings.finished_mutations_to_keep)
|
||||
return;
|
||||
entries.erase(entries.end() - storage.data.settings.finished_mutations_to_keep, entries.end());
|
||||
entries.erase(entries.end() - storage.settings.finished_mutations_to_keep, entries.end());
|
||||
|
||||
if (entries.empty())
|
||||
return;
|
||||
|
@ -90,7 +90,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
}
|
||||
|
||||
/// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
|
||||
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.data.format_version);
|
||||
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
|
||||
|
||||
/** The logic is as follows:
|
||||
* - if some live or inactive replica has such a part, or a part covering it
|
||||
@ -126,7 +126,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
|
||||
Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
|
||||
for (const String & part_on_replica : parts)
|
||||
{
|
||||
auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.data.format_version);
|
||||
auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version);
|
||||
|
||||
if (part_on_replica_info.contains(part_info))
|
||||
{
|
||||
@ -189,9 +189,9 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
/// If the part is still in the PreCommitted -> Committed transition, it is not lost
|
||||
/// and there is no need to go searching for it on other replicas. To definitely find the needed part
|
||||
/// if it exists (or a part containing it) we first search among the PreCommitted parts.
|
||||
auto part = storage.data.getPartIfExists(part_name, {MergeTreeDataPartState::PreCommitted});
|
||||
auto part = storage.getPartIfExists(part_name, {MergeTreeDataPartState::PreCommitted});
|
||||
if (!part)
|
||||
part = storage.data.getActiveContainingPart(part_name);
|
||||
part = storage.getActiveContainingPart(part_name);
|
||||
|
||||
/// We do not have this or a covering part.
|
||||
if (!part)
|
||||
@ -235,8 +235,8 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
checkDataPart(
|
||||
part,
|
||||
true,
|
||||
storage.data.primary_key_data_types,
|
||||
storage.data.skip_indices,
|
||||
storage.primary_key_data_types,
|
||||
storage.skip_indices,
|
||||
[this] { return need_stop.load(); });
|
||||
|
||||
if (need_stop)
|
||||
@ -259,7 +259,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
storage.removePartAndEnqueueFetch(part_name);
|
||||
|
||||
/// Delete part locally.
|
||||
storage.data.forgetPartAndMoveToDetached(part, "broken_");
|
||||
storage.forgetPartAndMoveToDetached(part, "broken_");
|
||||
}
|
||||
}
|
||||
else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr))
|
||||
@ -270,7 +270,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
|
||||
|
||||
LOG_ERROR(log, "Unexpected part " << part_name << " in filesystem. Removing.");
|
||||
storage.data.forgetPartAndMoveToDetached(part, "unexpected_");
|
||||
storage.forgetPartAndMoveToDetached(part, "unexpected_");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
|
||||
ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & storage_)
|
||||
: storage(storage_)
|
||||
, format_version(storage.data.format_version)
|
||||
, format_version(storage.format_version)
|
||||
, current_parts(format_version)
|
||||
, virtual_parts(format_version)
|
||||
{}
|
||||
@ -62,14 +62,14 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper)
|
||||
Strings children = zookeeper->getChildren(queue_path);
|
||||
|
||||
auto to_remove_it = std::remove_if(
|
||||
children.begin(), children.end(), [&](const String & path)
|
||||
{
|
||||
return already_loaded_paths.count(path);
|
||||
});
|
||||
children.begin(), children.end(), [&](const String & path)
|
||||
{
|
||||
return already_loaded_paths.count(path);
|
||||
});
|
||||
|
||||
LOG_DEBUG(log,
|
||||
"Having " << (to_remove_it - children.begin()) << " queue entries to load, "
|
||||
<< (children.end() - to_remove_it) << " entries already loaded.");
|
||||
"Having " << (to_remove_it - children.begin()) << " queue entries to load, "
|
||||
<< (children.end() - to_remove_it) << " entries already loaded.");
|
||||
children.erase(to_remove_it, children.end());
|
||||
|
||||
std::sort(children.begin(), children.end());
|
||||
|
@ -44,11 +44,11 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage
|
||||
, log(&Logger::get(log_name))
|
||||
, active_node_identifier(generateActiveNodeIdentifier())
|
||||
{
|
||||
check_period_ms = storage.data.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000;
|
||||
check_period_ms = storage.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000;
|
||||
|
||||
/// Periodicity of checking lag of replica.
|
||||
if (check_period_ms > static_cast<Int64>(storage.data.settings.check_delay_period) * 1000)
|
||||
check_period_ms = storage.data.settings.check_delay_period * 1000;
|
||||
if (check_period_ms > static_cast<Int64>(storage.settings.check_delay_period) * 1000)
|
||||
check_period_ms = storage.settings.check_delay_period * 1000;
|
||||
|
||||
task = storage.global_context.getSchedulePool().createTask(log_name, [this]{ run(); });
|
||||
}
|
||||
@ -121,7 +121,7 @@ void ReplicatedMergeTreeRestartingThread::run()
|
||||
}
|
||||
|
||||
time_t current_time = time(nullptr);
|
||||
if (current_time >= prev_time_of_check_delay + static_cast<time_t>(storage.data.settings.check_delay_period))
|
||||
if (current_time >= prev_time_of_check_delay + static_cast<time_t>(storage.settings.check_delay_period))
|
||||
{
|
||||
/// Find out lag of replicas.
|
||||
time_t absolute_delay = 0;
|
||||
@ -136,10 +136,10 @@ void ReplicatedMergeTreeRestartingThread::run()
|
||||
|
||||
/// We give up leadership if the relative lag is greater than threshold.
|
||||
if (storage.is_leader
|
||||
&& relative_delay > static_cast<time_t>(storage.data.settings.min_relative_delay_to_yield_leadership))
|
||||
&& relative_delay > static_cast<time_t>(storage.settings.min_relative_delay_to_yield_leadership))
|
||||
{
|
||||
LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold ("
|
||||
<< storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership.");
|
||||
<< storage.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership.");
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership);
|
||||
|
||||
@ -181,7 +181,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup()
|
||||
|
||||
updateQuorumIfWeHavePart();
|
||||
|
||||
if (storage.data.settings.replicated_can_become_leader)
|
||||
if (storage.settings.replicated_can_become_leader)
|
||||
storage.enterLeaderElection();
|
||||
else
|
||||
LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0");
|
||||
@ -239,13 +239,13 @@ void ReplicatedMergeTreeRestartingThread::removeFailedQuorumParts()
|
||||
|
||||
for (auto part_name : failed_parts)
|
||||
{
|
||||
auto part = storage.data.getPartIfExists(
|
||||
auto part = storage.getPartIfExists(
|
||||
part_name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
|
||||
|
||||
if (part)
|
||||
{
|
||||
LOG_DEBUG(log, "Found part " << part_name << " with failed quorum. Moving to detached. This shouldn't happen often.");
|
||||
storage.data.forgetPartAndMoveToDetached(part, "noquorum_");
|
||||
storage.forgetPartAndMoveToDetached(part, "noquorum_");
|
||||
storage.queue.removeFromVirtualParts(part->info);
|
||||
}
|
||||
}
|
||||
|
@ -32,9 +32,9 @@ public:
|
||||
|
||||
bool supportsIndexForIn() const override { return true; }
|
||||
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & /* query_context */) const override
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override
|
||||
{
|
||||
return part->storage.mayBenefitFromIndexForIn(left_in_operand);
|
||||
return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -333,7 +333,8 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const Context & c
|
||||
|
||||
/// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster
|
||||
return std::make_shared<DistributedBlockOutputStream>(
|
||||
*this, createInsertToRemoteTableQuery(remote_database, remote_table, getSampleBlock()), cluster, settings, insert_sync, timeout);
|
||||
context, *this, createInsertToRemoteTableQuery(remote_database, remote_table, getSampleBlock()), cluster,
|
||||
insert_sync, timeout);
|
||||
}
|
||||
|
||||
|
||||
|
@ -338,7 +338,7 @@ private:
|
||||
throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else
|
||||
for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(it->getSecond()); current != nullptr;
|
||||
for (auto current = &static_cast<const typename Map::mapped_type::Base &>(it->getSecond()); current != nullptr;
|
||||
current = current->next)
|
||||
{
|
||||
for (size_t j = 0; j < columns.size(); ++j)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Interpreters/InterpreterDropQuery.h>
|
||||
#include <Interpreters/InterpreterRenameQuery.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
||||
|
||||
@ -26,6 +27,10 @@ namespace ErrorCodes
|
||||
extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW;
|
||||
}
|
||||
|
||||
static inline String generateInnerTableName(const String & table_name)
|
||||
{
|
||||
return ".inner." + table_name;
|
||||
}
|
||||
|
||||
static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name)
|
||||
{
|
||||
@ -128,7 +133,7 @@ StorageMaterializedView::StorageMaterializedView(
|
||||
else
|
||||
{
|
||||
target_database_name = database_name;
|
||||
target_table_name = ".inner." + table_name;
|
||||
target_table_name = generateInnerTableName(table_name);
|
||||
has_inner_table = true;
|
||||
}
|
||||
|
||||
@ -265,6 +270,53 @@ void StorageMaterializedView::mutate(const MutationCommands & commands, const Co
|
||||
getTargetTable()->mutate(commands, context);
|
||||
}
|
||||
|
||||
static void executeRenameQuery(Context & global_context, const String & database_name, const String & table_original_name, const String & new_table_name)
|
||||
{
|
||||
if (global_context.tryGetTable(database_name, table_original_name))
|
||||
{
|
||||
auto rename = std::make_shared<ASTRenameQuery>();
|
||||
|
||||
ASTRenameQuery::Table from;
|
||||
from.database = database_name;
|
||||
from.table = table_original_name;
|
||||
|
||||
ASTRenameQuery::Table to;
|
||||
to.database = database_name;
|
||||
to.table = new_table_name;
|
||||
|
||||
ASTRenameQuery::Element elem;
|
||||
elem.from = from;
|
||||
elem.to = to;
|
||||
|
||||
rename->elements.emplace_back(elem);
|
||||
|
||||
InterpreterRenameQuery(rename, global_context).execute();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void StorageMaterializedView::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name)
|
||||
{
|
||||
if (has_inner_table && tryGetTargetTable())
|
||||
{
|
||||
String new_target_table_name = generateInnerTableName(new_table_name);
|
||||
executeRenameQuery(global_context, target_database_name, target_table_name, new_target_table_name);
|
||||
target_table_name = new_target_table_name;
|
||||
}
|
||||
|
||||
auto lock = global_context.getLock();
|
||||
|
||||
global_context.removeDependencyUnsafe(
|
||||
DatabaseAndTableName(select_database_name, select_table_name),
|
||||
DatabaseAndTableName(database_name, table_name));
|
||||
|
||||
table_name = new_table_name;
|
||||
|
||||
global_context.addDependencyUnsafe(
|
||||
DatabaseAndTableName(select_database_name, select_table_name),
|
||||
DatabaseAndTableName(database_name, table_name));
|
||||
}
|
||||
|
||||
void StorageMaterializedView::shutdown()
|
||||
{
|
||||
/// Make sure the dependency is removed after DETACH TABLE
|
||||
|
@ -39,6 +39,8 @@ public:
|
||||
|
||||
void mutate(const MutationCommands & commands, const Context & context) override;
|
||||
|
||||
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
void checkTableCanBeDropped() const override;
|
||||
|
@ -60,28 +60,28 @@ StorageMergeTree::StorageMergeTree(
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag)
|
||||
: path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'),
|
||||
global_context(context_), background_pool(context_.getBackgroundPool()),
|
||||
data(database_name, table_name,
|
||||
full_path, columns_, indices_,
|
||||
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
|
||||
sample_by_ast_, ttl_table_ast_, merging_params_,
|
||||
settings_, false, attach),
|
||||
reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()),
|
||||
log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)"))
|
||||
: MergeTreeData(database_name_, table_name_,
|
||||
path_ + escapeForFileName(table_name_) + '/',
|
||||
columns_, indices_,
|
||||
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
|
||||
sample_by_ast_, ttl_table_ast_, merging_params_,
|
||||
settings_, false, attach),
|
||||
path(path_),
|
||||
background_pool(context_.getBackgroundPool()),
|
||||
reader(*this), writer(*this), merger_mutator(*this, global_context.getBackgroundPool())
|
||||
{
|
||||
if (path_.empty())
|
||||
throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
if (path.empty())
|
||||
throw Exception("MergeTree require data path", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
|
||||
data.loadDataParts(has_force_restore_data_flag);
|
||||
loadDataParts(has_force_restore_data_flag);
|
||||
|
||||
if (!attach && !data.getDataParts().empty())
|
||||
if (!attach && !getDataParts().empty())
|
||||
throw Exception("Data directory for table already containing data parts - probably it was unclean DROP table or manual intervention. You must either clear directory by hand or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA);
|
||||
|
||||
increment.set(data.getMaxBlockNumber());
|
||||
increment.set(getMaxBlockNumber());
|
||||
|
||||
loadMutations();
|
||||
}
|
||||
@ -89,11 +89,11 @@ StorageMergeTree::StorageMergeTree(
|
||||
|
||||
void StorageMergeTree::startup()
|
||||
{
|
||||
data.clearOldPartsFromFilesystem();
|
||||
clearOldPartsFromFilesystem();
|
||||
|
||||
/// Temporary directories contain incomplete results of merges (after forced restart)
|
||||
/// and don't allow to reinitialize them, so delete each of them immediately
|
||||
data.clearOldTemporaryDirectories(0);
|
||||
clearOldTemporaryDirectories(0);
|
||||
|
||||
/// NOTE background task will also do the above cleanups periodically.
|
||||
time_after_previous_cleanup.restart();
|
||||
@ -135,16 +135,16 @@ BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const Con
|
||||
|
||||
void StorageMergeTree::checkTableCanBeDropped() const
|
||||
{
|
||||
const_cast<MergeTreeData &>(getData()).recalculateColumnSizes();
|
||||
global_context.checkTableCanBeDropped(database_name, table_name, getData().getTotalActiveSizeInBytes());
|
||||
const_cast<StorageMergeTree &>(*this).recalculateColumnSizes();
|
||||
global_context.checkTableCanBeDropped(database_name, table_name, getTotalActiveSizeInBytes());
|
||||
}
|
||||
|
||||
void StorageMergeTree::checkPartitionCanBeDropped(const ASTPtr & partition)
|
||||
{
|
||||
const_cast<MergeTreeData &>(getData()).recalculateColumnSizes();
|
||||
const_cast<StorageMergeTree &>(*this).recalculateColumnSizes();
|
||||
|
||||
const String partition_id = data.getPartitionIDFromQuery(partition, global_context);
|
||||
auto parts_to_remove = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
const String partition_id = getPartitionIDFromQuery(partition, global_context);
|
||||
auto parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
|
||||
UInt64 partition_size = 0;
|
||||
|
||||
@ -158,7 +158,7 @@ void StorageMergeTree::checkPartitionCanBeDropped(const ASTPtr & partition)
|
||||
void StorageMergeTree::drop()
|
||||
{
|
||||
shutdown();
|
||||
data.dropAllData();
|
||||
dropAllData();
|
||||
}
|
||||
|
||||
void StorageMergeTree::truncate(const ASTPtr &, const Context &)
|
||||
@ -170,20 +170,20 @@ void StorageMergeTree::truncate(const ASTPtr &, const Context &)
|
||||
|
||||
/// NOTE: It's assumed that this method is called under lockForAlter.
|
||||
|
||||
auto parts_to_remove = data.getDataPartsVector();
|
||||
data.removePartsFromWorkingSet(parts_to_remove, true);
|
||||
auto parts_to_remove = getDataPartsVector();
|
||||
removePartsFromWorkingSet(parts_to_remove, true);
|
||||
|
||||
LOG_INFO(log, "Removed " << parts_to_remove.size() << " parts.");
|
||||
}
|
||||
|
||||
data.clearOldPartsFromFilesystem();
|
||||
clearOldPartsFromFilesystem();
|
||||
}
|
||||
|
||||
void StorageMergeTree::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name)
|
||||
{
|
||||
std::string new_full_path = new_path_to_db + escapeForFileName(new_table_name) + '/';
|
||||
|
||||
data.setPath(new_full_path);
|
||||
setPath(new_full_path);
|
||||
|
||||
path = new_path_to_db;
|
||||
table_name = new_table_name;
|
||||
@ -216,21 +216,21 @@ void StorageMergeTree::alter(
|
||||
|
||||
lockNewDataStructureExclusively(table_lock_holder, context.getCurrentQueryId());
|
||||
|
||||
data.checkAlter(params, context);
|
||||
checkAlter(params, context);
|
||||
|
||||
auto new_columns = data.getColumns();
|
||||
auto new_indices = data.getIndices();
|
||||
ASTPtr new_order_by_ast = data.order_by_ast;
|
||||
ASTPtr new_primary_key_ast = data.primary_key_ast;
|
||||
ASTPtr new_ttl_table_ast = data.ttl_table_ast;
|
||||
auto new_columns = getColumns();
|
||||
auto new_indices = getIndices();
|
||||
ASTPtr new_order_by_ast = order_by_ast;
|
||||
ASTPtr new_primary_key_ast = primary_key_ast;
|
||||
ASTPtr new_ttl_table_ast = ttl_table_ast;
|
||||
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast);
|
||||
|
||||
auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
|
||||
auto parts = getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
|
||||
auto columns_for_parts = new_columns.getAllPhysical();
|
||||
std::vector<MergeTreeData::AlterDataPartTransactionPtr> transactions;
|
||||
for (const MergeTreeData::DataPartPtr & part : parts)
|
||||
std::vector<AlterDataPartTransactionPtr> transactions;
|
||||
for (const DataPartPtr & part : parts)
|
||||
{
|
||||
if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false))
|
||||
if (auto transaction = alterDataPart(part, columns_for_parts, new_indices.indices, false))
|
||||
transactions.push_back(std::move(transaction));
|
||||
}
|
||||
|
||||
@ -240,28 +240,28 @@ void StorageMergeTree::alter(
|
||||
{
|
||||
auto & storage_ast = ast.as<ASTStorage &>();
|
||||
|
||||
if (new_order_by_ast.get() != data.order_by_ast.get())
|
||||
if (new_order_by_ast.get() != order_by_ast.get())
|
||||
storage_ast.set(storage_ast.order_by, new_order_by_ast);
|
||||
|
||||
if (new_primary_key_ast.get() != data.primary_key_ast.get())
|
||||
if (new_primary_key_ast.get() != primary_key_ast.get())
|
||||
storage_ast.set(storage_ast.primary_key, new_primary_key_ast);
|
||||
|
||||
if (new_ttl_table_ast.get() != data.ttl_table_ast.get())
|
||||
if (new_ttl_table_ast.get() != ttl_table_ast.get())
|
||||
storage_ast.set(storage_ast.ttl_table, new_ttl_table_ast);
|
||||
};
|
||||
|
||||
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier);
|
||||
|
||||
/// Reinitialize primary key because primary key column types might have changed.
|
||||
data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
|
||||
setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
|
||||
|
||||
data.setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast);
|
||||
setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast);
|
||||
|
||||
for (auto & transaction : transactions)
|
||||
transaction->commit();
|
||||
|
||||
/// Columns sizes could be changed
|
||||
data.recalculateColumnSizes();
|
||||
recalculateColumnSizes();
|
||||
}
|
||||
|
||||
|
||||
@ -341,7 +341,7 @@ public:
|
||||
|
||||
void StorageMergeTree::mutate(const MutationCommands & commands, const Context &)
|
||||
{
|
||||
MergeTreeMutationEntry entry(commands, full_path, data.insert_increment.get());
|
||||
MergeTreeMutationEntry entry(commands, full_path, insert_increment.get());
|
||||
String file_name;
|
||||
{
|
||||
std::lock_guard lock(currently_merging_mutex);
|
||||
@ -362,7 +362,7 @@ std::vector<MergeTreeMutationStatus> StorageMergeTree::getMutationsStatus() cons
|
||||
std::lock_guard lock(currently_merging_mutex);
|
||||
|
||||
std::vector<Int64> part_data_versions;
|
||||
auto data_parts = data.getDataPartsVector();
|
||||
auto data_parts = getDataPartsVector();
|
||||
part_data_versions.reserve(data_parts.size());
|
||||
for (const auto & part : data_parts)
|
||||
part_data_versions.push_back(part->info.getDataVersion());
|
||||
@ -471,7 +471,7 @@ bool StorageMergeTree::merge(
|
||||
{
|
||||
std::lock_guard lock(currently_merging_mutex);
|
||||
|
||||
auto can_merge = [this, &lock] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *)
|
||||
auto can_merge = [this, &lock] (const DataPartPtr & left, const DataPartPtr & right, String *)
|
||||
{
|
||||
return !currently_merging.count(left) && !currently_merging.count(right)
|
||||
&& getCurrentMutationVersion(left, lock) == getCurrentMutationVersion(right, lock);
|
||||
@ -503,7 +503,7 @@ bool StorageMergeTree::merge(
|
||||
|
||||
/// Logging
|
||||
Stopwatch stopwatch;
|
||||
MergeTreeData::MutableDataPartPtr new_part;
|
||||
MutableDataPartPtr new_part;
|
||||
|
||||
auto write_part_log = [&] (const ExecutionStatus & execution_status)
|
||||
{
|
||||
@ -554,7 +554,7 @@ bool StorageMergeTree::merge(
|
||||
future_part, *merge_entry, time(nullptr),
|
||||
merging_tagger->reserved_space.get(), deduplicate);
|
||||
merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr);
|
||||
data.removeEmptyColumnsFromPart(new_part);
|
||||
removeEmptyColumnsFromPart(new_part);
|
||||
|
||||
merging_tagger->is_successful = true;
|
||||
write_part_log({});
|
||||
@ -587,7 +587,7 @@ bool StorageMergeTree::tryMutatePart()
|
||||
return false;
|
||||
|
||||
auto mutations_end_it = current_mutations_by_version.end();
|
||||
for (const auto & part : data.getDataPartsVector())
|
||||
for (const auto & part : getDataPartsVector())
|
||||
{
|
||||
if (currently_merging.count(part))
|
||||
continue;
|
||||
@ -621,7 +621,7 @@ bool StorageMergeTree::tryMutatePart()
|
||||
MergeList::EntryPtr merge_entry = global_context.getMergeList().insert(database_name, table_name, future_part);
|
||||
|
||||
Stopwatch stopwatch;
|
||||
MergeTreeData::MutableDataPartPtr new_part;
|
||||
MutableDataPartPtr new_part;
|
||||
|
||||
auto write_part_log = [&] (const ExecutionStatus & execution_status)
|
||||
{
|
||||
@ -670,7 +670,7 @@ bool StorageMergeTree::tryMutatePart()
|
||||
try
|
||||
{
|
||||
new_part = merger_mutator.mutatePartToTemporaryPart(future_part, commands, *merge_entry, global_context);
|
||||
data.renameTempPartAndReplace(new_part);
|
||||
renameTempPartAndReplace(new_part);
|
||||
tagger->is_successful = true;
|
||||
write_part_log({});
|
||||
}
|
||||
@ -698,11 +698,11 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::backgroundTask()
|
||||
/// Clear old parts. It is unnecessary to do it more than once a second.
|
||||
if (auto lock = time_after_previous_cleanup.compareAndRestartDeferred(1))
|
||||
{
|
||||
data.clearOldPartsFromFilesystem();
|
||||
clearOldPartsFromFilesystem();
|
||||
{
|
||||
/// TODO: Implement tryLockStructureForShare.
|
||||
auto lock_structure = lockStructureForShare(false, "");
|
||||
data.clearOldTemporaryDirectories();
|
||||
clearOldTemporaryDirectories();
|
||||
}
|
||||
clearOldMutations();
|
||||
}
|
||||
@ -729,7 +729,7 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::backgroundTask()
|
||||
}
|
||||
|
||||
Int64 StorageMergeTree::getCurrentMutationVersion(
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
const DataPartPtr & part,
|
||||
std::lock_guard<std::mutex> & /* currently_merging_mutex_lock */) const
|
||||
{
|
||||
auto it = current_mutations_by_version.upper_bound(part->info.getDataVersion());
|
||||
@ -741,28 +741,28 @@ Int64 StorageMergeTree::getCurrentMutationVersion(
|
||||
|
||||
void StorageMergeTree::clearOldMutations()
|
||||
{
|
||||
if (!data.settings.finished_mutations_to_keep)
|
||||
if (!settings.finished_mutations_to_keep)
|
||||
return;
|
||||
|
||||
std::vector<MergeTreeMutationEntry> mutations_to_delete;
|
||||
{
|
||||
std::lock_guard lock(currently_merging_mutex);
|
||||
|
||||
if (current_mutations_by_version.size() <= data.settings.finished_mutations_to_keep)
|
||||
if (current_mutations_by_version.size() <= settings.finished_mutations_to_keep)
|
||||
return;
|
||||
|
||||
auto begin_it = current_mutations_by_version.begin();
|
||||
|
||||
std::optional<Int64> min_version = data.getMinPartDataVersion();
|
||||
std::optional<Int64> min_version = getMinPartDataVersion();
|
||||
auto end_it = current_mutations_by_version.end();
|
||||
if (min_version)
|
||||
end_it = current_mutations_by_version.upper_bound(*min_version);
|
||||
|
||||
size_t done_count = std::distance(begin_it, end_it);
|
||||
if (done_count <= data.settings.finished_mutations_to_keep)
|
||||
if (done_count <= settings.finished_mutations_to_keep)
|
||||
return;
|
||||
|
||||
size_t to_delete_count = done_count - data.settings.finished_mutations_to_keep;
|
||||
size_t to_delete_count = done_count - settings.finished_mutations_to_keep;
|
||||
|
||||
auto it = begin_it;
|
||||
for (size_t i = 0; i < to_delete_count; ++i)
|
||||
@ -790,10 +790,10 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
|
||||
/// We don't change table structure, only data in some parts, parts are locked inside alterDataPart() function
|
||||
auto lock_read_structure = lockStructureForShare(false, context.getCurrentQueryId());
|
||||
|
||||
String partition_id = data.getPartitionIDFromQuery(partition, context);
|
||||
auto parts = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
String partition_id = getPartitionIDFromQuery(partition, context);
|
||||
auto parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
|
||||
std::vector<MergeTreeData::AlterDataPartTransactionPtr> transactions;
|
||||
std::vector<AlterDataPartTransactionPtr> transactions;
|
||||
|
||||
AlterCommand alter_command;
|
||||
alter_command.type = AlterCommand::DROP_COLUMN;
|
||||
@ -812,7 +812,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
|
||||
if (part->info.partition_id != partition_id)
|
||||
throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false))
|
||||
if (auto transaction = alterDataPart(part, columns_for_parts, new_indices.indices, false))
|
||||
transactions.push_back(std::move(transaction));
|
||||
|
||||
LOG_DEBUG(log, "Removing column " << get<String>(column_name) << " from part " << part->name);
|
||||
@ -825,7 +825,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
|
||||
transaction->commit();
|
||||
|
||||
/// Recalculate columns size (not only for the modified column)
|
||||
data.recalculateColumnSizes();
|
||||
recalculateColumnSizes();
|
||||
}
|
||||
|
||||
|
||||
@ -835,10 +835,10 @@ bool StorageMergeTree::optimize(
|
||||
String disable_reason;
|
||||
if (!partition && final)
|
||||
{
|
||||
MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector();
|
||||
DataPartsVector data_parts = getDataPartsVector();
|
||||
std::unordered_set<String> partition_ids;
|
||||
|
||||
for (const MergeTreeData::DataPartPtr & part : data_parts)
|
||||
for (const DataPartPtr & part : data_parts)
|
||||
partition_ids.emplace(part->info.partition_id);
|
||||
|
||||
for (const String & partition_id : partition_ids)
|
||||
@ -855,7 +855,7 @@ bool StorageMergeTree::optimize(
|
||||
{
|
||||
String partition_id;
|
||||
if (partition)
|
||||
partition_id = data.getPartitionIDFromQuery(partition, context);
|
||||
partition_id = getPartitionIDFromQuery(partition, context);
|
||||
|
||||
if (!merge(true, partition_id, final, deduplicate, &disable_reason))
|
||||
{
|
||||
@ -895,7 +895,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma
|
||||
case PartitionCommand::FREEZE_PARTITION:
|
||||
{
|
||||
auto lock = lockStructureForShare(false, context.getCurrentQueryId());
|
||||
data.freezePartition(command.partition, command.with_name, context);
|
||||
freezePartition(command.partition, command.with_name, context);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -906,7 +906,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma
|
||||
case PartitionCommand::FREEZE_ALL_PARTITIONS:
|
||||
{
|
||||
auto lock = lockStructureForShare(false, context.getCurrentQueryId());
|
||||
data.freezeAll(command.with_name, context);
|
||||
freezeAll(command.with_name, context);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -925,11 +925,11 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons
|
||||
/// Waits for completion of merge and does not start new ones.
|
||||
auto lock = lockExclusively(context.getCurrentQueryId());
|
||||
|
||||
String partition_id = data.getPartitionIDFromQuery(partition, context);
|
||||
String partition_id = getPartitionIDFromQuery(partition, context);
|
||||
|
||||
/// TODO: should we include PreComitted parts like in Replicated case?
|
||||
auto parts_to_remove = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
data.removePartsFromWorkingSet(parts_to_remove, true);
|
||||
auto parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
removePartsFromWorkingSet(parts_to_remove, true);
|
||||
|
||||
if (detach)
|
||||
{
|
||||
@ -944,7 +944,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons
|
||||
LOG_INFO(log, (detach ? "Detached " : "Removed ") << parts_to_remove.size() << " parts inside partition ID " << partition_id << ".");
|
||||
}
|
||||
|
||||
data.clearOldPartsFromFilesystem();
|
||||
clearOldPartsFromFilesystem();
|
||||
}
|
||||
|
||||
|
||||
@ -957,7 +957,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par
|
||||
if (attach_part)
|
||||
partition_id = partition->as<ASTLiteral &>().value.safeGet<String>();
|
||||
else
|
||||
partition_id = data.getPartitionIDFromQuery(partition, context);
|
||||
partition_id = getPartitionIDFromQuery(partition, context);
|
||||
|
||||
String source_dir = "detached/";
|
||||
|
||||
@ -970,12 +970,12 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir);
|
||||
ActiveDataPartSet active_parts(data.format_version);
|
||||
ActiveDataPartSet active_parts(format_version);
|
||||
for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it)
|
||||
{
|
||||
const String & name = it.name();
|
||||
MergeTreePartInfo part_info;
|
||||
if (!MergeTreePartInfo::tryParsePartName(name, &part_info, data.format_version)
|
||||
if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version)
|
||||
|| part_info.partition_id != partition_id)
|
||||
{
|
||||
continue;
|
||||
@ -992,10 +992,10 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par
|
||||
String source_path = source_dir + source_part_name;
|
||||
|
||||
LOG_DEBUG(log, "Checking data");
|
||||
MergeTreeData::MutableDataPartPtr part = data.loadPartAndFixMetadata(source_path);
|
||||
MutableDataPartPtr part = loadPartAndFixMetadata(source_path);
|
||||
|
||||
LOG_INFO(log, "Attaching part " << source_part_name << " from " << source_path);
|
||||
data.renameTempPartAndAdd(part, &increment);
|
||||
renameTempPartAndAdd(part, &increment);
|
||||
|
||||
LOG_INFO(log, "Finished attaching part");
|
||||
}
|
||||
@ -1010,22 +1010,22 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
|
||||
auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId());
|
||||
|
||||
Stopwatch watch;
|
||||
MergeTreeData * src_data = data.checkStructureAndGetMergeTreeData(source_table);
|
||||
String partition_id = data.getPartitionIDFromQuery(partition, context);
|
||||
MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table);
|
||||
String partition_id = getPartitionIDFromQuery(partition, context);
|
||||
|
||||
MergeTreeData::DataPartsVector src_parts = src_data->getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
MergeTreeData::MutableDataPartsVector dst_parts;
|
||||
DataPartsVector src_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
|
||||
MutableDataPartsVector dst_parts;
|
||||
|
||||
static const String TMP_PREFIX = "tmp_replace_from_";
|
||||
|
||||
for (const MergeTreeData::DataPartPtr & src_part : src_parts)
|
||||
for (const DataPartPtr & src_part : src_parts)
|
||||
{
|
||||
/// This will generate unique name in scope of current server process.
|
||||
Int64 temp_index = data.insert_increment.get();
|
||||
Int64 temp_index = insert_increment.get();
|
||||
MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
|
||||
|
||||
std::shared_lock<std::shared_mutex> part_lock(src_part->columns_lock);
|
||||
dst_parts.emplace_back(data.cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info));
|
||||
dst_parts.emplace_back(cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info));
|
||||
}
|
||||
|
||||
/// ATTACH empty part set
|
||||
@ -1047,19 +1047,19 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
|
||||
{
|
||||
/// Here we use the transaction just like RAII since rare errors in renameTempPartAndReplace() are possible
|
||||
/// and we should be able to rollback already added (Precomitted) parts
|
||||
MergeTreeData::Transaction transaction(data);
|
||||
Transaction transaction(*this);
|
||||
|
||||
auto data_parts_lock = data.lockParts();
|
||||
auto data_parts_lock = lockParts();
|
||||
|
||||
/// Populate transaction
|
||||
for (MergeTreeData::MutableDataPartPtr & part : dst_parts)
|
||||
data.renameTempPartAndReplace(part, &increment, &transaction, data_parts_lock);
|
||||
for (MutableDataPartPtr & part : dst_parts)
|
||||
renameTempPartAndReplace(part, &increment, &transaction, data_parts_lock);
|
||||
|
||||
transaction.commit(&data_parts_lock);
|
||||
|
||||
/// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block
|
||||
if (replace)
|
||||
data.removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock);
|
||||
removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock);
|
||||
}
|
||||
|
||||
PartLog::addNewParts(global_context, dst_parts, watch.elapsed());
|
||||
|
@ -20,34 +20,18 @@ namespace DB
|
||||
|
||||
/** See the description of the data structure in MergeTreeData.
|
||||
*/
|
||||
class StorageMergeTree : public ext::shared_ptr_helper<StorageMergeTree>, public IStorage
|
||||
class StorageMergeTree : public ext::shared_ptr_helper<StorageMergeTree>, public MergeTreeData
|
||||
{
|
||||
public:
|
||||
void startup() override;
|
||||
void shutdown() override;
|
||||
~StorageMergeTree() override;
|
||||
|
||||
std::string getName() const override { return data.merging_params.getModeName() + "MergeTree"; }
|
||||
std::string getName() const override { return merging_params.getModeName() + "MergeTree"; }
|
||||
std::string getTableName() const override { return table_name; }
|
||||
std::string getDatabaseName() const override { return database_name; }
|
||||
|
||||
bool supportsSampling() const override { return data.supportsSampling(); }
|
||||
bool supportsPrewhere() const override { return data.supportsPrewhere(); }
|
||||
bool supportsFinal() const override { return data.supportsFinal(); }
|
||||
bool supportsIndexForIn() const override { return true; }
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & /* query_context */) const override
|
||||
{
|
||||
return data.mayBenefitFromIndexForIn(left_in_operand);
|
||||
}
|
||||
|
||||
const ColumnsDescription & getColumns() const override { return data.getColumns(); }
|
||||
void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); }
|
||||
|
||||
virtual const IndicesDescription & getIndices() const override { return data.getIndices(); }
|
||||
virtual void setIndices(IndicesDescription indices_) override { data.setIndices(std::move(indices_)); }
|
||||
|
||||
NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); }
|
||||
bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); }
|
||||
|
||||
BlockInputStreams read(
|
||||
const Names & column_names,
|
||||
@ -66,7 +50,7 @@ public:
|
||||
void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override;
|
||||
|
||||
void mutate(const MutationCommands & commands, const Context & context) override;
|
||||
std::vector<MergeTreeMutationStatus> getMutationsStatus() const;
|
||||
std::vector<MergeTreeMutationStatus> getMutationsStatus() const override;
|
||||
CancellationCode killMutation(const String & mutation_id) override;
|
||||
|
||||
void drop() override;
|
||||
@ -84,32 +68,13 @@ public:
|
||||
|
||||
ActionLock getActionLock(StorageActionBlockType action_type) override;
|
||||
|
||||
MergeTreeData & getData() { return data; }
|
||||
const MergeTreeData & getData() const { return data; }
|
||||
|
||||
String getDataPath() const override { return full_path; }
|
||||
|
||||
ASTPtr getPartitionKeyAST() const override { return data.partition_by_ast; }
|
||||
ASTPtr getSortingKeyAST() const override { return data.getSortingKeyAST(); }
|
||||
ASTPtr getPrimaryKeyAST() const override { return data.getPrimaryKeyAST(); }
|
||||
ASTPtr getSamplingKeyAST() const override { return data.getSamplingExpression(); }
|
||||
|
||||
Names getColumnsRequiredForPartitionKey() const override { return data.getColumnsRequiredForPartitionKey(); }
|
||||
Names getColumnsRequiredForSortingKey() const override { return data.getColumnsRequiredForSortingKey(); }
|
||||
Names getColumnsRequiredForPrimaryKey() const override { return data.getColumnsRequiredForPrimaryKey(); }
|
||||
Names getColumnsRequiredForSampling() const override { return data.getColumnsRequiredForSampling(); }
|
||||
Names getColumnsRequiredForFinal() const override { return data.getColumnsRequiredForSortingKey(); }
|
||||
|
||||
private:
|
||||
String path;
|
||||
String database_name;
|
||||
String table_name;
|
||||
String full_path;
|
||||
|
||||
Context global_context;
|
||||
BackgroundProcessingPool & background_pool;
|
||||
|
||||
MergeTreeData data;
|
||||
MergeTreeDataSelectExecutor reader;
|
||||
MergeTreeDataWriter writer;
|
||||
MergeTreeDataMergerMutator merger_mutator;
|
||||
@ -121,12 +86,10 @@ private:
|
||||
AtomicStopwatch time_after_previous_cleanup;
|
||||
|
||||
mutable std::mutex currently_merging_mutex;
|
||||
MergeTreeData::DataParts currently_merging;
|
||||
DataParts currently_merging;
|
||||
std::map<String, MergeTreeMutationEntry> current_mutations_by_id;
|
||||
std::multimap<Int64, MergeTreeMutationEntry &> current_mutations_by_version;
|
||||
|
||||
Logger * log;
|
||||
|
||||
std::atomic<bool> shutdown_called {false};
|
||||
|
||||
BackgroundProcessingPool::TaskHandle background_task_handle;
|
||||
@ -137,8 +100,7 @@ private:
|
||||
* If aggressive - when selects parts don't takes into account their ratio size and novelty (used for OPTIMIZE query).
|
||||
* Returns true if merge is finished successfully.
|
||||
*/
|
||||
bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate,
|
||||
String * out_disable_reason = nullptr);
|
||||
bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate, String * out_disable_reason = nullptr);
|
||||
|
||||
/// Try and find a single part to mutate and mutate it. If some part was successfully mutated, return true.
|
||||
bool tryMutatePart();
|
||||
@ -146,7 +108,7 @@ private:
|
||||
BackgroundProcessingPoolTaskResult backgroundTask();
|
||||
|
||||
Int64 getCurrentMutationVersion(
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
const DataPartPtr & part,
|
||||
std::lock_guard<std::mutex> & /* currently_merging_mutex_lock */) const;
|
||||
|
||||
void clearOldMutations();
|
||||
@ -182,7 +144,7 @@ protected:
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag);
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -72,36 +72,20 @@ namespace DB
|
||||
* as the time will take the time of creation the appropriate part on any of the replicas.
|
||||
*/
|
||||
|
||||
class StorageReplicatedMergeTree : public ext::shared_ptr_helper<StorageReplicatedMergeTree>, public IStorage
|
||||
class StorageReplicatedMergeTree : public ext::shared_ptr_helper<StorageReplicatedMergeTree>, public MergeTreeData
|
||||
{
|
||||
public:
|
||||
void startup() override;
|
||||
void shutdown() override;
|
||||
~StorageReplicatedMergeTree() override;
|
||||
|
||||
std::string getName() const override { return "Replicated" + data.merging_params.getModeName() + "MergeTree"; }
|
||||
std::string getName() const override { return "Replicated" + merging_params.getModeName() + "MergeTree"; }
|
||||
std::string getTableName() const override { return table_name; }
|
||||
std::string getDatabaseName() const override { return database_name; }
|
||||
|
||||
bool supportsSampling() const override { return data.supportsSampling(); }
|
||||
bool supportsFinal() const override { return data.supportsFinal(); }
|
||||
bool supportsPrewhere() const override { return data.supportsPrewhere(); }
|
||||
bool supportsReplication() const override { return true; }
|
||||
bool supportsDeduplication() const override { return true; }
|
||||
|
||||
const ColumnsDescription & getColumns() const override { return data.getColumns(); }
|
||||
void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); }
|
||||
|
||||
NameAndTypePair getColumn(const String & column_name) const override
|
||||
{
|
||||
return data.getColumn(column_name);
|
||||
}
|
||||
|
||||
bool hasColumn(const String & column_name) const override
|
||||
{
|
||||
return data.hasColumn(column_name);
|
||||
}
|
||||
|
||||
BlockInputStreams read(
|
||||
const Names & column_names,
|
||||
const SelectQueryInfo & query_info,
|
||||
@ -121,7 +105,7 @@ public:
|
||||
void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & query_context) override;
|
||||
|
||||
void mutate(const MutationCommands & commands, const Context & context) override;
|
||||
std::vector<MergeTreeMutationStatus> getMutationsStatus() const;
|
||||
std::vector<MergeTreeMutationStatus> getMutationsStatus() const override;
|
||||
CancellationCode killMutation(const String & mutation_id) override;
|
||||
|
||||
/** Removes a replica from ZooKeeper. If there are no other replicas, it deletes the entire table from ZooKeeper.
|
||||
@ -133,10 +117,6 @@ public:
|
||||
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
|
||||
|
||||
bool supportsIndexForIn() const override { return true; }
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & /* query_context */) const override
|
||||
{
|
||||
return data.mayBenefitFromIndexForIn(left_in_operand);
|
||||
}
|
||||
|
||||
void checkTableCanBeDropped() const override;
|
||||
|
||||
@ -148,10 +128,6 @@ public:
|
||||
/// If timeout is exceeded returns false
|
||||
bool waitForShrinkingQueueSize(size_t queue_size = 0, UInt64 max_wait_milliseconds = 0);
|
||||
|
||||
MergeTreeData & getData() { return data; }
|
||||
const MergeTreeData & getData() const { return data; }
|
||||
|
||||
|
||||
/** For the system table replicas. */
|
||||
struct Status
|
||||
{
|
||||
@ -194,17 +170,6 @@ public:
|
||||
|
||||
String getDataPath() const override { return full_path; }
|
||||
|
||||
ASTPtr getPartitionKeyAST() const override { return data.partition_by_ast; }
|
||||
ASTPtr getSortingKeyAST() const override { return data.getSortingKeyAST(); }
|
||||
ASTPtr getPrimaryKeyAST() const override { return data.getPrimaryKeyAST(); }
|
||||
ASTPtr getSamplingKeyAST() const override { return data.getSamplingExpression(); }
|
||||
|
||||
Names getColumnsRequiredForPartitionKey() const override { return data.getColumnsRequiredForPartitionKey(); }
|
||||
Names getColumnsRequiredForSortingKey() const override { return data.getColumnsRequiredForSortingKey(); }
|
||||
Names getColumnsRequiredForPrimaryKey() const override { return data.getColumnsRequiredForPrimaryKey(); }
|
||||
Names getColumnsRequiredForSampling() const override { return data.getColumnsRequiredForSampling(); }
|
||||
Names getColumnsRequiredForFinal() const override { return data.getColumnsRequiredForSortingKey(); }
|
||||
|
||||
private:
|
||||
/// Delete old parts from disk and from ZooKeeper.
|
||||
void clearOldPartsAndRemoveFromZK();
|
||||
@ -222,8 +187,6 @@ private:
|
||||
using LogEntry = ReplicatedMergeTreeLogEntry;
|
||||
using LogEntryPtr = LogEntry::Ptr;
|
||||
|
||||
Context global_context;
|
||||
|
||||
zkutil::ZooKeeperPtr current_zookeeper; /// Use only the methods below.
|
||||
std::mutex current_zookeeper_mutex; /// To recreate the session in the background thread.
|
||||
|
||||
@ -234,10 +197,6 @@ private:
|
||||
/// If true, the table is offline and can not be written to it.
|
||||
std::atomic_bool is_readonly {false};
|
||||
|
||||
String database_name;
|
||||
String table_name;
|
||||
String full_path;
|
||||
|
||||
String zookeeper_path;
|
||||
String replica_name;
|
||||
String replica_path;
|
||||
@ -264,7 +223,6 @@ private:
|
||||
|
||||
InterserverIOEndpointHolderPtr data_parts_exchange_endpoint_holder;
|
||||
|
||||
MergeTreeData data;
|
||||
MergeTreeDataSelectExecutor reader;
|
||||
MergeTreeDataWriter writer;
|
||||
MergeTreeDataMergerMutator merger_mutator;
|
||||
@ -325,8 +283,6 @@ private:
|
||||
/// An event that awakens `alter` method from waiting for the completion of the ALTER query.
|
||||
zkutil::EventPtr alter_query_event = std::make_shared<Poco::Event>();
|
||||
|
||||
Logger * log;
|
||||
|
||||
/** Creates the minimum set of nodes in ZooKeeper.
|
||||
*/
|
||||
void createTableIfNotExists();
|
||||
@ -362,24 +318,24 @@ private:
|
||||
* Adds actions to `ops` that add data about the part into ZooKeeper.
|
||||
* Call under TableStructureLock.
|
||||
*/
|
||||
void checkPartChecksumsAndAddCommitOps(const zkutil::ZooKeeperPtr & zookeeper, const MergeTreeData::DataPartPtr & part,
|
||||
void checkPartChecksumsAndAddCommitOps(const zkutil::ZooKeeperPtr & zookeeper, const DataPartPtr & part,
|
||||
Coordination::Requests & ops, String part_name = "", NameSet * absent_replicas_paths = nullptr);
|
||||
|
||||
String getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const;
|
||||
|
||||
/// Accepts a PreComitted part, atomically checks its checksums with ones on other replicas and commit the part
|
||||
MergeTreeData::DataPartsVector checkPartChecksumsAndCommit(MergeTreeData::Transaction & transaction,
|
||||
const MergeTreeData::DataPartPtr & part);
|
||||
DataPartsVector checkPartChecksumsAndCommit(Transaction & transaction,
|
||||
const DataPartPtr & part);
|
||||
|
||||
void getCommitPartOps(
|
||||
Coordination::Requests & ops,
|
||||
MergeTreeData::MutableDataPartPtr & part,
|
||||
MutableDataPartPtr & part,
|
||||
const String & block_id_path = "") const;
|
||||
|
||||
/// Updates info about part columns and checksums in ZooKeeper and commits transaction if successful.
|
||||
void updatePartHeaderInZooKeeperAndCommit(
|
||||
const zkutil::ZooKeeperPtr & zookeeper,
|
||||
MergeTreeData::AlterDataPartTransaction & transaction);
|
||||
AlterDataPartTransaction & transaction);
|
||||
|
||||
/// Adds actions to `ops` that remove a part from ZooKeeper.
|
||||
/// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
|
||||
@ -390,7 +346,7 @@ private:
|
||||
NameSet * parts_should_be_retried = nullptr);
|
||||
|
||||
bool tryRemovePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries = 5);
|
||||
bool tryRemovePartsFromZooKeeperWithRetries(MergeTreeData::DataPartsVector & parts, size_t max_retries = 5);
|
||||
bool tryRemovePartsFromZooKeeperWithRetries(DataPartsVector & parts, size_t max_retries = 5);
|
||||
|
||||
/// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts.
|
||||
void removePartAndEnqueueFetch(const String & part_name);
|
||||
@ -405,8 +361,8 @@ private:
|
||||
void writePartLog(
|
||||
PartLogElement::Type type, const ExecutionStatus & execution_status, UInt64 elapsed_ns,
|
||||
const String & new_part_name,
|
||||
const MergeTreeData::DataPartPtr & result_part,
|
||||
const MergeTreeData::DataPartsVector & source_parts,
|
||||
const DataPartPtr & result_part,
|
||||
const DataPartsVector & source_parts,
|
||||
const MergeListEntry * merge_entry);
|
||||
|
||||
void executeDropRange(const LogEntry & entry);
|
||||
@ -463,7 +419,7 @@ private:
|
||||
*/
|
||||
bool createLogEntryToMergeParts(
|
||||
zkutil::ZooKeeperPtr & zookeeper,
|
||||
const MergeTreeData::DataPartsVector & parts,
|
||||
const DataPartsVector & parts,
|
||||
const String & merged_name,
|
||||
bool deduplicate,
|
||||
ReplicatedMergeTreeLogEntryData * out_log_entry = nullptr);
|
||||
@ -564,7 +520,7 @@ protected:
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_,
|
||||
const ASTPtr & table_ttl_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag);
|
||||
};
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include <optional>
|
||||
#include <Storages/System/StorageSystemColumns.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
@ -38,10 +36,10 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_)
|
||||
{ "marks_bytes", std::make_shared<DataTypeUInt64>() },
|
||||
{ "comment", std::make_shared<DataTypeString>() },
|
||||
{ "is_in_partition_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "is_in_sorting_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "is_in_primary_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "is_in_sampling_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "compression_codec", std::make_shared<DataTypeString>() },
|
||||
{ "is_in_sorting_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "is_in_primary_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "is_in_sampling_key", std::make_shared<DataTypeUInt8>() },
|
||||
{ "compression_codec", std::make_shared<DataTypeString>() },
|
||||
}));
|
||||
}
|
||||
|
||||
@ -124,16 +122,10 @@ protected:
|
||||
cols_required_for_sampling = storage->getColumnsRequiredForSampling();
|
||||
|
||||
/** Info about sizes of columns for tables of MergeTree family.
|
||||
* NOTE: It is possible to add getter for this info to IStorage interface.
|
||||
*/
|
||||
if (auto storage_concrete_plain = dynamic_cast<StorageMergeTree *>(storage.get()))
|
||||
{
|
||||
column_sizes = storage_concrete_plain->getData().getColumnSizes();
|
||||
}
|
||||
else if (auto storage_concrete_replicated = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
|
||||
{
|
||||
column_sizes = storage_concrete_replicated->getData().getColumnSizes();
|
||||
}
|
||||
* NOTE: It is possible to add getter for this info to IStorage interface.
|
||||
*/
|
||||
if (auto storage_concrete = dynamic_cast<const MergeTreeData *>(storage.get()))
|
||||
column_sizes = storage_concrete->getColumnSizes();
|
||||
}
|
||||
|
||||
for (const auto & column : columns)
|
||||
|
@ -1,7 +1,5 @@
|
||||
#include <Storages/System/StorageSystemGraphite.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
@ -37,20 +35,10 @@ StorageSystemGraphite::Configs StorageSystemGraphite::getConfigs(const Context &
|
||||
for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
auto & table = iterator->table();
|
||||
const MergeTreeData * table_data = nullptr;
|
||||
|
||||
if (const StorageMergeTree * merge_tree = dynamic_cast<StorageMergeTree *>(table.get()))
|
||||
{
|
||||
table_data = &merge_tree->getData();
|
||||
}
|
||||
else if (const StorageReplicatedMergeTree * replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(table.get()))
|
||||
{
|
||||
table_data = &replicated_merge_tree->getData();
|
||||
}
|
||||
else
|
||||
{
|
||||
const MergeTreeData * table_data = dynamic_cast<const MergeTreeData *>(table.get());
|
||||
if (!table_data)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (table_data->merging_params.mode == MergeTreeData::MergingParams::Graphite)
|
||||
{
|
||||
|
@ -4,8 +4,8 @@
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergeTreeMutationStatus.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
@ -38,19 +38,10 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex
|
||||
/// Collect a set of *MergeTree tables.
|
||||
std::map<String, std::map<String, StoragePtr>> merge_tree_tables;
|
||||
for (const auto & db : context.getDatabases())
|
||||
{
|
||||
if (context.hasDatabaseAccessRights(db.first))
|
||||
{
|
||||
for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
if (dynamic_cast<const StorageMergeTree *>(iterator->table().get())
|
||||
|| dynamic_cast<const StorageReplicatedMergeTree *>(iterator->table().get()))
|
||||
{
|
||||
if (dynamic_cast<const MergeTreeData *>(iterator->table().get()))
|
||||
merge_tree_tables[db.first][iterator->name()] = iterator->table();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MutableColumnPtr col_database_mut = ColumnString::create();
|
||||
MutableColumnPtr col_table_mut = ColumnString::create();
|
||||
@ -92,10 +83,8 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex
|
||||
std::vector<MergeTreeMutationStatus> statuses;
|
||||
{
|
||||
const IStorage * storage = merge_tree_tables[database][table].get();
|
||||
if (const auto * merge_tree = dynamic_cast<const StorageMergeTree *>(storage))
|
||||
if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage))
|
||||
statuses = merge_tree->getMutationsStatus();
|
||||
else if (const auto * replicated = dynamic_cast<const StorageReplicatedMergeTree *>(storage))
|
||||
statuses = replicated->getMutationsStatus();
|
||||
}
|
||||
|
||||
for (const MergeTreeMutationStatus & status : statuses)
|
||||
|
@ -6,8 +6,6 @@
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <Storages/System/StorageSystemParts.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
|
@ -7,8 +7,7 @@
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
@ -93,8 +92,7 @@ public:
|
||||
StoragePtr storage = iterator->table();
|
||||
String engine_name = storage->getName();
|
||||
|
||||
if (!dynamic_cast<StorageMergeTree *>(&*storage) &&
|
||||
!dynamic_cast<StorageReplicatedMergeTree *>(&*storage))
|
||||
if (!dynamic_cast<MergeTreeData *>(storage.get()))
|
||||
continue;
|
||||
|
||||
storages[std::make_pair(database_name, iterator->name())] = storage;
|
||||
@ -184,20 +182,9 @@ public:
|
||||
|
||||
info.engine = info.storage->getName();
|
||||
|
||||
info.data = nullptr;
|
||||
|
||||
if (auto merge_tree = dynamic_cast<StorageMergeTree *>(&*info.storage))
|
||||
{
|
||||
info.data = &merge_tree->getData();
|
||||
}
|
||||
else if (auto replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(&*info.storage))
|
||||
{
|
||||
info.data = &replicated_merge_tree->getData();
|
||||
}
|
||||
else
|
||||
{
|
||||
info.data = dynamic_cast<MergeTreeData *>(info.storage.get());
|
||||
if (!info.data)
|
||||
throw Exception("Unknown engine " + info.engine, ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
using State = MergeTreeDataPart::State;
|
||||
auto & all_parts_state = info.all_parts_state;
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <Storages/System/StorageSystemPartsColumns.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
@ -56,7 +56,7 @@ if [ "$DATA_DIR_PATTERN" != "$DATA_DIR" ]; then
|
||||
cat $CLICKHOUSE_CONFIG | sed -e s!$DATA_DIR_PATTERN!$DATA_DIR! > $DATA_DIR/etc/server-config.xml
|
||||
export CLICKHOUSE_CONFIG=$DATA_DIR/etc/server-config.xml
|
||||
cp $CLICKHOUSE_CONFIG_USERS $DATA_DIR/etc
|
||||
cp -r -L $CLICKHOUSE_CONFIG_USERS_D $DATA_DIR/etc
|
||||
cp -R -L $CLICKHOUSE_CONFIG_USERS_D $DATA_DIR/etc
|
||||
fi
|
||||
|
||||
CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="${CLICKHOUSE_EXTRACT} --config=$CLICKHOUSE_CONFIG"}
|
||||
|
@ -22,6 +22,8 @@
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE match(URL, ' *tranio\\.ru/spain/*/commercial/*') settings max_threads=5]]></query>
|
||||
|
||||
<query><![CDATA[select count(position(URL, 'yandex')), count(position(URL, 'google')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(multiSearchAllPositions(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(match(URL, 'yandex|google')) FROM hits_100m_single]]></query>
|
||||
|
@ -21,13 +21,16 @@
|
||||
<total_time_ms>60000</total_time_ms>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<query>SELECT DISTINCT URL,Title, ngramDistance(Title, URL) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistance(Title, SearchPhrase) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistance(Title, 'what is love') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistance(Title, 'baby dont hurt me') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistance(Title, 'no more') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'wHAt Is lovE') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'BABY DonT hUrT me') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'nO MOrE') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT URL,Title, ngramDistanceUTF8(Title, URL) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistanceUTF8(Title, SearchPhrase) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT Title, ngramDistanceUTF8(Title, 'метрика') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'как дела') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'чем занимаешься') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
|
||||
|
30
dbms/tests/performance/simple_join_query.xml
Normal file
30
dbms/tests/performance/simple_join_query.xml
Normal file
@ -0,0 +1,30 @@
|
||||
<test>
|
||||
<name>Simple Join Query</name>
|
||||
|
||||
<type>once</type>
|
||||
|
||||
<stop_conditions>
|
||||
<all_of>
|
||||
<total_time_ms>30000</total_time_ms>
|
||||
</all_of>
|
||||
<any_of>
|
||||
<min_time_not_changing_for_ms>5000</min_time_not_changing_for_ms>
|
||||
<total_time_ms>60000</total_time_ms>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<main_metric>
|
||||
<total_time />
|
||||
</main_metric>
|
||||
|
||||
<create_query>CREATE TABLE join_table(A Int64, S0 String, S1 String, S2 String, S3 String)ENGINE = MergeTree ORDER BY A</create_query>
|
||||
|
||||
<fill_query>INSERT INTO join_table SELECT number AS A, toString(arrayMap(x->x, range(100))) S0, S0 AS S1, S0 AS S2, S0 AS S3 from numbers(500000)</fill_query>
|
||||
|
||||
<query tag='UsingJoinWithoutSubquery'>SELECT COUNT() FROM join_table LEFT JOIN join_table USING A</query>
|
||||
<query tag='UsingJoinWithSubquery'>SELECT COUNT() FROM join_table LEFT JOIN (SELECT A FROM join_table) USING A</query>
|
||||
<query tag='OnExpressionJoinWithoutSubquery'>SELECT COUNT() FROM join_table AS left LEFT JOIN join_table AS right ON left.A = right.A</query>
|
||||
<query tag='OnExpressionJoinWithoutSubquery'>SELECT COUNT() FROM join_table AS left LEFT JOIN (SELECT A FROM join_table) AS right ON left.A = right.A</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS join_table</drop_query>
|
||||
</test>
|
@ -5,12 +5,12 @@
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
\N
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
\N
|
||||
1
|
||||
0
|
||||
0
|
||||
@ -27,7 +27,7 @@
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
\N
|
||||
0
|
||||
1
|
||||
0
|
||||
@ -35,12 +35,12 @@
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
\N
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
\N
|
||||
1
|
||||
0
|
||||
0
|
||||
@ -57,7 +57,7 @@
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
\N
|
||||
0
|
||||
1
|
||||
0
|
||||
|
@ -15,3 +15,7 @@
|
||||
60 50 70 40 20 30
|
||||
2019-01-01 50
|
||||
2019-01-02 60
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -1,3 +1,4 @@
|
||||
USE test;
|
||||
SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5]));
|
||||
SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
|
||||
SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
|
||||
@ -53,7 +54,7 @@ ALL LEFT JOIN
|
||||
)
|
||||
USING city_id;
|
||||
|
||||
|
||||
-- bitmap state test
|
||||
DROP TABLE IF EXISTS bitmap_state_test;
|
||||
CREATE TABLE bitmap_state_test
|
||||
(
|
||||
@ -72,6 +73,26 @@ GROUP BY pickup_date, city_id;
|
||||
|
||||
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
|
||||
|
||||
-- between column and expression test
|
||||
DROP TABLE IF EXISTS bitmap_column_expr_test;
|
||||
CREATE TABLE bitmap_column_expr_test
|
||||
(
|
||||
t DateTime,
|
||||
z AggregateFunction(groupBitmap, UInt32)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMMDD(t)
|
||||
ORDER BY t;
|
||||
|
||||
INSERT INTO bitmap_column_expr_test VALUES (now(), bitmapBuild(cast([3,19,47] as Array(UInt32))));
|
||||
|
||||
SELECT bitmapAndCardinality( bitmapBuild(cast([19,7] as Array(UInt32))), z) from bitmap_column_expr_test;
|
||||
SELECT bitmapAndCardinality( z, bitmapBuild(cast([19,7] as Array(UInt32))) ) from bitmap_column_expr_test;
|
||||
|
||||
select bitmapCardinality(bitmapAnd(bitmapBuild(cast([19,7] as Array(UInt32))), z )) from bitmap_column_expr_test;
|
||||
select bitmapCardinality(bitmapAnd(z, bitmapBuild(cast([19,7] as Array(UInt32))))) from bitmap_column_expr_test;
|
||||
|
||||
DROP TABLE IF EXISTS bitmap_test;
|
||||
DROP TABLE IF EXISTS bitmap_state_test;
|
||||
DROP TABLE IF EXISTS bitmap_column_expr_test;
|
||||
|
||||
|
@ -33,6 +33,76 @@
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1000
|
||||
1000
|
||||
@ -40,6 +110,39 @@
|
||||
77
|
||||
636
|
||||
1000
|
||||
привет как дела?... Херсон 0
|
||||
привет как дела клип - Яндекс.Видео 0
|
||||
привет 0
|
||||
пап привет как дела - Яндекс.Видео 0
|
||||
привет братан как дела - Яндекс.Видео 0
|
||||
http://metric.ru/ 0
|
||||
http://autometric.ru/ 0
|
||||
http://metrica.yandex.com/ 0
|
||||
http://metris.ru/ 0
|
||||
http://metrika.ru/ 0
|
||||
0
|
||||
0
|
||||
привет как дела?... Херсон 600
|
||||
пап привет как дела - Яндекс.Видео 684
|
||||
привет как дела клип - Яндекс.Видео 692
|
||||
привет братан как дела - Яндекс.Видео 707
|
||||
привет 1000
|
||||
http://metric.ru/ 1000
|
||||
http://autometric.ru/ 1000
|
||||
http://metrica.yandex.com/ 1000
|
||||
http://metris.ru/ 1000
|
||||
http://metrika.ru/ 1000
|
||||
0
|
||||
http://metric.ru/ 765
|
||||
http://metris.ru/ 765
|
||||
http://metrika.ru/ 778
|
||||
http://autometric.ru/ 810
|
||||
http://metrica.yandex.com/ 846
|
||||
привет как дела?... Херсон 1000
|
||||
привет как дела клип - Яндекс.Видео 1000
|
||||
привет 1000
|
||||
пап привет как дела - Яндекс.Видео 1000
|
||||
привет братан как дела - Яндекс.Видео 1000
|
||||
привет как дела?... Херсон 297
|
||||
пап привет как дела - Яндекс.Видео 422
|
||||
привет как дела клип - Яндекс.Видео 435
|
||||
@ -152,6 +255,76 @@ http://metrika.ru/ 1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1000
|
||||
1000
|
||||
@ -159,6 +332,39 @@ http://metrika.ru/ 1000
|
||||
77
|
||||
636
|
||||
1000
|
||||
привет как дела?... Херсон 0
|
||||
привет как дела клип - Яндекс.Видео 0
|
||||
привет 0
|
||||
пап привет как дела - Яндекс.Видео 0
|
||||
привет братан как дела - Яндекс.Видео 0
|
||||
http://metric.ru/ 0
|
||||
http://autometric.ru/ 0
|
||||
http://metrica.yandex.com/ 0
|
||||
http://metris.ru/ 0
|
||||
http://metrika.ru/ 0
|
||||
0
|
||||
0
|
||||
привет как дела?... Херсон 600
|
||||
пап привет как дела - Яндекс.Видео 684
|
||||
привет как дела клип - Яндекс.Видео 692
|
||||
привет братан как дела - Яндекс.Видео 707
|
||||
привет 1000
|
||||
http://metric.ru/ 1000
|
||||
http://autometric.ru/ 1000
|
||||
http://metrica.yandex.com/ 1000
|
||||
http://metris.ru/ 1000
|
||||
http://metrika.ru/ 1000
|
||||
0
|
||||
http://metric.ru/ 765
|
||||
http://metris.ru/ 765
|
||||
http://metrika.ru/ 778
|
||||
http://autometric.ru/ 810
|
||||
http://metrica.yandex.com/ 846
|
||||
привет как дела?... Херсон 1000
|
||||
привет как дела клип - Яндекс.Видео 1000
|
||||
привет 1000
|
||||
пап привет как дела - Яндекс.Видео 1000
|
||||
привет братан как дела - Яндекс.Видео 1000
|
||||
привет как дела?... Херсон 297
|
||||
пап привет как дела - Яндекс.Видео 422
|
||||
привет как дела клип - Яндекс.Видео 435
|
||||
@ -293,6 +499,76 @@ http://metrika.ru/ 1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
@ -412,6 +688,76 @@ http://metrika.ru/ 1000
|
||||
1000
|
||||
1000
|
||||
1000
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
|
@ -6,6 +6,22 @@ select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абв
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceUTF8(materialize(''), materialize('')))=round(1000 * ngramDistanceUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абв'), materialize('')))=round(1000 * ngramDistanceUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize(''), materialize('абв')))=round(1000 * ngramDistanceUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('абвгдеёжз')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('абвгдеёж')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('гдеёзд')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('ёёёёёёёё')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceUTF8('', materialize('')))=round(1000 * ngramDistanceUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8('абв', materialize('')))=round(1000 * ngramDistanceUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8('', materialize('абв')))=round(1000 * ngramDistanceUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('абвгдеёжз')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('абвгдеёж')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('гдеёзд')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('ёёёёёёёё')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceUTF8('', ''));
|
||||
select round(1000 * ngramDistanceUTF8('абв', ''));
|
||||
select round(1000 * ngramDistanceUTF8('', 'абв'));
|
||||
@ -18,6 +34,10 @@ drop table if exists test_distance;
|
||||
create table test_distance (Title String) engine = Memory;
|
||||
insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), ('');
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'metr')) as distance;
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'привет как дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'как привет дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrika') as distance;
|
||||
@ -35,6 +55,23 @@ select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеё
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''),materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абв'),materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), materialize('абв'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвГДЕёжз'), materialize('АбвгдЕёжз'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), materialize('АбвГдеёж'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('гдеёЗД'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абв',materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', materialize('абв'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвГДЕёжз', materialize('АбвгдЕёжз'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('аБВГдеёЖз', materialize('АбвГдеёж'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', materialize('гдеёЗД'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
|
||||
|
||||
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', ''));
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абв', ''));
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', 'абв'));
|
||||
@ -43,6 +80,10 @@ select round(1000 * ngramDistanceCaseInsensitiveUTF8('аБВГдеёЖз', 'Аб
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', 'гдеёЗД'));
|
||||
select round(1000 * ngramDistanceCaseInsensitiveUTF8('АБВГДеёжз', 'ЁЁЁЁЁЁЁЁ'));
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, Title) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance;
|
||||
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance;
|
||||
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrika') as distance;
|
||||
@ -62,6 +103,23 @@ select round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefg')) from syst
|
||||
select round(1000 * ngramDistance(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistance(materialize(''),materialize('')))=round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize('abc'),materialize('')))=round(1000 * ngramDistance(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize(''), materialize('abc')))=round(1000 * ngramDistance(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('abcdefgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('abcdefg')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('defgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('aaaaaaaa')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistance('',materialize('')))=round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance('abc', materialize('')))=round(1000 * ngramDistance(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance('', materialize('abc')))=round(1000 * ngramDistance(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance('abcdefgh', materialize('abcdefgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance('abcdefgh', materialize('abcdefg')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance('abcdefgh', materialize('defgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistance('abcdefgh', materialize('aaaaaaaa')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
|
||||
select round(1000 * ngramDistance('', ''));
|
||||
select round(1000 * ngramDistance('abc', ''));
|
||||
select round(1000 * ngramDistance('', 'abc'));
|
||||
@ -86,6 +144,22 @@ select round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), 'abcde
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize(''), materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize(''), materialize('abc')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('abCdefgH'), materialize('Abcdefgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), materialize('abcdeFG')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), materialize('defgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), materialize('aaaaaaaa')))=round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceCaseInsensitive('', materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive('abc', materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive('', materialize('abc')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive('abCdefgH', materialize('Abcdefgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive('abcdefgh', materialize('abcdeFG')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive('AAAAbcdefgh', materialize('defgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
|
||||
select round(1000 * ngramDistanceCaseInsensitive('ABCdefgH', materialize('aaaaaaaa')))=round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
|
||||
|
||||
select round(1000 * ngramDistanceCaseInsensitive('', ''));
|
||||
select round(1000 * ngramDistanceCaseInsensitive('abc', ''));
|
||||
select round(1000 * ngramDistanceCaseInsensitive('', 'abc'));
|
||||
|
@ -0,0 +1,43 @@
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
0 0
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
7 7
|
||||
8 8
|
||||
9 9
|
||||
SimpleAggregateFunction(sum, Float64)
|
||||
0 0
|
||||
1 2
|
||||
2 4
|
||||
3 6
|
||||
4 8
|
||||
5 10
|
||||
6 12
|
||||
7 14
|
||||
8 16
|
||||
9 18
|
||||
0 0
|
||||
1 2
|
||||
2 4
|
||||
3 6
|
||||
4 8
|
||||
5 10
|
||||
6 12
|
||||
7 14
|
||||
8 16
|
||||
9 18
|
||||
1 1 2 2.2.2.2
|
||||
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4)
|
@ -0,0 +1,27 @@
|
||||
-- basic test
|
||||
drop table if exists test.simple;
|
||||
|
||||
create table test.simple (id UInt64,val SimpleAggregateFunction(sum,Double)) engine=AggregatingMergeTree order by id;
|
||||
insert into test.simple select number,number from system.numbers limit 10;
|
||||
|
||||
select * from test.simple;
|
||||
select * from test.simple final;
|
||||
select toTypeName(val) from test.simple limit 1;
|
||||
|
||||
-- merge
|
||||
insert into test.simple select number,number from system.numbers limit 10;
|
||||
|
||||
select * from test.simple final;
|
||||
|
||||
optimize table test.simple final;
|
||||
select * from test.simple;
|
||||
|
||||
-- complex types
|
||||
drop table if exists test.simple;
|
||||
|
||||
create table test.simple (id UInt64,nullable_str SimpleAggregateFunction(anyLast,Nullable(String)),low_str SimpleAggregateFunction(anyLast,LowCardinality(Nullable(String))),ip SimpleAggregateFunction(anyLast,IPv4)) engine=AggregatingMergeTree order by id;
|
||||
insert into test.simple values(1,'1','1','1.1.1.1');
|
||||
insert into test.simple values(1,null,'2','2.2.2.2');
|
||||
|
||||
select * from test.simple final;
|
||||
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip) from test.simple limit 1;
|
@ -0,0 +1,16 @@
|
||||
4
|
||||
Object
|
||||
1
|
||||
1
|
||||
a
|
||||
hello
|
||||
hello
|
||||
3
|
||||
Array
|
||||
-100
|
||||
200
|
||||
300
|
||||
('a','hello','b',[-100,200,300])
|
||||
[-100,NULL,300]
|
||||
['a','hello','b',NULL]
|
||||
[(NULL,NULL,NULL),(NULL,NULL,NULL),(NULL,NULL,NULL),(-100,200,44)]
|
16
dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql
Normal file
16
dbms/tests/queries/0_stateless/00918_json_functions_avx2.sql
Normal file
@ -0,0 +1,16 @@
|
||||
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}');
|
||||
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1);
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 2);
|
||||
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
|
||||
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
|
||||
select jsonExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1);
|
||||
select jsonExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2);
|
||||
select jsonExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1);
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, String, String, Array(Float64))');
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Int32)', 'b');
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(String)');
|
||||
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Tuple(Int16, Float32, UInt8))');
|
@ -599,3 +599,4 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -79,3 +79,4 @@ select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*',
|
||||
select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10;;
|
||||
|
||||
SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']);
|
||||
SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']);
|
||||
|
@ -0,0 +1,9 @@
|
||||
1
|
||||
\N
|
||||
\N
|
||||
1
|
||||
\N
|
||||
\N
|
||||
1
|
||||
\N
|
||||
\N
|
11
dbms/tests/queries/0_stateless/00939_test_null_in.sql
Normal file
11
dbms/tests/queries/0_stateless/00939_test_null_in.sql
Normal file
@ -0,0 +1,11 @@
|
||||
DROP TABLE IF EXISTS test.nullt;
|
||||
|
||||
CREATE TABLE test.nullt (c1 Nullable(UInt32), c2 Nullable(String))ENGINE = Log;
|
||||
INSERT INTO test.nullt VALUES (1, 'abc'), (2, NULL), (NULL, NULL);
|
||||
|
||||
SELECT c2 = ('abc') FROM test.nullt;
|
||||
SELECT c2 IN ('abc') FROM test.nullt;
|
||||
|
||||
SELECT c2 IN ('abc', NULL) FROM test.nullt;
|
||||
|
||||
DROP TABLE IF EXISTS test.nullt;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user