Merge branch 'master' into custom_week_functions

Merge larst code from master
This commit is contained in:
Andy Yang 2019-05-11 23:10:30 +08:00
commit f58f6a4d6b
121 changed files with 3094 additions and 1062 deletions

3
.gitmodules vendored
View File

@ -79,3 +79,6 @@
[submodule "contrib/hyperscan"]
path = contrib/hyperscan
url = https://github.com/ClickHouse-Extras/hyperscan.git
[submodule "contrib/simdjson"]
path = contrib/simdjson
url = https://github.com/lemire/simdjson.git

View File

@ -1,6 +1,15 @@
project(ClickHouse)
cmake_minimum_required(VERSION 3.3)
cmake_policy(SET CMP0023 NEW)
foreach(policy
CMP0023
CMP0074 # CMake 3.12
)
if(POLICY ${policy})
cmake_policy(SET ${policy} NEW)
endif()
endforeach()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json
set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
@ -318,6 +327,7 @@ include (cmake/find_consistent-hashing.cmake)
include (cmake/find_base64.cmake)
include (cmake/find_hyperscan.cmake)
include (cmake/find_lfalloc.cmake)
include (cmake/find_simdjson.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
find_contrib_lib(metrohash)

View File

@ -12,7 +12,6 @@ ClickHouse is an open-source column-oriented database management system that all
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Community Meetup in Limassol](https://www.facebook.com/events/386638262181785/) on May 7.
* ClickHouse at [Percona Live 2019](https://www.percona.com/live/19/other-open-source-databases-track) in Austin on May 28-30.
* [ClickHouse Community Meetup in Beijing](https://www.huodongxing.com/event/2483759276200) on June 8.
* [ClickHouse Community Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20.

View File

@ -1,6 +1,9 @@
option(ENABLE_ICU "Enable ICU" ON)
if(ENABLE_ICU)
if (APPLE)
set(ICU_ROOT "/usr/local/opt/icu4c" CACHE STRING "")
endif()
find_package(ICU COMPONENTS i18n uc data) # TODO: remove Modules/FindICU.cmake after cmake 3.7
#set (ICU_LIBRARIES ${ICU_I18N_LIBRARY} ${ICU_UC_LIBRARY} ${ICU_DATA_LIBRARY} CACHE STRING "")
if(ICU_FOUND)

View File

@ -1,4 +1,4 @@
if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD)
if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD AND NOT APPLE)
option (ENABLE_LFALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED})
endif ()

14
cmake/find_simdjson.cmake Normal file
View File

@ -0,0 +1,14 @@
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson/jsonparser.h")
message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive")
return()
endif ()
if (NOT HAVE_AVX2)
message (WARNING "submodule contrib/simdjson requires AVX2 support")
return()
endif ()
option (USE_SIMDJSON "Use simdjson" ON)
set (SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
set (SIMDJSON_LIBRARY "simdjson")

View File

@ -227,7 +227,7 @@ if (USE_INTERNAL_POCO_LIBRARY)
set (ENABLE_TESTS 0)
set (POCO_ENABLE_TESTS 0)
set (CMAKE_DISABLE_FIND_PACKAGE_ZLIB 1)
if (MSVC)
if (MSVC OR NOT USE_POCO_DATAODBC)
set (ENABLE_DATA_ODBC 0 CACHE INTERNAL "") # TODO (build fail)
endif ()
add_subdirectory (poco)
@ -313,3 +313,7 @@ endif()
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
add_subdirectory (hyperscan)
endif()
if (USE_SIMDJSON)
add_subdirectory (simdjson-cmake)
endif()

1
contrib/simdjson vendored Submodule

@ -0,0 +1 @@
Subproject commit 681cd3369860f4eada49a387cbff93030f759c95

View File

@ -0,0 +1,26 @@
if (NOT HAVE_AVX2)
message (FATAL_ERROR "No AVX2 support")
endif ()
if(MAKE_STATIC_LIBRARIES)
set(SIMDJSON_LIB_TYPE STATIC)
MESSAGE(STATUS "Building static library ${SIMDJSON_LIBRARY}")
else()
set(SIMDJSON_LIB_TYPE SHARED)
MESSAGE(STATUS "Building dynamic library ${SIMDJSON_LIBRARY}")
endif()
set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
set(SIMDJSON_SRC
${SIMDJSON_SRC_DIR}/jsonioutil.cpp
${SIMDJSON_SRC_DIR}/jsonminifier.cpp
${SIMDJSON_SRC_DIR}/jsonparser.cpp
${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
${SIMDJSON_SRC_DIR}/parsedjson.cpp
${SIMDJSON_SRC_DIR}/parsedjsoniterator.cpp
)
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_LIB_TYPE} ${SIMDJSON_SRC})
target_include_directories(${SIMDJSON_LIBRARY} PRIVATE "${SIMDJSON_INCLUDE_DIR}")
target_compile_options(${SIMDJSON_LIBRARY} PRIVATE -mavx2 -mbmi -mbmi2 -mpclmul)

View File

@ -1,7 +1,6 @@
#include <Common/Exception.h>
#include <Common/OptimizedRegularExpression.h>
#define MIN_LENGTH_FOR_STRSTR 3
#define MAX_SUBPATTERNS 5
@ -211,20 +210,18 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
{
if (!has_alternative_on_depth_0)
{
/** We choose the non-alternative substring of the maximum length, among the prefixes,
* or a non-alternative substring of maximum length.
*/
/// We choose the non-alternative substring of the maximum length for first search.
/// Tuning for typical usage domain
auto tuning_strings_condition = [](const std::string & str)
{
return str != "://" && str != "http://" && str != "www" && str != "Windows ";
};
size_t max_length = 0;
Substrings::const_iterator candidate_it = trivial_substrings.begin();
for (Substrings::const_iterator it = trivial_substrings.begin(); it != trivial_substrings.end(); ++it)
{
if (((it->second == 0 && candidate_it->second != 0)
|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
/// Tuning for typical usage domain
&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
&& (it->first.size() > strlen("Windows ") || strncmp(it->first.data(), "Windows ", strlen("Windows "))))
if (it->first.size() > max_length && tuning_strings_condition(it->first))
{
max_length = it->first.size();
candidate_it = it;

View File

@ -122,6 +122,9 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
LockHolder res(new LockHolderImpl(shared_from_this(), it_group, it_client));
/// Wait a notification until we will be the only in the group.
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
/// Insert myself (weak_ptr to the holder) to threads set to implement recursive lock
thread_to_holder.emplace(this_thread_id, res);
res->thread_id = this_thread_id;
@ -130,17 +133,6 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
query_id_to_holder.emplace(query_id, res);
res->query_id = query_id;
/// We are first, we should not wait anything
/// If we are not the first client in the group, a notification could be already sent
if (it_group == queue.begin())
{
finalize_metrics();
return res;
}
/// Wait a notification
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
finalize_metrics();
return res;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Types.h>
#include <boost/core/noncopyable.hpp>
#include <list>
#include <vector>
#include <mutex>

View File

@ -25,6 +25,7 @@
#cmakedefine01 USE_BROTLI
#cmakedefine01 USE_SSL
#cmakedefine01 USE_HYPERSCAN
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_LFALLOC
#cmakedefine01 USE_LFALLOC_RANDOM_HINT

View File

@ -123,3 +123,7 @@
#else
#define OPTIMIZE(x)
#endif
/// This number is only used for distributed version compatible.
/// It could be any magic number.
#define DBMS_DISTRIBUTED_SENDS_MAGIC_NUMBER 0xCAFECABE

View File

@ -109,5 +109,4 @@ void Settings::addProgramOptions(boost::program_options::options_description & o
Settings::getDescription(index).data)));
}
}
}

View File

@ -1,6 +1,8 @@
#include <DataStreams/AggregatingSortedBlockInputStream.h>
#include <Common/typeid_cast.h>
#include <Common/StringUtils/StringUtils.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
namespace DB
@ -22,7 +24,7 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
ColumnWithTypeAndName & column = header.safeGetByPosition(i);
/// We leave only states of aggregate functions.
if (!startsWith(column.type->getName(), "AggregateFunction"))
if (!dynamic_cast<const DataTypeAggregateFunction *>(column.type.get()) && !dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName()))
{
column_numbers_not_to_aggregate.push_back(i);
continue;
@ -40,7 +42,17 @@ AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream(
continue;
}
column_numbers_to_aggregate.push_back(i);
if (auto simple_aggr = dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName()))
{
// simple aggregate function
SimpleAggregateDescription desc{simple_aggr->getFunction(), i};
columns_to_simple_aggregate.emplace_back(std::move(desc));
}
else
{
// standard aggregate function
column_numbers_to_aggregate.push_back(i);
}
}
}
@ -91,7 +103,11 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
/// if there are enough rows accumulated and the last one is calculated completely
if (key_differs && merged_rows >= max_block_size)
{
/// Write the simple aggregation result for the previous group.
insertSimpleAggregationResult(merged_columns);
return;
}
queue.pop();
@ -110,6 +126,14 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
for (auto & column_to_aggregate : columns_to_aggregate)
column_to_aggregate->insertDefault();
/// Write the simple aggregation result for the previous group.
if (merged_rows > 0)
insertSimpleAggregationResult(merged_columns);
/// Reset simple aggregation states for next row
for (auto & desc : columns_to_simple_aggregate)
desc.createState();
++merged_rows;
}
@ -127,6 +151,9 @@ void AggregatingSortedBlockInputStream::merge(MutableColumns & merged_columns, s
}
}
/// Write the simple aggregation result for the previous group.
insertSimpleAggregationResult(merged_columns);
finished = true;
}
@ -138,6 +165,21 @@ void AggregatingSortedBlockInputStream::addRow(SortCursor & cursor)
size_t j = column_numbers_to_aggregate[i];
columns_to_aggregate[i]->insertMergeFrom(*cursor->all_columns[j], cursor->pos);
}
for (auto & desc : columns_to_simple_aggregate)
{
auto & col = cursor->all_columns[desc.column_number];
desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr);
}
}
void AggregatingSortedBlockInputStream::insertSimpleAggregationResult(MutableColumns & merged_columns)
{
for (auto & desc : columns_to_simple_aggregate)
{
desc.function->insertResultInto(desc.state.data(), *merged_columns[desc.column_number]);
desc.destroyState();
}
}
}

View File

@ -7,6 +7,7 @@
#include <DataStreams/MergingSortedBlockInputStream.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Common/AlignedBuffer.h>
namespace DB
@ -38,10 +39,13 @@ private:
/// Read finished.
bool finished = false;
struct SimpleAggregateDescription;
/// Columns with which numbers should be aggregated.
ColumnNumbers column_numbers_to_aggregate;
ColumnNumbers column_numbers_not_to_aggregate;
std::vector<ColumnAggregateFunction *> columns_to_aggregate;
std::vector<SimpleAggregateDescription> columns_to_simple_aggregate;
RowRef current_key; /// The current primary key.
RowRef next_key; /// The primary key of the next row.
@ -54,6 +58,53 @@ private:
/** Extract all states of aggregate functions and merge them with the current group.
*/
void addRow(SortCursor & cursor);
/** Insert all values of current row for simple aggregate functions
*/
void insertSimpleAggregationResult(MutableColumns & merged_columns);
/// Stores information for aggregation of SimpleAggregateFunction columns
struct SimpleAggregateDescription
{
/// An aggregate function 'anyLast', 'sum'...
AggregateFunctionPtr function;
IAggregateFunction::AddFunc add_function;
size_t column_number;
AlignedBuffer state;
bool created = false;
SimpleAggregateDescription(const AggregateFunctionPtr & function_, const size_t column_number_) : function(function_), column_number(column_number_)
{
add_function = function->getAddressOfAddFunction();
state.reset(function->sizeOfData(), function->alignOfData());
}
void createState()
{
if (created)
return;
function->create(state.data());
created = true;
}
void destroyState()
{
if (!created)
return;
function->destroy(state.data());
created = false;
}
/// Explicitly destroy aggregation state if the stream is terminated
~SimpleAggregateDescription()
{
destroyState();
}
SimpleAggregateDescription() = default;
SimpleAggregateDescription(SimpleAggregateDescription &&) = default;
SimpleAggregateDescription(const SimpleAggregateDescription &) = delete;
};
};
}

View File

@ -1,6 +1,8 @@
#pragma once
#include <memory>
#include <cstddef>
#include <Core/Types.h>
namespace DB
{
@ -10,21 +12,21 @@ class WriteBuffer;
struct FormatSettings;
class IColumn;
/** Further refinment of the properties of data type.
*
* Contains methods for serialization/deserialization.
* Implementations of this interface represent a data type domain (example: IPv4)
* which is a refinement of the exsitgin type with a name and specific text
* representation.
*
* IDataTypeDomain is totally immutable object. You can always share them.
/** Allow to customize an existing data type and set a different name and/or text serialization/deserialization methods.
* See use in IPv4 and IPv6 data types, and also in SimpleAggregateFunction.
*/
class IDataTypeDomain
class IDataTypeCustomName
{
public:
virtual ~IDataTypeDomain() {}
virtual ~IDataTypeCustomName() {}
virtual const char* getName() const = 0;
virtual String getName() const = 0;
};
class IDataTypeCustomTextSerialization
{
public:
virtual ~IDataTypeCustomTextSerialization() {}
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
* Without escaping or quoting.
@ -56,4 +58,31 @@ public:
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
};
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
/** Describe a data type customization
*/
struct DataTypeCustomDesc
{
DataTypeCustomNamePtr name;
DataTypeCustomTextSerializationPtr text_serialization;
DataTypeCustomDesc(DataTypeCustomNamePtr name_, DataTypeCustomTextSerializationPtr text_serialization_)
: name(std::move(name_)), text_serialization(std::move(text_serialization_)) {}
};
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
/** A simple implementation of IDataTypeCustomName
*/
class DataTypeCustomFixedName : public IDataTypeCustomName
{
private:
String name;
public:
DataTypeCustomFixedName(String name_) : name(name_) {}
String getName() const override { return name; }
};
} // namespace DB

View File

@ -1,9 +1,9 @@
#include <Columns/ColumnsNumber.h>
#include <Common/Exception.h>
#include <Common/formatIPv6.h>
#include <DataTypes/DataTypeDomainWithSimpleSerialization.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsCoding.h>
@ -20,20 +20,15 @@ namespace ErrorCodes
namespace
{
class DataTypeDomainIPv4 : public DataTypeDomainWithSimpleSerialization
class DataTypeCustomIPv4Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
const char * getName() const override
{
return "IPv4";
}
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto col = checkAndGetColumn<ColumnUInt32>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -48,7 +43,7 @@ public:
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -63,20 +58,16 @@ public:
}
};
class DataTypeDomainIPv6 : public DataTypeDomainWithSimpleSerialization
class DataTypeCustomIPv6Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
const char * getName() const override
{
return "IPv6";
}
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto col = checkAndGetColumn<ColumnFixedString>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv6 type domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -91,7 +82,7 @@ public:
ColumnFixedString * col = typeid_cast<ColumnFixedString *>(&column);
if (!col)
{
throw Exception(String(getName()) + " domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("IPv6 type domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
@ -100,7 +91,7 @@ public:
std::string ipv6_value(IPV6_BINARY_LENGTH, '\0');
if (!parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data())))
{
throw Exception(String("Invalid ") + getName() + " value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
throw Exception("Invalid IPv6 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insertString(ipv6_value);
@ -111,8 +102,17 @@ public:
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
{
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomainIPv4>());
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomainIPv6>());
factory.registerSimpleDataTypeCustom("IPv4", []
{
return std::make_pair(DataTypeFactory::instance().get("UInt32"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<DataTypeCustomIPv4Serialization>()));
});
factory.registerSimpleDataTypeCustom("IPv6", []
{
return std::make_pair(DataTypeFactory::instance().get("FixedString(16)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<DataTypeCustomIPv6Serialization>()));
});
}
} // namespace DB

View File

@ -0,0 +1,137 @@
#include <Common/FieldVisitors.h>
#include <Common/typeid_cast.h>
#include <IO/ReadHelpers.h>
#include <Columns/ColumnAggregateFunction.h>
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
extern const int BAD_ARGUMENTS;
extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
}
static const std::vector<String> supported_functions{"any", "anyLast", "min", "max", "sum"};
String DataTypeCustomSimpleAggregateFunction::getName() const
{
std::stringstream stream;
stream << "SimpleAggregateFunction(" << function->getName();
if (!parameters.empty())
{
stream << "(";
for (size_t i = 0; i < parameters.size(); ++i)
{
if (i)
stream << ", ";
stream << applyVisitor(DB::FieldVisitorToString(), parameters[i]);
}
stream << ")";
}
for (const auto & argument_type : argument_types)
stream << ", " << argument_type->getName();
stream << ")";
return stream.str();
}
static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
{
String function_name;
AggregateFunctionPtr function;
DataTypes argument_types;
Array params_row;
if (!arguments || arguments->children.empty())
throw Exception("Data type SimpleAggregateFunction requires parameters: "
"name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>())
{
if (parametric->parameters)
throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR);
function_name = parametric->name;
const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
params_row.resize(parameters.size());
for (size_t i = 0; i < parameters.size(); ++i)
{
const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
if (!lit)
throw Exception("Parameters to aggregate functions must be literals",
ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
params_row[i] = lit->value;
}
}
else if (auto opt_name = getIdentifierName(arguments->children[0]))
{
function_name = *opt_name;
}
else if (arguments->children[0]->as<ASTLiteral>())
{
throw Exception("Aggregate function name for data type SimpleAggregateFunction must be passed as identifier (without quotes) or function",
ErrorCodes::BAD_ARGUMENTS);
}
else
throw Exception("Unexpected AST element passed as aggregate function name for data type SimpleAggregateFunction. Must be identifier or function.",
ErrorCodes::BAD_ARGUMENTS);
for (size_t i = 1; i < arguments->children.size(); ++i)
argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
if (function_name.empty())
throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR);
function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row);
// check function
if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
{
throw Exception("Unsupported aggregate function " + function->getName() + ", supported functions are " + boost::algorithm::join(supported_functions, ","),
ErrorCodes::BAD_ARGUMENTS);
}
DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());
if (!function->getReturnType()->equals(*removeLowCardinality(storage_type)))
{
throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(),
ErrorCodes::BAD_ARGUMENTS);
}
DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row);
return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
}
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
{
factory.registerDataTypeCustom("SimpleAggregateFunction", create);
}
}

View File

@ -0,0 +1,42 @@
#pragma once
#include <DataTypes/DataTypeCustom.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/FieldVisitors.h>
#include <IO/ReadHelpers.h>
namespace DB
{
/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard
* data type but when rows are merged, an aggregation function is applied.
*
* The aggregation function is limited to simple functions whose merge state is the final result:
* any, anyLast, min, max, sum
*
* Examples:
*
* SimpleAggregateFunction(sum, Nullable(Float64))
* SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String)))
* SimpleAggregateFunction(anyLast, IPv4)
*
* Technically, a standard IDataType is instanciated and customized with IDataTypeCustomName and DataTypeCustomDesc.
*/
class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName
{
private:
const AggregateFunctionPtr function;
const DataTypes argument_types;
const Array parameters;
public:
DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_)
: function(function_), argument_types(argument_types_), parameters(parameters_) {}
const AggregateFunctionPtr getFunction() const { return function; }
String getName() const override;
};
}

View File

@ -1,4 +1,4 @@
#include <DataTypes/DataTypeDomainWithSimpleSerialization.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
@ -9,7 +9,7 @@ namespace
{
using namespace DB;
static String serializeToString(const DataTypeDomainWithSimpleSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
static String serializeToString(const DataTypeCustomSimpleTextSerialization & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
{
WriteBufferFromOwnString buffer;
domain.serializeText(column, row_num, buffer, settings);
@ -17,7 +17,7 @@ static String serializeToString(const DataTypeDomainWithSimpleSerialization & do
return buffer.str();
}
static void deserializeFromString(const DataTypeDomainWithSimpleSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
static void deserializeFromString(const DataTypeCustomSimpleTextSerialization & domain, IColumn & column, const String & s, const FormatSettings & settings)
{
ReadBufferFromString istr(s);
domain.deserializeText(column, istr, settings);
@ -28,59 +28,59 @@ static void deserializeFromString(const DataTypeDomainWithSimpleSerialization &
namespace DB
{
DataTypeDomainWithSimpleSerialization::~DataTypeDomainWithSimpleSerialization()
DataTypeCustomSimpleTextSerialization::~DataTypeCustomSimpleTextSerialization()
{
}
void DataTypeDomainWithSimpleSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readEscapedString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readQuotedString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readCSVString(str, istr, settings.csv);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
}
void DataTypeDomainWithSimpleSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readJSONString(str, istr);
deserializeFromString(*this, column, str, settings);
}
void DataTypeDomainWithSimpleSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
void DataTypeCustomSimpleTextSerialization::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeXMLString(serializeToString(*this, column, row_num, settings), ostr);
}

View File

@ -1,6 +1,6 @@
#pragma once
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
namespace DB
{
@ -10,12 +10,12 @@ class WriteBuffer;
struct FormatSettings;
class IColumn;
/** Simple DataTypeDomain that uses serializeText/deserializeText
/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText
* for all serialization and deserialization. */
class DataTypeDomainWithSimpleSerialization : public IDataTypeDomain
class DataTypeCustomSimpleTextSerialization : public IDataTypeCustomTextSerialization
{
public:
virtual ~DataTypeDomainWithSimpleSerialization() override;
virtual ~DataTypeCustomSimpleTextSerialization() override;
// Methods that subclasses must override in order to get full serialization/deserialization support.
virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;

View File

@ -1,5 +1,5 @@
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ASTFunction.h>
@ -115,19 +115,23 @@ void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator
}, case_sensitiveness);
}
void DataTypeFactory::registerDataTypeDomain(const String & type_name, DataTypeDomainPtr domain, CaseSensitiveness case_sensitiveness)
void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
{
all_domains.reserve(all_domains.size() + 1);
auto data_type = get(type_name);
setDataTypeDomain(*data_type, *domain);
registerDataType(domain->getName(), [data_type](const ASTPtr & /*ast*/)
registerDataType(family_name, [creator](const ASTPtr & ast)
{
return data_type;
}, case_sensitiveness);
auto res = creator(ast);
res.first->setCustomization(std::move(res.second));
all_domains.emplace_back(std::move(domain));
return res.first;
}, case_sensitiveness);
}
void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
{
registerDataTypeCustom(name, [creator](const ASTPtr & /*ast*/)
{
return creator();
}, case_sensitiveness);
}
const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String & family_name) const
@ -153,11 +157,6 @@ const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String
throw Exception("Unknown data type family: " + family_name, ErrorCodes::UNKNOWN_TYPE);
}
void DataTypeFactory::setDataTypeDomain(const IDataType & data_type, const IDataTypeDomain & domain)
{
data_type.setDomain(&domain);
}
void registerDataTypeNumbers(DataTypeFactory & factory);
void registerDataTypeDecimal(DataTypeFactory & factory);
void registerDataTypeDate(DataTypeFactory & factory);
@ -175,6 +174,7 @@ void registerDataTypeNested(DataTypeFactory & factory);
void registerDataTypeInterval(DataTypeFactory & factory);
void registerDataTypeLowCardinality(DataTypeFactory & factory);
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
DataTypeFactory::DataTypeFactory()
@ -196,6 +196,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeInterval(*this);
registerDataTypeLowCardinality(*this);
registerDataTypeDomainIPv4AndIPv6(*this);
registerDataTypeDomainSimpleAggregateFunction(*this);
}
DataTypeFactory::~DataTypeFactory()

View File

@ -17,9 +17,6 @@ namespace DB
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
class IDataTypeDomain;
using DataTypeDomainPtr = std::unique_ptr<const IDataTypeDomain>;
/** Creates a data type by name of data type family and parameters.
*/
@ -28,6 +25,8 @@ class DataTypeFactory final : public ext::singleton<DataTypeFactory>, public IFa
private:
using SimpleCreator = std::function<DataTypePtr()>;
using DataTypesDictionary = std::unordered_map<String, Creator>;
using CreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>(const ASTPtr & parameters)>;
using SimpleCreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>()>;
public:
DataTypePtr get(const String & full_name) const;
@ -40,11 +39,13 @@ public:
/// Register a simple data type, that have no parameters.
void registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
// Register a domain - a refinement of existing type.
void registerDataTypeDomain(const String & type_name, DataTypeDomainPtr domain, CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register a customized type family
void registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register a simple customized data type
void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
private:
static void setDataTypeDomain(const IDataType & data_type, const IDataTypeDomain & domain);
const Creator& findCreatorByName(const String & family_name) const;
private:
@ -53,9 +54,6 @@ private:
/// Case insensitive data types will be additionally added here with lowercased name.
DataTypesDictionary case_insensitive_data_types;
// All domains are owned by factory and shared amongst DataType instances.
std::vector<DataTypeDomainPtr> all_domains;
DataTypeFactory();
~DataTypeFactory() override;

View File

@ -9,7 +9,7 @@
#include <IO/WriteHelpers.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/IDataTypeDomain.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/NestedUtils.h>
@ -23,8 +23,7 @@ namespace ErrorCodes
extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
}
IDataType::IDataType()
: domain(nullptr)
IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr)
{
}
@ -34,9 +33,9 @@ IDataType::~IDataType()
String IDataType::getName() const
{
if (domain)
if (custom_name)
{
return domain->getName();
return custom_name->getName();
}
else
{
@ -142,9 +141,9 @@ void IDataType::insertDefaultInto(IColumn & column) const
void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextEscaped(column, row_num, ostr, settings);
custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings);
}
else
{
@ -154,9 +153,9 @@ void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, W
void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextEscaped(column, istr, settings);
custom_text_serialization->deserializeTextEscaped(column, istr, settings);
}
else
{
@ -166,9 +165,9 @@ void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, co
void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextQuoted(column, row_num, ostr, settings);
custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings);
}
else
{
@ -178,9 +177,9 @@ void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, Wr
void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextQuoted(column, istr, settings);
custom_text_serialization->deserializeTextQuoted(column, istr, settings);
}
else
{
@ -190,9 +189,9 @@ void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, con
void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextCSV(column, row_num, ostr, settings);
custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings);
}
else
{
@ -202,9 +201,9 @@ void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, Write
void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextCSV(column, istr, settings);
custom_text_serialization->deserializeTextCSV(column, istr, settings);
}
else
{
@ -214,9 +213,9 @@ void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const
void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeText(column, row_num, ostr, settings);
custom_text_serialization->serializeText(column, row_num, ostr, settings);
}
else
{
@ -226,9 +225,9 @@ void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuf
void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextJSON(column, row_num, ostr, settings);
custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings);
}
else
{
@ -238,9 +237,9 @@ void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, Writ
void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->deserializeTextJSON(column, istr, settings);
custom_text_serialization->deserializeTextJSON(column, istr, settings);
}
else
{
@ -250,9 +249,9 @@ void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const
void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (domain)
if (custom_text_serialization)
{
domain->serializeTextXML(column, row_num, ostr, settings);
custom_text_serialization->serializeTextXML(column, row_num, ostr, settings);
}
else
{
@ -260,13 +259,14 @@ void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, Write
}
}
void IDataType::setDomain(const IDataTypeDomain* const new_domain) const
void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
{
if (domain != nullptr)
{
throw Exception("Type " + getName() + " already has a domain.", ErrorCodes::LOGICAL_ERROR);
}
domain = new_domain;
/// replace only if not null
if (custom_desc_->name)
custom_name = std::move(custom_desc_->name);
if (custom_desc_->text_serialization)
custom_text_serialization = std::move(custom_desc_->text_serialization);
}
}

View File

@ -4,6 +4,7 @@
#include <Common/COW.h>
#include <boost/noncopyable.hpp>
#include <Core/Field.h>
#include <DataTypes/DataTypeCustom.h>
namespace DB
@ -12,7 +13,6 @@ namespace DB
class ReadBuffer;
class WriteBuffer;
class IDataTypeDomain;
class IDataType;
struct FormatSettings;
@ -459,18 +459,19 @@ public:
private:
friend class DataTypeFactory;
/** Sets domain on existing DataType, can be considered as second phase
* of construction explicitly done by DataTypeFactory.
* Will throw an exception if domain is already set.
/** Customize this DataType
*/
void setDomain(const IDataTypeDomain* newDomain) const;
void setCustomization(DataTypeCustomDescPtr custom_desc_) const;
private:
/** This is mutable to allow setting domain on `const IDataType` post construction,
* simplifying creation of domains for all types, without them even knowing
* of domain existence.
/** This is mutable to allow setting custom name and serialization on `const IDataType` post construction.
*/
mutable IDataTypeDomain const* domain;
mutable DataTypeCustomNamePtr custom_name;
mutable DataTypeCustomTextSerializationPtr custom_text_serialization;
public:
const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
const IDataTypeCustomTextSerialization * getCustomTextSerialization() const { return custom_text_serialization.get(); }
};

View File

@ -69,3 +69,8 @@ if (USE_HYPERSCAN)
target_link_libraries (clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY})
target_include_directories (clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
endif ()
if (USE_SIMDJSON)
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
target_include_directories(clickhouse_functions PRIVATE ${SIMDJSON_INCLUDE_DIR})
endif ()

View File

@ -9,7 +9,7 @@ using StoragePtr = std::shared_ptr<IStorage>;
class Join;
using JoinPtr = std::shared_ptr<Join>;
class FunctionJoinGet final : public IFunction, public std::enable_shared_from_this<FunctionJoinGet>
class FunctionJoinGet final : public IFunction
{
public:
static constexpr auto name = "joinGet";

View File

@ -406,7 +406,12 @@ private:
{
const ColumnAggregateFunction * columns[2];
for (size_t i = 0; i < 2; ++i)
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
{
if (auto argument_column_const = typeid_cast<const ColumnConst *>(block.getByPosition(arguments[i]).column.get()))
columns[i] = typeid_cast<const ColumnAggregateFunction *>(argument_column_const->getDataColumnPtr().get());
else
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
}
for (size_t i = 0; i < input_rows_count; ++i)
{
@ -511,7 +516,12 @@ private:
{
const ColumnAggregateFunction * columns[2];
for (size_t i = 0; i < 2; ++i)
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
{
if (auto argument_column_const = typeid_cast<const ColumnConst *>(block.getByPosition(arguments[i]).column.get()))
columns[i] = typeid_cast<const ColumnAggregateFunction *>(argument_column_const->getDataColumnPtr().get());
else
columns[i] = typeid_cast<const ColumnAggregateFunction *>(block.getByPosition(arguments[i]).column.get());
}
auto col_to = ColumnAggregateFunction::create(columns[0]->getAggregateFunction());

View File

@ -0,0 +1,378 @@
#include <Functions/FunctionsJSON.h>
#include <Functions/FunctionFactory.h>
#include <Common/config.h>
#if USE_SIMDJSON
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
template <typename T>
class JSONNullableImplBase
{
public:
static DataTypePtr getType() { return std::make_shared<DataTypeNullable>(std::make_shared<T>()); }
static Field getDefault() { return {}; }
};
class JSONHasImpl : public JSONNullableImplBase<DataTypeUInt8>
{
public:
static constexpr auto name{"jsonHas"};
static Field getValue(ParsedJson::iterator &) { return {1}; }
};
class JSONLengthImpl : public JSONNullableImplBase<DataTypeUInt64>
{
public:
static constexpr auto name{"jsonLength"};
static Field getValue(ParsedJson::iterator & pjh)
{
if (!pjh.is_object_or_array())
return getDefault();
size_t size = 0;
if (pjh.down())
{
size += 1;
while (pjh.next())
size += 1;
}
return {size};
}
};
class JSONTypeImpl : public JSONNullableImplBase<DataTypeString>
{
public:
static constexpr auto name{"jsonType"};
static Field getValue(ParsedJson::iterator & pjh)
{
switch (pjh.get_type())
{
case '[':
return "Array";
case '{':
return "Object";
case '"':
return "String";
case 'l':
return "Int64";
case 'd':
return "Float64";
case 't':
return "Bool";
case 'f':
return "Bool";
case 'n':
return "Null";
default:
return "Unknown";
}
}
};
class JSONExtractImpl
{
public:
static constexpr auto name{"jsonExtract"};
static DataTypePtr getType(const DataTypePtr & type)
{
WhichDataType which{type};
if (which.isNativeUInt() || which.isNativeInt() || which.isFloat() || which.isEnum() || which.isDateOrDateTime()
|| which.isStringOrFixedString() || which.isInterval())
return std::make_shared<DataTypeNullable>(type);
if (which.isArray())
{
auto array_type = static_cast<const DataTypeArray *>(type.get());
return std::make_shared<DataTypeArray>(getType(array_type->getNestedType()));
}
if (which.isTuple())
{
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
DataTypes types;
types.reserve(tuple_type->getElements().size());
for (const DataTypePtr & element : tuple_type->getElements())
{
types.push_back(getType(element));
}
return std::make_shared<DataTypeTuple>(std::move(types));
}
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
static Field getDefault(const DataTypePtr & type)
{
WhichDataType which{type};
if (which.isNativeUInt() || which.isNativeInt() || which.isFloat() || which.isEnum() || which.isDateOrDateTime()
|| which.isStringOrFixedString() || which.isInterval())
return {};
if (which.isArray())
return {Array{}};
if (which.isTuple())
{
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
Tuple tuple;
tuple.toUnderType().reserve(tuple_type->getElements().size());
for (const DataTypePtr & element : tuple_type->getElements())
tuple.toUnderType().push_back(getDefault(element));
return {tuple};
}
// should not reach
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
static Field getValue(ParsedJson::iterator & pjh, const DataTypePtr & type)
{
WhichDataType which{type};
if (which.isNativeUInt() || which.isNativeInt() || which.isEnum() || which.isDateOrDateTime() || which.isInterval())
{
if (pjh.is_integer())
return {pjh.get_integer()};
else
return getDefault(type);
}
if (which.isFloat())
{
if (pjh.is_integer())
return {static_cast<double>(pjh.get_integer())};
else if (pjh.is_double())
return {pjh.get_double()};
else
return getDefault(type);
}
if (which.isStringOrFixedString())
{
if (pjh.is_string())
return {String{pjh.get_string()}};
else
return getDefault(type);
}
if (which.isArray())
{
if (!pjh.is_object_or_array())
return getDefault(type);
auto array_type = static_cast<const DataTypeArray *>(type.get());
Array array;
bool first = true;
while (first ? pjh.down() : pjh.next())
{
first = false;
ParsedJson::iterator pjh1{pjh};
array.push_back(getValue(pjh1, array_type->getNestedType()));
}
return {array};
}
if (which.isTuple())
{
if (!pjh.is_object_or_array())
return getDefault(type);
auto tuple_type = static_cast<const DataTypeTuple *>(type.get());
Tuple tuple;
tuple.toUnderType().reserve(tuple_type->getElements().size());
bool valid = true;
bool first = true;
for (const DataTypePtr & element : tuple_type->getElements())
{
if (valid)
{
valid &= first ? pjh.down() : pjh.next();
first = false;
ParsedJson::iterator pjh1{pjh};
tuple.toUnderType().push_back(getValue(pjh1, element));
}
else
tuple.toUnderType().push_back(getDefault(element));
}
return {tuple};
}
// should not reach
throw Exception{"Unsupported return type schema: " + type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
};
class JSONExtractUIntImpl : public JSONNullableImplBase<DataTypeUInt64>
{
public:
static constexpr auto name{"jsonExtractUInt"};
static Field getValue(ParsedJson::iterator & pjh)
{
if (pjh.is_integer())
return {pjh.get_integer()};
else
return getDefault();
}
};
class JSONExtractIntImpl : public JSONNullableImplBase<DataTypeInt64>
{
public:
static constexpr auto name{"jsonExtractInt"};
static Field getValue(ParsedJson::iterator & pjh)
{
if (pjh.is_integer())
return {pjh.get_integer()};
else
return getDefault();
}
};
class JSONExtractFloatImpl : public JSONNullableImplBase<DataTypeFloat64>
{
public:
static constexpr auto name{"jsonExtractFloat"};
static Field getValue(ParsedJson::iterator & pjh)
{
if (pjh.is_double())
return {pjh.get_double()};
else
return getDefault();
}
};
class JSONExtractBoolImpl : public JSONNullableImplBase<DataTypeUInt8>
{
public:
static constexpr auto name{"jsonExtractBool"};
static Field getValue(ParsedJson::iterator & pjh)
{
if (pjh.get_type() == 't')
return {1};
else if (pjh.get_type() == 'f')
return {0};
else
return getDefault();
}
};
// class JSONExtractRawImpl: public JSONNullableImplBase<DataTypeString>
// {
// public:
// static constexpr auto name {"jsonExtractRaw"};
// static Field getValue(ParsedJson::iterator & pjh)
// {
// //
// }
// };
class JSONExtractStringImpl : public JSONNullableImplBase<DataTypeString>
{
public:
static constexpr auto name{"jsonExtractString"};
static Field getValue(ParsedJson::iterator & pjh)
{
if (pjh.is_string())
return {String{pjh.get_string()}};
else
return getDefault();
}
};
}
#else
namespace DB
{
struct JSONHasImpl { static constexpr auto name{"jsonHas"}; };
struct JSONLengthImpl { static constexpr auto name{"jsonLength"}; };
struct JSONTypeImpl { static constexpr auto name{"jsonType"}; };
struct JSONExtractImpl { static constexpr auto name{"jsonExtract"}; };
struct JSONExtractUIntImpl { static constexpr auto name{"jsonExtractUInt"}; };
struct JSONExtractIntImpl { static constexpr auto name{"jsonExtractInt"}; };
struct JSONExtractFloatImpl { static constexpr auto name{"jsonExtractFloat"}; };
struct JSONExtractBoolImpl { static constexpr auto name{"jsonExtractBool"}; };
//struct JSONExtractRawImpl { static constexpr auto name {"jsonExtractRaw"}; };
struct JSONExtractStringImpl { static constexpr auto name{"jsonExtractString"}; };
}
#endif
namespace DB
{
void registerFunctionsJSON(FunctionFactory & factory)
{
#if USE_SIMDJSON
if (__builtin_cpu_supports("avx2"))
{
factory.registerFunction<FunctionJSONBase<JSONHasImpl, false>>();
factory.registerFunction<FunctionJSONBase<JSONLengthImpl, false>>();
factory.registerFunction<FunctionJSONBase<JSONTypeImpl, false>>();
factory.registerFunction<FunctionJSONBase<JSONExtractImpl, true>>();
factory.registerFunction<FunctionJSONBase<JSONExtractUIntImpl, false>>();
factory.registerFunction<FunctionJSONBase<JSONExtractIntImpl, false>>();
factory.registerFunction<FunctionJSONBase<JSONExtractFloatImpl, false>>();
factory.registerFunction<FunctionJSONBase<JSONExtractBoolImpl, false>>();
// factory.registerFunction<FunctionJSONBase<
// JSONExtractRawImpl,
// false
// >>();
factory.registerFunction<FunctionJSONBase<JSONExtractStringImpl, false>>();
return;
}
#endif
factory.registerFunction<FunctionJSONDummy<JSONHasImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONLengthImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONTypeImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONExtractImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONExtractUIntImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONExtractIntImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONExtractFloatImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONExtractBoolImpl>>();
//factory.registerFunction<FunctionJSONDummy<JSONExtractRawImpl>>();
factory.registerFunction<FunctionJSONDummy<JSONExtractStringImpl>>();
}
}

View File

@ -0,0 +1,243 @@
#pragma once
#include <Functions/IFunction.h>
#include <Common/config.h>
#if USE_SIMDJSON
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/typeid_cast.h>
#include <ext/range.h>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wold-style-cast"
#pragma clang diagnostic ignored "-Wnewline-eof"
#endif
#include <simdjson/jsonparser.h>
#ifdef __clang__
#pragma clang diagnostic pop
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename Impl, bool ExtraArg>
class FunctionJSONBase : public IFunction
{
private:
enum class Action
{
key = 1,
index = 2,
};
mutable std::vector<Action> actions;
mutable DataTypePtr virtual_type;
bool tryMove(ParsedJson::iterator & pjh, Action action, const Field & accessor)
{
switch (action)
{
case Action::key:
if (!pjh.is_object() || !pjh.move_to_key(accessor.get<String>().data()))
return false;
break;
case Action::index:
if (!pjh.is_object_or_array() || !pjh.down())
return false;
int steps = accessor.get<Int64>();
if (steps > 0)
steps -= 1;
else if (steps < 0)
{
steps += 1;
ParsedJson::iterator pjh1{pjh};
while (pjh1.next())
steps += 1;
}
else
return false;
for (const auto i : ext::range(0, steps))
{
(void)i;
if (!pjh.next())
return false;
}
break;
}
return true;
}
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJSONBase>(); }
String getName() const override { return Impl::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if constexpr (ExtraArg)
{
if (arguments.size() < 2)
throw Exception{"Function " + getName() + " requires at least two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
auto col_type_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
if (!col_type_const)
throw Exception{"Illegal non-const column " + arguments[1].column->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN};
virtual_type = DataTypeFactory::instance().get(col_type_const->getValue<String>());
}
else
{
if (arguments.size() < 1)
throw Exception{"Function " + getName() + " requires at least one arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
}
if (!isString(arguments[0].type))
throw Exception{"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
actions.reserve(arguments.size() - 1 - ExtraArg);
for (const auto i : ext::range(1 + ExtraArg, arguments.size()))
{
if (isString(arguments[i].type))
actions.push_back(Action::key);
else if (isInteger(arguments[i].type))
actions.push_back(Action::index);
else
throw Exception{"Illegal type " + arguments[i].type->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
if constexpr (ExtraArg)
return Impl::getType(virtual_type);
else
return Impl::getType();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count) override
{
MutableColumnPtr to{block.getByPosition(result_pos).type->createColumn()};
to->reserve(input_rows_count);
const ColumnPtr & arg_json = block.getByPosition(arguments[0]).column;
auto col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
auto col_json_string
= typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
if (!col_json_string)
throw Exception{"Illegal column " + arg_json->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
const ColumnString::Chars & chars = col_json_string->getChars();
const ColumnString::Offsets & offsets = col_json_string->getOffsets();
size_t max_size = 1;
for (const auto i : ext::range(0, input_rows_count))
if (max_size < offsets[i] - offsets[i - 1] - 1)
max_size = offsets[i] - offsets[i - 1] - 1;
ParsedJson pj;
if (!pj.allocateCapacity(max_size))
throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
for (const auto i : ext::range(0, input_rows_count))
{
bool ok = json_parse(&chars[offsets[i - 1]], offsets[i] - offsets[i - 1] - 1, pj) == 0;
ParsedJson::iterator pjh{pj};
for (const auto j : ext::range(0, actions.size()))
{
if (!ok)
break;
ok = tryMove(pjh, actions[j], (*block.getByPosition(arguments[j + 1 + ExtraArg]).column)[i]);
}
if (ok)
{
if constexpr (ExtraArg)
to->insert(Impl::getValue(pjh, virtual_type));
else
to->insert(Impl::getValue(pjh));
}
else
{
if constexpr (ExtraArg)
to->insert(Impl::getDefault(virtual_type));
else
to->insert(Impl::getDefault());
}
}
block.getByPosition(result_pos).column = std::move(to);
}
};
}
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
template <typename Impl>
class FunctionJSONDummy : public IFunction
{
public:
static constexpr auto name = Impl::name;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJSONDummy>(); }
String getName() const override { return Impl::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override
{
throw Exception{"Function " + getName() + " is not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED};
}
void executeImpl(Block &, const ColumnNumbers &, size_t, size_t) override
{
throw Exception{"Function " + getName() + " is not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED};
}
};
}

View File

@ -449,44 +449,27 @@ struct NameMultiSearchFirstPositionCaseInsensitiveUTF8
using FunctionPosition = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveASCII>, NamePosition>;
using FunctionPositionUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveUTF8>, NamePositionUTF8>;
using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveASCII>, NamePositionCaseInsensitive>;
using FunctionPositionCaseInsensitiveUTF8
= FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
using FunctionPositionCaseInsensitiveUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
using FunctionMultiSearchAllPositions
= FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>;
using FunctionMultiSearchAllPositionsUTF8
= FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>;
using FunctionMultiSearchAllPositionsCaseInsensitive
= FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>;
using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition<
MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>,
NameMultiSearchAllPositionsCaseInsensitiveUTF8>;
using FunctionMultiSearchAllPositions = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>;
using FunctionMultiSearchAllPositionsUTF8 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>;
using FunctionMultiSearchAllPositionsCaseInsensitive = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>;
using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAllPositionsCaseInsensitiveUTF8>;
using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearchAny>;
using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAnyUTF8>;
using FunctionMultiSearchCaseInsensitive
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>;
using FunctionMultiSearchCaseInsensitiveUTF8
= FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>;
using FunctionMultiSearchCaseInsensitive = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>;
using FunctionMultiSearchCaseInsensitiveUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>;
using FunctionMultiSearchFirstIndex
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>;
using FunctionMultiSearchFirstIndexUTF8
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>;
using FunctionMultiSearchFirstIndexCaseInsensitive
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>;
using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8
= FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>;
using FunctionMultiSearchFirstIndex = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>;
using FunctionMultiSearchFirstIndexUTF8 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>;
using FunctionMultiSearchFirstIndexCaseInsensitive = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>;
using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>;
using FunctionMultiSearchFirstPosition
= FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>;
using FunctionMultiSearchFirstPositionUTF8
= FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>;
using FunctionMultiSearchFirstPositionCaseInsensitive
= FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>;
using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch<
MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>,
NameMultiSearchFirstPositionCaseInsensitiveUTF8>;
using FunctionMultiSearchFirstPosition = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>;
using FunctionMultiSearchFirstPositionUTF8 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>;
using FunctionMultiSearchFirstPositionCaseInsensitive = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>;
using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstPositionCaseInsensitiveUTF8>;
void registerFunctionsStringSearch(FunctionFactory & factory)

View File

@ -164,43 +164,46 @@ struct NgramDistanceImpl
return num;
}
template <bool SaveNgrams>
static ALWAYS_INLINE inline size_t calculateNeedleStats(
const char * data,
const size_t size,
NgramStats & ngram_stats,
[[maybe_unused]] UInt16 * ngram_storage,
size_t (*read_code_points)(CodePoint *, const char *&, const char *),
UInt16 (*hash_functor)(const CodePoint *))
{
// To prevent size_t overflow below.
if (size < N)
return 0;
const char * start = data;
const char * end = data + size;
CodePoint cp[simultaneously_codepoints_num] = {};
/// read_code_points returns the position of cp where it stopped reading codepoints.
size_t found = read_code_points(cp, start, end);
/// We need to start for the first time here, because first N - 1 codepoints mean nothing.
size_t i = N - 1;
/// Initialize with this value because for the first time `found` does not initialize first N - 1 codepoints.
size_t len = -N + 1;
size_t len = 0;
do
{
len += found - N + 1;
for (; i + N <= found; ++i)
++ngram_stats[hash_functor(cp + i)];
{
++len;
UInt16 hash = hash_functor(cp + i);
if constexpr (SaveNgrams)
*ngram_storage++ = hash;
++ngram_stats[hash];
}
i = 0;
} while (start < end && (found = read_code_points(cp, start, end)));
return len;
}
template <bool ReuseStats>
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
const char * data,
const size_t size,
NgramStats & ngram_stats,
size_t & distance,
[[maybe_unused]] UInt16 * ngram_storage,
size_t (*read_code_points)(CodePoint *, const char *&, const char *),
UInt16 (*hash_functor)(const CodePoint *))
{
@ -209,18 +212,6 @@ struct NgramDistanceImpl
const char * end = data + size;
CodePoint cp[simultaneously_codepoints_num] = {};
/// allocation tricks, most strings are relatively small
static constexpr size_t small_buffer_size = 256;
std::unique_ptr<UInt16[]> big_buffer;
UInt16 small_buffer[small_buffer_size];
UInt16 * ngram_storage = small_buffer;
if (size > small_buffer_size)
{
ngram_storage = new UInt16[size];
big_buffer.reset(ngram_storage);
}
/// read_code_points returns the position of cp where it stopped reading codepoints.
size_t found = read_code_points(cp, start, end);
/// We need to start for the first time here, because first N - 1 codepoints mean nothing.
@ -235,21 +226,25 @@ struct NgramDistanceImpl
--distance;
else
++distance;
ngram_storage[ngram_cnt++] = hash;
if constexpr (ReuseStats)
ngram_storage[ngram_cnt] = hash;
++ngram_cnt;
--ngram_stats[hash];
}
iter = 0;
} while (start < end && (found = read_code_points(cp, start, end)));
/// Return the state of hash map to its initial.
for (size_t i = 0; i < ngram_cnt; ++i)
++ngram_stats[ngram_storage[i]];
if constexpr (ReuseStats)
{
for (size_t i = 0; i < ngram_cnt; ++i)
++ngram_stats[ngram_storage[i]];
}
return ngram_cnt;
}
template <class Callback, class... Args>
static inline size_t dispatchSearcher(Callback callback, Args &&... args)
static inline auto dispatchSearcher(Callback callback, Args &&... args)
{
if constexpr (!UTF8)
return callback(std::forward<Args>(args)..., readASCIICodePoints, ASCIIHash);
@ -259,8 +254,7 @@ struct NgramDistanceImpl
static void constant_constant(std::string data, std::string needle, Float32 & res)
{
NgramStats common_stats;
memset(common_stats, 0, sizeof(common_stats));
NgramStats common_stats = {};
/// We use unsafe versions of getting ngrams, so I decided to use padded strings.
const size_t needle_size = needle.size();
@ -268,11 +262,11 @@ struct NgramDistanceImpl
needle.resize(needle_size + default_padding);
data.resize(data_size + default_padding);
size_t second_size = dispatchSearcher(calculateNeedleStats, needle.data(), needle_size, common_stats);
size_t second_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr);
size_t distance = second_size;
if (data_size <= max_string_size)
{
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric, data.data(), data_size, common_stats, distance);
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr);
res = distance * 1.f / std::max(first_size + second_size, size_t(1));
}
else
@ -281,18 +275,89 @@ struct NgramDistanceImpl
}
}
static void vector_vector(
const ColumnString::Chars & haystack_data,
const ColumnString::Offsets & haystack_offsets,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<Float32> & res)
{
const size_t haystack_offsets_size = haystack_offsets.size();
size_t prev_haystack_offset = 0;
size_t prev_needle_offset = 0;
NgramStats common_stats = {};
/// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
/// And we can reuse these storages in one thread because we care only about what was written to first places.
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
for (size_t i = 0; i < haystack_offsets_size; ++i)
{
const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
const size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
const char * needle = reinterpret_cast<const char *>(&needle_data[prev_needle_offset]);
const size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
if (needle_size <= max_string_size && haystack_size <= max_string_size)
{
/// Get needle stats.
const size_t needle_stats_size = dispatchSearcher(
calculateNeedleStats<true>,
needle,
needle_size,
common_stats,
needle_ngram_storage.get());
size_t distance = needle_stats_size;
/// Combine with haystack stats, return to initial needle stats.
const size_t haystack_stats_size = dispatchSearcher(
calculateHaystackStatsAndMetric<true>,
haystack,
haystack_size,
common_stats,
distance,
haystack_ngram_storage.get());
/// Return to zero array stats.
for (size_t j = 0; j < needle_stats_size; ++j)
--common_stats[needle_ngram_storage[j]];
/// For now, common stats is a zero array.
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
}
else
{
/// Strings are too big, we are assuming they are not the same. This is done because of limiting number
/// of bigrams added and not allocating too much memory.
res[i] = 1.f;
}
prev_needle_offset = needle_offsets[i];
prev_haystack_offset = haystack_offsets[i];
}
}
static void vector_constant(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string needle, PaddedPODArray<Float32> & res)
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
std::string needle,
PaddedPODArray<Float32> & res)
{
/// zeroing our map
NgramStats common_stats;
memset(common_stats, 0, sizeof(common_stats));
NgramStats common_stats = {};
/// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
/// And we can reuse these storages in one thread because we care only about what was written to first places.
std::unique_ptr<UInt16[]> ngram_storage(new UInt16[max_string_size]);
/// We use unsafe versions of getting ngrams, so I decided to use padded_data even in needle case.
const size_t needle_size = needle.size();
needle.resize(needle_size + default_padding);
const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats, needle.data(), needle_size, common_stats);
const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr);
size_t distance = needle_stats_size;
size_t prev_offset = 0;
@ -303,7 +368,11 @@ struct NgramDistanceImpl
if (haystack_size <= max_string_size)
{
size_t haystack_stats_size = dispatchSearcher(
calculateHaystackStatsAndMetric, reinterpret_cast<const char *>(haystack), haystack_size, common_stats, distance);
calculateHaystackStatsAndMetric<true>,
reinterpret_cast<const char *>(haystack),
haystack_size, common_stats,
distance,
ngram_storage.get());
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
}
else
@ -339,11 +408,9 @@ struct NameNgramDistanceUTF8CaseInsensitive
};
using FunctionNgramDistance = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, false>, NameNgramDistance>;
using FunctionNgramDistanceCaseInsensitive
= FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true>, NameNgramDistanceCaseInsensitive>;
using FunctionNgramDistanceCaseInsensitive = FunctionsStringSimilarity<NgramDistanceImpl<4, UInt8, false, true>, NameNgramDistanceCaseInsensitive>;
using FunctionNgramDistanceUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, false>, NameNgramDistanceUTF8>;
using FunctionNgramDistanceCaseInsensitiveUTF8
= FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true>, NameNgramDistanceUTF8CaseInsensitive>;
using FunctionNgramDistanceCaseInsensitiveUTF8 = FunctionsStringSimilarity<NgramDistanceImpl<3, UInt32, true, true>, NameNgramDistanceUTF8CaseInsensitive>;
void registerFunctionsStringSimilarity(FunctionFactory & factory)
{

View File

@ -62,10 +62,7 @@ public:
const ColumnConst * col_haystack_const = typeid_cast<const ColumnConst *>(&*column_haystack);
const ColumnConst * col_needle_const = typeid_cast<const ColumnConst *>(&*column_needle);
if (!col_needle_const)
throw Exception("Second argument of function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_COLUMN);
if (col_haystack_const)
if (col_haystack_const && col_needle_const)
{
ResultType res{};
const String & needle = col_needle_const->getValue<String>();
@ -88,8 +85,9 @@ public:
vec_res.resize(column_haystack->size());
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
if (col_haystack_vector)
if (col_haystack_vector && col_needle_const)
{
const String & needle = col_needle_const->getValue<String>();
if (needle.size() > Impl::max_string_size)
@ -101,6 +99,27 @@ public:
}
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res);
}
else if (col_haystack_vector && col_needle_vector)
{
Impl::vector_vector(
col_haystack_vector->getChars(),
col_haystack_vector->getOffsets(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
vec_res);
}
else if (col_haystack_const && col_needle_vector)
{
const String & needle = col_haystack_const->getValue<String>();
if (needle.size() > Impl::max_string_size)
{
throw Exception(
"String size of needle is too big for function " + getName() + ". Should be at most "
+ std::to_string(Impl::max_string_size),
ErrorCodes::TOO_LARGE_STRING_SIZE);
}
Impl::vector_constant(col_needle_vector->getChars(), col_needle_vector->getOffsets(), needle, vec_res);
}
else
{
throw Exception(

View File

@ -135,9 +135,10 @@ namespace MultiRegexps
for (const StringRef ref : str_patterns)
{
ptrns.push_back(ref.data);
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH);
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8);
if constexpr (CompileForEditDistance)
{
flags.back() &= ~HS_FLAG_UTF8;
ext_exprs.emplace_back();
ext_exprs.back().flags = HS_EXT_FLAG_EDIT_DISTANCE;
ext_exprs.back().edit_distance = edit_distance.value();

View File

@ -0,0 +1,51 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
/** ignoreExceptNull(...) is a function that takes any arguments, and always returns 0 except Null.
*/
class FunctionIgnoreExceptNull : public IFunction
{
public:
static constexpr auto name = "ignoreExceptNull";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionIgnoreExceptNull>();
}
bool isVariadic() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 0;
}
String getName() const override
{
return name;
}
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeUInt8>();
}
void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override
{
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(0));
}
};
void registerFunctionIgnoreExceptNull(FunctionFactory & factory)
{
factory.registerFunction<FunctionIgnoreExceptNull>();
}
}

View File

@ -73,11 +73,6 @@ public:
return std::make_shared<DataTypeUInt8>();
}
bool useDefaultImplementationForNulls() const override
{
return false;
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
/// Second argument must be ColumnSet.
@ -89,7 +84,7 @@ public:
Block block_of_key_columns;
/// First argument may be tuple or single column.
/// First argument may be a tuple or a single column.
const ColumnWithTypeAndName & left_arg = block.getByPosition(arguments[0]);
const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(left_arg.column.get());
const ColumnConst * const_tuple = checkAndGetColumnConst<ColumnTuple>(left_arg.column.get());

View File

@ -40,6 +40,7 @@ void registerFunctionsMath(FunctionFactory &);
void registerFunctionsGeo(FunctionFactory &);
void registerFunctionsNull(FunctionFactory &);
void registerFunctionsFindCluster(FunctionFactory &);
void registerFunctionsJSON(FunctionFactory &);
void registerFunctionTransform(FunctionFactory &);
#if USE_ICU
@ -82,6 +83,7 @@ void registerFunctions()
registerFunctionsGeo(factory);
registerFunctionsNull(factory);
registerFunctionsFindCluster(factory);
registerFunctionsJSON(factory);
registerFunctionTransform(factory);
#if USE_ICU

View File

@ -19,6 +19,7 @@ void registerFunctionSleep(FunctionFactory &);
void registerFunctionSleepEachRow(FunctionFactory &);
void registerFunctionMaterialize(FunctionFactory &);
void registerFunctionIgnore(FunctionFactory &);
void registerFunctionIgnoreExceptNull(FunctionFactory &);
void registerFunctionIndexHint(FunctionFactory &);
void registerFunctionIdentity(FunctionFactory &);
void registerFunctionArrayJoin(FunctionFactory &);
@ -62,6 +63,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
registerFunctionSleepEachRow(factory);
registerFunctionMaterialize(factory);
registerFunctionIgnore(factory);
registerFunctionIgnoreExceptNull(factory);
registerFunctionIndexHint(factory);
registerFunctionIdentity(factory);
registerFunctionArrayJoin(factory);

View File

@ -1,3 +1,5 @@
#if defined(__linux__) || defined(__FreeBSD__)
#pragma GCC diagnostic ignored "-Wsign-compare"
#ifdef __clang__
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
@ -69,3 +71,5 @@ TEST(ReadBufferAIOTest, TestReadAfterAIO)
EXPECT_EQ(read_after_eof_big, data.length());
EXPECT_TRUE(testbuf.eof());
}
#endif

View File

@ -328,10 +328,10 @@ void ActionsVisitor::visit(const ASTPtr & ast)
if (!only_consts)
{
/// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We treat "IN" as "ignore" function.
/// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function.
actions_stack.addAction(ExpressionAction::applyFunction(
FunctionFactory::instance().get("ignore", context),
FunctionFactory::instance().get("ignoreExceptNull", context),
{ node->arguments->children.at(0)->getColumnName() },
getColumnName()));
}

View File

@ -191,12 +191,12 @@ void AsynchronousMetrics::update()
"Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name()));
}
calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getData().getMaxPartsCountForPartition());
calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getMaxPartsCountForPartition());
}
if (table_merge_tree)
{
calculateMax(max_part_count_for_partition, table_merge_tree->getData().getMaxPartsCountForPartition());
calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountForPartition());
}
}
}

View File

@ -702,9 +702,8 @@ void Context::checkDatabaseAccessRightsImpl(const std::string & database_name) c
throw Exception("Access denied to database " + database_name + " for user " + client_info.current_user , ErrorCodes::DATABASE_ACCESS_DENIED);
}
void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
void Context::addDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
{
auto lock = getLock();
checkDatabaseAccessRightsImpl(from.first);
checkDatabaseAccessRightsImpl(where.first);
shared->view_dependencies[from].insert(where);
@ -715,9 +714,14 @@ void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAnd
table->updateDependencies();
}
void Context::removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
{
auto lock = getLock();
addDependencyUnsafe(from, where);
}
void Context::removeDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
{
checkDatabaseAccessRightsImpl(from.first);
checkDatabaseAccessRightsImpl(where.first);
shared->view_dependencies[from].erase(where);
@ -728,6 +732,12 @@ void Context::removeDependency(const DatabaseAndTableName & from, const Database
table->updateDependencies();
}
void Context::removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where)
{
auto lock = getLock();
removeDependencyUnsafe(from, where);
}
Dependencies Context::getDependencies(const String & database_name, const String & table_name) const
{
auto lock = getLock();

View File

@ -215,6 +215,10 @@ public:
void removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where);
Dependencies getDependencies(const String & database_name, const String & table_name) const;
/// Functions where we can lock the context manually
void addDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where);
void removeDependencyUnsafe(const DatabaseAndTableName & from, const DatabaseAndTableName & where);
/// Checking the existence of the table/database. Database can be empty - in this case the current database is used.
bool isTableExist(const String & database_name, const String & table_name) const;
bool isDatabaseExist(const String & database_name) const;

View File

@ -513,13 +513,14 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only
columns_added_by_join_list));
}
static void appendRequiredColumns(NameSet & required_columns, const Block & sample, const AnalyzedJoin & analyzed_join)
static void appendRequiredColumns(
NameSet & required_columns, const Block & sample, const Names & key_names_right, const JoinedColumnsList & columns_added_by_join)
{
for (auto & column : analyzed_join.key_names_right)
for (auto & column : key_names_right)
if (!sample.has(column))
required_columns.insert(column);
for (auto & column : analyzed_join.columns_from_joined_table)
for (auto & column : columns_added_by_join)
if (!sample.has(column.name_and_type.name))
required_columns.insert(column.name_and_type.name);
}
@ -606,7 +607,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
Names action_columns = joined_block_actions->getRequiredColumns();
NameSet required_columns(action_columns.begin(), action_columns.end());
appendRequiredColumns(required_columns, joined_block_actions->getSampleBlock(), analyzed_join);
appendRequiredColumns(
required_columns, joined_block_actions->getSampleBlock(), analyzed_join.key_names_right, columns_added_by_join);
Names original_columns = analyzed_join.getOriginalColumnNames(required_columns);

View File

@ -30,6 +30,8 @@
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
@ -43,8 +45,7 @@
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Storages/IStorage.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionFactory.h>
@ -590,13 +591,11 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere && query.where() && !query.prewhere() && !query.final())
MergeTreeWhereOptimizer{query_info, context, merge_tree.getData(), query_analyzer->getRequiredSourceColumns(), log};
MergeTreeWhereOptimizer{query_info, context, merge_tree, query_analyzer->getRequiredSourceColumns(), log};
};
if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(storage.get()))
optimize_prewhere(*merge_tree);
else if (const StorageReplicatedMergeTree * replicated_merge_tree = dynamic_cast<const StorageReplicatedMergeTree *>(storage.get()))
optimize_prewhere(*replicated_merge_tree);
if (const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get()))
optimize_prewhere(*merge_tree_data);
}
AnalysisResult expressions;

View File

@ -377,11 +377,11 @@ namespace
template <typename Map, typename KeyGetter>
struct Inserter<ASTTableJoin::Strictness::Any, Map, KeyGetter>
{
static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
{
auto emplace_result = key_getter.emplaceKey(map, i, pool);
if (emplace_result.isInserted() || emplace_result.getMapped().overwrite)
if (emplace_result.isInserted() || join.anyTakeLastRow())
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
}
};
@ -659,7 +659,7 @@ void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added,
if constexpr (STRICTNESS == ASTTableJoin::Strictness::All)
{
for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(mapped); current != nullptr; current = current->next)
for (auto current = &static_cast<const typename Map::mapped_type::Base &>(mapped); current != nullptr; current = current->next)
{
added.appendFromBlock(*current->block, current->row_num);
++current_offset;
@ -1078,10 +1078,7 @@ void Join::joinGet(Block & block, const String & column_name) const
if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any)
{
if (any_take_last_row)
joinGetImpl(block, column_name, std::get<MapsAnyOverwrite>(maps));
else
joinGetImpl(block, column_name, std::get<MapsAny>(maps));
joinGetImpl(block, column_name, std::get<MapsAny>(maps));
}
else
throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR);
@ -1156,7 +1153,7 @@ struct AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>
{
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
{
for (auto current = &static_cast<const typename Mapped::Base_t &>(mapped); current != nullptr; current = current->next)
for (auto current = &static_cast<const typename Mapped::Base &>(mapped); current != nullptr; current = current->next)
{
for (size_t j = 0; j < columns_right.size(); ++j)
columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num);

View File

@ -25,6 +25,43 @@
namespace DB
{
namespace JoinStuff
{
/// Base class with optional flag attached that's needed to implement RIGHT and FULL JOINs.
template <typename T, bool with_used>
struct WithFlags;
template <typename T>
struct WithFlags<T, true> : T
{
using Base = T;
using T::T;
mutable std::atomic<bool> used {};
void setUsed() const { used.store(true, std::memory_order_relaxed); } /// Could be set simultaneously from different threads.
bool getUsed() const { return used; }
};
template <typename T>
struct WithFlags<T, false> : T
{
using Base = T;
using T::T;
void setUsed() const {}
bool getUsed() const { return true; }
};
using MappedAny = WithFlags<RowRef, false>;
using MappedAll = WithFlags<RowRefList, false>;
using MappedAnyFull = WithFlags<RowRef, true>;
using MappedAllFull = WithFlags<RowRefList, true>;
using MappedAsof = WithFlags<AsofRowRefs, false>;
}
/** Data structure for implementation of JOIN.
* It is just a hash table: keys -> rows of joined ("right") table.
* Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys.
@ -132,36 +169,7 @@ public:
ASTTableJoin::Kind getKind() const { return kind; }
AsofRowRefs::Type getAsofType() const { return *asof_type; }
/** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined).
* Depending on template parameter, decide whether to overwrite existing values when encountering the same key again
* with_used is for implementation of RIGHT and FULL JOINs.
* overwrite is for implementation of StorageJoin with overwrite setting enabled
* NOTE: It is possible to store the flag in one bit of pointer to block or row_num. It seems not reasonable, because memory saving is minimal.
*/
template <bool with_used, bool overwrite_, typename Base>
struct WithFlags;
template <bool overwrite_, typename Base>
struct WithFlags<true, overwrite_, Base> : Base
{
static constexpr bool overwrite = overwrite_;
mutable std::atomic<bool> used {};
using Base::Base;
using Base_t = Base;
void setUsed() const { used.store(true, std::memory_order_relaxed); } /// Could be set simultaneously from different threads.
bool getUsed() const { return used; }
};
template <bool overwrite_, typename Base>
struct WithFlags<false, overwrite_, Base> : Base
{
static constexpr bool overwrite = overwrite_;
using Base::Base;
using Base_t = Base;
void setUsed() const {}
bool getUsed() const { return true; }
};
bool anyTakeLastRow() const { return any_take_last_row; }
/// Different types of keys for maps.
#define APPLY_FOR_JOIN_VARIANTS(M) \
@ -257,13 +265,11 @@ public:
}
};
using MapsAny = MapsTemplate<WithFlags<false, false, RowRef>>;
using MapsAnyOverwrite = MapsTemplate<WithFlags<false, true, RowRef>>;
using MapsAll = MapsTemplate<WithFlags<false, false, RowRefList>>;
using MapsAnyFull = MapsTemplate<WithFlags<true, false, RowRef>>;
using MapsAnyFullOverwrite = MapsTemplate<WithFlags<true, true, RowRef>>;
using MapsAllFull = MapsTemplate<WithFlags<true, false, RowRefList>>;
using MapsAsof = MapsTemplate<WithFlags<false, false, AsofRowRefs>>;
using MapsAny = MapsTemplate<JoinStuff::MappedAny>;
using MapsAll = MapsTemplate<JoinStuff::MappedAll>;
using MapsAnyFull = MapsTemplate<JoinStuff::MappedAnyFull>;
using MapsAllFull = MapsTemplate<JoinStuff::MappedAllFull>;
using MapsAsof = MapsTemplate<JoinStuff::MappedAsof>;
template <ASTTableJoin::Kind KIND>
struct KindTrait
@ -276,13 +282,14 @@ public:
static constexpr bool fill_right = static_in_v<KIND, ASTTableJoin::Kind::Right, ASTTableJoin::Kind::Full>;
};
template <bool fill_right, typename ASTTableJoin::Strictness, bool overwrite>
template <bool fill_right, typename ASTTableJoin::Strictness>
struct MapGetterImpl;
template <ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness, bool overwrite>
using Map = typename MapGetterImpl<KindTrait<kind>::fill_right, strictness, overwrite>::Map;
template <ASTTableJoin::Kind kind, ASTTableJoin::Strictness strictness>
using Map = typename MapGetterImpl<KindTrait<kind>::fill_right, strictness>::Map;
static constexpr std::array<ASTTableJoin::Strictness, 3> STRICTNESSES = {ASTTableJoin::Strictness::Any, ASTTableJoin::Strictness::All, ASTTableJoin::Strictness::Asof};
static constexpr std::array<ASTTableJoin::Strictness, 3> STRICTNESSES
= {ASTTableJoin::Strictness::Any, ASTTableJoin::Strictness::All, ASTTableJoin::Strictness::Asof};
static constexpr std::array<ASTTableJoin::Kind, 4> KINDS
= {ASTTableJoin::Kind::Left, ASTTableJoin::Kind::Inner, ASTTableJoin::Kind::Full, ASTTableJoin::Kind::Right};
@ -298,12 +305,12 @@ public:
if (kind == KINDS[i] && strictness == ASTTableJoin::Strictness::Any)
{
if constexpr (std::is_same_v<Func, MapInitTag>)
maps = Map<KINDS[i], ASTTableJoin::Strictness::Any, true>();
maps = Map<KINDS[i], ASTTableJoin::Strictness::Any>();
else
func(
std::integral_constant<ASTTableJoin::Kind, KINDS[i]>(),
std::integral_constant<ASTTableJoin::Strictness, ASTTableJoin::Strictness::Any>(),
std::get<Map<KINDS[i], ASTTableJoin::Strictness::Any, true>>(maps));
std::get<Map<KINDS[i], ASTTableJoin::Strictness::Any>>(maps));
return true;
}
return false;
@ -320,12 +327,12 @@ public:
if (kind == KINDS[i] && strictness == STRICTNESSES[j])
{
if constexpr (std::is_same_v<Func, MapInitTag>)
maps = Map<KINDS[i], STRICTNESSES[j], false>();
maps = Map<KINDS[i], STRICTNESSES[j]>();
else
func(
std::integral_constant<ASTTableJoin::Kind, KINDS[i]>(),
std::integral_constant<ASTTableJoin::Strictness, STRICTNESSES[j]>(),
std::get<Map<KINDS[i], STRICTNESSES[j], false>>(maps));
std::get<Map<KINDS[i], STRICTNESSES[j]>>(maps));
return true;
}
return false;
@ -359,7 +366,7 @@ private:
*/
BlocksList blocks;
std::variant<MapsAny, MapsAnyOverwrite, MapsAll, MapsAnyFull, MapsAnyFullOverwrite, MapsAllFull, MapsAsof> maps;
std::variant<MapsAny, MapsAll, MapsAnyFull, MapsAllFull, MapsAsof> maps;
/// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.
Arena pool;
@ -421,32 +428,32 @@ private:
using JoinPtr = std::shared_ptr<Join>;
using Joins = std::vector<JoinPtr>;
template <bool overwrite_>
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::Any, overwrite_>
template <>
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::Any>
{
using Map = std::conditional_t<overwrite_, MapsAnyOverwrite, MapsAny>;
};
template <bool overwrite_>
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::Any, overwrite_>
{
using Map = std::conditional_t<overwrite_, MapsAnyFullOverwrite, MapsAnyFull>;
using Map = MapsAny;
};
template <>
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::All, false>
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::Any>
{
using Map = MapsAnyFull;
};
template <>
struct Join::MapGetterImpl<false, ASTTableJoin::Strictness::All>
{
using Map = MapsAll;
};
template <>
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::All, false>
struct Join::MapGetterImpl<true, ASTTableJoin::Strictness::All>
{
using Map = MapsAllFull;
};
template <bool fill_right>
struct Join::MapGetterImpl<fill_right, ASTTableJoin::Strictness::Asof, false>
struct Join::MapGetterImpl<fill_right, ASTTableJoin::Strictness::Asof>
{
using Map = MapsAsof;
};

View File

@ -1,8 +1,7 @@
#include <Interpreters/MutationsInterpreter.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <DataStreams/FilterBlockInputStream.h>
#include <DataStreams/ExpressionBlockInputStream.h>
#include <DataStreams/CreatingSetsBlockInputStream.h>
@ -86,12 +85,8 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const
static NameSet getKeyColumns(const StoragePtr & storage)
{
const MergeTreeData * merge_tree_data = nullptr;
if (auto merge_tree = dynamic_cast<StorageMergeTree *>(storage.get()))
merge_tree_data = &merge_tree->getData();
else if (auto replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
merge_tree_data = &replicated_merge_tree->getData();
else
const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get());
if (!merge_tree_data)
return {};
NameSet key_columns;

View File

@ -220,10 +220,11 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
ReadBufferFromFile in{file_path};
Settings insert_settings;
std::string insert_query;
readStringBinary(insert_query, in);
readQueryAndSettings(in, insert_settings, insert_query);
RemoteBlockOutputStream remote{*connection, insert_query};
RemoteBlockOutputStream remote{*connection, insert_query, &insert_settings};
remote.writePrefix();
remote.writePrepared(in);
@ -240,20 +241,39 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
LOG_TRACE(log, "Finished processing `" << file_path << '`');
}
void StorageDistributedDirectoryMonitor::readQueryAndSettings(
ReadBuffer & in, Settings & insert_settings, std::string & insert_query) const
{
UInt64 magic_number_or_query_size;
readVarUInt(magic_number_or_query_size, in);
if (magic_number_or_query_size == UInt64(DBMS_DISTRIBUTED_SENDS_MAGIC_NUMBER))
{
insert_settings.deserialize(in);
readVarUInt(magic_number_or_query_size, in);
}
insert_query.resize(magic_number_or_query_size);
in.readStrict(insert_query.data(), magic_number_or_query_size);
}
struct StorageDistributedDirectoryMonitor::BatchHeader
{
Settings settings;
String query;
Block sample_block;
BatchHeader(String query_, Block sample_block_)
: query(std::move(query_))
BatchHeader(Settings settings_, String query_, Block sample_block_)
: settings(std::move(settings_))
, query(std::move(query_))
, sample_block(std::move(sample_block_))
{
}
bool operator==(const BatchHeader & other) const
{
return query == other.query && blocksHaveEqualStructure(sample_block, other.sample_block);
return settings == other.settings && query == other.query &&
blocksHaveEqualStructure(sample_block, other.sample_block);
}
struct Hash
@ -320,6 +340,7 @@ struct StorageDistributedDirectoryMonitor::Batch
bool batch_broken = false;
try
{
Settings insert_settings;
String insert_query;
std::unique_ptr<RemoteBlockOutputStream> remote;
bool first = true;
@ -335,12 +356,12 @@ struct StorageDistributedDirectoryMonitor::Batch
}
ReadBufferFromFile in(file_path->second);
readStringBinary(insert_query, in); /// NOTE: all files must have the same insert_query
parent.readQueryAndSettings(in, insert_settings, insert_query);
if (first)
{
first = false;
remote = std::make_unique<RemoteBlockOutputStream>(*connection, insert_query);
remote = std::make_unique<RemoteBlockOutputStream>(*connection, insert_query, &insert_settings);
remote->writePrefix();
}
@ -436,12 +457,13 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
size_t total_rows = 0;
size_t total_bytes = 0;
Block sample_block;
Settings insert_settings;
String insert_query;
try
{
/// Determine metadata of the current file and check if it is not broken.
ReadBufferFromFile in{file_path};
readStringBinary(insert_query, in);
readQueryAndSettings(in, insert_settings, insert_query);
CompressedReadBuffer decompressing_in(in);
NativeBlockInputStream block_in(decompressing_in, ClickHouseRevision::get());
@ -468,7 +490,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
throw;
}
BatchHeader batch_header(std::move(insert_query), std::move(sample_block));
BatchHeader batch_header(std::move(insert_settings), std::move(insert_query), std::move(sample_block));
Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
batch.file_indices.push_back(file_idx);

View File

@ -7,6 +7,7 @@
#include <thread>
#include <mutex>
#include <condition_variable>
#include <IO/ReadBufferFromFile.h>
namespace DB
@ -57,6 +58,9 @@ private:
std::condition_variable cond;
Logger * log;
ThreadFromGlobalPool thread{&StorageDistributedDirectoryMonitor::run, this};
/// Read insert query and insert settings for backward compatible.
void readQueryAndSettings(ReadBuffer & in, Settings & insert_settings, std::string & insert_query) const;
};
}

View File

@ -59,10 +59,10 @@ namespace ErrorCodes
DistributedBlockOutputStream::DistributedBlockOutputStream(
StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_,
const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_)
: storage(storage), query_ast(query_ast), query_string(queryToString(query_ast)),
cluster(cluster_), settings(settings_), insert_sync(insert_sync_),
const Context & context_, StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_,
bool insert_sync_, UInt64 insert_timeout_)
: context(context_), storage(storage), query_ast(query_ast), query_string(queryToString(query_ast)),
cluster(cluster_), insert_sync(insert_sync_),
insert_timeout(insert_timeout_), log(&Logger::get("DistributedBlockOutputStream"))
{
}
@ -249,7 +249,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
throw Exception("There are several writing job for an automatically replicated shard", ErrorCodes::LOGICAL_ERROR);
/// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here
auto connections = shard_info.pool->getMany(&settings, PoolMode::GET_ONE);
auto connections = shard_info.pool->getMany(&context.getSettingsRef(), PoolMode::GET_ONE);
if (connections.empty() || connections.front().isNull())
throw Exception("Expected exactly one connection for shard " + toString(job.shard_index), ErrorCodes::LOGICAL_ERROR);
@ -263,7 +263,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
if (!connection_pool)
throw Exception("Connection pool for replica " + replica.readableString() + " does not exist", ErrorCodes::LOGICAL_ERROR);
job.connection_entry = connection_pool->get(&settings);
job.connection_entry = connection_pool->get(&context.getSettingsRef());
if (job.connection_entry.isNull())
throw Exception("Got empty connection for replica" + replica.readableString(), ErrorCodes::LOGICAL_ERROR);
}
@ -271,7 +271,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
if (throttler)
job.connection_entry->setThrottler(throttler);
job.stream = std::make_shared<RemoteBlockOutputStream>(*job.connection_entry, query_string, &settings);
job.stream = std::make_shared<RemoteBlockOutputStream>(*job.connection_entry, query_string, &context.getSettingsRef());
job.stream->writePrefix();
}
@ -283,8 +283,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
if (!job.stream)
{
/// Forward user settings
job.local_context = std::make_unique<Context>(storage.global_context);
job.local_context->setSettings(settings);
job.local_context = std::make_unique<Context>(context);
InterpreterInsertQuery interp(query_ast, *job.local_context);
job.stream = interp.execute().out;
@ -304,6 +303,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
void DistributedBlockOutputStream::writeSync(const Block & block)
{
const Settings & settings = context.getSettingsRef();
const auto & shards_info = cluster->getShardsInfo();
size_t num_shards = shards_info.size();
@ -504,7 +504,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz
void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats)
{
/// Async insert does not support settings forwarding yet whereas sync one supports
InterpreterInsertQuery interp(query_ast, storage.global_context);
InterpreterInsertQuery interp(query_ast, context);
auto block_io = interp.execute();
block_io.out->writePrefix();
@ -553,6 +553,8 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std::
CompressedWriteBuffer compress{out};
NativeBlockOutputStream stream{compress, ClickHouseRevision::get(), block.cloneEmpty()};
writeVarUInt(UInt64(DBMS_DISTRIBUTED_SENDS_MAGIC_NUMBER), out);
context.getSettingsRef().serialize(out);
writeStringBinary(query_string, out);
stream.writePrefix();

View File

@ -35,8 +35,8 @@ class StorageDistributed;
class DistributedBlockOutputStream : public IBlockOutputStream
{
public:
DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_,
const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_);
DistributedBlockOutputStream(const Context & context_, StorageDistributed & storage, const ASTPtr & query_ast,
const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_);
Block getHeader() const override;
void write(const Block & block) override;
@ -78,11 +78,11 @@ private:
std::string getCurrentStateDescription();
private:
const Context & context;
StorageDistributed & storage;
ASTPtr query_ast;
String query_string;
ClusterPtr cluster;
const Settings & settings;
size_t inserted_blocks = 0;
size_t inserted_rows = 0;

View File

@ -23,6 +23,16 @@ namespace ErrorCodes
extern const int EMPTY_LIST_OF_COLUMNS_PASSED;
}
const ColumnsDescription & ITableDeclaration::getColumns() const
{
return columns;
}
const IndicesDescription & ITableDeclaration::getIndices() const
{
return indices;
}
void ITableDeclaration::setColumns(ColumnsDescription columns_)
{

View File

@ -13,11 +13,11 @@ namespace DB
class ITableDeclaration
{
public:
virtual const ColumnsDescription & getColumns() const { return columns; }
virtual void setColumns(ColumnsDescription columns_);
const ColumnsDescription & getColumns() const;
void setColumns(ColumnsDescription columns_);
virtual const IndicesDescription & getIndices() const { return indices; }
virtual void setIndices(IndicesDescription indices_);
const IndicesDescription & getIndices() const;
void setIndices(IndicesDescription indices_);
/// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns
/// (they are treated separately).

View File

@ -84,30 +84,30 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart()
MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader)
{
if (!current_task.size_predictor)
return current_max_block_size_rows;
return static_cast<size_t>(current_max_block_size_rows);
/// Calculates number of rows will be read using preferred_block_size_bytes.
/// Can't be less than avg_index_granularity.
UInt64 rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes);
size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes);
if (!rows_to_read)
return rows_to_read;
UInt64 total_row_in_current_granule = current_reader.numRowsInCurrentGranule();
rows_to_read = std::max<UInt64>(total_row_in_current_granule, rows_to_read);
auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule();
rows_to_read = std::max(total_row_in_current_granule, rows_to_read);
if (current_preferred_max_column_in_block_size_bytes)
{
/// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes.
UInt64 rows_to_read_for_max_size_column
auto rows_to_read_for_max_size_column
= current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes);
double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio);
auto rows_to_read_for_max_size_column_with_filtration
= static_cast<UInt64>(rows_to_read_for_max_size_column / filtration_ratio);
= static_cast<size_t>(rows_to_read_for_max_size_column / filtration_ratio);
/// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity.
rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration);
}
UInt64 unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule();
auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule();
if (unread_rows_in_current_granule >= rows_to_read)
return rows_to_read;

View File

@ -14,7 +14,7 @@ Block MergeTreeBlockOutputStream::getHeader() const
void MergeTreeBlockOutputStream::write(const Block & block)
{
storage.data.delayInsertOrThrowIfNeeded();
storage.delayInsertOrThrowIfNeeded();
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block);
for (auto & current_block : part_blocks)
@ -22,7 +22,7 @@ void MergeTreeBlockOutputStream::write(const Block & block)
Stopwatch watch;
MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block);
storage.data.renameTempPartAndAdd(part, &storage.increment);
storage.renameTempPartAndAdd(part, &storage.increment);
PartLog::addNewPart(storage.global_context, part, watch.elapsed());

View File

@ -116,7 +116,7 @@ MergeTreeData::MergeTreeData(
database_name(database_), table_name(table_),
full_path(full_path_),
broken_part_callback(broken_part_callback_),
log_name(database_name + "." + table_name), log(&Logger::get(log_name + " (Data)")),
log_name(database_name + "." + table_name), log(&Logger::get(log_name)),
data_parts_by_info(data_parts_indexes.get<TagByInfo>()),
data_parts_by_state_and_info(data_parts_indexes.get<TagByStateAndInfo>())
{
@ -730,7 +730,7 @@ String MergeTreeData::MergingParams::getModeName() const
}
Int64 MergeTreeData::getMaxBlockNumber()
Int64 MergeTreeData::getMaxBlockNumber() const
{
auto lock = lockParts();
@ -2665,7 +2665,7 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const A
return false;
}
bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const
bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const
{
/// Make sure that the left side of the IN operator contain part of the key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
@ -2694,18 +2694,12 @@ bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) con
}
}
MergeTreeData * MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const
MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const
{
MergeTreeData * src_data;
if (auto storage_merge_tree = dynamic_cast<StorageMergeTree *>(source_table.get()))
src_data = &storage_merge_tree->data;
else if (auto storage_replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(source_table.get()))
src_data = &storage_replicated_merge_tree->data;
else
{
throw Exception("Table " + table_name + " supports attachPartitionFrom only for MergeTree or ReplicatedMergeTree engines."
MergeTreeData * src_data = dynamic_cast<MergeTreeData *>(source_table.get());
if (!src_data)
throw Exception("Table " + table_name + " supports attachPartitionFrom only for MergeTree family of table engines."
" Got " + source_table->getName(), ErrorCodes::NOT_IMPLEMENTED);
}
if (getColumns().getAllPhysical().sizeOfDifference(src_data->getColumns().getAllPhysical()))
throw Exception("Tables have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS);
@ -2724,7 +2718,7 @@ MergeTreeData * MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePt
if (format_version != src_data->format_version)
throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS);
return src_data;
return *src_data;
}
MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part,

View File

@ -3,10 +3,11 @@
#include <Common/SimpleIncrement.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Storages/ITableDeclaration.h>
#include <Storages/IStorage.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/MergeTreeMutationStatus.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>
@ -89,7 +90,7 @@ namespace ErrorCodes
/// - MergeTreeDataWriter
/// - MergeTreeDataMergerMutator
class MergeTreeData : public ITableDeclaration
class MergeTreeData : public IStorage
{
public:
/// Function to call if the part is suspected to contain corrupt data.
@ -344,12 +345,21 @@ public:
bool attach,
BrokenPartCallback broken_part_callback_ = [](const String &){});
/// Load the set of data parts from disk. Call once - immediately after the object is created.
void loadDataParts(bool skip_sanity_checks);
ASTPtr getPartitionKeyAST() const override { return partition_by_ast; }
ASTPtr getSortingKeyAST() const override { return sorting_key_expr_ast; }
ASTPtr getPrimaryKeyAST() const override { return primary_key_expr_ast; }
ASTPtr getSamplingKeyAST() const override { return sample_by_ast; }
bool supportsPrewhere() const { return true; }
Names getColumnsRequiredForPartitionKey() const override { return (partition_key_expr ? partition_key_expr->getRequiredColumns() : Names{}); }
Names getColumnsRequiredForSortingKey() const override { return sorting_key_expr->getRequiredColumns(); }
Names getColumnsRequiredForPrimaryKey() const override { return primary_key_expr->getRequiredColumns(); }
Names getColumnsRequiredForSampling() const override { return columns_required_for_sampling; }
Names getColumnsRequiredForFinal() const override { return sorting_key_expr->getRequiredColumns(); }
bool supportsFinal() const
bool supportsPrewhere() const override { return true; }
bool supportsSampling() const override { return sample_by_ast != nullptr; }
bool supportsFinal() const override
{
return merging_params.mode == MergingParams::Collapsing
|| merging_params.mode == MergingParams::Summing
@ -358,9 +368,7 @@ public:
|| merging_params.mode == MergingParams::VersionedCollapsing;
}
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const;
Int64 getMaxBlockNumber();
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const override;
NameAndTypePair getColumn(const String & column_name) const override
{
@ -385,14 +393,17 @@ public:
|| column_name == "_sample_factor";
}
String getDatabaseName() const { return database_name; }
String getDatabaseName() const override { return database_name; }
String getTableName() const override { return table_name; }
String getTableName() const { return table_name; }
/// Load the set of data parts from disk. Call once - immediately after the object is created.
void loadDataParts(bool skip_sanity_checks);
String getFullPath() const { return full_path; }
String getLogName() const { return log_name; }
Int64 getMaxBlockNumber() const;
/// Returns a copy of the list so that the caller shouldn't worry about locks.
DataParts getDataParts(const DataPartStates & affordable_states) const;
/// Returns sorted list of the parts with specified states
@ -539,23 +550,11 @@ public:
*/
static ASTPtr extractKeyExpressionList(const ASTPtr & node);
Names getColumnsRequiredForPartitionKey() const { return (partition_key_expr ? partition_key_expr->getRequiredColumns() : Names{}); }
bool hasSortingKey() const { return !sorting_key_columns.empty(); }
bool hasPrimaryKey() const { return !primary_key_columns.empty(); }
bool hasSkipIndices() const { return !skip_indices.empty(); }
bool hasTableTTL() const { return ttl_table_ast != nullptr; }
ASTPtr getSortingKeyAST() const { return sorting_key_expr_ast; }
ASTPtr getPrimaryKeyAST() const { return primary_key_expr_ast; }
Names getColumnsRequiredForSortingKey() const { return sorting_key_expr->getRequiredColumns(); }
Names getColumnsRequiredForPrimaryKey() const { return primary_key_expr->getRequiredColumns(); }
bool supportsSampling() const { return sample_by_ast != nullptr; }
ASTPtr getSamplingExpression() const { return sample_by_ast; }
Names getColumnsRequiredForSampling() const { return columns_required_for_sampling; }
/// Check that the part is not broken and calculate the checksums for it if they are not present.
MutableDataPartPtr loadPartAndFixMetadata(const String & relative_path);
@ -592,11 +591,13 @@ public:
/// Extracts MergeTreeData of other *MergeTree* storage
/// and checks that their structure suitable for ALTER TABLE ATTACH PARTITION FROM
/// Tables structure should be locked.
MergeTreeData * checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const;
MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const;
MergeTreeData::MutableDataPartPtr cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix,
const MergeTreePartInfo & dst_part_info);
virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
MergeTreeDataFormatVersion format_version;
Context global_context;
@ -655,13 +656,12 @@ public:
/// For generating names of temporary parts during insertion.
SimpleIncrement insert_increment;
private:
protected:
friend struct MergeTreeDataPart;
friend class StorageMergeTree;
friend class StorageReplicatedMergeTree;
friend class MergeTreeDataMergerMutator;
friend class ReplicatedMergeTreeAlterThread;
friend struct ReplicatedMergeTreeTableMetadata;
friend class StorageReplicatedMergeTree;
ASTPtr partition_by_ast;
ASTPtr order_by_ast;

View File

@ -449,7 +449,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
ASTPtr args = std::make_shared<ASTExpressionList>();
args->children.push_back(data.getSamplingExpression());
args->children.push_back(data.getSamplingKeyAST());
args->children.push_back(std::make_shared<ASTLiteral>(lower));
lower_function = std::make_shared<ASTFunction>();
@ -466,7 +466,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
ASTPtr args = std::make_shared<ASTExpressionList>();
args->children.push_back(data.getSamplingExpression());
args->children.push_back(data.getSamplingKeyAST());
args->children.push_back(std::make_shared<ASTLiteral>(upper));
upper_function = std::make_shared<ASTFunction>();

View File

@ -36,7 +36,7 @@ void ReplicatedMergeTreeAlterThread::run()
try
{
/** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns),
* as well as a description of columns in local file with metadata (storage.data.getColumnsList()).
* as well as a description of columns in local file with metadata (storage.getColumnsList()).
*
* If these descriptions are different - you need to do ALTER.
*
@ -83,7 +83,7 @@ void ReplicatedMergeTreeAlterThread::run()
const String & metadata_str = metadata_znode.contents;
auto metadata_in_zk = ReplicatedMergeTreeTableMetadata::parse(metadata_str);
auto metadata_diff = ReplicatedMergeTreeTableMetadata(storage.data).checkAndFindDiff(metadata_in_zk, /* allow_alter = */ true);
auto metadata_diff = ReplicatedMergeTreeTableMetadata(storage).checkAndFindDiff(metadata_in_zk, /* allow_alter = */ true);
/// If you need to lock table structure, then suspend merges.
ActionLock merge_blocker = storage.merger_mutator.actions_blocker.cancel();
@ -123,7 +123,7 @@ void ReplicatedMergeTreeAlterThread::run()
}
/// You need to get a list of parts under table lock to avoid race condition with merge.
parts = storage.data.getDataParts();
parts = storage.getDataParts();
storage.columns_version = columns_version;
storage.metadata_version = metadata_version;
@ -140,7 +140,7 @@ void ReplicatedMergeTreeAlterThread::run()
int changed_parts = 0;
if (!changed_columns_version)
parts = storage.data.getDataParts();
parts = storage.getDataParts();
const auto columns_for_parts = storage.getColumns().getAllPhysical();
const auto indices_for_parts = storage.getIndices();
@ -150,7 +150,7 @@ void ReplicatedMergeTreeAlterThread::run()
/// Update the part and write result to temporary files.
/// TODO: You can skip checking for too large changes if ZooKeeper has, for example,
/// node /flags/force_alter.
auto transaction = storage.data.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false);
auto transaction = storage.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false);
if (!transaction)
continue;
@ -160,7 +160,7 @@ void ReplicatedMergeTreeAlterThread::run()
}
/// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN
storage.data.recalculateColumnSizes();
storage.recalculateColumnSizes();
if (changed_columns_version)
{

View File

@ -35,7 +35,7 @@ namespace ErrorCodes
ReplicatedMergeTreeBlockOutputStream::ReplicatedMergeTreeBlockOutputStream(
StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, size_t max_parts_per_block, bool deduplicate_)
: storage(storage_), quorum(quorum_), quorum_timeout_ms(quorum_timeout_ms_), max_parts_per_block(max_parts_per_block), deduplicate(deduplicate_),
log(&Logger::get(storage.data.getLogName() + " (Replicated OutputStream)"))
log(&Logger::get(storage.getLogName() + " (Replicated OutputStream)"))
{
/// The quorum value `1` has the same meaning as if it is disabled.
if (quorum == 1)
@ -109,7 +109,7 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block)
last_block_is_duplicate = false;
/// TODO Is it possible to not lock the table structure here?
storage.data.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event);
storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event);
auto zookeeper = storage.getZooKeeper();
assertSessionIsNotExpired(zookeeper);
@ -297,8 +297,8 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
quorum_info.host_node_version));
}
MergeTreeData::Transaction transaction(storage.data); /// If you can not add a part to ZK, we'll remove it back from the working set.
storage.data.renameTempPartAndAdd(part, nullptr, &transaction);
MergeTreeData::Transaction transaction(storage); /// If you can not add a part to ZK, we'll remove it back from the working set.
storage.renameTempPartAndAdd(part, nullptr, &transaction);
Coordination::Responses responses;
int32_t multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT
@ -414,7 +414,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo
void ReplicatedMergeTreeBlockOutputStream::writePrefix()
{
storage.data.throwInsertIfNeeded();
storage.throwInsertIfNeeded();
}

View File

@ -27,8 +27,8 @@ ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplic
void ReplicatedMergeTreeCleanupThread::run()
{
const auto CLEANUP_SLEEP_MS = storage.data.settings.cleanup_delay_period * 1000
+ std::uniform_int_distribution<UInt64>(0, storage.data.settings.cleanup_delay_period_random_add * 1000)(rng);
const auto CLEANUP_SLEEP_MS = storage.settings.cleanup_delay_period * 1000
+ std::uniform_int_distribution<UInt64>(0, storage.settings.cleanup_delay_period_random_add * 1000)(rng);
try
{
@ -57,7 +57,7 @@ void ReplicatedMergeTreeCleanupThread::iterate()
{
/// TODO: Implement tryLockStructureForShare.
auto lock = storage.lockStructureForShare(false, "");
storage.data.clearOldTemporaryDirectories();
storage.clearOldTemporaryDirectories();
}
/// This is loose condition: no problem if we actually had lost leadership at this moment
@ -82,7 +82,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
int children_count = stat.numChildren;
/// We will wait for 1.1 times more records to accumulate than necessary.
if (static_cast<double>(children_count) < storage.data.settings.min_replicated_logs_to_keep * 1.1)
if (static_cast<double>(children_count) < storage.settings.min_replicated_logs_to_keep * 1.1)
return;
Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat);
@ -100,8 +100,8 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
std::sort(entries.begin(), entries.end());
String min_saved_record_log_str = entries[
entries.size() > storage.data.settings.max_replicated_logs_to_keep.value
? entries.size() - storage.data.settings.max_replicated_logs_to_keep.value
entries.size() > storage.settings.max_replicated_logs_to_keep.value
? entries.size() - storage.settings.max_replicated_logs_to_keep.value
: 0];
/// Replicas that were marked is_lost but are active.
@ -203,7 +203,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate);
/// We will not touch the last `min_replicated_logs_to_keep` records.
entries.erase(entries.end() - std::min<UInt64>(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end());
entries.erase(entries.end() - std::min<UInt64>(entries.size(), storage.settings.min_replicated_logs_to_keep.value), entries.end());
/// We will not touch records that are no less than `min_saved_log_pointer`.
entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end());
@ -294,12 +294,12 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks()
/// Use ZooKeeper's first node (last according to time) timestamp as "current" time.
Int64 current_time = timed_blocks.front().ctime;
Int64 time_threshold = std::max(static_cast<Int64>(0), current_time - static_cast<Int64>(1000 * storage.data.settings.replicated_deduplication_window_seconds));
Int64 time_threshold = std::max(static_cast<Int64>(0), current_time - static_cast<Int64>(1000 * storage.settings.replicated_deduplication_window_seconds));
/// Virtual node, all nodes that are "greater" than this one will be deleted
NodeWithStat block_threshold{{}, time_threshold};
size_t current_deduplication_window = std::min<size_t>(timed_blocks.size(), storage.data.settings.replicated_deduplication_window.value);
size_t current_deduplication_window = std::min<size_t>(timed_blocks.size(), storage.settings.replicated_deduplication_window.value);
auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window;
auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime);
auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold);
@ -392,10 +392,10 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper &
void ReplicatedMergeTreeCleanupThread::clearOldMutations()
{
if (!storage.data.settings.finished_mutations_to_keep)
if (!storage.settings.finished_mutations_to_keep)
return;
if (storage.queue.countFinishedMutations() <= storage.data.settings.finished_mutations_to_keep)
if (storage.queue.countFinishedMutations() <= storage.settings.finished_mutations_to_keep)
{
/// Not strictly necessary, but helps to avoid unnecessary ZooKeeper requests.
/// If even this replica hasn't finished enough mutations yet, then we don't need to clean anything.
@ -422,10 +422,10 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations()
/// Do not remove entries that are greater than `min_pointer` (they are not done yet).
entries.erase(std::upper_bound(entries.begin(), entries.end(), padIndex(min_pointer)), entries.end());
/// Do not remove last `storage.data.settings.finished_mutations_to_keep` entries.
if (entries.size() <= storage.data.settings.finished_mutations_to_keep)
/// Do not remove last `storage.settings.finished_mutations_to_keep` entries.
if (entries.size() <= storage.settings.finished_mutations_to_keep)
return;
entries.erase(entries.end() - storage.data.settings.finished_mutations_to_keep, entries.end());
entries.erase(entries.end() - storage.settings.finished_mutations_to_keep, entries.end());
if (entries.empty())
return;

View File

@ -90,7 +90,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
}
/// If the part is not in ZooKeeper, we'll check if it's at least somewhere.
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.data.format_version);
auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version);
/** The logic is as follows:
* - if some live or inactive replica has such a part, or a part covering it
@ -126,7 +126,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPart(const String & par
Strings parts = zookeeper->getChildren(storage.zookeeper_path + "/replicas/" + replica + "/parts");
for (const String & part_on_replica : parts)
{
auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.data.format_version);
auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version);
if (part_on_replica_info.contains(part_info))
{
@ -189,9 +189,9 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
/// If the part is still in the PreCommitted -> Committed transition, it is not lost
/// and there is no need to go searching for it on other replicas. To definitely find the needed part
/// if it exists (or a part containing it) we first search among the PreCommitted parts.
auto part = storage.data.getPartIfExists(part_name, {MergeTreeDataPartState::PreCommitted});
auto part = storage.getPartIfExists(part_name, {MergeTreeDataPartState::PreCommitted});
if (!part)
part = storage.data.getActiveContainingPart(part_name);
part = storage.getActiveContainingPart(part_name);
/// We do not have this or a covering part.
if (!part)
@ -235,8 +235,8 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
checkDataPart(
part,
true,
storage.data.primary_key_data_types,
storage.data.skip_indices,
storage.primary_key_data_types,
storage.skip_indices,
[this] { return need_stop.load(); });
if (need_stop)
@ -259,7 +259,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
storage.removePartAndEnqueueFetch(part_name);
/// Delete part locally.
storage.data.forgetPartAndMoveToDetached(part, "broken_");
storage.forgetPartAndMoveToDetached(part, "broken_");
}
}
else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr))
@ -270,7 +270,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
LOG_ERROR(log, "Unexpected part " << part_name << " in filesystem. Removing.");
storage.data.forgetPartAndMoveToDetached(part, "unexpected_");
storage.forgetPartAndMoveToDetached(part, "unexpected_");
}
else
{

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & storage_)
: storage(storage_)
, format_version(storage.data.format_version)
, format_version(storage.format_version)
, current_parts(format_version)
, virtual_parts(format_version)
{}
@ -62,14 +62,14 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper)
Strings children = zookeeper->getChildren(queue_path);
auto to_remove_it = std::remove_if(
children.begin(), children.end(), [&](const String & path)
{
return already_loaded_paths.count(path);
});
children.begin(), children.end(), [&](const String & path)
{
return already_loaded_paths.count(path);
});
LOG_DEBUG(log,
"Having " << (to_remove_it - children.begin()) << " queue entries to load, "
<< (children.end() - to_remove_it) << " entries already loaded.");
"Having " << (to_remove_it - children.begin()) << " queue entries to load, "
<< (children.end() - to_remove_it) << " entries already loaded.");
children.erase(to_remove_it, children.end());
std::sort(children.begin(), children.end());

View File

@ -44,11 +44,11 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage
, log(&Logger::get(log_name))
, active_node_identifier(generateActiveNodeIdentifier())
{
check_period_ms = storage.data.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000;
check_period_ms = storage.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000;
/// Periodicity of checking lag of replica.
if (check_period_ms > static_cast<Int64>(storage.data.settings.check_delay_period) * 1000)
check_period_ms = storage.data.settings.check_delay_period * 1000;
if (check_period_ms > static_cast<Int64>(storage.settings.check_delay_period) * 1000)
check_period_ms = storage.settings.check_delay_period * 1000;
task = storage.global_context.getSchedulePool().createTask(log_name, [this]{ run(); });
}
@ -121,7 +121,7 @@ void ReplicatedMergeTreeRestartingThread::run()
}
time_t current_time = time(nullptr);
if (current_time >= prev_time_of_check_delay + static_cast<time_t>(storage.data.settings.check_delay_period))
if (current_time >= prev_time_of_check_delay + static_cast<time_t>(storage.settings.check_delay_period))
{
/// Find out lag of replicas.
time_t absolute_delay = 0;
@ -136,10 +136,10 @@ void ReplicatedMergeTreeRestartingThread::run()
/// We give up leadership if the relative lag is greater than threshold.
if (storage.is_leader
&& relative_delay > static_cast<time_t>(storage.data.settings.min_relative_delay_to_yield_leadership))
&& relative_delay > static_cast<time_t>(storage.settings.min_relative_delay_to_yield_leadership))
{
LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold ("
<< storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership.");
<< storage.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership.");
ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership);
@ -181,7 +181,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup()
updateQuorumIfWeHavePart();
if (storage.data.settings.replicated_can_become_leader)
if (storage.settings.replicated_can_become_leader)
storage.enterLeaderElection();
else
LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0");
@ -239,13 +239,13 @@ void ReplicatedMergeTreeRestartingThread::removeFailedQuorumParts()
for (auto part_name : failed_parts)
{
auto part = storage.data.getPartIfExists(
auto part = storage.getPartIfExists(
part_name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
if (part)
{
LOG_DEBUG(log, "Found part " << part_name << " with failed quorum. Moving to detached. This shouldn't happen often.");
storage.data.forgetPartAndMoveToDetached(part, "noquorum_");
storage.forgetPartAndMoveToDetached(part, "noquorum_");
storage.queue.removeFromVirtualParts(part->info);
}
}

View File

@ -32,9 +32,9 @@ public:
bool supportsIndexForIn() const override { return true; }
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & /* query_context */) const override
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override
{
return part->storage.mayBenefitFromIndexForIn(left_in_operand);
return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context);
}
protected:

View File

@ -333,7 +333,8 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const Context & c
/// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster
return std::make_shared<DistributedBlockOutputStream>(
*this, createInsertToRemoteTableQuery(remote_database, remote_table, getSampleBlock()), cluster, settings, insert_sync, timeout);
context, *this, createInsertToRemoteTableQuery(remote_database, remote_table, getSampleBlock()), cluster,
insert_sync, timeout);
}

View File

@ -338,7 +338,7 @@ private:
throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
}
else
for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(it->getSecond()); current != nullptr;
for (auto current = &static_cast<const typename Map::mapped_type::Base &>(it->getSecond()); current != nullptr;
current = current->next)
{
for (size_t j = 0; j < columns.size(); ++j)

View File

@ -8,6 +8,7 @@
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/InterpreterDropQuery.h>
#include <Interpreters/InterpreterRenameQuery.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/AddDefaultDatabaseVisitor.h>
@ -26,6 +27,10 @@ namespace ErrorCodes
extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW;
}
static inline String generateInnerTableName(const String & table_name)
{
return ".inner." + table_name;
}
static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name)
{
@ -128,7 +133,7 @@ StorageMaterializedView::StorageMaterializedView(
else
{
target_database_name = database_name;
target_table_name = ".inner." + table_name;
target_table_name = generateInnerTableName(table_name);
has_inner_table = true;
}
@ -265,6 +270,53 @@ void StorageMaterializedView::mutate(const MutationCommands & commands, const Co
getTargetTable()->mutate(commands, context);
}
static void executeRenameQuery(Context & global_context, const String & database_name, const String & table_original_name, const String & new_table_name)
{
if (global_context.tryGetTable(database_name, table_original_name))
{
auto rename = std::make_shared<ASTRenameQuery>();
ASTRenameQuery::Table from;
from.database = database_name;
from.table = table_original_name;
ASTRenameQuery::Table to;
to.database = database_name;
to.table = new_table_name;
ASTRenameQuery::Element elem;
elem.from = from;
elem.to = to;
rename->elements.emplace_back(elem);
InterpreterRenameQuery(rename, global_context).execute();
}
}
void StorageMaterializedView::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name)
{
if (has_inner_table && tryGetTargetTable())
{
String new_target_table_name = generateInnerTableName(new_table_name);
executeRenameQuery(global_context, target_database_name, target_table_name, new_target_table_name);
target_table_name = new_target_table_name;
}
auto lock = global_context.getLock();
global_context.removeDependencyUnsafe(
DatabaseAndTableName(select_database_name, select_table_name),
DatabaseAndTableName(database_name, table_name));
table_name = new_table_name;
global_context.addDependencyUnsafe(
DatabaseAndTableName(select_database_name, select_table_name),
DatabaseAndTableName(database_name, table_name));
}
void StorageMaterializedView::shutdown()
{
/// Make sure the dependency is removed after DETACH TABLE

View File

@ -39,6 +39,8 @@ public:
void mutate(const MutationCommands & commands, const Context & context) override;
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
void shutdown() override;
void checkTableCanBeDropped() const override;

View File

@ -60,28 +60,28 @@ StorageMergeTree::StorageMergeTree(
const ASTPtr & primary_key_ast_,
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
const ASTPtr & ttl_table_ast_,
const MergeTreeData::MergingParams & merging_params_,
const MergingParams & merging_params_,
const MergeTreeSettings & settings_,
bool has_force_restore_data_flag)
: path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'),
global_context(context_), background_pool(context_.getBackgroundPool()),
data(database_name, table_name,
full_path, columns_, indices_,
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
sample_by_ast_, ttl_table_ast_, merging_params_,
settings_, false, attach),
reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()),
log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)"))
: MergeTreeData(database_name_, table_name_,
path_ + escapeForFileName(table_name_) + '/',
columns_, indices_,
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
sample_by_ast_, ttl_table_ast_, merging_params_,
settings_, false, attach),
path(path_),
background_pool(context_.getBackgroundPool()),
reader(*this), writer(*this), merger_mutator(*this, global_context.getBackgroundPool())
{
if (path_.empty())
throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME);
if (path.empty())
throw Exception("MergeTree require data path", ErrorCodes::INCORRECT_FILE_NAME);
data.loadDataParts(has_force_restore_data_flag);
loadDataParts(has_force_restore_data_flag);
if (!attach && !data.getDataParts().empty())
if (!attach && !getDataParts().empty())
throw Exception("Data directory for table already containing data parts - probably it was unclean DROP table or manual intervention. You must either clear directory by hand or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA);
increment.set(data.getMaxBlockNumber());
increment.set(getMaxBlockNumber());
loadMutations();
}
@ -89,11 +89,11 @@ StorageMergeTree::StorageMergeTree(
void StorageMergeTree::startup()
{
data.clearOldPartsFromFilesystem();
clearOldPartsFromFilesystem();
/// Temporary directories contain incomplete results of merges (after forced restart)
/// and don't allow to reinitialize them, so delete each of them immediately
data.clearOldTemporaryDirectories(0);
clearOldTemporaryDirectories(0);
/// NOTE background task will also do the above cleanups periodically.
time_after_previous_cleanup.restart();
@ -135,16 +135,16 @@ BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const Con
void StorageMergeTree::checkTableCanBeDropped() const
{
const_cast<MergeTreeData &>(getData()).recalculateColumnSizes();
global_context.checkTableCanBeDropped(database_name, table_name, getData().getTotalActiveSizeInBytes());
const_cast<StorageMergeTree &>(*this).recalculateColumnSizes();
global_context.checkTableCanBeDropped(database_name, table_name, getTotalActiveSizeInBytes());
}
void StorageMergeTree::checkPartitionCanBeDropped(const ASTPtr & partition)
{
const_cast<MergeTreeData &>(getData()).recalculateColumnSizes();
const_cast<StorageMergeTree &>(*this).recalculateColumnSizes();
const String partition_id = data.getPartitionIDFromQuery(partition, global_context);
auto parts_to_remove = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
const String partition_id = getPartitionIDFromQuery(partition, global_context);
auto parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
UInt64 partition_size = 0;
@ -158,7 +158,7 @@ void StorageMergeTree::checkPartitionCanBeDropped(const ASTPtr & partition)
void StorageMergeTree::drop()
{
shutdown();
data.dropAllData();
dropAllData();
}
void StorageMergeTree::truncate(const ASTPtr &, const Context &)
@ -170,20 +170,20 @@ void StorageMergeTree::truncate(const ASTPtr &, const Context &)
/// NOTE: It's assumed that this method is called under lockForAlter.
auto parts_to_remove = data.getDataPartsVector();
data.removePartsFromWorkingSet(parts_to_remove, true);
auto parts_to_remove = getDataPartsVector();
removePartsFromWorkingSet(parts_to_remove, true);
LOG_INFO(log, "Removed " << parts_to_remove.size() << " parts.");
}
data.clearOldPartsFromFilesystem();
clearOldPartsFromFilesystem();
}
void StorageMergeTree::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name)
{
std::string new_full_path = new_path_to_db + escapeForFileName(new_table_name) + '/';
data.setPath(new_full_path);
setPath(new_full_path);
path = new_path_to_db;
table_name = new_table_name;
@ -216,21 +216,21 @@ void StorageMergeTree::alter(
lockNewDataStructureExclusively(table_lock_holder, context.getCurrentQueryId());
data.checkAlter(params, context);
checkAlter(params, context);
auto new_columns = data.getColumns();
auto new_indices = data.getIndices();
ASTPtr new_order_by_ast = data.order_by_ast;
ASTPtr new_primary_key_ast = data.primary_key_ast;
ASTPtr new_ttl_table_ast = data.ttl_table_ast;
auto new_columns = getColumns();
auto new_indices = getIndices();
ASTPtr new_order_by_ast = order_by_ast;
ASTPtr new_primary_key_ast = primary_key_ast;
ASTPtr new_ttl_table_ast = ttl_table_ast;
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast);
auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
auto parts = getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
auto columns_for_parts = new_columns.getAllPhysical();
std::vector<MergeTreeData::AlterDataPartTransactionPtr> transactions;
for (const MergeTreeData::DataPartPtr & part : parts)
std::vector<AlterDataPartTransactionPtr> transactions;
for (const DataPartPtr & part : parts)
{
if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false))
if (auto transaction = alterDataPart(part, columns_for_parts, new_indices.indices, false))
transactions.push_back(std::move(transaction));
}
@ -240,28 +240,28 @@ void StorageMergeTree::alter(
{
auto & storage_ast = ast.as<ASTStorage &>();
if (new_order_by_ast.get() != data.order_by_ast.get())
if (new_order_by_ast.get() != order_by_ast.get())
storage_ast.set(storage_ast.order_by, new_order_by_ast);
if (new_primary_key_ast.get() != data.primary_key_ast.get())
if (new_primary_key_ast.get() != primary_key_ast.get())
storage_ast.set(storage_ast.primary_key, new_primary_key_ast);
if (new_ttl_table_ast.get() != data.ttl_table_ast.get())
if (new_ttl_table_ast.get() != ttl_table_ast.get())
storage_ast.set(storage_ast.ttl_table, new_ttl_table_ast);
};
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier);
/// Reinitialize primary key because primary key column types might have changed.
data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
data.setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast);
setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast);
for (auto & transaction : transactions)
transaction->commit();
/// Columns sizes could be changed
data.recalculateColumnSizes();
recalculateColumnSizes();
}
@ -341,7 +341,7 @@ public:
void StorageMergeTree::mutate(const MutationCommands & commands, const Context &)
{
MergeTreeMutationEntry entry(commands, full_path, data.insert_increment.get());
MergeTreeMutationEntry entry(commands, full_path, insert_increment.get());
String file_name;
{
std::lock_guard lock(currently_merging_mutex);
@ -362,7 +362,7 @@ std::vector<MergeTreeMutationStatus> StorageMergeTree::getMutationsStatus() cons
std::lock_guard lock(currently_merging_mutex);
std::vector<Int64> part_data_versions;
auto data_parts = data.getDataPartsVector();
auto data_parts = getDataPartsVector();
part_data_versions.reserve(data_parts.size());
for (const auto & part : data_parts)
part_data_versions.push_back(part->info.getDataVersion());
@ -471,7 +471,7 @@ bool StorageMergeTree::merge(
{
std::lock_guard lock(currently_merging_mutex);
auto can_merge = [this, &lock] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *)
auto can_merge = [this, &lock] (const DataPartPtr & left, const DataPartPtr & right, String *)
{
return !currently_merging.count(left) && !currently_merging.count(right)
&& getCurrentMutationVersion(left, lock) == getCurrentMutationVersion(right, lock);
@ -503,7 +503,7 @@ bool StorageMergeTree::merge(
/// Logging
Stopwatch stopwatch;
MergeTreeData::MutableDataPartPtr new_part;
MutableDataPartPtr new_part;
auto write_part_log = [&] (const ExecutionStatus & execution_status)
{
@ -554,7 +554,7 @@ bool StorageMergeTree::merge(
future_part, *merge_entry, time(nullptr),
merging_tagger->reserved_space.get(), deduplicate);
merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr);
data.removeEmptyColumnsFromPart(new_part);
removeEmptyColumnsFromPart(new_part);
merging_tagger->is_successful = true;
write_part_log({});
@ -587,7 +587,7 @@ bool StorageMergeTree::tryMutatePart()
return false;
auto mutations_end_it = current_mutations_by_version.end();
for (const auto & part : data.getDataPartsVector())
for (const auto & part : getDataPartsVector())
{
if (currently_merging.count(part))
continue;
@ -621,7 +621,7 @@ bool StorageMergeTree::tryMutatePart()
MergeList::EntryPtr merge_entry = global_context.getMergeList().insert(database_name, table_name, future_part);
Stopwatch stopwatch;
MergeTreeData::MutableDataPartPtr new_part;
MutableDataPartPtr new_part;
auto write_part_log = [&] (const ExecutionStatus & execution_status)
{
@ -670,7 +670,7 @@ bool StorageMergeTree::tryMutatePart()
try
{
new_part = merger_mutator.mutatePartToTemporaryPart(future_part, commands, *merge_entry, global_context);
data.renameTempPartAndReplace(new_part);
renameTempPartAndReplace(new_part);
tagger->is_successful = true;
write_part_log({});
}
@ -698,11 +698,11 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::backgroundTask()
/// Clear old parts. It is unnecessary to do it more than once a second.
if (auto lock = time_after_previous_cleanup.compareAndRestartDeferred(1))
{
data.clearOldPartsFromFilesystem();
clearOldPartsFromFilesystem();
{
/// TODO: Implement tryLockStructureForShare.
auto lock_structure = lockStructureForShare(false, "");
data.clearOldTemporaryDirectories();
clearOldTemporaryDirectories();
}
clearOldMutations();
}
@ -729,7 +729,7 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::backgroundTask()
}
Int64 StorageMergeTree::getCurrentMutationVersion(
const MergeTreeData::DataPartPtr & part,
const DataPartPtr & part,
std::lock_guard<std::mutex> & /* currently_merging_mutex_lock */) const
{
auto it = current_mutations_by_version.upper_bound(part->info.getDataVersion());
@ -741,28 +741,28 @@ Int64 StorageMergeTree::getCurrentMutationVersion(
void StorageMergeTree::clearOldMutations()
{
if (!data.settings.finished_mutations_to_keep)
if (!settings.finished_mutations_to_keep)
return;
std::vector<MergeTreeMutationEntry> mutations_to_delete;
{
std::lock_guard lock(currently_merging_mutex);
if (current_mutations_by_version.size() <= data.settings.finished_mutations_to_keep)
if (current_mutations_by_version.size() <= settings.finished_mutations_to_keep)
return;
auto begin_it = current_mutations_by_version.begin();
std::optional<Int64> min_version = data.getMinPartDataVersion();
std::optional<Int64> min_version = getMinPartDataVersion();
auto end_it = current_mutations_by_version.end();
if (min_version)
end_it = current_mutations_by_version.upper_bound(*min_version);
size_t done_count = std::distance(begin_it, end_it);
if (done_count <= data.settings.finished_mutations_to_keep)
if (done_count <= settings.finished_mutations_to_keep)
return;
size_t to_delete_count = done_count - data.settings.finished_mutations_to_keep;
size_t to_delete_count = done_count - settings.finished_mutations_to_keep;
auto it = begin_it;
for (size_t i = 0; i < to_delete_count; ++i)
@ -790,10 +790,10 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
/// We don't change table structure, only data in some parts, parts are locked inside alterDataPart() function
auto lock_read_structure = lockStructureForShare(false, context.getCurrentQueryId());
String partition_id = data.getPartitionIDFromQuery(partition, context);
auto parts = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
String partition_id = getPartitionIDFromQuery(partition, context);
auto parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
std::vector<MergeTreeData::AlterDataPartTransactionPtr> transactions;
std::vector<AlterDataPartTransactionPtr> transactions;
AlterCommand alter_command;
alter_command.type = AlterCommand::DROP_COLUMN;
@ -812,7 +812,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
if (part->info.partition_id != partition_id)
throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR);
if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false))
if (auto transaction = alterDataPart(part, columns_for_parts, new_indices.indices, false))
transactions.push_back(std::move(transaction));
LOG_DEBUG(log, "Removing column " << get<String>(column_name) << " from part " << part->name);
@ -825,7 +825,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
transaction->commit();
/// Recalculate columns size (not only for the modified column)
data.recalculateColumnSizes();
recalculateColumnSizes();
}
@ -835,10 +835,10 @@ bool StorageMergeTree::optimize(
String disable_reason;
if (!partition && final)
{
MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector();
DataPartsVector data_parts = getDataPartsVector();
std::unordered_set<String> partition_ids;
for (const MergeTreeData::DataPartPtr & part : data_parts)
for (const DataPartPtr & part : data_parts)
partition_ids.emplace(part->info.partition_id);
for (const String & partition_id : partition_ids)
@ -855,7 +855,7 @@ bool StorageMergeTree::optimize(
{
String partition_id;
if (partition)
partition_id = data.getPartitionIDFromQuery(partition, context);
partition_id = getPartitionIDFromQuery(partition, context);
if (!merge(true, partition_id, final, deduplicate, &disable_reason))
{
@ -895,7 +895,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma
case PartitionCommand::FREEZE_PARTITION:
{
auto lock = lockStructureForShare(false, context.getCurrentQueryId());
data.freezePartition(command.partition, command.with_name, context);
freezePartition(command.partition, command.with_name, context);
}
break;
@ -906,7 +906,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma
case PartitionCommand::FREEZE_ALL_PARTITIONS:
{
auto lock = lockStructureForShare(false, context.getCurrentQueryId());
data.freezeAll(command.with_name, context);
freezeAll(command.with_name, context);
}
break;
@ -925,11 +925,11 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons
/// Waits for completion of merge and does not start new ones.
auto lock = lockExclusively(context.getCurrentQueryId());
String partition_id = data.getPartitionIDFromQuery(partition, context);
String partition_id = getPartitionIDFromQuery(partition, context);
/// TODO: should we include PreComitted parts like in Replicated case?
auto parts_to_remove = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
data.removePartsFromWorkingSet(parts_to_remove, true);
auto parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
removePartsFromWorkingSet(parts_to_remove, true);
if (detach)
{
@ -944,7 +944,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons
LOG_INFO(log, (detach ? "Detached " : "Removed ") << parts_to_remove.size() << " parts inside partition ID " << partition_id << ".");
}
data.clearOldPartsFromFilesystem();
clearOldPartsFromFilesystem();
}
@ -957,7 +957,7 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par
if (attach_part)
partition_id = partition->as<ASTLiteral &>().value.safeGet<String>();
else
partition_id = data.getPartitionIDFromQuery(partition, context);
partition_id = getPartitionIDFromQuery(partition, context);
String source_dir = "detached/";
@ -970,12 +970,12 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par
else
{
LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir);
ActiveDataPartSet active_parts(data.format_version);
ActiveDataPartSet active_parts(format_version);
for (Poco::DirectoryIterator it = Poco::DirectoryIterator(full_path + source_dir); it != Poco::DirectoryIterator(); ++it)
{
const String & name = it.name();
MergeTreePartInfo part_info;
if (!MergeTreePartInfo::tryParsePartName(name, &part_info, data.format_version)
if (!MergeTreePartInfo::tryParsePartName(name, &part_info, format_version)
|| part_info.partition_id != partition_id)
{
continue;
@ -992,10 +992,10 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par
String source_path = source_dir + source_part_name;
LOG_DEBUG(log, "Checking data");
MergeTreeData::MutableDataPartPtr part = data.loadPartAndFixMetadata(source_path);
MutableDataPartPtr part = loadPartAndFixMetadata(source_path);
LOG_INFO(log, "Attaching part " << source_part_name << " from " << source_path);
data.renameTempPartAndAdd(part, &increment);
renameTempPartAndAdd(part, &increment);
LOG_INFO(log, "Finished attaching part");
}
@ -1010,22 +1010,22 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId());
Stopwatch watch;
MergeTreeData * src_data = data.checkStructureAndGetMergeTreeData(source_table);
String partition_id = data.getPartitionIDFromQuery(partition, context);
MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table);
String partition_id = getPartitionIDFromQuery(partition, context);
MergeTreeData::DataPartsVector src_parts = src_data->getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
MergeTreeData::MutableDataPartsVector dst_parts;
DataPartsVector src_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
MutableDataPartsVector dst_parts;
static const String TMP_PREFIX = "tmp_replace_from_";
for (const MergeTreeData::DataPartPtr & src_part : src_parts)
for (const DataPartPtr & src_part : src_parts)
{
/// This will generate unique name in scope of current server process.
Int64 temp_index = data.insert_increment.get();
Int64 temp_index = insert_increment.get();
MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
std::shared_lock<std::shared_mutex> part_lock(src_part->columns_lock);
dst_parts.emplace_back(data.cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info));
dst_parts.emplace_back(cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info));
}
/// ATTACH empty part set
@ -1047,19 +1047,19 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
{
/// Here we use the transaction just like RAII since rare errors in renameTempPartAndReplace() are possible
/// and we should be able to rollback already added (Precomitted) parts
MergeTreeData::Transaction transaction(data);
Transaction transaction(*this);
auto data_parts_lock = data.lockParts();
auto data_parts_lock = lockParts();
/// Populate transaction
for (MergeTreeData::MutableDataPartPtr & part : dst_parts)
data.renameTempPartAndReplace(part, &increment, &transaction, data_parts_lock);
for (MutableDataPartPtr & part : dst_parts)
renameTempPartAndReplace(part, &increment, &transaction, data_parts_lock);
transaction.commit(&data_parts_lock);
/// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block
if (replace)
data.removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock);
removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock);
}
PartLog::addNewParts(global_context, dst_parts, watch.elapsed());

View File

@ -20,34 +20,18 @@ namespace DB
/** See the description of the data structure in MergeTreeData.
*/
class StorageMergeTree : public ext::shared_ptr_helper<StorageMergeTree>, public IStorage
class StorageMergeTree : public ext::shared_ptr_helper<StorageMergeTree>, public MergeTreeData
{
public:
void startup() override;
void shutdown() override;
~StorageMergeTree() override;
std::string getName() const override { return data.merging_params.getModeName() + "MergeTree"; }
std::string getName() const override { return merging_params.getModeName() + "MergeTree"; }
std::string getTableName() const override { return table_name; }
std::string getDatabaseName() const override { return database_name; }
bool supportsSampling() const override { return data.supportsSampling(); }
bool supportsPrewhere() const override { return data.supportsPrewhere(); }
bool supportsFinal() const override { return data.supportsFinal(); }
bool supportsIndexForIn() const override { return true; }
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & /* query_context */) const override
{
return data.mayBenefitFromIndexForIn(left_in_operand);
}
const ColumnsDescription & getColumns() const override { return data.getColumns(); }
void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); }
virtual const IndicesDescription & getIndices() const override { return data.getIndices(); }
virtual void setIndices(IndicesDescription indices_) override { data.setIndices(std::move(indices_)); }
NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); }
bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); }
BlockInputStreams read(
const Names & column_names,
@ -66,7 +50,7 @@ public:
void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override;
void mutate(const MutationCommands & commands, const Context & context) override;
std::vector<MergeTreeMutationStatus> getMutationsStatus() const;
std::vector<MergeTreeMutationStatus> getMutationsStatus() const override;
CancellationCode killMutation(const String & mutation_id) override;
void drop() override;
@ -84,32 +68,13 @@ public:
ActionLock getActionLock(StorageActionBlockType action_type) override;
MergeTreeData & getData() { return data; }
const MergeTreeData & getData() const { return data; }
String getDataPath() const override { return full_path; }
ASTPtr getPartitionKeyAST() const override { return data.partition_by_ast; }
ASTPtr getSortingKeyAST() const override { return data.getSortingKeyAST(); }
ASTPtr getPrimaryKeyAST() const override { return data.getPrimaryKeyAST(); }
ASTPtr getSamplingKeyAST() const override { return data.getSamplingExpression(); }
Names getColumnsRequiredForPartitionKey() const override { return data.getColumnsRequiredForPartitionKey(); }
Names getColumnsRequiredForSortingKey() const override { return data.getColumnsRequiredForSortingKey(); }
Names getColumnsRequiredForPrimaryKey() const override { return data.getColumnsRequiredForPrimaryKey(); }
Names getColumnsRequiredForSampling() const override { return data.getColumnsRequiredForSampling(); }
Names getColumnsRequiredForFinal() const override { return data.getColumnsRequiredForSortingKey(); }
private:
String path;
String database_name;
String table_name;
String full_path;
Context global_context;
BackgroundProcessingPool & background_pool;
MergeTreeData data;
MergeTreeDataSelectExecutor reader;
MergeTreeDataWriter writer;
MergeTreeDataMergerMutator merger_mutator;
@ -121,12 +86,10 @@ private:
AtomicStopwatch time_after_previous_cleanup;
mutable std::mutex currently_merging_mutex;
MergeTreeData::DataParts currently_merging;
DataParts currently_merging;
std::map<String, MergeTreeMutationEntry> current_mutations_by_id;
std::multimap<Int64, MergeTreeMutationEntry &> current_mutations_by_version;
Logger * log;
std::atomic<bool> shutdown_called {false};
BackgroundProcessingPool::TaskHandle background_task_handle;
@ -137,8 +100,7 @@ private:
* If aggressive - when selects parts don't takes into account their ratio size and novelty (used for OPTIMIZE query).
* Returns true if merge is finished successfully.
*/
bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate,
String * out_disable_reason = nullptr);
bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate, String * out_disable_reason = nullptr);
/// Try and find a single part to mutate and mutate it. If some part was successfully mutated, return true.
bool tryMutatePart();
@ -146,7 +108,7 @@ private:
BackgroundProcessingPoolTaskResult backgroundTask();
Int64 getCurrentMutationVersion(
const MergeTreeData::DataPartPtr & part,
const DataPartPtr & part,
std::lock_guard<std::mutex> & /* currently_merging_mutex_lock */) const;
void clearOldMutations();
@ -182,7 +144,7 @@ protected:
const ASTPtr & primary_key_ast_,
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
const ASTPtr & ttl_table_ast_,
const MergeTreeData::MergingParams & merging_params_,
const MergingParams & merging_params_,
const MergeTreeSettings & settings_,
bool has_force_restore_data_flag);
};

File diff suppressed because it is too large Load Diff

View File

@ -72,36 +72,20 @@ namespace DB
* as the time will take the time of creation the appropriate part on any of the replicas.
*/
class StorageReplicatedMergeTree : public ext::shared_ptr_helper<StorageReplicatedMergeTree>, public IStorage
class StorageReplicatedMergeTree : public ext::shared_ptr_helper<StorageReplicatedMergeTree>, public MergeTreeData
{
public:
void startup() override;
void shutdown() override;
~StorageReplicatedMergeTree() override;
std::string getName() const override { return "Replicated" + data.merging_params.getModeName() + "MergeTree"; }
std::string getName() const override { return "Replicated" + merging_params.getModeName() + "MergeTree"; }
std::string getTableName() const override { return table_name; }
std::string getDatabaseName() const override { return database_name; }
bool supportsSampling() const override { return data.supportsSampling(); }
bool supportsFinal() const override { return data.supportsFinal(); }
bool supportsPrewhere() const override { return data.supportsPrewhere(); }
bool supportsReplication() const override { return true; }
bool supportsDeduplication() const override { return true; }
const ColumnsDescription & getColumns() const override { return data.getColumns(); }
void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); }
NameAndTypePair getColumn(const String & column_name) const override
{
return data.getColumn(column_name);
}
bool hasColumn(const String & column_name) const override
{
return data.hasColumn(column_name);
}
BlockInputStreams read(
const Names & column_names,
const SelectQueryInfo & query_info,
@ -121,7 +105,7 @@ public:
void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & query_context) override;
void mutate(const MutationCommands & commands, const Context & context) override;
std::vector<MergeTreeMutationStatus> getMutationsStatus() const;
std::vector<MergeTreeMutationStatus> getMutationsStatus() const override;
CancellationCode killMutation(const String & mutation_id) override;
/** Removes a replica from ZooKeeper. If there are no other replicas, it deletes the entire table from ZooKeeper.
@ -133,10 +117,6 @@ public:
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
bool supportsIndexForIn() const override { return true; }
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & /* query_context */) const override
{
return data.mayBenefitFromIndexForIn(left_in_operand);
}
void checkTableCanBeDropped() const override;
@ -148,10 +128,6 @@ public:
/// If timeout is exceeded returns false
bool waitForShrinkingQueueSize(size_t queue_size = 0, UInt64 max_wait_milliseconds = 0);
MergeTreeData & getData() { return data; }
const MergeTreeData & getData() const { return data; }
/** For the system table replicas. */
struct Status
{
@ -194,17 +170,6 @@ public:
String getDataPath() const override { return full_path; }
ASTPtr getPartitionKeyAST() const override { return data.partition_by_ast; }
ASTPtr getSortingKeyAST() const override { return data.getSortingKeyAST(); }
ASTPtr getPrimaryKeyAST() const override { return data.getPrimaryKeyAST(); }
ASTPtr getSamplingKeyAST() const override { return data.getSamplingExpression(); }
Names getColumnsRequiredForPartitionKey() const override { return data.getColumnsRequiredForPartitionKey(); }
Names getColumnsRequiredForSortingKey() const override { return data.getColumnsRequiredForSortingKey(); }
Names getColumnsRequiredForPrimaryKey() const override { return data.getColumnsRequiredForPrimaryKey(); }
Names getColumnsRequiredForSampling() const override { return data.getColumnsRequiredForSampling(); }
Names getColumnsRequiredForFinal() const override { return data.getColumnsRequiredForSortingKey(); }
private:
/// Delete old parts from disk and from ZooKeeper.
void clearOldPartsAndRemoveFromZK();
@ -222,8 +187,6 @@ private:
using LogEntry = ReplicatedMergeTreeLogEntry;
using LogEntryPtr = LogEntry::Ptr;
Context global_context;
zkutil::ZooKeeperPtr current_zookeeper; /// Use only the methods below.
std::mutex current_zookeeper_mutex; /// To recreate the session in the background thread.
@ -234,10 +197,6 @@ private:
/// If true, the table is offline and can not be written to it.
std::atomic_bool is_readonly {false};
String database_name;
String table_name;
String full_path;
String zookeeper_path;
String replica_name;
String replica_path;
@ -264,7 +223,6 @@ private:
InterserverIOEndpointHolderPtr data_parts_exchange_endpoint_holder;
MergeTreeData data;
MergeTreeDataSelectExecutor reader;
MergeTreeDataWriter writer;
MergeTreeDataMergerMutator merger_mutator;
@ -325,8 +283,6 @@ private:
/// An event that awakens `alter` method from waiting for the completion of the ALTER query.
zkutil::EventPtr alter_query_event = std::make_shared<Poco::Event>();
Logger * log;
/** Creates the minimum set of nodes in ZooKeeper.
*/
void createTableIfNotExists();
@ -362,24 +318,24 @@ private:
* Adds actions to `ops` that add data about the part into ZooKeeper.
* Call under TableStructureLock.
*/
void checkPartChecksumsAndAddCommitOps(const zkutil::ZooKeeperPtr & zookeeper, const MergeTreeData::DataPartPtr & part,
void checkPartChecksumsAndAddCommitOps(const zkutil::ZooKeeperPtr & zookeeper, const DataPartPtr & part,
Coordination::Requests & ops, String part_name = "", NameSet * absent_replicas_paths = nullptr);
String getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const;
/// Accepts a PreComitted part, atomically checks its checksums with ones on other replicas and commit the part
MergeTreeData::DataPartsVector checkPartChecksumsAndCommit(MergeTreeData::Transaction & transaction,
const MergeTreeData::DataPartPtr & part);
DataPartsVector checkPartChecksumsAndCommit(Transaction & transaction,
const DataPartPtr & part);
void getCommitPartOps(
Coordination::Requests & ops,
MergeTreeData::MutableDataPartPtr & part,
MutableDataPartPtr & part,
const String & block_id_path = "") const;
/// Updates info about part columns and checksums in ZooKeeper and commits transaction if successful.
void updatePartHeaderInZooKeeperAndCommit(
const zkutil::ZooKeeperPtr & zookeeper,
MergeTreeData::AlterDataPartTransaction & transaction);
AlterDataPartTransaction & transaction);
/// Adds actions to `ops` that remove a part from ZooKeeper.
/// Set has_children to true for "old-style" parts (those with /columns and /checksums child znodes).
@ -390,7 +346,7 @@ private:
NameSet * parts_should_be_retried = nullptr);
bool tryRemovePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries = 5);
bool tryRemovePartsFromZooKeeperWithRetries(MergeTreeData::DataPartsVector & parts, size_t max_retries = 5);
bool tryRemovePartsFromZooKeeperWithRetries(DataPartsVector & parts, size_t max_retries = 5);
/// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts.
void removePartAndEnqueueFetch(const String & part_name);
@ -405,8 +361,8 @@ private:
void writePartLog(
PartLogElement::Type type, const ExecutionStatus & execution_status, UInt64 elapsed_ns,
const String & new_part_name,
const MergeTreeData::DataPartPtr & result_part,
const MergeTreeData::DataPartsVector & source_parts,
const DataPartPtr & result_part,
const DataPartsVector & source_parts,
const MergeListEntry * merge_entry);
void executeDropRange(const LogEntry & entry);
@ -463,7 +419,7 @@ private:
*/
bool createLogEntryToMergeParts(
zkutil::ZooKeeperPtr & zookeeper,
const MergeTreeData::DataPartsVector & parts,
const DataPartsVector & parts,
const String & merged_name,
bool deduplicate,
ReplicatedMergeTreeLogEntryData * out_log_entry = nullptr);
@ -564,7 +520,7 @@ protected:
const ASTPtr & primary_key_ast_,
const ASTPtr & sample_by_ast_,
const ASTPtr & table_ttl_ast_,
const MergeTreeData::MergingParams & merging_params_,
const MergingParams & merging_params_,
const MergeTreeSettings & settings_,
bool has_force_restore_data_flag);
};

View File

@ -1,8 +1,6 @@
#include <optional>
#include <Storages/System/StorageSystemColumns.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
@ -38,10 +36,10 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_)
{ "marks_bytes", std::make_shared<DataTypeUInt64>() },
{ "comment", std::make_shared<DataTypeString>() },
{ "is_in_partition_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_sorting_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_primary_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_sampling_key", std::make_shared<DataTypeUInt8>() },
{ "compression_codec", std::make_shared<DataTypeString>() },
{ "is_in_sorting_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_primary_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_sampling_key", std::make_shared<DataTypeUInt8>() },
{ "compression_codec", std::make_shared<DataTypeString>() },
}));
}
@ -124,16 +122,10 @@ protected:
cols_required_for_sampling = storage->getColumnsRequiredForSampling();
/** Info about sizes of columns for tables of MergeTree family.
* NOTE: It is possible to add getter for this info to IStorage interface.
*/
if (auto storage_concrete_plain = dynamic_cast<StorageMergeTree *>(storage.get()))
{
column_sizes = storage_concrete_plain->getData().getColumnSizes();
}
else if (auto storage_concrete_replicated = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
{
column_sizes = storage_concrete_replicated->getData().getColumnSizes();
}
* NOTE: It is possible to add getter for this info to IStorage interface.
*/
if (auto storage_concrete = dynamic_cast<const MergeTreeData *>(storage.get()))
column_sizes = storage_concrete->getColumnSizes();
}
for (const auto & column : columns)

View File

@ -1,7 +1,5 @@
#include <Storages/System/StorageSystemGraphite.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/Context.h>
@ -37,20 +35,10 @@ StorageSystemGraphite::Configs StorageSystemGraphite::getConfigs(const Context &
for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next())
{
auto & table = iterator->table();
const MergeTreeData * table_data = nullptr;
if (const StorageMergeTree * merge_tree = dynamic_cast<StorageMergeTree *>(table.get()))
{
table_data = &merge_tree->getData();
}
else if (const StorageReplicatedMergeTree * replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(table.get()))
{
table_data = &replicated_merge_tree->getData();
}
else
{
const MergeTreeData * table_data = dynamic_cast<const MergeTreeData *>(table.get());
if (!table_data)
continue;
}
if (table_data->merging_params.mode == MergeTreeData::MergingParams::Graphite)
{

View File

@ -4,8 +4,8 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeMutationStatus.h>
#include <Storages/VirtualColumnUtils.h>
#include <Databases/IDatabase.h>
@ -38,19 +38,10 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex
/// Collect a set of *MergeTree tables.
std::map<String, std::map<String, StoragePtr>> merge_tree_tables;
for (const auto & db : context.getDatabases())
{
if (context.hasDatabaseAccessRights(db.first))
{
for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next())
{
if (dynamic_cast<const StorageMergeTree *>(iterator->table().get())
|| dynamic_cast<const StorageReplicatedMergeTree *>(iterator->table().get()))
{
if (dynamic_cast<const MergeTreeData *>(iterator->table().get()))
merge_tree_tables[db.first][iterator->name()] = iterator->table();
}
}
}
}
MutableColumnPtr col_database_mut = ColumnString::create();
MutableColumnPtr col_table_mut = ColumnString::create();
@ -92,10 +83,8 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex
std::vector<MergeTreeMutationStatus> statuses;
{
const IStorage * storage = merge_tree_tables[database][table].get();
if (const auto * merge_tree = dynamic_cast<const StorageMergeTree *>(storage))
if (const auto * merge_tree = dynamic_cast<const MergeTreeData *>(storage))
statuses = merge_tree->getMutationsStatus();
else if (const auto * replicated = dynamic_cast<const StorageReplicatedMergeTree *>(storage))
statuses = replicated->getMutationsStatus();
}
for (const MergeTreeMutationStatus & status : statuses)

View File

@ -6,8 +6,6 @@
#include <DataTypes/DataTypeDate.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Storages/System/StorageSystemParts.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h>
#include <Databases/IDatabase.h>

View File

@ -7,8 +7,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/VirtualColumnUtils.h>
#include <Databases/IDatabase.h>
#include <Parsers/queryToString.h>
@ -93,8 +92,7 @@ public:
StoragePtr storage = iterator->table();
String engine_name = storage->getName();
if (!dynamic_cast<StorageMergeTree *>(&*storage) &&
!dynamic_cast<StorageReplicatedMergeTree *>(&*storage))
if (!dynamic_cast<MergeTreeData *>(storage.get()))
continue;
storages[std::make_pair(database_name, iterator->name())] = storage;
@ -184,20 +182,9 @@ public:
info.engine = info.storage->getName();
info.data = nullptr;
if (auto merge_tree = dynamic_cast<StorageMergeTree *>(&*info.storage))
{
info.data = &merge_tree->getData();
}
else if (auto replicated_merge_tree = dynamic_cast<StorageReplicatedMergeTree *>(&*info.storage))
{
info.data = &replicated_merge_tree->getData();
}
else
{
info.data = dynamic_cast<MergeTreeData *>(info.storage.get());
if (!info.data)
throw Exception("Unknown engine " + info.engine, ErrorCodes::LOGICAL_ERROR);
}
using State = MergeTreeDataPart::State;
auto & all_parts_state = info.all_parts_state;

View File

@ -6,7 +6,6 @@
#include <DataTypes/DataTypeDate.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Storages/System/StorageSystemPartsColumns.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h>
#include <Databases/IDatabase.h>
#include <Parsers/queryToString.h>

View File

@ -56,7 +56,7 @@ if [ "$DATA_DIR_PATTERN" != "$DATA_DIR" ]; then
cat $CLICKHOUSE_CONFIG | sed -e s!$DATA_DIR_PATTERN!$DATA_DIR! > $DATA_DIR/etc/server-config.xml
export CLICKHOUSE_CONFIG=$DATA_DIR/etc/server-config.xml
cp $CLICKHOUSE_CONFIG_USERS $DATA_DIR/etc
cp -r -L $CLICKHOUSE_CONFIG_USERS_D $DATA_DIR/etc
cp -R -L $CLICKHOUSE_CONFIG_USERS_D $DATA_DIR/etc
fi
CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="${CLICKHOUSE_EXTRACT} --config=$CLICKHOUSE_CONFIG"}

View File

@ -22,6 +22,8 @@
</any_of>
</stop_conditions>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE match(URL, ' *tranio\\.ru/spain/*/commercial/*') settings max_threads=5]]></query>
<query><![CDATA[select count(position(URL, 'yandex')), count(position(URL, 'google')) FROM hits_100m_single]]></query>
<query><![CDATA[select count(multiSearchAllPositions(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
<query><![CDATA[select count(match(URL, 'yandex|google')) FROM hits_100m_single]]></query>

View File

@ -21,13 +21,16 @@
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<query>SELECT DISTINCT URL,Title, ngramDistance(Title, URL) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistance(Title, SearchPhrase) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'what is love') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'baby dont hurt me') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'no more') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'wHAt Is lovE') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'BABY DonT hUrT me') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'nO MOrE') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT URL,Title, ngramDistanceUTF8(Title, URL) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistanceUTF8(Title, SearchPhrase) AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT Title, ngramDistanceUTF8(Title, 'метрика') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'как дела') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'чем занимаешься') AS distance FROM hits_100m_single ORDER BY distance ASC LIMIT 50</query>

View File

@ -0,0 +1,30 @@
<test>
<name>Simple Join Query</name>
<type>once</type>
<stop_conditions>
<all_of>
<total_time_ms>30000</total_time_ms>
</all_of>
<any_of>
<min_time_not_changing_for_ms>5000</min_time_not_changing_for_ms>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<main_metric>
<total_time />
</main_metric>
<create_query>CREATE TABLE join_table(A Int64, S0 String, S1 String, S2 String, S3 String)ENGINE = MergeTree ORDER BY A</create_query>
<fill_query>INSERT INTO join_table SELECT number AS A, toString(arrayMap(x->x, range(100))) S0, S0 AS S1, S0 AS S2, S0 AS S3 from numbers(500000)</fill_query>
<query tag='UsingJoinWithoutSubquery'>SELECT COUNT() FROM join_table LEFT JOIN join_table USING A</query>
<query tag='UsingJoinWithSubquery'>SELECT COUNT() FROM join_table LEFT JOIN (SELECT A FROM join_table) USING A</query>
<query tag='OnExpressionJoinWithoutSubquery'>SELECT COUNT() FROM join_table AS left LEFT JOIN join_table AS right ON left.A = right.A</query>
<query tag='OnExpressionJoinWithoutSubquery'>SELECT COUNT() FROM join_table AS left LEFT JOIN (SELECT A FROM join_table) AS right ON left.A = right.A</query>
<drop_query>DROP TABLE IF EXISTS join_table</drop_query>
</test>

View File

@ -5,12 +5,12 @@
0
0
1
0
\N
1
0
0
1
0
\N
1
0
0
@ -27,7 +27,7 @@
1
0
1
0
\N
0
1
0
@ -35,12 +35,12 @@
0
0
1
0
\N
1
0
0
1
0
\N
1
0
0
@ -57,7 +57,7 @@
1
0
1
0
\N
0
1
0

View File

@ -15,3 +15,7 @@
60 50 70 40 20 30
2019-01-01 50
2019-01-02 60
1
1
1
1

View File

@ -1,3 +1,4 @@
USE test;
SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5]));
SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
@ -53,7 +54,7 @@ ALL LEFT JOIN
)
USING city_id;
-- bitmap state test
DROP TABLE IF EXISTS bitmap_state_test;
CREATE TABLE bitmap_state_test
(
@ -72,6 +73,26 @@ GROUP BY pickup_date, city_id;
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
-- between column and expression test
DROP TABLE IF EXISTS bitmap_column_expr_test;
CREATE TABLE bitmap_column_expr_test
(
t DateTime,
z AggregateFunction(groupBitmap, UInt32)
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(t)
ORDER BY t;
INSERT INTO bitmap_column_expr_test VALUES (now(), bitmapBuild(cast([3,19,47] as Array(UInt32))));
SELECT bitmapAndCardinality( bitmapBuild(cast([19,7] as Array(UInt32))), z) from bitmap_column_expr_test;
SELECT bitmapAndCardinality( z, bitmapBuild(cast([19,7] as Array(UInt32))) ) from bitmap_column_expr_test;
select bitmapCardinality(bitmapAnd(bitmapBuild(cast([19,7] as Array(UInt32))), z )) from bitmap_column_expr_test;
select bitmapCardinality(bitmapAnd(z, bitmapBuild(cast([19,7] as Array(UInt32))))) from bitmap_column_expr_test;
DROP TABLE IF EXISTS bitmap_test;
DROP TABLE IF EXISTS bitmap_state_test;
DROP TABLE IF EXISTS bitmap_column_expr_test;

View File

@ -33,6 +33,76 @@
1000
1000
1000
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1000
1000
@ -40,6 +110,39 @@
77
636
1000
привет как дела?... Херсон 0
привет как дела клип - Яндекс.Видео 0
привет 0
пап привет как дела - Яндекс.Видео 0
привет братан как дела - Яндекс.Видео 0
http://metric.ru/ 0
http://autometric.ru/ 0
http://metrica.yandex.com/ 0
http://metris.ru/ 0
http://metrika.ru/ 0
0
0
привет как дела?... Херсон 600
пап привет как дела - Яндекс.Видео 684
привет как дела клип - Яндекс.Видео 692
привет братан как дела - Яндекс.Видео 707
привет 1000
http://metric.ru/ 1000
http://autometric.ru/ 1000
http://metrica.yandex.com/ 1000
http://metris.ru/ 1000
http://metrika.ru/ 1000
0
http://metric.ru/ 765
http://metris.ru/ 765
http://metrika.ru/ 778
http://autometric.ru/ 810
http://metrica.yandex.com/ 846
привет как дела?... Херсон 1000
привет как дела клип - Яндекс.Видео 1000
привет 1000
пап привет как дела - Яндекс.Видео 1000
привет братан как дела - Яндекс.Видео 1000
привет как дела?... Херсон 297
пап привет как дела - Яндекс.Видео 422
привет как дела клип - Яндекс.Видео 435
@ -152,6 +255,76 @@ http://metrika.ru/ 1000
1000
1000
1000
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1000
1000
@ -159,6 +332,39 @@ http://metrika.ru/ 1000
77
636
1000
привет как дела?... Херсон 0
привет как дела клип - Яндекс.Видео 0
привет 0
пап привет как дела - Яндекс.Видео 0
привет братан как дела - Яндекс.Видео 0
http://metric.ru/ 0
http://autometric.ru/ 0
http://metrica.yandex.com/ 0
http://metris.ru/ 0
http://metrika.ru/ 0
0
0
привет как дела?... Херсон 600
пап привет как дела - Яндекс.Видео 684
привет как дела клип - Яндекс.Видео 692
привет братан как дела - Яндекс.Видео 707
привет 1000
http://metric.ru/ 1000
http://autometric.ru/ 1000
http://metrica.yandex.com/ 1000
http://metris.ru/ 1000
http://metrika.ru/ 1000
0
http://metric.ru/ 765
http://metris.ru/ 765
http://metrika.ru/ 778
http://autometric.ru/ 810
http://metrica.yandex.com/ 846
привет как дела?... Херсон 1000
привет как дела клип - Яндекс.Видео 1000
привет 1000
пап привет как дела - Яндекс.Видео 1000
привет братан как дела - Яндекс.Видео 1000
привет как дела?... Херсон 297
пап привет как дела - Яндекс.Видео 422
привет как дела клип - Яндекс.Видео 435
@ -293,6 +499,76 @@ http://metrika.ru/ 1000
1000
1000
1000
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
@ -412,6 +688,76 @@ http://metrika.ru/ 1000
1000
1000
1000
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0

View File

@ -6,6 +6,22 @@ select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абв
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize(''), materialize('')))=round(1000 * ngramDistanceUTF8(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize('абв'), materialize('')))=round(1000 * ngramDistanceUTF8(materialize('абв'), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize(''), materialize('абв')))=round(1000 * ngramDistanceUTF8(materialize(''), 'абв')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('абвгдеёжз')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('абвгдеёж')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('гдеёзд')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), materialize('ёёёёёёёё')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('', materialize('')))=round(1000 * ngramDistanceUTF8(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('абв', materialize('')))=round(1000 * ngramDistanceUTF8(materialize('абв'), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('', materialize('абв')))=round(1000 * ngramDistanceUTF8(materialize(''), 'абв')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('абвгдеёжз')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёжз')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('абвгдеёж')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'абвгдеёж')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('гдеёзд')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'гдеёзд')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('абвгдеёжз', materialize('ёёёёёёёё')))=round(1000 * ngramDistanceUTF8(materialize('абвгдеёжз'), 'ёёёёёёёё')) from system.numbers limit 5;
select round(1000 * ngramDistanceUTF8('', ''));
select round(1000 * ngramDistanceUTF8('абв', ''));
select round(1000 * ngramDistanceUTF8('', 'абв'));
@ -18,6 +34,10 @@ drop table if exists test_distance;
create table test_distance (Title String) engine = Memory;
insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), ('');
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'metr')) as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'привет как дела') as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'как привет дела') as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrika') as distance;
@ -35,6 +55,23 @@ select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеё
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''),materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абв'),materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), materialize('абв'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвГДЕёжз'), materialize('АбвгдЕёжз'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), materialize('АбвГдеёж'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('гдеёЗД'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абв',materialize(''))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абв'), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', materialize('абв'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), 'абв')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвГДЕёжз', materialize('АбвгдЕёжз'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвГДЕёжз'), 'АбвгдЕёжз')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('аБВГдеёЖз', materialize('АбвГдеёж'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('аБВГдеёЖз'), 'АбвГдеёж')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', materialize('гдеёЗД'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'гдеёЗД')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', materialize('ЁЁЁЁЁЁЁЁ'))) = round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize('абвгдеёжз'), 'ЁЁЁЁЁЁЁЁ')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', ''));
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абв', ''));
select round(1000 * ngramDistanceCaseInsensitiveUTF8('', 'абв'));
@ -43,6 +80,10 @@ select round(1000 * ngramDistanceCaseInsensitiveUTF8('аБВГдеёЖз', 'Аб
select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', 'гдеёЗД'));
select round(1000 * ngramDistanceCaseInsensitiveUTF8('АБВГДеёжз', 'ЁЁЁЁЁЁЁЁ'));
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, Title) as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance;
SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrika') as distance;
@ -62,6 +103,23 @@ select round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefg')) from syst
select round(1000 * ngramDistance(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize(''),materialize('')))=round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize('abc'),materialize('')))=round(1000 * ngramDistance(materialize('abc'), '')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize(''), materialize('abc')))=round(1000 * ngramDistance(materialize(''), 'abc')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('abcdefgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('abcdefg')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('defgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
select round(1000 * ngramDistance(materialize('abcdefgh'), materialize('aaaaaaaa')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
select round(1000 * ngramDistance('',materialize('')))=round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistance('abc', materialize('')))=round(1000 * ngramDistance(materialize('abc'), '')) from system.numbers limit 5;
select round(1000 * ngramDistance('', materialize('abc')))=round(1000 * ngramDistance(materialize(''), 'abc')) from system.numbers limit 5;
select round(1000 * ngramDistance('abcdefgh', materialize('abcdefgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefgh')) from system.numbers limit 5;
select round(1000 * ngramDistance('abcdefgh', materialize('abcdefg')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'abcdefg')) from system.numbers limit 5;
select round(1000 * ngramDistance('abcdefgh', materialize('defgh')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'defgh')) from system.numbers limit 5;
select round(1000 * ngramDistance('abcdefgh', materialize('aaaaaaaa')))=round(1000 * ngramDistance(materialize('abcdefgh'), 'aaaaaaaa')) from system.numbers limit 5;
select round(1000 * ngramDistance('', ''));
select round(1000 * ngramDistance('abc', ''));
select round(1000 * ngramDistance('', 'abc'));
@ -86,6 +144,22 @@ select round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), 'abcde
select round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize(''), materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize(''), materialize('abc')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize('abCdefgH'), materialize('Abcdefgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), materialize('abcdeFG')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), materialize('defgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), materialize('aaaaaaaa')))=round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('', materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('abc', materialize('')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('', materialize('abc')))=round(1000 * ngramDistanceCaseInsensitive(materialize(''), 'abc')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('abCdefgH', materialize('Abcdefgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abCdefgH'), 'Abcdefgh')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('abcdefgh', materialize('abcdeFG')))=round(1000 * ngramDistanceCaseInsensitive(materialize('abcdefgh'), 'abcdeFG')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('AAAAbcdefgh', materialize('defgh')))=round(1000 * ngramDistanceCaseInsensitive(materialize('AAAAbcdefgh'), 'defgh')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('ABCdefgH', materialize('aaaaaaaa')))=round(1000 * ngramDistanceCaseInsensitive(materialize('ABCdefgH'), 'aaaaaaaa')) from system.numbers limit 5;
select round(1000 * ngramDistanceCaseInsensitive('', ''));
select round(1000 * ngramDistanceCaseInsensitive('abc', ''));
select round(1000 * ngramDistanceCaseInsensitive('', 'abc'));

View File

@ -0,0 +1,43 @@
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
SimpleAggregateFunction(sum, Float64)
0 0
1 2
2 4
3 6
4 8
5 10
6 12
7 14
8 16
9 18
0 0
1 2
2 4
3 6
4 8
5 10
6 12
7 14
8 16
9 18
1 1 2 2.2.2.2
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4)

View File

@ -0,0 +1,27 @@
-- basic test
drop table if exists test.simple;
create table test.simple (id UInt64,val SimpleAggregateFunction(sum,Double)) engine=AggregatingMergeTree order by id;
insert into test.simple select number,number from system.numbers limit 10;
select * from test.simple;
select * from test.simple final;
select toTypeName(val) from test.simple limit 1;
-- merge
insert into test.simple select number,number from system.numbers limit 10;
select * from test.simple final;
optimize table test.simple final;
select * from test.simple;
-- complex types
drop table if exists test.simple;
create table test.simple (id UInt64,nullable_str SimpleAggregateFunction(anyLast,Nullable(String)),low_str SimpleAggregateFunction(anyLast,LowCardinality(Nullable(String))),ip SimpleAggregateFunction(anyLast,IPv4)) engine=AggregatingMergeTree order by id;
insert into test.simple values(1,'1','1','1.1.1.1');
insert into test.simple values(1,null,'2','2.2.2.2');
select * from test.simple final;
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip) from test.simple limit 1;

View File

@ -0,0 +1,16 @@
4
Object
1
1
a
hello
hello
3
Array
-100
200
300
('a','hello','b',[-100,200,300])
[-100,NULL,300]
['a','hello','b',NULL]
[(NULL,NULL,NULL),(NULL,NULL,NULL),(NULL,NULL,NULL),(-100,200,44)]

View File

@ -0,0 +1,16 @@
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}');
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}');
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
select jsonHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1);
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 2);
select jsonExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a');
select jsonLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
select jsonType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b');
select jsonExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1);
select jsonExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2);
select jsonExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1);
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, String, String, Array(Float64))');
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Int32)', 'b');
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(String)');
select jsonExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Tuple(Int16, Float32, UInt8))');

View File

@ -599,3 +599,4 @@
1
1
1
1

View File

@ -79,3 +79,4 @@ select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*',
select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10;;
SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']);
SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']);

View File

@ -0,0 +1,9 @@
1
\N
\N
1
\N
\N
1
\N
\N

View File

@ -0,0 +1,11 @@
DROP TABLE IF EXISTS test.nullt;
CREATE TABLE test.nullt (c1 Nullable(UInt32), c2 Nullable(String))ENGINE = Log;
INSERT INTO test.nullt VALUES (1, 'abc'), (2, NULL), (NULL, NULL);
SELECT c2 = ('abc') FROM test.nullt;
SELECT c2 IN ('abc') FROM test.nullt;
SELECT c2 IN ('abc', NULL) FROM test.nullt;
DROP TABLE IF EXISTS test.nullt;

Some files were not shown because too many files have changed in this diff Show More