mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 13:42:02 +00:00
714 lines
23 KiB
C++
714 lines
23 KiB
C++
#include <DB/Core/Field.h>
|
||
|
||
#include <DB/Columns/ColumnString.h>
|
||
#include <DB/Columns/ColumnFixedString.h>
|
||
#include <DB/Columns/ColumnsNumber.h>
|
||
|
||
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
||
#include <DB/DataStreams/OneBlockInputStream.h>
|
||
|
||
#include <DB/Parsers/ASTExpressionList.h>
|
||
#include <DB/Parsers/ASTFunction.h>
|
||
#include <DB/Parsers/ASTLiteral.h>
|
||
|
||
#include <DB/Interpreters/Set.h>
|
||
#include <DB/Interpreters/ExpressionAnalyzer.h>
|
||
#include <DB/Interpreters/ExpressionActions.h>
|
||
|
||
#include <DB/DataTypes/DataTypeArray.h>
|
||
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
||
#include <DB/DataTypes/DataTypeString.h>
|
||
#include <DB/DataTypes/DataTypeFixedString.h>
|
||
#include <DB/DataTypes/DataTypeDate.h>
|
||
#include <DB/DataTypes/DataTypeDateTime.h>
|
||
|
||
|
||
namespace DB
|
||
{
|
||
|
||
void SetVariants::init(Type type_)
|
||
{
|
||
type = type_;
|
||
|
||
switch (type)
|
||
{
|
||
case Type::EMPTY: break;
|
||
|
||
#define M(NAME) \
|
||
case Type::NAME: NAME.reset(new decltype(NAME)::element_type); break;
|
||
APPLY_FOR_SET_VARIANTS(M)
|
||
#undef M
|
||
|
||
default:
|
||
throw Exception("Unknown Set variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
||
}
|
||
}
|
||
|
||
|
||
size_t SetVariants::getTotalRowCount() const
|
||
{
|
||
switch (type)
|
||
{
|
||
case Type::EMPTY: return 0;
|
||
|
||
#define M(NAME) \
|
||
case Type::NAME: return NAME->data.size();
|
||
APPLY_FOR_SET_VARIANTS(M)
|
||
#undef M
|
||
|
||
default:
|
||
throw Exception("Unknown Set variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
||
}
|
||
}
|
||
|
||
|
||
size_t SetVariants::getTotalByteCount() const
|
||
{
|
||
switch (type)
|
||
{
|
||
case Type::EMPTY: return 0;
|
||
|
||
#define M(NAME) \
|
||
case Type::NAME: return NAME->data.getBufferSizeInBytes();
|
||
APPLY_FOR_SET_VARIANTS(M)
|
||
#undef M
|
||
|
||
default:
|
||
throw Exception("Unknown Set variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
||
}
|
||
}
|
||
|
||
|
||
bool Set::checkSetSizeLimits() const
|
||
{
|
||
if (max_rows && data.getTotalRowCount() > max_rows)
|
||
return false;
|
||
if (max_bytes && data.getTotalByteCount() > max_bytes)
|
||
return false;
|
||
return true;
|
||
}
|
||
|
||
|
||
SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
|
||
{
|
||
size_t keys_size = key_columns.size();
|
||
|
||
bool all_fixed = true;
|
||
size_t keys_bytes = 0;
|
||
key_sizes.resize(keys_size);
|
||
for (size_t j = 0; j < keys_size; ++j)
|
||
{
|
||
if (!key_columns[j]->isFixed())
|
||
{
|
||
all_fixed = false;
|
||
break;
|
||
}
|
||
key_sizes[j] = key_columns[j]->sizeOfField();
|
||
keys_bytes += key_sizes[j];
|
||
}
|
||
|
||
/// Если есть один числовой ключ, который помещается в 64 бита
|
||
if (keys_size == 1 && key_columns[0]->isNumeric())
|
||
{
|
||
size_t size_of_field = key_columns[0]->sizeOfField();
|
||
if (size_of_field == 1)
|
||
return SetVariants::Type::key8;
|
||
if (size_of_field == 2)
|
||
return SetVariants::Type::key16;
|
||
if (size_of_field == 4)
|
||
return SetVariants::Type::key32;
|
||
if (size_of_field == 8)
|
||
return SetVariants::Type::key64;
|
||
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.", ErrorCodes::LOGICAL_ERROR);
|
||
}
|
||
|
||
/// Если ключи помещаются в N бит, будем использовать хэш-таблицу по упакованным в N-бит ключам
|
||
if (all_fixed && keys_bytes <= 16)
|
||
return SetVariants::Type::keys128;
|
||
if (all_fixed && keys_bytes <= 32)
|
||
return SetVariants::Type::keys256;
|
||
|
||
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
|
||
if (keys_size == 1 && (typeid_cast<const ColumnString *>(key_columns[0]) || typeid_cast<const ColumnConstString *>(key_columns[0])))
|
||
return SetVariants::Type::key_string;
|
||
|
||
if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(key_columns[0]))
|
||
return SetVariants::Type::key_fixed_string;
|
||
|
||
/// Иначе будем агрегировать по конкатенации ключей.
|
||
return SetVariants::Type::hashed;
|
||
}
|
||
|
||
|
||
template <typename Method>
|
||
void NO_INLINE Set::insertFromBlockImpl(
|
||
Method & method,
|
||
const ConstColumnPlainPtrs & key_columns,
|
||
size_t rows,
|
||
SetVariants & variants)
|
||
{
|
||
typename Method::State state;
|
||
state.init(key_columns);
|
||
size_t keys_size = key_columns.size();
|
||
|
||
/// Для всех строчек
|
||
for (size_t i = 0; i < rows; ++i)
|
||
{
|
||
/// Строим ключ
|
||
typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes);
|
||
|
||
typename Method::Data::iterator it = method.data.find(key);
|
||
bool inserted;
|
||
method.data.emplace(key, it, inserted);
|
||
|
||
if (inserted)
|
||
method.onNewKey(*it, keys_size, i, variants.string_pool);
|
||
}
|
||
}
|
||
|
||
|
||
bool Set::insertFromBlock(const Block & block, bool create_ordered_set)
|
||
{
|
||
Poco::ScopedWriteRWLock lock(rwlock);
|
||
|
||
size_t keys_size = block.columns();
|
||
ConstColumnPlainPtrs key_columns(keys_size);
|
||
data_types.resize(keys_size);
|
||
|
||
/// Константные столбцы справа от IN поддерживается не напрямую. Для этого, они сначала материализуется.
|
||
Columns materialized_columns;
|
||
|
||
/// Запоминаем столбцы, с которыми будем работать
|
||
for (size_t i = 0; i < keys_size; ++i)
|
||
{
|
||
key_columns[i] = block.getByPosition(i).column;
|
||
data_types[i] = block.getByPosition(i).type;
|
||
|
||
if (key_columns[i]->isConst())
|
||
{
|
||
materialized_columns.emplace_back(static_cast<const IColumnConst *>(key_columns[i])->convertToFullColumn());
|
||
key_columns[i] = materialized_columns.back().get();
|
||
}
|
||
}
|
||
|
||
size_t rows = block.rows();
|
||
|
||
/// Какую структуру данных для множества использовать?
|
||
if (empty())
|
||
data.init(data.chooseMethod(key_columns, key_sizes));
|
||
|
||
if (false) {}
|
||
#define M(NAME) \
|
||
else if (data.type == SetVariants::Type::NAME) \
|
||
insertFromBlockImpl(*data.NAME, key_columns, rows, data);
|
||
APPLY_FOR_SET_VARIANTS(M)
|
||
#undef M
|
||
else
|
||
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||
|
||
if (create_ordered_set)
|
||
for (size_t i = 0; i < rows; ++i)
|
||
ordered_set_elements->push_back((*key_columns[0])[i]); /// ordered_set для индекса работает только если IN одному ключу.
|
||
|
||
if (!checkSetSizeLimits())
|
||
{
|
||
if (overflow_mode == OverflowMode::THROW)
|
||
throw Exception("IN-set size exceeded."
|
||
" Rows: " + toString(data.getTotalRowCount()) +
|
||
", limit: " + toString(max_rows) +
|
||
". Bytes: " + toString(data.getTotalByteCount()) +
|
||
", limit: " + toString(max_bytes) + ".",
|
||
ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
|
||
|
||
if (overflow_mode == OverflowMode::BREAK)
|
||
return false;
|
||
|
||
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
|
||
/** Чтобы корректно работали выражения вида 1.0 IN (1).
|
||
* Проверяет совместимость типов, проверяет попадание значений в диапазон допустимых значений типа, делает преобразование типа.
|
||
* Код слегка дурацкий.
|
||
*/
|
||
static Field convertToType(const Field & src, const IDataType & type)
|
||
{
|
||
if (type.isNumeric())
|
||
{
|
||
bool is_uint8 = false;
|
||
bool is_uint16 = false;
|
||
bool is_uint32 = false;
|
||
bool is_uint64 = false;
|
||
bool is_int8 = false;
|
||
bool is_int16 = false;
|
||
bool is_int32 = false;
|
||
bool is_int64 = false;
|
||
bool is_float32 = false;
|
||
bool is_float64 = false;
|
||
bool is_date = false;
|
||
bool is_datetime = false;
|
||
|
||
false
|
||
|| (is_uint8 = typeid_cast<const DataTypeUInt8 * >(&type))
|
||
|| (is_uint16 = typeid_cast<const DataTypeUInt16 * >(&type))
|
||
|| (is_uint32 = typeid_cast<const DataTypeUInt32 * >(&type))
|
||
|| (is_uint64 = typeid_cast<const DataTypeUInt64 * >(&type))
|
||
|| (is_int8 = typeid_cast<const DataTypeInt8 * >(&type))
|
||
|| (is_int16 = typeid_cast<const DataTypeInt16 * >(&type))
|
||
|| (is_int32 = typeid_cast<const DataTypeInt32 * >(&type))
|
||
|| (is_int64 = typeid_cast<const DataTypeInt64 * >(&type))
|
||
|| (is_float32 = typeid_cast<const DataTypeFloat32 * >(&type))
|
||
|| (is_float64 = typeid_cast<const DataTypeFloat64 * >(&type))
|
||
|| (is_date = typeid_cast<const DataTypeDate * >(&type))
|
||
|| (is_datetime = typeid_cast<const DataTypeDateTime * >(&type))
|
||
;
|
||
|
||
if (is_uint8 || is_uint16 || is_uint32 || is_uint64)
|
||
{
|
||
if (src.getType() == Field::Types::Int64)
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, signed at right");
|
||
|
||
if (src.getType() == Field::Types::Float64)
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, floating point at right");
|
||
|
||
if (src.getType() == Field::Types::UInt64)
|
||
{
|
||
UInt64 value = src.get<const UInt64 &>();
|
||
if ((is_uint8 && value > std::numeric_limits<uint8_t>::max())
|
||
|| (is_uint16 && value > std::numeric_limits<uint16_t>::max())
|
||
|| (is_uint32 && value > std::numeric_limits<uint32_t>::max()))
|
||
throw Exception("Value (" + toString(value) + ") in IN section is out of range of type " + type.getName() + " at left");
|
||
|
||
return src;
|
||
}
|
||
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
|
||
+ Field::Types::toString(src.getType()) + " at right");
|
||
}
|
||
else if (is_int8 || is_int16 || is_int32 || is_int64)
|
||
{
|
||
if (src.getType() == Field::Types::Float64)
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, floating point at right");
|
||
|
||
if (src.getType() == Field::Types::UInt64)
|
||
{
|
||
UInt64 value = src.get<const UInt64 &>();
|
||
|
||
if ((is_int8 && value > uint8_t(std::numeric_limits<int8_t>::max()))
|
||
|| (is_int16 && value > uint16_t(std::numeric_limits<int16_t>::max()))
|
||
|| (is_int32 && value > uint32_t(std::numeric_limits<int32_t>::max()))
|
||
|| (is_int64 && value > uint64_t(std::numeric_limits<int64_t>::max())))
|
||
throw Exception("Value (" + toString(value) + ") in IN section is out of range of type " + type.getName() + " at left");
|
||
|
||
return Field(Int64(value));
|
||
}
|
||
|
||
if (src.getType() == Field::Types::Int64)
|
||
{
|
||
Int64 value = src.get<const Int64 &>();
|
||
if ((is_int8 && (value < std::numeric_limits<int8_t>::min() || value > std::numeric_limits<int8_t>::max()))
|
||
|| (is_int16 && (value < std::numeric_limits<int16_t>::min() || value > std::numeric_limits<int16_t>::max()))
|
||
|| (is_int32 && (value < std::numeric_limits<int32_t>::min() || value > std::numeric_limits<int32_t>::max())))
|
||
throw Exception("Value (" + toString(value) + ") in IN section is out of range of type " + type.getName() + " at left");
|
||
|
||
return src;
|
||
}
|
||
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
|
||
+ Field::Types::toString(src.getType()) + " at right");
|
||
}
|
||
else if (is_float32 || is_float64)
|
||
{
|
||
if (src.getType() == Field::Types::UInt64)
|
||
return Field(Float64(src.get<UInt64>()));
|
||
|
||
if (src.getType() == Field::Types::Int64)
|
||
return Field(Float64(src.get<Int64>()));
|
||
|
||
if (src.getType() == Field::Types::Float64)
|
||
return src;
|
||
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
|
||
+ Field::Types::toString(src.getType()) + " at right");
|
||
}
|
||
else if (is_date || is_datetime)
|
||
{
|
||
if (src.getType() == Field::Types::UInt64)
|
||
return src;
|
||
|
||
if (src.getType() == Field::Types::String)
|
||
{
|
||
/// Возможность сравнивать даты и даты-с-временем со строкой.
|
||
const String & str = src.get<const String &>();
|
||
ReadBufferFromString in(str);
|
||
|
||
if (is_date)
|
||
{
|
||
DayNum_t date{};
|
||
readDateText(date, in);
|
||
if (!in.eof())
|
||
throw Exception("String is too long for Date: " + str);
|
||
|
||
return Field(UInt64(date));
|
||
}
|
||
else
|
||
{
|
||
time_t date_time{};
|
||
readDateTimeText(date_time, in);
|
||
if (!in.eof())
|
||
throw Exception("String is too long for DateTime: " + str);
|
||
|
||
return Field(UInt64(date_time));
|
||
}
|
||
}
|
||
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
|
||
+ Field::Types::toString(src.getType()) + " at right");
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (src.getType() == Field::Types::UInt64
|
||
|| src.getType() == Field::Types::Int64
|
||
|| src.getType() == Field::Types::Float64
|
||
|| src.getType() == Field::Types::Null
|
||
|| (src.getType() == Field::Types::String
|
||
&& !typeid_cast<const DataTypeString *>(&type)
|
||
&& !typeid_cast<const DataTypeFixedString *>(&type))
|
||
|| (src.getType() == Field::Types::Array
|
||
&& !typeid_cast<const DataTypeArray *>(&type)))
|
||
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
|
||
+ Field::Types::toString(src.getType()) + " at right");
|
||
}
|
||
|
||
return src;
|
||
}
|
||
|
||
|
||
/** Выполнить константное выражение (для элемента множества в IN). Весьма неоптимально. */
|
||
static Field evaluateConstantExpression(ASTPtr & node, const Context & context)
|
||
{
|
||
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(
|
||
node, context, nullptr, NamesAndTypesList{{ "_dummy", new DataTypeUInt8 }}).getConstActions();
|
||
|
||
/// В блоке должен быть хотя бы один столбец, чтобы у него было известно число строк.
|
||
Block block_with_constants{{ new ColumnConstUInt8(1, 0), new DataTypeUInt8, "_dummy" }};
|
||
|
||
expr_for_constant_folding->execute(block_with_constants);
|
||
|
||
if (!block_with_constants || block_with_constants.rows() == 0)
|
||
throw Exception("Logical error: empty block after evaluation constant expression for IN", ErrorCodes::LOGICAL_ERROR);
|
||
|
||
String name = node->getColumnName();
|
||
|
||
if (!block_with_constants.has(name))
|
||
throw Exception("Element of set in IN is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS);
|
||
|
||
const IColumn & result_column = *block_with_constants.getByName(name).column;
|
||
|
||
if (!result_column.isConst())
|
||
throw Exception("Element of set in IN is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS);
|
||
|
||
return result_column[0];
|
||
}
|
||
|
||
|
||
static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context)
|
||
{
|
||
if (ASTLiteral * lit = typeid_cast<ASTLiteral *>(node.get()))
|
||
return convertToType(lit->value, type);
|
||
else if (typeid_cast<ASTFunction *>(node.get()))
|
||
return convertToType(evaluateConstantExpression(node, context), type);
|
||
else
|
||
throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
||
}
|
||
|
||
|
||
void Set::createFromAST(DataTypes & types, ASTPtr node, const Context & context, bool create_ordered_set)
|
||
{
|
||
data_types = types;
|
||
|
||
/// Засунем множество в блок.
|
||
Block block;
|
||
for (size_t i = 0, size = data_types.size(); i < size; ++i)
|
||
{
|
||
ColumnWithNameAndType col;
|
||
col.type = data_types[i];
|
||
col.column = data_types[i]->createColumn();
|
||
col.name = "_" + toString(i);
|
||
|
||
block.insert(col);
|
||
}
|
||
|
||
ASTExpressionList & list = typeid_cast<ASTExpressionList &>(*node);
|
||
for (ASTs::iterator it = list.children.begin(); it != list.children.end(); ++it)
|
||
{
|
||
if (data_types.size() == 1)
|
||
{
|
||
block.getByPosition(0).column->insert(extractValueFromNode(*it, *data_types[0], context));
|
||
}
|
||
else if (ASTFunction * func = typeid_cast<ASTFunction *>(&**it))
|
||
{
|
||
if (func->name != "tuple")
|
||
throw Exception("Incorrect element of set. Must be tuple.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
||
|
||
size_t tuple_size = func->arguments->children.size();
|
||
if (tuple_size != data_types.size())
|
||
throw Exception("Incorrect size of tuple in set.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
||
|
||
for (size_t j = 0; j < tuple_size; ++j)
|
||
{
|
||
block.getByPosition(j).column->insert(extractValueFromNode(func->arguments->children[j], *data_types[j], context));
|
||
}
|
||
}
|
||
else
|
||
throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
||
}
|
||
|
||
if (create_ordered_set)
|
||
ordered_set_elements = OrderedSetElementsPtr(new OrderedSetElements());
|
||
|
||
insertFromBlock(block, create_ordered_set);
|
||
|
||
if (create_ordered_set)
|
||
std::sort(ordered_set_elements->begin(), ordered_set_elements->end());
|
||
}
|
||
|
||
|
||
void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const
|
||
{
|
||
ColumnUInt8 * c_res = new ColumnUInt8;
|
||
block.getByPosition(result).column = c_res;
|
||
ColumnUInt8::Container_t & vec_res = c_res->getData();
|
||
vec_res.resize(block.getByPosition(arguments[0]).column->size());
|
||
|
||
Poco::ScopedReadRWLock lock(rwlock);
|
||
|
||
/// Если множество пусто
|
||
if (data_types.empty())
|
||
{
|
||
if (negative)
|
||
memset(&vec_res[0], 1, vec_res.size());
|
||
else
|
||
memset(&vec_res[0], 0, vec_res.size());
|
||
return;
|
||
}
|
||
|
||
DataTypeArray * array_type = typeid_cast<DataTypeArray *>(&*block.getByPosition(arguments[0]).type);
|
||
|
||
if (array_type)
|
||
{
|
||
if (data_types.size() != 1 || arguments.size() != 1)
|
||
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
||
if (array_type->getNestedType()->getName() != data_types[0]->getName())
|
||
throw Exception(std::string() + "Types in section IN don't match: " + data_types[0]->getName() + " on the right, " + array_type->getNestedType()->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH);
|
||
|
||
IColumn * in_column = &*block.getByPosition(arguments[0]).column;
|
||
|
||
/// Константный столбец слева от IN поддерживается не напрямую. Для этого, он сначала материализуется.
|
||
ColumnPtr materialized_column;
|
||
if (in_column->isConst())
|
||
{
|
||
materialized_column = static_cast<const IColumnConst *>(in_column)->convertToFullColumn();
|
||
in_column = materialized_column.get();
|
||
}
|
||
|
||
if (ColumnArray * col = typeid_cast<ColumnArray *>(in_column))
|
||
executeArray(col, vec_res, negative);
|
||
else
|
||
throw Exception("Unexpected array column type: " + in_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||
}
|
||
else
|
||
{
|
||
if (data_types.size() != arguments.size())
|
||
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
||
|
||
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
|
||
ConstColumnPlainPtrs key_columns(arguments.size());
|
||
for (size_t i = 0; i < arguments.size(); ++i)
|
||
{
|
||
key_columns[i] = block.getByPosition(arguments[i]).column;
|
||
|
||
if (data_types[i]->getName() != block.getByPosition(arguments[i]).type->getName())
|
||
throw Exception("Types of column " + toString(i + 1) + " in section IN don't match: " + data_types[i]->getName() + " on the right, " + block.getByPosition(arguments[i]).type->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH);
|
||
}
|
||
|
||
/// Константные столбцы слева от IN поддерживается не напрямую. Для этого, они сначала материализуется.
|
||
Columns materialized_columns;
|
||
for (auto & column_ptr : key_columns)
|
||
{
|
||
if (column_ptr->isConst())
|
||
{
|
||
materialized_columns.emplace_back(static_cast<const IColumnConst *>(column_ptr)->convertToFullColumn());
|
||
column_ptr = materialized_columns.back().get();
|
||
}
|
||
}
|
||
|
||
executeOrdinary(key_columns, vec_res, negative);
|
||
}
|
||
}
|
||
|
||
|
||
template <typename Method>
|
||
void NO_INLINE Set::executeImpl(
|
||
Method & method,
|
||
const ConstColumnPlainPtrs & key_columns,
|
||
ColumnUInt8::Container_t & vec_res,
|
||
bool negative,
|
||
size_t rows) const
|
||
{
|
||
typename Method::State state;
|
||
state.init(key_columns);
|
||
size_t keys_size = key_columns.size();
|
||
|
||
/// NOTE Не используется оптимизация для подряд идущих одинаковых значений.
|
||
|
||
/// Для всех строчек
|
||
for (size_t i = 0; i < rows; ++i)
|
||
{
|
||
/// Строим ключ
|
||
typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes);
|
||
vec_res[i] = negative ^ (method.data.end() != method.data.find(key));
|
||
}
|
||
}
|
||
|
||
template <typename Method>
|
||
void NO_INLINE Set::executeArrayImpl(
|
||
Method & method,
|
||
const ConstColumnPlainPtrs & key_columns,
|
||
const ColumnArray::Offsets_t & offsets,
|
||
ColumnUInt8::Container_t & vec_res,
|
||
bool negative,
|
||
size_t rows) const
|
||
{
|
||
typename Method::State state;
|
||
state.init(key_columns);
|
||
size_t keys_size = key_columns.size();
|
||
|
||
size_t prev_offset = 0;
|
||
/// Для всех строчек
|
||
for (size_t i = 0; i < rows; ++i)
|
||
{
|
||
UInt8 res = 0;
|
||
/// Для всех элементов
|
||
for (size_t j = prev_offset; j < offsets[i]; ++j)
|
||
{
|
||
/// Строим ключ
|
||
typename Method::Key key = state.getKey(key_columns, keys_size, j, key_sizes);
|
||
res |= negative ^ (method.data.end() != method.data.find(key));
|
||
if (res)
|
||
break;
|
||
}
|
||
vec_res[i] = res;
|
||
prev_offset = offsets[i];
|
||
}
|
||
}
|
||
|
||
|
||
void Set::executeOrdinary(const ConstColumnPlainPtrs & key_columns, ColumnUInt8::Container_t & vec_res, bool negative) const
|
||
{
|
||
size_t rows = key_columns[0]->size();
|
||
|
||
if (false) {}
|
||
#define M(NAME) \
|
||
else if (data.type == SetVariants::Type::NAME) \
|
||
executeImpl(*data.NAME, key_columns, vec_res, negative, rows);
|
||
APPLY_FOR_SET_VARIANTS(M)
|
||
#undef M
|
||
else
|
||
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||
}
|
||
|
||
void Set::executeArray(const ColumnArray * key_column, ColumnUInt8::Container_t & vec_res, bool negative) const
|
||
{
|
||
size_t rows = key_column->size();
|
||
const ColumnArray::Offsets_t & offsets = key_column->getOffsets();
|
||
const IColumn & nested_column = key_column->getData();
|
||
|
||
if (false) {}
|
||
#define M(NAME) \
|
||
else if (data.type == SetVariants::Type::NAME) \
|
||
executeArrayImpl(*data.NAME, ConstColumnPlainPtrs{&nested_column}, offsets, vec_res, negative, rows);
|
||
APPLY_FOR_SET_VARIANTS(M)
|
||
#undef M
|
||
else
|
||
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||
}
|
||
|
||
|
||
BoolMask Set::mayBeTrueInRange(const Range & range) const
|
||
{
|
||
if (!ordered_set_elements)
|
||
throw DB::Exception("Ordered set in not created.");
|
||
|
||
if (ordered_set_elements->empty())
|
||
return BoolMask(false, true);
|
||
|
||
const Field & left = range.left;
|
||
const Field & right = range.right;
|
||
|
||
bool can_be_true;
|
||
bool can_be_false = true;
|
||
|
||
/// Если во всем диапазоне одинаковый ключ и он есть в Set, то выбираем блок для in и не выбираем для notIn
|
||
if (range.left_bounded && range.right_bounded && range.right_included && range.left_included && left == right)
|
||
{
|
||
if (std::binary_search(ordered_set_elements->begin(), ordered_set_elements->end(), left))
|
||
{
|
||
can_be_false = false;
|
||
can_be_true = true;
|
||
}
|
||
else
|
||
{
|
||
can_be_true = false;
|
||
can_be_false = true;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
auto left_it = range.left_bounded
|
||
? std::lower_bound(ordered_set_elements->begin(), ordered_set_elements->end(), left)
|
||
: ordered_set_elements->begin();
|
||
|
||
if (range.left_bounded && !range.left_included && left_it != ordered_set_elements->end() && *left_it == left)
|
||
++left_it;
|
||
|
||
/// если весь диапазон, правее in
|
||
if (left_it == ordered_set_elements->end())
|
||
{
|
||
can_be_true = false;
|
||
}
|
||
else
|
||
{
|
||
auto right_it = range.right_bounded
|
||
? std::upper_bound(ordered_set_elements->begin(), ordered_set_elements->end(), right)
|
||
: ordered_set_elements->end();
|
||
|
||
if (range.right_bounded && !range.right_included && right_it != ordered_set_elements->begin() && *(right_it--) == right)
|
||
--right_it;
|
||
|
||
/// весь диапазон, левее in
|
||
if (right_it == ordered_set_elements->begin())
|
||
{
|
||
can_be_true = false;
|
||
}
|
||
else
|
||
{
|
||
--right_it;
|
||
/// в диапазон не попадает ни одного ключа из in
|
||
if (*right_it < *left_it)
|
||
can_be_true = false;
|
||
else
|
||
can_be_true = true;
|
||
}
|
||
}
|
||
}
|
||
|
||
return BoolMask(can_be_true, can_be_false);
|
||
}
|
||
|
||
}
|