mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
reverted the code back to constant args handling
This commit is contained in:
parent
5a742ec0fc
commit
403a47bd1e
@ -113,71 +113,18 @@ public:
|
||||
|
||||
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }
|
||||
|
||||
inline UInt64 getValueIndex(StringRef value) override { return reverse_index.getInsertionPoint(value); }
|
||||
|
||||
inline void buildIndexColumn(size_t origin_index_type_size, IColumn& target, const IColumn& origin) override
|
||||
std::optional<UInt64> getOrFindValueIndex(StringRef value) const override
|
||||
{
|
||||
const size_t origin_size = isColumnConst(origin)
|
||||
? 1
|
||||
: origin.size();
|
||||
if (std::optional<UInt64> res = reverse_index.getIndex(value); res)
|
||||
return res;
|
||||
|
||||
auto dispatch_target_col = [this, origin_size, &origin](auto & target_dispatched)
|
||||
{
|
||||
auto dispatch_origin_col = [this, origin_size, &target_dispatched](const auto & origin_dispatched)
|
||||
{
|
||||
for (size_t i = 0; i < origin_size; ++i)
|
||||
{
|
||||
const StringRef elem = origin_dispatched.getDataAt(i);
|
||||
auto& nested = *getNestedColumn();
|
||||
|
||||
target_dispatched.getElement(i) = (elem == EMPTY_STRING_REF)
|
||||
? 0 // NULL value index
|
||||
: reverse_index.getInsertionPoint(elem);
|
||||
}
|
||||
};
|
||||
for (size_t i = 0; i < nested.size(); ++i)
|
||||
if (nested.getDataAt(i) == value)
|
||||
return i;
|
||||
|
||||
#define dispatch(TYPE) case TypeIndex::TYPE: dispatch_origin_col(*typeid_cast<const Column##TYPE *>(&col)); break;
|
||||
|
||||
auto dispatch_origin = [dispatch_origin_col = std::move(dispatch_origin_col)](const auto& col)
|
||||
{
|
||||
switch (col.getDataType())
|
||||
{
|
||||
dispatch(UInt8)
|
||||
dispatch(UInt16)
|
||||
dispatch(UInt32)
|
||||
dispatch(UInt64)
|
||||
dispatch(UInt128)
|
||||
dispatch(Int8)
|
||||
dispatch(Int16)
|
||||
dispatch(Int32)
|
||||
dispatch(Int64)
|
||||
dispatch(Float32)
|
||||
dispatch(Float64)
|
||||
dispatch(String)
|
||||
dispatch(FixedString)
|
||||
// dispatch(Array) cannot be forward-declared -- typeid on incomplete type is prohibited
|
||||
// dispatch(Tuple)
|
||||
// dispatch(Set)
|
||||
// dispatch(Interval)
|
||||
dispatch(Nullable)
|
||||
default: dispatch_origin_col(col); break;
|
||||
}
|
||||
};
|
||||
|
||||
#undef dispatch
|
||||
|
||||
if (isColumnConst(origin)) // special case as dispatch_origin would produce wrong results if invoked ditectly.
|
||||
dispatch_origin(typeid_cast<const ColumnConst *>(&origin)->getDataColumn());
|
||||
else
|
||||
dispatch_origin(origin);
|
||||
};
|
||||
|
||||
switch (origin_index_type_size)
|
||||
{
|
||||
case sizeof(UInt8): dispatch_target_col(*typeid_cast<ColumnUInt8 *>(&target)); break;
|
||||
case sizeof(UInt16): dispatch_target_col(*typeid_cast<ColumnUInt16 *>(&target)); break;
|
||||
case sizeof(UInt32): dispatch_target_col(*typeid_cast<ColumnUInt32 *>(&target)); break;
|
||||
case sizeof(UInt64): dispatch_target_col(*typeid_cast<ColumnUInt64 *>(&target)); break;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
*
|
||||
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
|
||||
* - On the function call it's present. Therefore we obtain the index in O(1).
|
||||
* - The reverse index is absent. We build the index.
|
||||
* - The reverse index is absent. We search for the index linearly.
|
||||
*
|
||||
* @see DB::ReverseIndex
|
||||
* @see DB::ColumnUnique
|
||||
@ -86,14 +86,7 @@ public:
|
||||
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
|
||||
* index in the dictionary, which can be used to operate with the indices column.
|
||||
*/
|
||||
virtual UInt64 getValueIndex(StringRef value) = 0;
|
||||
|
||||
/**
|
||||
* Same as above, but operates on the columns.
|
||||
* Given a #origin column, writes to another column #target (of type equal to the current column's index type)
|
||||
* with indices corresponding to values in the original column.
|
||||
*/
|
||||
virtual void buildIndexColumn(size_t origin_index_type_size, IColumn& target, const IColumn& origin) = 0;
|
||||
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
|
||||
|
||||
void insert(const Field &) override
|
||||
{
|
||||
|
@ -68,8 +68,7 @@ template <
|
||||
class Initial,
|
||||
class Result,
|
||||
class ConcreteAction,
|
||||
bool InvokedNotFromLCSpec = true,
|
||||
bool ResColumnIsConst = false>
|
||||
bool InvokedNotFromLCSpec = true>
|
||||
struct ArrayIndexNumImpl
|
||||
{
|
||||
private:
|
||||
@ -137,7 +136,7 @@ private:
|
||||
if (!compare(data[current_offset + j], target, i))
|
||||
continue;
|
||||
}
|
||||
else if (0 != data.compareAt(current_offset + j, ResColumnIsConst ? 0 : i , target, 1))
|
||||
else if (data.compareAt(current_offset + j, /* index */ 0, target, 1))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -148,7 +147,7 @@ private:
|
||||
if (!compare(data[current_offset + j], target, i))
|
||||
continue;
|
||||
}
|
||||
else if (0 != data.compareAt(current_offset + j, ResColumnIsConst ? 0 : i , target, 1))
|
||||
else if (data.compareAt(current_offset + j, 0, target, 1))
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -734,7 +733,17 @@ private:
|
||||
}
|
||||
|
||||
/**
|
||||
* Catches arguments of type LC(T).
|
||||
* Catches arguments of type LC(T) (left) and ColumnConst (right).
|
||||
*
|
||||
* The perftests
|
||||
* https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html
|
||||
* showed that the amount of action needed to convert the non-constant right argument to the index column
|
||||
* (similar to the left one's) is significantly higher than converting the array itself to an ordinary column.
|
||||
*
|
||||
* So, in terms of performance it's more optimal to fall back to default implementation and catch only constant
|
||||
* right arguments here.
|
||||
*
|
||||
* Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 .
|
||||
*/
|
||||
bool executeLowCardinality(Block & block, const ColumnNumbers & arguments, size_t result) const
|
||||
{
|
||||
@ -750,19 +759,13 @@ private:
|
||||
return false;
|
||||
|
||||
auto col_result = ResultColumnType::create();
|
||||
const IColumn * const col_arg = block.getByPosition(arguments[1]).column.get();
|
||||
|
||||
/**
|
||||
* If the types of #col_arg and #col_lc (or its nested column if Nullable) are
|
||||
* non-integral, everything is ok as equal values would give equal StringRef representations.
|
||||
* But this is not true for different integral types:
|
||||
* consider #col_arg = UInt8 and #col_lc = UInt16.
|
||||
* The right argument StringRef's size would be 2 (and the left one's -- 1).
|
||||
*
|
||||
* So, for integral types, it's not enough to simply call getDataAt on both arguments.
|
||||
* The left argument (the value whose index is being searched in the indices column) must be casted
|
||||
* to the right argument's side to ensure the StringRefs' equality.
|
||||
*/
|
||||
const ColumnConst * const col_arg = checkAndGetColumnConst<ColumnConst>(
|
||||
block.getByPosition(arguments[1]).column.get());
|
||||
|
||||
if (!col_arg)
|
||||
return false;
|
||||
|
||||
const IColumnUnique& col_lc_dict = col_lc->getDictionary();
|
||||
|
||||
const bool different_inner_types = col_lc_dict.isNullable()
|
||||
@ -784,25 +787,32 @@ private:
|
||||
? castColumn(block.getByPosition(arguments[1]), target_type_ptr)
|
||||
: col_arg->getPtr();
|
||||
|
||||
const size_t col_arg_is_const = isColumnConst(*col_arg);
|
||||
|
||||
const IColumn & lc_indices = col_lc->getIndexes();
|
||||
const MutableColumnPtr col_arg_indices = col_lc->buildIndexColumn(*col_arg_cloned.get());
|
||||
const ColumnArray::Offsets& lc_offsets = col_array->getOffsets();
|
||||
PaddedPODArray<ResultType> & res_data = col_result->getData();
|
||||
const auto [null_map_data, null_map_item] = getNullMaps(block, arguments);
|
||||
|
||||
if (col_arg_is_const)
|
||||
ArrayIndexNumImpl<UInt64, UInt64, ConcreteAction, false /* Invoking from LC spec */, true /* const column */>::vector(
|
||||
lc_indices, /* where the value will be searched */
|
||||
lc_offsets,
|
||||
*col_arg_indices.get(), /* target value to search */
|
||||
res_data,
|
||||
const StringRef elem = col_arg_cloned->getDataAt(0);
|
||||
|
||||
UInt64 index {0};
|
||||
|
||||
if (elem != EMPTY_STRING_REF)
|
||||
{
|
||||
if (std::optional<UInt64> maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index)
|
||||
index = *maybe_index;
|
||||
else
|
||||
{
|
||||
col_result->getData()[0] = 0;
|
||||
block.getByPosition(result).column = std::move(col_result);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ArrayIndexNumImpl<UInt64, UInt64, ConcreteAction,
|
||||
false /* Invoking from LC spec */>::vector(
|
||||
col_lc->getIndexes(), /* where the value will be searched */
|
||||
col_array->getOffsets(),
|
||||
index, /* target value to search */
|
||||
col_result->getData(),
|
||||
null_map_data,
|
||||
null_map_item);
|
||||
else
|
||||
ArrayIndexNumImpl<UInt64, UInt64, ConcreteAction, false>::vector(
|
||||
lc_indices, lc_offsets, *col_arg_indices.get(), res_data, null_map_data, null_map_item);
|
||||
|
||||
block.getByPosition(result).column = std::move(col_result);
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user