reverted the code back to constant args handling

This commit is contained in:
myrrc 2020-08-10 13:34:19 +03:00
parent 5a742ec0fc
commit 403a47bd1e
3 changed files with 55 additions and 105 deletions

View File

@ -113,71 +113,18 @@ public:
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }
inline UInt64 getValueIndex(StringRef value) override { return reverse_index.getInsertionPoint(value); }
inline void buildIndexColumn(size_t origin_index_type_size, IColumn& target, const IColumn& origin) override
std::optional<UInt64> getOrFindValueIndex(StringRef value) const override
{
const size_t origin_size = isColumnConst(origin)
? 1
: origin.size();
if (std::optional<UInt64> res = reverse_index.getIndex(value); res)
return res;
auto dispatch_target_col = [this, origin_size, &origin](auto & target_dispatched)
{
auto dispatch_origin_col = [this, origin_size, &target_dispatched](const auto & origin_dispatched)
{
for (size_t i = 0; i < origin_size; ++i)
{
const StringRef elem = origin_dispatched.getDataAt(i);
auto& nested = *getNestedColumn();
target_dispatched.getElement(i) = (elem == EMPTY_STRING_REF)
? 0 // NULL value index
: reverse_index.getInsertionPoint(elem);
}
};
for (size_t i = 0; i < nested.size(); ++i)
if (nested.getDataAt(i) == value)
return i;
#define dispatch(TYPE) case TypeIndex::TYPE: dispatch_origin_col(*typeid_cast<const Column##TYPE *>(&col)); break;
auto dispatch_origin = [dispatch_origin_col = std::move(dispatch_origin_col)](const auto& col)
{
switch (col.getDataType())
{
dispatch(UInt8)
dispatch(UInt16)
dispatch(UInt32)
dispatch(UInt64)
dispatch(UInt128)
dispatch(Int8)
dispatch(Int16)
dispatch(Int32)
dispatch(Int64)
dispatch(Float32)
dispatch(Float64)
dispatch(String)
dispatch(FixedString)
// dispatch(Array) cannot be forward-declared -- typeid on incomplete type is prohibited
// dispatch(Tuple)
// dispatch(Set)
// dispatch(Interval)
dispatch(Nullable)
default: dispatch_origin_col(col); break;
}
};
#undef dispatch
if (isColumnConst(origin)) // special case as dispatch_origin would produce wrong results if invoked ditectly.
dispatch_origin(typeid_cast<const ColumnConst *>(&origin)->getDataColumn());
else
dispatch_origin(origin);
};
switch (origin_index_type_size)
{
case sizeof(UInt8): dispatch_target_col(*typeid_cast<ColumnUInt8 *>(&target)); break;
case sizeof(UInt16): dispatch_target_col(*typeid_cast<ColumnUInt16 *>(&target)); break;
case sizeof(UInt32): dispatch_target_col(*typeid_cast<ColumnUInt32 *>(&target)); break;
case sizeof(UInt64): dispatch_target_col(*typeid_cast<ColumnUInt64 *>(&target)); break;
}
return {};
}
private:

View File

@ -76,7 +76,7 @@ public:
*
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
* - On the function call it's present. Therefore we obtain the index in O(1).
* - The reverse index is absent. We build the index.
* - The reverse index is absent. We search for the index linearly.
*
* @see DB::ReverseIndex
* @see DB::ColumnUnique
@ -86,14 +86,7 @@ public:
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
* index in the dictionary, which can be used to operate with the indices column.
*/
virtual UInt64 getValueIndex(StringRef value) = 0;
/**
* Same as above, but operates on the columns.
* Given a #origin column, writes to another column #target (of type equal to the current column's index type)
* with indices corresponding to values in the original column.
*/
virtual void buildIndexColumn(size_t origin_index_type_size, IColumn& target, const IColumn& origin) = 0;
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
void insert(const Field &) override
{

View File

@ -68,8 +68,7 @@ template <
class Initial,
class Result,
class ConcreteAction,
bool InvokedNotFromLCSpec = true,
bool ResColumnIsConst = false>
bool InvokedNotFromLCSpec = true>
struct ArrayIndexNumImpl
{
private:
@ -137,7 +136,7 @@ private:
if (!compare(data[current_offset + j], target, i))
continue;
}
else if (0 != data.compareAt(current_offset + j, ResColumnIsConst ? 0 : i , target, 1))
else if (data.compareAt(current_offset + j, /* index */ 0, target, 1))
continue;
}
}
@ -148,7 +147,7 @@ private:
if (!compare(data[current_offset + j], target, i))
continue;
}
else if (0 != data.compareAt(current_offset + j, ResColumnIsConst ? 0 : i , target, 1))
else if (data.compareAt(current_offset + j, 0, target, 1))
continue;
}
@ -734,7 +733,17 @@ private:
}
/**
* Catches arguments of type LC(T).
* Catches arguments of type LC(T) (left) and ColumnConst (right).
*
* The perftests
* https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html
* showed that the amount of action needed to convert the non-constant right argument to the index column
* (similar to the left one's) is significantly higher than converting the array itself to an ordinary column.
*
* So, in terms of performance it's more optimal to fall back to default implementation and catch only constant
* right arguments here.
*
* Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 .
*/
bool executeLowCardinality(Block & block, const ColumnNumbers & arguments, size_t result) const
{
@ -750,20 +759,14 @@ private:
return false;
auto col_result = ResultColumnType::create();
const IColumn * const col_arg = block.getByPosition(arguments[1]).column.get();
/**
* If the types of #col_arg and #col_lc (or its nested column if Nullable) are
* non-integral, everything is ok as equal values would give equal StringRef representations.
* But this is not true for different integral types:
* consider #col_arg = UInt8 and #col_lc = UInt16.
* The right argument StringRef's size would be 2 (and the left one's -- 1).
*
* So, for integral types, it's not enough to simply call getDataAt on both arguments.
* The left argument (the value whose index is being searched in the indices column) must be casted
* to the right argument's side to ensure the StringRefs' equality.
*/
const IColumnUnique & col_lc_dict = col_lc->getDictionary();
const ColumnConst * const col_arg = checkAndGetColumnConst<ColumnConst>(
block.getByPosition(arguments[1]).column.get());
if (!col_arg)
return false;
const IColumnUnique& col_lc_dict = col_lc->getDictionary();
const bool different_inner_types = col_lc_dict.isNullable()
? !col_arg->structureEquals(*col_lc_dict.getNestedColumn().get())
@ -784,25 +787,32 @@ private:
? castColumn(block.getByPosition(arguments[1]), target_type_ptr)
: col_arg->getPtr();
const size_t col_arg_is_const = isColumnConst(*col_arg);
const IColumn & lc_indices = col_lc->getIndexes();
const MutableColumnPtr col_arg_indices = col_lc->buildIndexColumn(*col_arg_cloned.get());
const ColumnArray::Offsets& lc_offsets = col_array->getOffsets();
PaddedPODArray<ResultType> & res_data = col_result->getData();
const auto [null_map_data, null_map_item] = getNullMaps(block, arguments);
if (col_arg_is_const)
ArrayIndexNumImpl<UInt64, UInt64, ConcreteAction, false /* Invoking from LC spec */, true /* const column */>::vector(
lc_indices, /* where the value will be searched */
lc_offsets,
*col_arg_indices.get(), /* target value to search */
res_data,
const StringRef elem = col_arg_cloned->getDataAt(0);
UInt64 index {0};
if (elem != EMPTY_STRING_REF)
{
if (std::optional<UInt64> maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index)
index = *maybe_index;
else
{
col_result->getData()[0] = 0;
block.getByPosition(result).column = std::move(col_result);
return true;
}
}
ArrayIndexNumImpl<UInt64, UInt64, ConcreteAction,
false /* Invoking from LC spec */>::vector(
col_lc->getIndexes(), /* where the value will be searched */
col_array->getOffsets(),
index, /* target value to search */
col_result->getData(),
null_map_data,
null_map_item);
else
ArrayIndexNumImpl<UInt64, UInt64, ConcreteAction, false>::vector(
lc_indices, lc_offsets, *col_arg_indices.get(), res_data, null_map_data, null_map_item);
block.getByPosition(result).column = std::move(col_result);
return true;