Remove strange code (#40195)

* Remove strange code

* Even more code removal

* Fix style

* Remove even more code

* Simplify code by making it slower

* Attempt to do something

* Attempt to do something

* Well do something with this horrible trash

* Add a test
This commit is contained in:
Alexey Milovidov 2022-09-12 17:29:23 +03:00 committed by GitHub
parent 49e0b1316d
commit 2aedd41023
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 56 additions and 69 deletions

View File

@ -61,11 +61,6 @@ struct StringRef
constexpr explicit operator std::string_view() const { return std::string_view(data, size); }
};
/// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/
/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetic
/// and the UBSan thinks that something like nullptr + 8 is UB.
constexpr const inline char empty_string_ref_addr{};
constexpr const inline StringRef EMPTY_STRING_REF{&empty_string_ref_addr, 0};
using StringRefs = std::vector<StringRef>;

View File

@ -24,6 +24,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
extern const int NOT_IMPLEMENTED;
}
@ -40,6 +41,14 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
throw Exception{"ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN};
}
StringRef ColumnNullable::getDataAt(size_t n) const
{
if (!isNullAt(n))
return getNestedColumn().getDataAt(n);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDataAt is not supported for {} in case if value is NULL", getName());
}
void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
{
const auto & arr = getNullMapData();

View File

@ -59,19 +59,7 @@ public:
bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); }
UInt64 get64(size_t n) const override { return nested_column->get64(n); }
bool isDefaultAt(size_t n) const override { return isNullAt(n); }
/**
* If isNullAt(n) returns false, returns the nested column's getDataAt(n), otherwise returns a special value
* EMPTY_STRING_REF indicating that data is not present.
*/
StringRef getDataAt(size_t n) const override
{
if (isNullAt(n))
return EMPTY_STRING_REF;
return getNestedColumn().getDataAt(n);
}
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;

View File

@ -136,15 +136,16 @@ public:
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }
/// This is strange. Please remove this method as soon as possible.
std::optional<UInt64> getOrFindValueIndex(StringRef value) const override
{
if (std::optional<UInt64> res = reverse_index.getIndex(value); res)
return res;
auto& nested = *getNestedColumn();
const IColumn & nested = *getNestedColumn();
for (size_t i = 0; i < nested.size(); ++i)
if (nested.getDataAt(i) == value)
if (!nested.isNullAt(i) && nested.getDataAt(i) == value)
return i;
return {};

View File

@ -65,30 +65,12 @@ public:
virtual size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) = 0;
/// Returns dictionary hash which is sipHash is applied to each row of nested column.
/// Returns dictionary hash which is SipHash is applied to each row of nested column.
virtual UInt128 getHash() const = 0;
const char * getFamilyName() const override { return "ColumnUnique"; }
TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); }
/**
* Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
* index in the DB::ColumnUnique::reverse_index hashtable.
*
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
* - On the function call it's present. Therefore we obtain the index in O(1).
* - The reverse index is absent. We search for the index linearly.
*
* @see DB::ReverseIndex
* @see DB::ColumnUnique
*
* The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns.
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
* index in the dictionary, which can be used to operate with the indices column.
*/
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
void insert(const Field &) override
{
throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
@ -190,6 +172,23 @@ public:
{
throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
/** Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
* index in the DB::ColumnUnique::reverse_index hashtable.
*
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
* - On the function call it's present. Therefore we obtain the index in O(1).
* - The reverse index is absent. We search for the index linearly.
*
* @see DB::ReverseIndex
* @see DB::ColumnUnique
*
* The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns.
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
* index in the dictionary, which can be used to operate with the indices column.
*/
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
};
using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;

View File

@ -740,42 +740,34 @@ private:
const auto [null_map_data, null_map_item] = getNullMaps(arguments);
const IColumn& col_arg = *arguments[1].column.get();
if (const ColumnConst * const col_arg_const = checkAndGetColumn<ColumnConst>(col_arg))
if (const ColumnConst * col_arg_const = checkAndGetColumn<ColumnConst>(*arguments[1].column))
{
const IColumnUnique& col_lc_dict = col_lc->getDictionary();
const bool different_inner_types = col_lc_dict.isNullable()
? !col_arg_const->structureEquals(*col_lc_dict.getNestedColumn().get())
: true; // Can't compare so ignore this check
const bool use_cloned_arg = col_arg_const->isNumeric()
// outer types do not match
&& !col_arg_const->structureEquals(col_lc_dict)
// inner types do not match (like A and Nullable(B) or A and Const(B));
&& different_inner_types;
const IColumnUnique & col_lc_dict = col_lc->getDictionary();
const DataTypeArray * const array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType());
const ColumnPtr col_arg_cloned = use_cloned_arg
? castColumn(arguments[1], target_type_ptr)
: col_arg_const->getPtr();
ColumnPtr col_arg_cloned = castColumn(
{col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr);
const StringRef elem = col_arg_cloned->getDataAt(0);
ResultColumnPtr col_result = ResultColumnType::create();
UInt64 index = 0;
if (elem != EMPTY_STRING_REF)
if (!col_arg_cloned->isNullAt(0))
{
if (col_arg_cloned->isNullable())
col_arg_cloned = checkAndGetColumn<ColumnNullable>(*col_arg_cloned)->getNestedColumnPtr();
StringRef elem = col_arg_cloned->getDataAt(0);
if (std::optional<UInt64> maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index)
{
index = *maybe_index;
}
else
{
const size_t offsets_size = col_array->getOffsets().size();
auto& data = col_result->getData();
auto & data = col_result->getData();
data.resize_fill(offsets_size);
@ -786,7 +778,7 @@ private:
Impl::Main<ConcreteAction, true>::vector(
col_lc->getIndexes(),
col_array->getOffsets(),
index,
index, /** Assuming LowCardinality has index of NULL always as zero. */
col_result->getData(),
null_map_data,
null_map_item);
@ -800,9 +792,9 @@ private:
const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData();
const IColumn& left_ptr = left_nullable.getNestedColumn();
const IColumn & left_ptr = left_nullable.getNestedColumn();
const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality();
const ColumnNullable * const right_nullable = checkAndGetColumn<ColumnNullable>(right_casted.get());
const NullMap * const null_map_right_casted = right_nullable
@ -825,17 +817,17 @@ private:
}
else // LowCardinality(T) and U, T not Nullable
{
if (col_arg.isNullable())
if (arguments[1].column->isNullable())
return nullptr;
if (const auto* const arg_lc = checkAndGetColumn<ColumnLowCardinality>(&col_arg);
if (const auto* const arg_lc = checkAndGetColumn<ColumnLowCardinality>(arguments[1].column.get());
arg_lc && arg_lc->isNullable())
return nullptr;
// LowCardinality(T) and U (possibly LowCardinality(V))
const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality();
ExecutionData data =
{

View File

@ -17,8 +17,11 @@ RawBLOBRowOutputFormat::RawBLOBRowOutputFormat(
void RawBLOBRowOutputFormat::writeField(const IColumn & column, const ISerialization &, size_t row_num)
{
std::string_view value = column.getDataAt(row_num).toView();
out.write(value.data(), value.size());
if (!column.isNullAt(row_num))
{
auto value = column.getDataAt(row_num);
out.write(value.data, value.size);
}
}
@ -35,4 +38,3 @@ void registerOutputFormatRawBLOB(FormatFactory & factory)
}
}

View File

@ -0,0 +1 @@
SELECT contingency(1, [1, NULL]); -- { serverError 48 }