mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #64098 from ClickHouse/speed-up-index-set-a-little
Speed up Set index a little
This commit is contained in:
commit
deb5b47829
@ -1296,8 +1296,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
size_t last_index_mark = 0;
|
||||
|
||||
PostingsCacheForStore cache_in_store;
|
||||
|
||||
if (dynamic_cast<const MergeTreeIndexFullText *>(&*index_helper) != nullptr)
|
||||
if (dynamic_cast<const MergeTreeIndexFullText *>(index_helper.get()))
|
||||
cache_in_store.store = GinIndexStoreFactory::instance().get(index_helper->getFileName(), part->getDataPartStoragePtr());
|
||||
|
||||
for (size_t i = 0; i < ranges.size(); ++i)
|
||||
@ -1315,12 +1314,12 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
auto ann_condition = std::dynamic_pointer_cast<IMergeTreeIndexConditionApproximateNearestNeighbor>(condition);
|
||||
if (ann_condition != nullptr)
|
||||
{
|
||||
// vector of indexes of useful ranges
|
||||
/// An array of indices of useful ranges.
|
||||
auto result = ann_condition->getUsefulRanges(granule);
|
||||
|
||||
for (auto range : result)
|
||||
{
|
||||
// range for corresponding index
|
||||
/// The range for the corresponding index.
|
||||
MarkRange data_range(
|
||||
std::max(ranges[i].begin, index_mark * index_granularity + range),
|
||||
std::min(ranges[i].end, index_mark * index_granularity + range + 1));
|
||||
@ -1344,8 +1343,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
continue;
|
||||
|
||||
MarkRange data_range(
|
||||
std::max(ranges[i].begin, index_mark * index_granularity),
|
||||
std::min(ranges[i].end, (index_mark + 1) * index_granularity));
|
||||
std::max(ranges[i].begin, index_mark * index_granularity),
|
||||
std::min(ranges[i].end, (index_mark + 1) * index_granularity));
|
||||
|
||||
if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek)
|
||||
res.push_back(data_range);
|
||||
|
@ -35,8 +35,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(
|
||||
size_t max_rows_)
|
||||
: index_name(index_name_)
|
||||
, max_rows(max_rows_)
|
||||
, index_sample_block(index_sample_block_)
|
||||
, block(index_sample_block)
|
||||
, block(index_sample_block_.cloneEmpty())
|
||||
{
|
||||
}
|
||||
|
||||
@ -47,8 +46,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(
|
||||
MutableColumns && mutable_columns_)
|
||||
: index_name(index_name_)
|
||||
, max_rows(max_rows_)
|
||||
, index_sample_block(index_sample_block_)
|
||||
, block(index_sample_block.cloneWithColumns(std::move(mutable_columns_)))
|
||||
, block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_)))
|
||||
{
|
||||
}
|
||||
|
||||
@ -67,10 +65,11 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const
|
||||
}
|
||||
|
||||
size_serialization->serializeBinary(size(), ostr, {});
|
||||
size_t num_columns = block.columns();
|
||||
|
||||
for (size_t i = 0; i < index_sample_block.columns(); ++i)
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
const auto & type = index_sample_block.getByPosition(i).type;
|
||||
const auto & type = block.getByPosition(i).type;
|
||||
|
||||
ISerialization::SerializeBinaryBulkSettings settings;
|
||||
settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; };
|
||||
@ -92,8 +91,6 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
|
||||
if (version != 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version);
|
||||
|
||||
block.clear();
|
||||
|
||||
Field field_rows;
|
||||
const auto & size_type = DataTypePtr(std::make_shared<DataTypeUInt64>());
|
||||
size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {});
|
||||
@ -102,24 +99,22 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
|
||||
if (rows_to_read == 0)
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < index_sample_block.columns(); ++i)
|
||||
size_t num_columns = block.columns();
|
||||
|
||||
ISerialization::DeserializeBinaryBulkSettings settings;
|
||||
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
|
||||
settings.position_independent_encoding = false;
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
const auto & column = index_sample_block.getByPosition(i);
|
||||
const auto & type = column.type;
|
||||
ColumnPtr new_column = type->createColumn();
|
||||
|
||||
|
||||
ISerialization::DeserializeBinaryBulkSettings settings;
|
||||
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
|
||||
settings.position_independent_encoding = false;
|
||||
auto & elem = block.getByPosition(i);
|
||||
elem.column = elem.column->cloneEmpty();
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr state;
|
||||
auto serialization = type->getDefaultSerialization();
|
||||
auto serialization = elem.type->getDefaultSerialization();
|
||||
|
||||
serialization->deserializeBinaryBulkStatePrefix(settings, state);
|
||||
serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr);
|
||||
|
||||
block.insert(ColumnWithTypeAndName(new_column, type, column.name));
|
||||
serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -272,6 +267,8 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
|
||||
|
||||
filter_actions_dag->removeUnusedActions();
|
||||
actions = std::make_shared<ExpressionActions>(filter_actions_dag);
|
||||
|
||||
actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name;
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const
|
||||
@ -284,42 +281,19 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
if (isUseless())
|
||||
return true;
|
||||
|
||||
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleSet>(idx_granule);
|
||||
if (!granule)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Set index condition got a granule with the wrong type");
|
||||
const MergeTreeIndexGranuleSet & granule = assert_cast<const MergeTreeIndexGranuleSet &>(*idx_granule);
|
||||
|
||||
if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows))
|
||||
size_t size = granule.size();
|
||||
if (size == 0 || (max_rows != 0 && size > max_rows))
|
||||
return true;
|
||||
|
||||
Block result = granule->block;
|
||||
Block result = granule.block;
|
||||
actions->execute(result);
|
||||
|
||||
const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name;
|
||||
auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
|
||||
const auto & column = result.getByName(actions_output_column_name).column;
|
||||
|
||||
if (column->onlyNull())
|
||||
return false;
|
||||
|
||||
const auto * col_uint8 = typeid_cast<const ColumnUInt8 *>(column.get());
|
||||
|
||||
const NullMap * null_map = nullptr;
|
||||
|
||||
if (const auto * col_nullable = checkAndGetColumn<ColumnNullable>(&*column))
|
||||
{
|
||||
col_uint8 = typeid_cast<const ColumnUInt8 *>(&col_nullable->getNestedColumn());
|
||||
null_map = &col_nullable->getNullMapData();
|
||||
}
|
||||
|
||||
if (!col_uint8)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"ColumnUInt8 expected as Set index condition result");
|
||||
|
||||
const auto & condition = col_uint8->getData();
|
||||
size_t column_size = column->size();
|
||||
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1)
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (!column->isNullAt(i) && (column->get64(i) & 1))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -34,7 +34,6 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
|
||||
|
||||
const String index_name;
|
||||
const size_t max_rows;
|
||||
const Block index_sample_block;
|
||||
|
||||
Block block;
|
||||
};
|
||||
@ -127,6 +126,7 @@ private:
|
||||
|
||||
std::unordered_set<String> key_columns;
|
||||
ExpressionActionsPtr actions;
|
||||
String actions_output_column_name;
|
||||
};
|
||||
|
||||
|
||||
|
14
tests/performance/set_index_analysis.xml
Normal file
14
tests/performance/set_index_analysis.xml
Normal file
@ -0,0 +1,14 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE test_set (k UInt32, x UInt32, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 111;
|
||||
</create_query>
|
||||
<fill_query>SYSTEM STOP MERGES</fill_query>
|
||||
<fill_query>INSERT INTO test_set SELECT number, number DIV 100 + rand() % 7 FROM numbers(3000000) SETTINGS max_insert_threads = 4;</fill_query>
|
||||
|
||||
<query>
|
||||
SELECT count() FROM test_set WHERE x = 1234 SETTINGS max_threads = 8;
|
||||
</query>
|
||||
|
||||
<drop_query>SYSTEM START MERGES</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS test_set</drop_query>
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user