Merge pull request #66722 from ClickHouse/chesema-fix-isUniqTypes

fixing assert `isUniqTypes`
This commit is contained in:
Alexey Milovidov 2024-07-19 01:48:00 +00:00 committed by GitHub
commit c4c8e999c2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 97 additions and 7 deletions

View File

@ -84,12 +84,18 @@ public:
return result;
}
void append(Self && other)
// append items for other inscnace only if there is no such item in current instance
void appendIfUniq(Self && other)
{
auto middle_idx = records.size();
std::move(other.records.begin(), other.records.end(), std::back_inserter(records));
// merge is stable
std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end());
chassert(isUniqTypes());
// remove duplicates
records.erase(std::unique(records.begin(), records.end()), records.end());
assert(std::is_sorted(records.begin(), records.end()));
assert(isUniqTypes());
}
template <class T>
@ -142,7 +148,6 @@ private:
bool isUniqTypes() const
{
auto uniq_it = std::adjacent_find(records.begin(), records.end());
return uniq_it == records.end();
}
@ -161,8 +166,6 @@ private:
records.emplace(it, type_idx, item);
chassert(isUniqTypes());
}
Records::const_iterator getImpl(std::type_index type_idx) const

View File

@ -134,7 +134,7 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
Chunk result;
result.setColumns(std::move(mutable_columns), rows);
result.setChunkInfos(infos);
result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos()));
result.getChunkInfos().appendIfUniq(std::move(input_chunks.back().getChunkInfos()));
chassert(result);
return result;

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
void RestoreChunkInfosTransform::transform(Chunk & chunk)
{
chunk.getChunkInfos().append(chunk_infos.clone());
chunk.getChunkInfos().appendIfUniq(chunk_infos.clone());
}
namespace DeduplicationToken

View File

@ -0,0 +1,36 @@
-- { echoOn }
SELECT count() FROM src;
100
SELECT a, sum(b), uniq(b), FROM src GROUP BY a ORDER BY a;
0 450 10
1 460 10
2 470 10
3 480 10
4 490 10
5 500 10
6 510 10
7 520 10
8 530 10
9 540 10
SELECT count() FROM remote('127.0.0.{1..2}', currentDatabase(), src);
200
-- { echoOn }
INSERT INTO dst_null
SELECT a, b FROM src;
SELECT
a,
sumMerge(sum_b) AS sum_b,
uniqMerge(uniq_b) AS uniq_b
FROM mv_dst
GROUP BY a
ORDER BY a;
0 450 10
1 460 10
2 470 10
3 480 10
4 490 10
5 500 10
6 510 10
7 520 10
8 530 10
9 540 10

View File

@ -0,0 +1,51 @@
DROP TABLE IF EXISTS src;
CREATE TABLE src (a UInt64, b UInt64)
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_remote_insert_select/src', '{replica}')
ORDER BY tuple();
INSERT INTO src SELECT number % 10 as a, number as b FROM numbers(100);
SET allow_experimental_parallel_reading_from_replicas=1;
SET max_parallel_replicas=3;
SET parallel_replicas_for_non_replicated_merge_tree=1;
SET cluster_for_parallel_replicas='parallel_replicas';
-- { echoOn }
SELECT count() FROM src;
SELECT a, sum(b), uniq(b), FROM src GROUP BY a ORDER BY a;
SELECT count() FROM remote('127.0.0.{1..2}', currentDatabase(), src);
-- { echoOff }
DROP TABLE IF EXISTS dst_null;
CREATE TABLE dst_null(a UInt64, b UInt64)
ENGINE = Null;
DROP TABLE IF EXISTS mv_dst;
CREATE MATERIALIZED VIEW mv_dst
ENGINE = AggregatingMergeTree()
ORDER BY a
AS SELECT
a,
sumState(b) AS sum_b,
uniqState(b) AS uniq_b
FROM dst_null
GROUP BY a;
-- { echoOn }
INSERT INTO dst_null
SELECT a, b FROM src;
SELECT
a,
sumMerge(sum_b) AS sum_b,
uniqMerge(uniq_b) AS uniq_b
FROM mv_dst
GROUP BY a
ORDER BY a;
-- { echoOff }
DROP TABLE src;
DROP TABLE mv_dst;
DROP TABLE dst_null;