mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
serialize partition and minmax index values [#CLICKHOUSE-3000]
This commit is contained in:
parent
2f80c150db
commit
e9f93028b8
@ -266,7 +266,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
|
||||
|
||||
new_data_part->modification_time = time(nullptr);
|
||||
new_data_part->loadColumnsChecksumsIndexes(true, false);
|
||||
new_data_part->is_sharded = false;
|
||||
new_data_part->checksums.checkEqual(checksums, false);
|
||||
|
||||
return new_data_part;
|
||||
|
@ -221,8 +221,14 @@ void MergeTreeData::initPartitionKey()
|
||||
partition_expr_ast = parseQuery(
|
||||
parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression");
|
||||
partition_expr = ExpressionAnalyzer(partition_expr_ast, context, nullptr, getColumnsList()).getActions(false);
|
||||
partition_expr_columns.clear();
|
||||
partition_expr_column_types.clear();
|
||||
for (const ASTPtr & ast : partition_expr_ast->children)
|
||||
partition_expr_columns.emplace_back(ast->getColumnName());
|
||||
{
|
||||
String col_name = ast->getColumnName();
|
||||
partition_expr_columns.emplace_back(col_name);
|
||||
partition_expr_column_types.emplace_back(partition_expr->getSampleBlock().getByName(col_name).type);
|
||||
}
|
||||
|
||||
const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes();
|
||||
minmax_idx_expr = std::make_shared<ExpressionActions>(minmax_idx_columns_with_types, context.getSettingsRef());
|
||||
|
@ -485,6 +485,7 @@ public:
|
||||
ASTPtr partition_expr_ast;
|
||||
ExpressionActionsPtr partition_expr;
|
||||
Names partition_expr_columns;
|
||||
DataTypes partition_expr_column_types;
|
||||
|
||||
ExpressionActionsPtr minmax_idx_expr;
|
||||
Names minmax_idx_columns;
|
||||
|
@ -739,22 +739,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart
|
||||
<< merge_entry->bytes_read_uncompressed / 1000000.0 / elapsed_seconds << " MB/sec.");
|
||||
}
|
||||
|
||||
new_data_part->columns = all_columns;
|
||||
if (merge_alg != MergeAlgorithm::Vertical)
|
||||
new_data_part->checksums = to.writeSuffixAndGetChecksums();
|
||||
to.writeSuffixAndFinalizePart(new_data_part);
|
||||
else
|
||||
new_data_part->checksums = to.writeSuffixAndGetChecksums(all_columns, &checksums_gathered_columns);
|
||||
new_data_part->index.swap(to.getIndex());
|
||||
to.writeSuffixAndFinalizePart(new_data_part, &all_columns, &checksums_gathered_columns);
|
||||
|
||||
/// For convenience, even CollapsingSortedBlockInputStream can not return zero rows.
|
||||
if (0 == to.marksCount())
|
||||
throw Exception("Empty part after merge", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
new_data_part->size = to.marksCount();
|
||||
new_data_part->modification_time = time(nullptr);
|
||||
new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part_tmp_path);
|
||||
new_data_part->is_sharded = false;
|
||||
|
||||
return new_data_part;
|
||||
}
|
||||
|
||||
@ -1056,14 +1049,7 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
|
||||
}
|
||||
|
||||
MergeTreeData::MutableDataPartPtr & data_part = per_shard_data_parts.at(shard_no);
|
||||
|
||||
data_part->columns = column_names_and_types;
|
||||
data_part->checksums = output_stream->writeSuffixAndGetChecksums();
|
||||
data_part->index.swap(output_stream->getIndex());
|
||||
data_part->size = output_stream->marksCount();
|
||||
data_part->modification_time = time(nullptr);
|
||||
data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(output_stream->getPartPath());
|
||||
data_part->is_sharded = true;
|
||||
output_stream->writeSuffixAndFinalizePart(data_part);
|
||||
data_part->shard_no = shard_no;
|
||||
}
|
||||
|
||||
|
@ -158,7 +158,6 @@ struct MergeTreeDataPart
|
||||
bool is_temp = false;
|
||||
|
||||
/// For resharding.
|
||||
bool is_sharded = false;
|
||||
size_t shard_no = 0;
|
||||
|
||||
/// Primary key (correspond to primary.idx file).
|
||||
|
@ -124,8 +124,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
|
||||
{
|
||||
Block & block = block_with_partition.block;
|
||||
|
||||
size_t part_size = (block.rows() + data.index_granularity - 1) / data.index_granularity;
|
||||
|
||||
static const String TMP_PREFIX = "tmp_insert_";
|
||||
|
||||
/// This will generate unique name in scope of current server process.
|
||||
@ -205,14 +203,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
|
||||
|
||||
out.writePrefix();
|
||||
out.writeWithPermutation(block, perm_ptr);
|
||||
MergeTreeData::DataPart::Checksums checksums = out.writeSuffixAndGetChecksums();
|
||||
|
||||
new_data_part->size = part_size;
|
||||
new_data_part->modification_time = time(nullptr);
|
||||
new_data_part->columns = columns;
|
||||
new_data_part->checksums = checksums;
|
||||
new_data_part->index.swap(out.getIndex());
|
||||
new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_data_part->getFullPath());
|
||||
out.writeSuffixAndFinalizePart(new_data_part);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());
|
||||
|
@ -385,10 +385,14 @@ void MergedBlockOutputStream::writeSuffix()
|
||||
throw Exception("Method writeSuffix is not supported by MergedBlockOutputStream", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChecksums(
|
||||
const NamesAndTypesList & total_column_list,
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums)
|
||||
void MergedBlockOutputStream::writeSuffixAndFinalizePart(
|
||||
MergeTreeData::MutableDataPartPtr & new_part,
|
||||
const NamesAndTypesList * total_column_list,
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums)
|
||||
{
|
||||
if (!total_column_list)
|
||||
total_column_list = &columns_list;
|
||||
|
||||
/// Finish write and get checksums.
|
||||
MergeTreeData::DataPart::Checksums checksums;
|
||||
|
||||
@ -415,14 +419,36 @@ MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChe
|
||||
{
|
||||
/// A part is empty - all records are deleted.
|
||||
Poco::File(part_path).remove(true);
|
||||
checksums.files.clear();
|
||||
return checksums;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!storage.partition_expr_columns.empty())
|
||||
{
|
||||
WriteBufferFromFile out(part_path + "partition.dat");
|
||||
HashingWriteBuffer out_hashing(out);
|
||||
for (size_t i = 0; i < new_part->partition.size(); ++i)
|
||||
storage.partition_expr_column_types[i]->serializeBinary(new_part->partition[i], out_hashing);
|
||||
checksums.files["partition.dat"].file_size = out_hashing.count();
|
||||
checksums.files["partition.dat"].file_hash = out_hashing.getHash();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < storage.minmax_idx_columns.size(); ++i)
|
||||
{
|
||||
String file_name = "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx";
|
||||
const DataTypePtr & type = storage.minmax_idx_column_types[i];
|
||||
|
||||
WriteBufferFromFile out(part_path + file_name);
|
||||
HashingWriteBuffer out_hashing(out);
|
||||
type->serializeBinary(new_part->minmax_idx.min_column_values[i], out_hashing);
|
||||
type->serializeBinary(new_part->minmax_idx.max_column_values[i], out_hashing);
|
||||
checksums.files[file_name].file_size = out_hashing.count();
|
||||
checksums.files[file_name].file_hash = out_hashing.getHash();
|
||||
}
|
||||
|
||||
{
|
||||
/// Write a file with a description of columns.
|
||||
WriteBufferFromFile out(part_path + "columns.txt", 4096);
|
||||
total_column_list.writeText(out);
|
||||
total_column_list->writeText(out);
|
||||
}
|
||||
|
||||
{
|
||||
@ -431,17 +457,12 @@ MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChe
|
||||
checksums.write(out);
|
||||
}
|
||||
|
||||
return checksums;
|
||||
}
|
||||
|
||||
MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChecksums()
|
||||
{
|
||||
return writeSuffixAndGetChecksums(columns_list, nullptr);
|
||||
}
|
||||
|
||||
MergeTreeData::DataPart::Index & MergedBlockOutputStream::getIndex()
|
||||
{
|
||||
return index_columns;
|
||||
new_part->size = marks_count;
|
||||
new_part->modification_time = time(nullptr);
|
||||
new_part->columns = *total_column_list;
|
||||
new_part->index.swap(index_columns);
|
||||
new_part->checksums = checksums;
|
||||
new_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part->getFullPath());
|
||||
}
|
||||
|
||||
size_t MergedBlockOutputStream::marksCount()
|
||||
|
@ -122,13 +122,10 @@ public:
|
||||
|
||||
void writeSuffix() override;
|
||||
|
||||
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums(
|
||||
const NamesAndTypesList & total_column_list,
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
|
||||
|
||||
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums();
|
||||
|
||||
MergeTreeData::DataPart::Index & getIndex();
|
||||
void writeSuffixAndFinalizePart(
|
||||
MergeTreeData::MutableDataPartPtr & new_part,
|
||||
const NamesAndTypesList * total_columns_list = nullptr,
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
|
||||
|
||||
/// How many marks are already written.
|
||||
size_t marksCount();
|
||||
|
@ -105,7 +105,6 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
|
||||
|
||||
data_part->modification_time = time(nullptr);
|
||||
data_part->loadColumnsChecksumsIndexes(true, false);
|
||||
data_part->is_sharded = false;
|
||||
data_part->checksums.checkEqual(checksums, false);
|
||||
|
||||
/// Now store permanently the received part.
|
||||
|
Loading…
Reference in New Issue
Block a user