serialize partition and minmax index values [#CLICKHOUSE-3000]

This commit is contained in:
Alexey Zatelepin 2017-08-30 22:03:19 +03:00 committed by alexey-milovidov
parent 2f80c150db
commit e9f93028b8
9 changed files with 54 additions and 55 deletions

View File

@ -266,7 +266,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
new_data_part->modification_time = time(nullptr);
new_data_part->loadColumnsChecksumsIndexes(true, false);
new_data_part->is_sharded = false;
new_data_part->checksums.checkEqual(checksums, false);
return new_data_part;

View File

@ -221,8 +221,14 @@ void MergeTreeData::initPartitionKey()
partition_expr_ast = parseQuery(
parser, partition_expr_str.data(), partition_expr_str.data() + partition_expr_str.length(), "partition expression");
partition_expr = ExpressionAnalyzer(partition_expr_ast, context, nullptr, getColumnsList()).getActions(false);
partition_expr_columns.clear();
partition_expr_column_types.clear();
for (const ASTPtr & ast : partition_expr_ast->children)
partition_expr_columns.emplace_back(ast->getColumnName());
{
String col_name = ast->getColumnName();
partition_expr_columns.emplace_back(col_name);
partition_expr_column_types.emplace_back(partition_expr->getSampleBlock().getByName(col_name).type);
}
const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes();
minmax_idx_expr = std::make_shared<ExpressionActions>(minmax_idx_columns_with_types, context.getSettingsRef());

View File

@ -485,6 +485,7 @@ public:
ASTPtr partition_expr_ast;
ExpressionActionsPtr partition_expr;
Names partition_expr_columns;
DataTypes partition_expr_column_types;
ExpressionActionsPtr minmax_idx_expr;
Names minmax_idx_columns;

View File

@ -739,22 +739,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart
<< merge_entry->bytes_read_uncompressed / 1000000.0 / elapsed_seconds << " MB/sec.");
}
new_data_part->columns = all_columns;
if (merge_alg != MergeAlgorithm::Vertical)
new_data_part->checksums = to.writeSuffixAndGetChecksums();
to.writeSuffixAndFinalizePart(new_data_part);
else
new_data_part->checksums = to.writeSuffixAndGetChecksums(all_columns, &checksums_gathered_columns);
new_data_part->index.swap(to.getIndex());
to.writeSuffixAndFinalizePart(new_data_part, &all_columns, &checksums_gathered_columns);
/// For convenience, even CollapsingSortedBlockInputStream can not return zero rows.
if (0 == to.marksCount())
throw Exception("Empty part after merge", ErrorCodes::LOGICAL_ERROR);
new_data_part->size = to.marksCount();
new_data_part->modification_time = time(nullptr);
new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part_tmp_path);
new_data_part->is_sharded = false;
return new_data_part;
}
@ -1056,14 +1049,7 @@ MergeTreeData::PerShardDataParts MergeTreeDataMerger::reshardPartition(
}
MergeTreeData::MutableDataPartPtr & data_part = per_shard_data_parts.at(shard_no);
data_part->columns = column_names_and_types;
data_part->checksums = output_stream->writeSuffixAndGetChecksums();
data_part->index.swap(output_stream->getIndex());
data_part->size = output_stream->marksCount();
data_part->modification_time = time(nullptr);
data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(output_stream->getPartPath());
data_part->is_sharded = true;
output_stream->writeSuffixAndFinalizePart(data_part);
data_part->shard_no = shard_no;
}

View File

@ -158,7 +158,6 @@ struct MergeTreeDataPart
bool is_temp = false;
/// For resharding.
bool is_sharded = false;
size_t shard_no = 0;
/// Primary key (correspond to primary.idx file).

View File

@ -124,8 +124,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
{
Block & block = block_with_partition.block;
size_t part_size = (block.rows() + data.index_granularity - 1) / data.index_granularity;
static const String TMP_PREFIX = "tmp_insert_";
/// This will generate unique name in scope of current server process.
@ -205,14 +203,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
out.writePrefix();
out.writeWithPermutation(block, perm_ptr);
MergeTreeData::DataPart::Checksums checksums = out.writeSuffixAndGetChecksums();
new_data_part->size = part_size;
new_data_part->modification_time = time(nullptr);
new_data_part->columns = columns;
new_data_part->checksums = checksums;
new_data_part->index.swap(out.getIndex());
new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_data_part->getFullPath());
out.writeSuffixAndFinalizePart(new_data_part);
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());

View File

@ -385,10 +385,14 @@ void MergedBlockOutputStream::writeSuffix()
throw Exception("Method writeSuffix is not supported by MergedBlockOutputStream", ErrorCodes::NOT_IMPLEMENTED);
}
MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChecksums(
const NamesAndTypesList & total_column_list,
MergeTreeData::DataPart::Checksums * additional_column_checksums)
void MergedBlockOutputStream::writeSuffixAndFinalizePart(
MergeTreeData::MutableDataPartPtr & new_part,
const NamesAndTypesList * total_column_list,
MergeTreeData::DataPart::Checksums * additional_column_checksums)
{
if (!total_column_list)
total_column_list = &columns_list;
/// Finish write and get checksums.
MergeTreeData::DataPart::Checksums checksums;
@ -415,14 +419,36 @@ MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChe
{
/// A part is empty - all records are deleted.
Poco::File(part_path).remove(true);
checksums.files.clear();
return checksums;
return;
}
if (!storage.partition_expr_columns.empty())
{
WriteBufferFromFile out(part_path + "partition.dat");
HashingWriteBuffer out_hashing(out);
for (size_t i = 0; i < new_part->partition.size(); ++i)
storage.partition_expr_column_types[i]->serializeBinary(new_part->partition[i], out_hashing);
checksums.files["partition.dat"].file_size = out_hashing.count();
checksums.files["partition.dat"].file_hash = out_hashing.getHash();
}
for (size_t i = 0; i < storage.minmax_idx_columns.size(); ++i)
{
String file_name = "minmax_" + escapeForFileName(storage.minmax_idx_columns[i]) + ".idx";
const DataTypePtr & type = storage.minmax_idx_column_types[i];
WriteBufferFromFile out(part_path + file_name);
HashingWriteBuffer out_hashing(out);
type->serializeBinary(new_part->minmax_idx.min_column_values[i], out_hashing);
type->serializeBinary(new_part->minmax_idx.max_column_values[i], out_hashing);
checksums.files[file_name].file_size = out_hashing.count();
checksums.files[file_name].file_hash = out_hashing.getHash();
}
{
/// Write a file with a description of columns.
WriteBufferFromFile out(part_path + "columns.txt", 4096);
total_column_list.writeText(out);
total_column_list->writeText(out);
}
{
@ -431,17 +457,12 @@ MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChe
checksums.write(out);
}
return checksums;
}
MergeTreeData::DataPart::Checksums MergedBlockOutputStream::writeSuffixAndGetChecksums()
{
return writeSuffixAndGetChecksums(columns_list, nullptr);
}
MergeTreeData::DataPart::Index & MergedBlockOutputStream::getIndex()
{
return index_columns;
new_part->size = marks_count;
new_part->modification_time = time(nullptr);
new_part->columns = *total_column_list;
new_part->index.swap(index_columns);
new_part->checksums = checksums;
new_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_part->getFullPath());
}
size_t MergedBlockOutputStream::marksCount()

View File

@ -122,13 +122,10 @@ public:
void writeSuffix() override;
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums(
const NamesAndTypesList & total_column_list,
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums();
MergeTreeData::DataPart::Index & getIndex();
void writeSuffixAndFinalizePart(
MergeTreeData::MutableDataPartPtr & new_part,
const NamesAndTypesList * total_columns_list = nullptr,
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
/// How many marks are already written.
size_t marksCount();

View File

@ -105,7 +105,6 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
data_part->modification_time = time(nullptr);
data_part->loadColumnsChecksumsIndexes(true, false);
data_part->is_sharded = false;
data_part->checksums.checkEqual(checksums, false);
/// Now store permanently the received part.