2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataWriter.h>
|
|
|
|
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
|
|
|
#include <Common/escapeForFileName.h>
|
2017-08-19 18:11:20 +00:00
|
|
|
#include <Common/HashTable/HashMap.h>
|
|
|
|
#include <Interpreters/AggregationCommon.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/HashingWriteBuffer.h>
|
2017-01-21 04:24:28 +00:00
|
|
|
#include <Poco/File.h>
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2016-10-24 02:02:37 +00:00
|
|
|
|
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const Event MergeTreeDataWriterBlocks;
|
|
|
|
extern const Event MergeTreeDataWriterBlocksAlreadySorted;
|
|
|
|
extern const Event MergeTreeDataWriterRows;
|
|
|
|
extern const Event MergeTreeDataWriterUncompressedBytes;
|
|
|
|
extern const Event MergeTreeDataWriterCompressedBytes;
|
2016-10-24 02:02:37 +00:00
|
|
|
}
|
|
|
|
|
2014-03-13 17:44:00 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
namespace
|
2014-03-13 17:44:00 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
void buildScatterSelector(
|
2017-12-13 01:27:53 +00:00
|
|
|
const ColumnRawPtrs & columns,
|
2017-08-31 19:56:43 +00:00
|
|
|
PODArray<size_t> & partition_num_to_first_row,
|
2017-08-19 18:11:20 +00:00
|
|
|
IColumn::Selector & selector)
|
|
|
|
{
|
2017-08-31 19:56:43 +00:00
|
|
|
/// Use generic hashed variant since partitioning is unlikely to be a bottleneck.
|
2017-08-19 18:11:20 +00:00
|
|
|
using Data = HashMap<UInt128, size_t, UInt128TrivialHash>;
|
|
|
|
Data partitions_map;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
size_t num_rows = columns[0]->size();
|
|
|
|
size_t partitions_count = 0;
|
|
|
|
for (size_t i = 0; i < num_rows; ++i)
|
|
|
|
{
|
|
|
|
Data::key_type key = hash128(i, columns.size(), columns);
|
|
|
|
typename Data::iterator it;
|
|
|
|
bool inserted;
|
|
|
|
partitions_map.emplace(key, it, inserted);
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
{
|
2017-08-31 19:56:43 +00:00
|
|
|
partition_num_to_first_row.push_back(i);
|
2017-08-19 18:11:20 +00:00
|
|
|
it->second = partitions_count;
|
|
|
|
|
|
|
|
++partitions_count;
|
|
|
|
|
|
|
|
/// Optimization for common case when there is only one partition - defer selector initialization.
|
|
|
|
if (partitions_count == 2)
|
|
|
|
{
|
|
|
|
selector = IColumn::Selector(num_rows);
|
|
|
|
std::fill(selector.begin(), selector.begin() + i, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (partitions_count > 1)
|
|
|
|
selector[i] = it->second;
|
|
|
|
}
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block)
|
|
|
|
{
|
|
|
|
BlocksWithPartition result;
|
|
|
|
if (!block || !block.rows())
|
|
|
|
return result;
|
2017-08-16 19:24:50 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
data.check(block, true);
|
|
|
|
block.checkNumberOfRows();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-09-01 20:33:17 +00:00
|
|
|
if (!data.partition_expr) /// Table is not partitioned.
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-08-19 18:11:20 +00:00
|
|
|
result.emplace_back(Block(block), Row());
|
|
|
|
return result;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
Block block_copy = block;
|
|
|
|
data.partition_expr->execute(block_copy);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-12-13 01:27:53 +00:00
|
|
|
ColumnRawPtrs partition_columns;
|
2018-02-21 17:05:21 +00:00
|
|
|
partition_columns.reserve(data.partition_key_sample.columns());
|
|
|
|
for (const ColumnWithTypeAndName & element : data.partition_key_sample)
|
|
|
|
partition_columns.emplace_back(block_copy.getByName(element.name).column.get());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-31 19:56:43 +00:00
|
|
|
PODArray<size_t> partition_num_to_first_row;
|
2017-08-19 18:11:20 +00:00
|
|
|
IColumn::Selector selector;
|
2017-08-31 19:56:43 +00:00
|
|
|
buildScatterSelector(partition_columns, partition_num_to_first_row, selector);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-31 19:56:43 +00:00
|
|
|
size_t partitions_count = partition_num_to_first_row.size();
|
2017-08-19 18:11:20 +00:00
|
|
|
result.reserve(partitions_count);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
auto get_partition = [&](size_t num)
|
|
|
|
{
|
2017-08-31 19:56:43 +00:00
|
|
|
Row partition(partition_columns.size());
|
2017-08-19 18:11:20 +00:00
|
|
|
for (size_t i = 0; i < partition_columns.size(); ++i)
|
2017-08-31 19:56:43 +00:00
|
|
|
partition[i] = Field((*partition_columns[i])[partition_num_to_first_row[num]]);
|
2017-08-19 18:11:20 +00:00
|
|
|
return partition;
|
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
if (partitions_count == 1)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-08-19 18:11:20 +00:00
|
|
|
/// A typical case is when there is one partition (you do not need to split anything).
|
2018-02-21 15:43:24 +00:00
|
|
|
/// NOTE: returning a copy of the original block so that calculated partition key columns
|
|
|
|
/// do not interfere with possible calculated primary key columns of the same name.
|
|
|
|
result.emplace_back(Block(block), get_partition(0));
|
2017-08-19 18:11:20 +00:00
|
|
|
return result;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
for (size_t i = 0; i < partitions_count; ++i)
|
|
|
|
result.emplace_back(block.cloneEmpty(), get_partition(i));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
for (size_t col = 0; col < block.columns(); ++col)
|
|
|
|
{
|
2017-12-15 20:48:46 +00:00
|
|
|
MutableColumns scattered = block.getByPosition(col).column->scatter(partitions_count, selector);
|
2017-08-19 18:11:20 +00:00
|
|
|
for (size_t i = 0; i < partitions_count; ++i)
|
|
|
|
result[i].block.getByPosition(col).column = std::move(scattered[i]);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
return result;
|
2014-03-13 17:44:00 +00:00
|
|
|
}
|
|
|
|
|
2017-08-18 19:46:26 +00:00
|
|
|
MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPartition & block_with_partition)
|
2014-03-13 17:44:00 +00:00
|
|
|
{
|
2017-08-18 19:46:26 +00:00
|
|
|
Block & block = block_with_partition.block;
|
2017-08-16 19:24:50 +00:00
|
|
|
|
|
|
|
static const String TMP_PREFIX = "tmp_insert_";
|
|
|
|
|
|
|
|
/// This will generate unique name in scope of current server process.
|
|
|
|
Int64 temp_index = data.insert_increment.get();
|
|
|
|
|
2017-08-31 15:40:34 +00:00
|
|
|
MergeTreeDataPart::MinMaxIndex minmax_idx;
|
2017-08-21 15:35:29 +00:00
|
|
|
minmax_idx.update(block, data.minmax_idx_columns);
|
|
|
|
|
2017-09-11 17:55:41 +00:00
|
|
|
MergeTreePartition partition(std::move(block_with_partition.partition));
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2017-09-05 12:12:55 +00:00
|
|
|
MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0);
|
2017-08-25 20:41:45 +00:00
|
|
|
String part_name;
|
2017-09-07 16:21:06 +00:00
|
|
|
if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
2017-08-25 20:41:45 +00:00
|
|
|
{
|
2018-06-04 16:20:02 +00:00
|
|
|
DayNum min_date(minmax_idx.parallelogram[data.minmax_idx_date_column_pos].left.get<UInt64>());
|
|
|
|
DayNum max_date(minmax_idx.parallelogram[data.minmax_idx_date_column_pos].right.get<UInt64>());
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
const auto & date_lut = DateLUT::instance();
|
2015-08-17 21:09:36 +00:00
|
|
|
|
2018-05-25 13:29:15 +00:00
|
|
|
DayNum min_month = date_lut.toFirstDayNumOfMonth(DayNum(min_date));
|
|
|
|
DayNum max_month = date_lut.toFirstDayNumOfMonth(DayNum(max_date));
|
2015-08-17 21:09:36 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
if (min_month != max_month)
|
|
|
|
throw Exception("Logical error: part spans more than one month.");
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2017-08-25 20:41:45 +00:00
|
|
|
part_name = new_part_info.getPartNameV0(min_date, max_date);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
part_name = new_part_info.getPartName();
|
|
|
|
|
|
|
|
MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared<MergeTreeData::DataPart>(data, part_name, new_part_info);
|
2017-09-05 12:12:55 +00:00
|
|
|
new_data_part->partition = std::move(partition);
|
2017-08-21 15:35:29 +00:00
|
|
|
new_data_part->minmax_idx = std::move(minmax_idx);
|
2017-05-16 15:40:32 +00:00
|
|
|
new_data_part->relative_path = TMP_PREFIX + part_name;
|
2017-04-01 07:20:54 +00:00
|
|
|
new_data_part->is_temp = true;
|
2014-07-17 10:44:17 +00:00
|
|
|
|
2017-06-25 00:01:10 +00:00
|
|
|
/// The name could be non-unique in case of stale files from previous runs.
|
|
|
|
String full_path = new_data_part->getFullPath();
|
|
|
|
Poco::File dir(full_path);
|
|
|
|
|
|
|
|
if (dir.exists())
|
|
|
|
{
|
|
|
|
LOG_WARNING(log, "Removing old temporary directory " + full_path);
|
|
|
|
dir.remove(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
dir.createDirectories();
|
2017-05-16 15:40:32 +00:00
|
|
|
|
2018-02-19 17:31:30 +00:00
|
|
|
/// If we need to calculate some columns to sort.
|
|
|
|
if (data.hasPrimaryKey())
|
2018-02-09 10:53:50 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
data.getPrimaryExpression()->execute(block);
|
2018-02-09 10:53:50 +00:00
|
|
|
auto secondary_sort_expr = data.getSecondarySortExpression();
|
|
|
|
if (secondary_sort_expr)
|
2018-02-09 16:19:11 +00:00
|
|
|
secondary_sort_expr->execute(block);
|
2018-02-09 10:53:50 +00:00
|
|
|
}
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2018-06-30 21:35:01 +00:00
|
|
|
Names sort_columns = data.getSortColumns();
|
|
|
|
SortDescription sort_description;
|
|
|
|
size_t sort_columns_size = sort_columns.size();
|
|
|
|
sort_description.reserve(sort_columns_size);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < sort_columns_size; ++i)
|
|
|
|
sort_description.emplace_back(block.getPositionByName(sort_columns[i]), 1, 1);
|
2014-03-14 17:03:52 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocks);
|
2016-04-23 02:39:40 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Sort.
|
|
|
|
IColumn::Permutation * perm_ptr = nullptr;
|
|
|
|
IColumn::Permutation perm;
|
2018-02-19 17:31:30 +00:00
|
|
|
if (data.hasPrimaryKey())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-06-30 21:35:01 +00:00
|
|
|
if (!isAlreadySorted(block, sort_description))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-06-30 21:35:01 +00:00
|
|
|
stableGetPermutation(block, sort_description, perm);
|
2017-04-01 07:20:54 +00:00
|
|
|
perm_ptr = &perm;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
|
|
|
|
}
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2017-08-01 20:07:16 +00:00
|
|
|
/// This effectively chooses minimal compression method:
|
|
|
|
/// either default lz4 or compression method with zero thresholds on absolute and relative part size.
|
2017-10-13 01:02:16 +00:00
|
|
|
auto compression_settings = data.context.chooseCompressionSettings(0, 0);
|
2017-07-31 11:05:49 +00:00
|
|
|
|
2018-03-13 15:00:28 +00:00
|
|
|
NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames());
|
2017-10-13 01:02:16 +00:00
|
|
|
MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_settings);
|
2015-03-14 02:36:39 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
out.writePrefix();
|
|
|
|
out.writeWithPermutation(block, perm_ptr);
|
2017-08-30 19:03:19 +00:00
|
|
|
out.writeSuffixAndFinalizePart(new_data_part);
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
|
|
|
|
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());
|
2018-03-26 14:18:04 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterCompressedBytes, new_data_part->bytes_on_disk);
|
2016-04-23 02:39:40 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return new_data_part;
|
2014-03-13 17:44:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|