parallelogram -> hyperrectangle

This commit is contained in:
Amos Bird 2020-03-10 22:56:55 +08:00
parent 27f07b513e
commit 7659a56266
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
9 changed files with 80 additions and 80 deletions

View File

@ -570,7 +570,7 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
return true;
};
/** Because each parallelogram maps to a contiguous sequence of elements
/** Because each hyperrectangle maps to a contiguous sequence of elements
* layed out in the lexicographically increasing order, the set intersects the range
* if and only if either bound coincides with an element or at least one element
* is between the lower bounds

View File

@ -43,7 +43,7 @@ static std::unique_ptr<ReadBufferFromFileBase> openForReading(const DiskPtr & di
void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path)
{
size_t minmax_idx_size = data.minmax_idx_column_types.size();
parallelogram.reserve(minmax_idx_size);
hyperrectangle.reserve(minmax_idx_size);
for (size_t i = 0; i < minmax_idx_size; ++i)
{
String file_name = part_path + "minmax_" + escapeForFileName(data.minmax_idx_columns[i]) + ".idx";
@ -55,7 +55,7 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Dis
Field max_val;
data_type->deserializeBinary(max_val, *file);
parallelogram.emplace_back(min_val, true, max_val, true);
hyperrectangle.emplace_back(min_val, true, max_val, true);
}
initialized = true;
}
@ -84,8 +84,8 @@ void IMergeTreeDataPart::MinMaxIndex::store(
auto out = disk_->writeFile(part_path + file_name);
HashingWriteBuffer out_hashing(*out);
data_type->serializeBinary(parallelogram[i].left, out_hashing);
data_type->serializeBinary(parallelogram[i].right, out_hashing);
data_type->serializeBinary(hyperrectangle[i].left, out_hashing);
data_type->serializeBinary(hyperrectangle[i].right, out_hashing);
out_hashing.next();
out_checksums.files[file_name].file_size = out_hashing.count();
out_checksums.files[file_name].file_hash = out_hashing.getHash();
@ -95,7 +95,7 @@ void IMergeTreeDataPart::MinMaxIndex::store(
void IMergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names & column_names)
{
if (!initialized)
parallelogram.reserve(column_names.size());
hyperrectangle.reserve(column_names.size());
for (size_t i = 0; i < column_names.size(); ++i)
{
@ -105,11 +105,11 @@ void IMergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names &
column.column->getExtremes(min_value, max_value);
if (!initialized)
parallelogram.emplace_back(min_value, true, max_value, true);
hyperrectangle.emplace_back(min_value, true, max_value, true);
else
{
parallelogram[i].left = std::min(parallelogram[i].left, min_value);
parallelogram[i].right = std::max(parallelogram[i].right, max_value);
hyperrectangle[i].left = std::min(hyperrectangle[i].left, min_value);
hyperrectangle[i].right = std::max(hyperrectangle[i].right, max_value);
}
}
@ -123,15 +123,15 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other)
if (!initialized)
{
parallelogram = other.parallelogram;
hyperrectangle = other.hyperrectangle;
initialized = true;
}
else
{
for (size_t i = 0; i < parallelogram.size(); ++i)
for (size_t i = 0; i < hyperrectangle.size(); ++i)
{
parallelogram[i].left = std::min(parallelogram[i].left, other.parallelogram[i].left);
parallelogram[i].right = std::max(parallelogram[i].right, other.parallelogram[i].right);
hyperrectangle[i].left = std::min(hyperrectangle[i].left, other.hyperrectangle[i].left);
hyperrectangle[i].right = std::max(hyperrectangle[i].right, other.hyperrectangle[i].right);
}
}
}
@ -195,7 +195,7 @@ std::optional<size_t> IMergeTreeDataPart::getColumnPosition(const String & colum
DayNum IMergeTreeDataPart::getMinDate() const
{
if (storage.minmax_idx_date_column_pos != -1 && minmax_idx.initialized)
return DayNum(minmax_idx.parallelogram[storage.minmax_idx_date_column_pos].left.get<UInt64>());
return DayNum(minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos].left.get<UInt64>());
else
return DayNum();
}
@ -204,7 +204,7 @@ DayNum IMergeTreeDataPart::getMinDate() const
DayNum IMergeTreeDataPart::getMaxDate() const
{
if (storage.minmax_idx_date_column_pos != -1 && minmax_idx.initialized)
return DayNum(minmax_idx.parallelogram[storage.minmax_idx_date_column_pos].right.get<UInt64>());
return DayNum(minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos].right.get<UInt64>());
else
return DayNum();
}
@ -212,7 +212,7 @@ DayNum IMergeTreeDataPart::getMaxDate() const
time_t IMergeTreeDataPart::getMinTime() const
{
if (storage.minmax_idx_time_column_pos != -1 && minmax_idx.initialized)
return minmax_idx.parallelogram[storage.minmax_idx_time_column_pos].left.get<UInt64>();
return minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos].left.get<UInt64>();
else
return 0;
}
@ -221,7 +221,7 @@ time_t IMergeTreeDataPart::getMinTime() const
time_t IMergeTreeDataPart::getMaxTime() const
{
if (storage.minmax_idx_time_column_pos != -1 && minmax_idx.initialized)
return minmax_idx.parallelogram[storage.minmax_idx_time_column_pos].right.get<UInt64>();
return minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos].right.get<UInt64>();
else
return 0;
}

View File

@ -255,7 +255,7 @@ public:
struct MinMaxIndex
{
/// A direct product of ranges for each key column. See Storages/MergeTree/KeyCondition.cpp for details.
std::vector<Range> parallelogram;
std::vector<Range> hyperrectangle;
bool initialized = false;
public:
@ -263,7 +263,7 @@ public:
/// For month-based partitioning.
MinMaxIndex(DayNum min_date, DayNum max_date)
: parallelogram(1, Range(min_date, true, max_date, true))
: hyperrectangle(1, Range(min_date, true, max_date, true))
, initialized(true)
{
}

View File

@ -938,40 +938,40 @@ String KeyCondition::toString() const
* The set of all possible tuples can be considered as an n-dimensional space, where n is the size of the tuple.
* A range of tuples specifies some subset of this space.
*
* Parallelograms (you can also find the term "rail")
* Hyperrectangles (you can also find the term "rail")
* will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges.
* In this case, the one-dimensional range can be: a period, a segment, an interval, a half-interval, unlimited on the left, unlimited on the right ...
*
* The range of tuples can always be represented as a combination of parallelograms.
* For example, the range [ x1 y1 .. x2 y2 ] given x1 != x2 is equal to the union of the following three parallelograms:
* The range of tuples can always be represented as a combination of hyperrectangles.
* For example, the range [ x1 y1 .. x2 y2 ] given x1 != x2 is equal to the union of the following three hyperrectangles:
* [x1] x [y1 .. +inf)
* (x1 .. x2) x (-inf .. +inf)
* [x2] x (-inf .. y2]
*
* Or, for example, the range [ x1 y1 .. +inf ] is equal to the union of the following two parallelograms:
* Or, for example, the range [ x1 y1 .. +inf ] is equal to the union of the following two hyperrectangles:
* [x1] x [y1 .. +inf)
* (x1 .. +inf) x (-inf .. +inf)
* It's easy to see that this is a special case of the variant above.
*
* This is important because it is easy for us to check the feasibility of the condition over the parallelogram,
* This is important because it is easy for us to check the feasibility of the condition over the hyperrectangle,
* and therefore, feasibility of condition on the range of tuples will be checked by feasibility of condition
* over at least one parallelogram from which this range consists.
* over at least one hyperrectangle from which this range consists.
*/
template <typename F>
static BoolMask forAnyParallelogram(
static BoolMask forAnyHyperrectangle(
size_t key_size,
const Field * key_left,
const Field * key_right,
bool left_bounded,
bool right_bounded,
std::vector<Range> & parallelogram,
std::vector<Range> & hyperrectangle,
size_t prefix_size,
BoolMask initial_mask,
F && callback)
{
if (!left_bounded && !right_bounded)
return callback(parallelogram);
return callback(hyperrectangle);
if (left_bounded && right_bounded)
{
@ -981,7 +981,7 @@ static BoolMask forAnyParallelogram(
if (key_left[prefix_size] == key_right[prefix_size])
{
/// Point ranges.
parallelogram[prefix_size] = Range(key_left[prefix_size]);
hyperrectangle[prefix_size] = Range(key_left[prefix_size]);
++prefix_size;
}
else
@ -990,35 +990,35 @@ static BoolMask forAnyParallelogram(
}
if (prefix_size == key_size)
return callback(parallelogram);
return callback(hyperrectangle);
if (prefix_size + 1 == key_size)
{
if (left_bounded && right_bounded)
parallelogram[prefix_size] = Range(key_left[prefix_size], true, key_right[prefix_size], true);
hyperrectangle[prefix_size] = Range(key_left[prefix_size], true, key_right[prefix_size], true);
else if (left_bounded)
parallelogram[prefix_size] = Range::createLeftBounded(key_left[prefix_size], true);
hyperrectangle[prefix_size] = Range::createLeftBounded(key_left[prefix_size], true);
else if (right_bounded)
parallelogram[prefix_size] = Range::createRightBounded(key_right[prefix_size], true);
hyperrectangle[prefix_size] = Range::createRightBounded(key_right[prefix_size], true);
return callback(parallelogram);
return callback(hyperrectangle);
}
/// (x1 .. x2) x (-inf .. +inf)
if (left_bounded && right_bounded)
parallelogram[prefix_size] = Range(key_left[prefix_size], false, key_right[prefix_size], false);
hyperrectangle[prefix_size] = Range(key_left[prefix_size], false, key_right[prefix_size], false);
else if (left_bounded)
parallelogram[prefix_size] = Range::createLeftBounded(key_left[prefix_size], false);
hyperrectangle[prefix_size] = Range::createLeftBounded(key_left[prefix_size], false);
else if (right_bounded)
parallelogram[prefix_size] = Range::createRightBounded(key_right[prefix_size], false);
hyperrectangle[prefix_size] = Range::createRightBounded(key_right[prefix_size], false);
for (size_t i = prefix_size + 1; i < key_size; ++i)
parallelogram[i] = Range();
hyperrectangle[i] = Range();
BoolMask result = initial_mask;
result = result | callback(parallelogram);
result = result | callback(hyperrectangle);
/// There are several early-exit conditions (like the one below) hereinafter.
/// They are important; in particular, if initial_mask == BoolMask::consider_only_can_be_true
@ -1031,8 +1031,8 @@ static BoolMask forAnyParallelogram(
if (left_bounded)
{
parallelogram[prefix_size] = Range(key_left[prefix_size]);
result = result | forAnyParallelogram(key_size, key_left, key_right, true, false, parallelogram, prefix_size + 1, initial_mask, callback);
hyperrectangle[prefix_size] = Range(key_left[prefix_size]);
result = result | forAnyHyperrectangle(key_size, key_left, key_right, true, false, hyperrectangle, prefix_size + 1, initial_mask, callback);
if (result.isComplete())
return result;
}
@ -1041,8 +1041,8 @@ static BoolMask forAnyParallelogram(
if (right_bounded)
{
parallelogram[prefix_size] = Range(key_right[prefix_size]);
result = result | forAnyParallelogram(key_size, key_left, key_right, false, true, parallelogram, prefix_size + 1, initial_mask, callback);
hyperrectangle[prefix_size] = Range(key_right[prefix_size]);
result = result | forAnyHyperrectangle(key_size, key_left, key_right, false, true, hyperrectangle, prefix_size + 1, initial_mask, callback);
if (result.isComplete())
return result;
}
@ -1075,12 +1075,12 @@ BoolMask KeyCondition::checkInRange(
else
std::cerr << "+inf)\n";*/
return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, initial_mask,
[&] (const std::vector<Range> & key_ranges_parallelogram)
return forAnyHyperrectangle(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, initial_mask,
[&] (const std::vector<Range> & key_ranges_hyperrectangle)
{
auto res = checkInParallelogram(key_ranges_parallelogram, data_types);
auto res = checkInHyperrectangle(key_ranges_hyperrectangle, data_types);
/* std::cerr << "Parallelogram: ";
/* std::cerr << "Hyperrectangle: ";
for (size_t i = 0, size = key_ranges.size(); i != size; ++i)
std::cerr << (i != 0 ? " x " : "") << key_ranges[i].toString();
std::cerr << ": " << res << "\n";*/
@ -1126,8 +1126,8 @@ std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
return key_range;
}
BoolMask KeyCondition::checkInParallelogram(
const std::vector<Range> & parallelogram,
BoolMask KeyCondition::checkInHyperrectangle(
const std::vector<Range> & hyperrectangle,
const DataTypes & data_types) const
{
std::vector<BoolMask> rpn_stack;
@ -1141,7 +1141,7 @@ BoolMask KeyCondition::checkInParallelogram(
else if (element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
{
const Range * key_range = &parallelogram[element.key_column];
const Range * key_range = &hyperrectangle[element.key_column];
/// The case when the column is wrapped in a chain of possibly monotonic functions.
Range transformed_range;
@ -1176,7 +1176,7 @@ BoolMask KeyCondition::checkInParallelogram(
if (!element.set_index)
throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR);
rpn_stack.emplace_back(element.set_index->checkInRange(parallelogram, data_types));
rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types));
if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
rpn_stack.back() = !rpn_stack.back();
}

View File

@ -249,9 +249,9 @@ public:
const Names & key_column_names,
const ExpressionActionsPtr & key_expr);
/// Whether the condition and its negation are feasible in the direct product of single column ranges specified by `parallelogram`.
BoolMask checkInParallelogram(
const std::vector<Range> & parallelogram,
/// Whether the condition and its negation are feasible in the direct product of single column ranges specified by `hyperrectangle`.
BoolMask checkInHyperrectangle(
const std::vector<Range> & hyperrectangle,
const DataTypes & data_types) const;
/// Whether the condition and its negation are (independently) feasible in the key range.

View File

@ -276,8 +276,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
if (part->isEmpty())
continue;
if (minmax_idx_condition && !minmax_idx_condition->checkInParallelogram(
part->minmax_idx.parallelogram, data.minmax_idx_column_types).can_be_true)
if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true)
continue;
if (max_block_numbers_to_read)

View File

@ -205,8 +205,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
String part_name;
if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
DayNum min_date(minmax_idx.parallelogram[data.minmax_idx_date_column_pos].left.get<UInt64>());
DayNum max_date(minmax_idx.parallelogram[data.minmax_idx_date_column_pos].right.get<UInt64>());
DayNum min_date(minmax_idx.hyperrectangle[data.minmax_idx_date_column_pos].left.get<UInt64>());
DayNum max_date(minmax_idx.hyperrectangle[data.minmax_idx_date_column_pos].right.get<UInt64>());
const auto & date_lut = DateLUT::instance();

View File

@ -17,11 +17,11 @@ namespace ErrorCodes
MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index_)
: IMergeTreeIndexGranule(), index(index_), parallelogram() {}
: IMergeTreeIndexGranule(), index(index_), hyperrectangle() {}
MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(
const MergeTreeIndexMinMax & index_, std::vector<Range> && parallelogram_)
: IMergeTreeIndexGranule(), index(index_), parallelogram(std::move(parallelogram_)) {}
const MergeTreeIndexMinMax & index_, std::vector<Range> && hyperrectangle_)
: IMergeTreeIndexGranule(), index(index_), hyperrectangle(std::move(hyperrectangle_)) {}
void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
{
@ -34,17 +34,17 @@ void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
const DataTypePtr & type = index.data_types[i];
if (!type->isNullable())
{
type->serializeBinary(parallelogram[i].left, ostr);
type->serializeBinary(parallelogram[i].right, ostr);
type->serializeBinary(hyperrectangle[i].left, ostr);
type->serializeBinary(hyperrectangle[i].right, ostr);
}
else
{
bool is_null = parallelogram[i].left.isNull() || parallelogram[i].right.isNull(); // one is enough
bool is_null = hyperrectangle[i].left.isNull() || hyperrectangle[i].right.isNull(); // one is enough
writeBinary(is_null, ostr);
if (!is_null)
{
type->serializeBinary(parallelogram[i].left, ostr);
type->serializeBinary(parallelogram[i].right, ostr);
type->serializeBinary(hyperrectangle[i].left, ostr);
type->serializeBinary(hyperrectangle[i].right, ostr);
}
}
}
@ -52,7 +52,7 @@ void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
{
parallelogram.clear();
hyperrectangle.clear();
Field min_val;
Field max_val;
for (size_t i = 0; i < index.columns.size(); ++i)
@ -78,7 +78,7 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
max_val = Null();
}
}
parallelogram.emplace_back(min_val, true, max_val, true);
hyperrectangle.emplace_back(min_val, true, max_val, true);
}
}
@ -88,7 +88,7 @@ MergeTreeIndexAggregatorMinMax::MergeTreeIndexAggregatorMinMax(const MergeTreeIn
MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset()
{
return std::make_shared<MergeTreeIndexGranuleMinMax>(index, std::move(parallelogram));
return std::make_shared<MergeTreeIndexGranuleMinMax>(index, std::move(hyperrectangle));
}
void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit)
@ -107,14 +107,14 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s
const auto & column = block.getByName(index.columns[i]).column;
column->cut(*pos, rows_read)->getExtremes(field_min, field_max);
if (parallelogram.size() <= i)
if (hyperrectangle.size() <= i)
{
parallelogram.emplace_back(field_min, true, field_max, true);
hyperrectangle.emplace_back(field_min, true, field_max, true);
}
else
{
parallelogram[i].left = std::min(parallelogram[i].left, field_min);
parallelogram[i].right = std::max(parallelogram[i].right, field_max);
hyperrectangle[i].left = std::min(hyperrectangle[i].left, field_min);
hyperrectangle[i].right = std::max(hyperrectangle[i].right, field_max);
}
}
@ -140,10 +140,10 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr
if (!granule)
throw Exception(
"Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
for (const auto & range : granule->parallelogram)
for (const auto & range : granule->hyperrectangle)
if (range.left.isNull() || range.right.isNull())
return true;
return condition.checkInParallelogram(granule->parallelogram, index.data_types).can_be_true;
return condition.checkInHyperrectangle(granule->hyperrectangle, index.data_types).can_be_true;
}

View File

@ -16,16 +16,16 @@ class MergeTreeIndexMinMax;
struct MergeTreeIndexGranuleMinMax : public IMergeTreeIndexGranule
{
explicit MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index_);
MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index_, std::vector<Range> && parallelogram_);
MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index_, std::vector<Range> && hyperrectangle_);
~MergeTreeIndexGranuleMinMax() override = default;
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr) override;
bool empty() const override { return parallelogram.empty(); }
bool empty() const override { return hyperrectangle.empty(); }
const MergeTreeIndexMinMax & index;
std::vector<Range> parallelogram;
std::vector<Range> hyperrectangle;
};
@ -34,12 +34,12 @@ struct MergeTreeIndexAggregatorMinMax : IMergeTreeIndexAggregator
explicit MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index);
~MergeTreeIndexAggregatorMinMax() override = default;
bool empty() const override { return parallelogram.empty(); }
bool empty() const override { return hyperrectangle.empty(); }
MergeTreeIndexGranulePtr getGranuleAndReset() override;
void update(const Block & block, size_t * pos, size_t limit) override;
const MergeTreeIndexMinMax & index;
std::vector<Range> parallelogram;
std::vector<Range> hyperrectangle;
};