Merge pull request #59741 from vitlibar/fix-skipping-unused-shards-with_analyzer

Fix skipping unused shards with analyzer
This commit is contained in:
Ilya Yatsishin 2024-02-09 02:44:59 +04:00 committed by GitHub
commit 1c58cafcf3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 41 additions and 7 deletions

View File

@ -554,6 +554,21 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
}
MutableColumnPtr ColumnArray::getDataInRange(size_t start, size_t length) const
{
if (start + length > getOffsets().size())
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnArray::getDataPtrForRange method. "
"[start({}) + length({}) > offsets.size({})]", start, length, getOffsets().size());
size_t start_offset = offsetAt(start);
size_t end_offset = offsetAt(start + length);
auto res = getData().cloneEmpty();
res->insertRangeFrom(getData(), start_offset, end_offset - start_offset);
return res;
}
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
{
if (typeid_cast<const ColumnUInt8 *>(data.get()))

View File

@ -143,6 +143,10 @@ public:
const ColumnPtr & getOffsetsPtr() const { return offsets; }
ColumnPtr & getOffsetsPtr() { return offsets; }
/// Returns a copy of the data column's part corresponding to a specified range of rows.
/// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
MutableColumnPtr getDataInRange(size_t start, size_t length) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnArray>(num_columns, selector);

View File

@ -605,7 +605,7 @@ ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool rem
return actions;
}
static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments)
static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments, size_t input_rows_count)
{
ColumnWithTypeAndName res_column;
res_column.type = node->result_type;
@ -615,7 +615,7 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
{
case ActionsDAG::ActionType::FUNCTION:
{
res_column.column = node->function->execute(arguments, res_column.type, 0, true);
res_column.column = node->function->execute(arguments, res_column.type, input_rows_count, true);
break;
}
@ -628,13 +628,24 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
if (!array)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"ARRAY JOIN of not array nor map: {}", node->result_name);
res_column.column = array->getDataPtr()->cloneEmpty();
ColumnPtr data;
if (input_rows_count < array->size())
data = array->getDataInRange(0, input_rows_count);
else
data = array->getDataPtr();
res_column.column = data;
break;
}
case ActionsDAG::ActionType::COLUMN:
{
res_column.column = node->column->cloneResized(0);
auto column = node->column;
if (input_rows_count < column->size())
column = column->cloneResized(input_rows_count);
res_column.column = column;
break;
}
@ -681,7 +692,7 @@ Block ActionsDAG::updateHeader(Block header) const
ColumnsWithTypeAndName result_columns;
try
{
result_columns = evaluatePartialResult(node_to_column, outputs, true);
result_columns = evaluatePartialResult(node_to_column, outputs, /* input_rows_count= */ 0, /* throw_on_error= */ true);
}
catch (Exception & e)
{
@ -710,8 +721,11 @@ Block ActionsDAG::updateHeader(Block header) const
ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
IntermediateExecutionResult & node_to_column,
const NodeRawConstPtrs & outputs,
size_t input_rows_count,
bool throw_on_error)
{
chassert(input_rows_count <= 1); /// evaluatePartialResult() should be used only to evaluate headers or constants
ColumnsWithTypeAndName result_columns;
result_columns.reserve(outputs.size());
@ -768,7 +782,7 @@ ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
node->result_name);
if (node->type != ActionsDAG::ActionType::INPUT && has_all_arguments)
node_to_column[node] = executeActionForHeader(node, std::move(arguments));
node_to_column[node] = executeActionForPartialResult(node, std::move(arguments), input_rows_count);
}
}

View File

@ -278,6 +278,7 @@ public:
static ColumnsWithTypeAndName evaluatePartialResult(
IntermediateExecutionResult & node_to_column,
const NodeRawConstPtrs & outputs,
size_t input_rows_count,
bool throw_on_error);
/// For apply materialize() function for every output.

View File

@ -661,7 +661,7 @@ namespace
const ActionsDAG::NodeRawConstPtrs & target_expr,
ConjunctionMap && conjunction)
{
auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, false);
auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, /* input_rows_count= */ 1, /* throw_on_error= */ false);
for (const auto & column : columns)
if (!column.column)
return {};