dbms: improvements [#METR-19266]

This commit is contained in:
Alexey Arno 2016-08-18 17:25:02 +03:00
parent da06da5528
commit f65e7b4a0f
13 changed files with 291 additions and 254 deletions

View File

@ -39,6 +39,7 @@ add_library (dbms
include/DB/Functions/FunctionsTransform.h
include/DB/Functions/Conditional/CondException.h
include/DB/Functions/Conditional/common.h
include/DB/Functions/Conditional/NullMapBuilder.h
include/DB/Functions/Conditional/getArrayType.h
include/DB/Functions/Conditional/ArgsInfo.h
include/DB/Functions/Conditional/CondSource.h
@ -891,6 +892,7 @@ add_library (dbms
src/Functions/Conditional/getArrayType.cpp
src/Functions/Conditional/ArgsInfo.cpp
src/Functions/Conditional/CondSource.cpp
src/Functions/Conditional/NullMapBuilder.cpp
src/Functions/Conditional/NumericPerformer.cpp
src/Functions/Conditional/StringEvaluator.cpp
src/Functions/Conditional/StringArrayEvaluator.cpp

View File

@ -2,6 +2,7 @@
#include <DB/Functions/Conditional/CondException.h>
#include <DB/Functions/Conditional/common.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Functions/Conditional/CondSource.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/Columns/ColumnVector.h>
@ -315,20 +316,15 @@ template <typename TResult>
class ArrayEvaluator final
{
public:
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
const CondSources conds = createConds(block, args);
size_t row_count = conds[0].getSize();
IArraySources<TResult> sources = createSources(block, args, branches);
ArraySink<TResult> sink = createSink(block, sources, result, row_count);
ColumnUInt16 * tracker_col = nullptr;
if (tracker != result)
{
auto & col = block.unsafeGetByPosition(tracker).column;
col = std::make_shared<ColumnUInt16>(row_count);
tracker_col = static_cast<ColumnUInt16 *>(col.get());
}
if (builder)
builder.init(args);
for (size_t cur_row = 0; cur_row < row_count; ++cur_row)
{
@ -340,11 +336,8 @@ public:
if (cond.get(cur_row))
{
sink.store(sources[cur_source]->get());
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = args[branches[cur_source].index];
}
if (builder)
builder.update(args[branches[cur_source].index], cur_row);
has_triggered_cond = true;
break;
}
@ -354,11 +347,8 @@ public:
if (!has_triggered_cond)
{
sink.store(sources.back()->get());
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = args[branches.back().index];
}
if (builder)
builder.update(args[branches.back().index], cur_row);
}
for (auto & source : sources)
@ -446,10 +436,8 @@ template <>
class ArrayEvaluator<NumberTraits::Error>
{
public:
/// The tracker parameter is an index to a column that tracks the originating column of each value of
/// the result column. Calling this function with result == tracker means that no such tracking is
/// required, which happens if multiIf is called with no nullable parameters.
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
/// For the meaning of the builder parameter, see the FunctionMultiIf::perform() declaration.
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
throw CondException{CondErrorCodes::ARRAY_EVALUATOR_INVALID_TYPES};
}

View File

@ -0,0 +1,78 @@
#pragma once
#include <DB/Core/Block.h>
namespace DB
{
namespace Conditional
{
/// Here is provided a way to incrementally build the null map of the result column
/// of a multiIf invokation if its type is nullable.
class NullMapBuilder final
{
public:
/// Create a dummy builder when we don't need any builder, i.e. when the result
/// of multiIf is not nullable.
NullMapBuilder()
: block{empty_block}
{
}
/// This constructor takes the block that contains the original data received
/// by multiIf, i.e. they have not been processed.
NullMapBuilder(Block & block_)
: block{block_}, row_count{block.rowsInFirstColumn()}
{
}
/// Check whether the builder is dummy or not.
operator bool() const { return block; }
bool operator!() const { return !block; }
/// Initialize the builder. For the non-trivial execution paths of multiIf.
void init(const ColumnNumbers & args);
/// Update the null map being built at the row that has just been processed
/// by multiIf. The parameter index indicates the index of the column being
/// checked for nullity. For non-trivial execution paths of multiIf.
void update(size_t index, size_t row);
/// Build the null map. The parameter index has the same meaning as above.
/// For the trivial execution path of multiIf.
void build(size_t index);
/// Accessor needed to return the fully built null map.
ColumnPtr getNullMap() const { return null_map; }
private:
/// Property of a column.
enum Property
{
/// Neither nullable nor null.
IS_ORDINARY = 0,
/// Nullable column.
IS_NULLABLE,
/// Null column.
IS_NULL
};
private:
Block & block;
/// Remember for each column representing an argument whether it is
/// nullable, null, or neither of them. This avoids us many costly
/// calls to virtual functions.
std::vector<Property> cols_properties;
ColumnPtr null_map;
size_t row_count;
private:
static Block empty_block;
};
}
}

View File

@ -2,6 +2,7 @@
#include <DB/Functions/Conditional/CondException.h>
#include <DB/Functions/Conditional/common.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Functions/Conditional/CondSource.h>
#include <DB/Functions/NumberTraits.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
@ -141,20 +142,15 @@ template <typename TResult>
class NumericEvaluator final
{
public:
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
const CondSources conds = createConds(block, args);
const NumericSources<TResult> sources = createNumericSources(block, args, branches);
size_t row_count = conds[0].getSize();
PaddedPODArray<TResult> & res = createSink(block, result, row_count);
ColumnUInt16 * tracker_col = nullptr;
if (tracker != result)
{
auto & col = block.unsafeGetByPosition(tracker).column;
col = std::make_shared<ColumnUInt16>(row_count);
tracker_col = static_cast<ColumnUInt16 *>(col.get());
}
if (builder)
builder.init(args);
for (size_t cur_row = 0; cur_row < row_count; ++cur_row)
{
@ -166,11 +162,8 @@ public:
if (cond.get(cur_row))
{
res[cur_row] = sources[cur_source]->get(cur_row);
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = args[branches[cur_source].index];
}
if (builder)
builder.update(args[branches[cur_source].index], cur_row);
has_triggered_cond = true;
break;
}
@ -180,11 +173,8 @@ public:
if (!has_triggered_cond)
{
res[cur_row] = sources.back()->get(cur_row);
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = args[branches.back().index];
}
if (builder)
builder.update(args[branches.back().index], cur_row);
}
}
}
@ -249,10 +239,8 @@ template <>
class NumericEvaluator<NumberTraits::Error>
{
public:
/// The tracker parameter is an index to a column that tracks the originating column of each value of
/// the result column. Calling this function with result == tracker means that no such tracking is
/// required, which happens if multiIf is called with no nullable parameters.
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
/// For the meaning of the builder parameter, see the FunctionMultiIf::perform() declaration.
static void perform(const Branches & branches, Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
throw Exception{"Internal logic error", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}

View File

@ -9,14 +9,14 @@ namespace DB
namespace Conditional
{
class NullMapBuilder;
struct NumericPerformer
{
/// Perform a multiIf function for numeric branch (then, else) arguments
/// that may have either scalar types or array types.
/// The tracker parameter is an index to a column that tracks the originating column of each value of
/// the result column. Calling this function with result == tracker means that no such tracking is
/// required, which happens if multiIf is called with no nullable parameters.
static bool perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker);
/// For the meaning of the builder parameter, see the FunctionMultiIf::perform() declaration.
static bool perform(Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder);
};
}

View File

@ -9,13 +9,15 @@ namespace DB
namespace Conditional
{
class NullMapBuilder;
class StringArrayEvaluator
{
public:
/// The tracker parameter is an index to a column that tracks the originating column of each value of
/// the result column. Calling this function with result == tracker means that no such tracking is
/// required, which happens if multiIf is called with no nullable parameters.
static bool perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker);
static bool perform(Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder);
};
}

View File

@ -9,12 +9,12 @@ namespace DB
namespace Conditional
{
class NullMapBuilder;
struct StringEvaluator final
{
/// The tracker parameter is an index to a column that tracks the originating column of each value of
/// the result column. Calling this function with result == tracker means that no such tracking is
/// required, which happens if multiIf is called with no nullable parameters.
static bool perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker);
/// For the meaning of the builder parameter, see the FunctionMultiIf::perform() declaration.
static bool perform(Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder);
};
}

View File

@ -1458,6 +1458,7 @@ public:
namespace Conditional
{
class NullMapBuilder;
class CondException;
}
@ -1494,15 +1495,15 @@ private:
DataTypePtr getReturnTypeInternal(const DataTypes & args) const;
/// Internal version of multiIf.
/// The tracker parameter is an index to a column that tracks the originating column of each value of
/// the result column. The condition result == tracker means that no such tracking is
/// required, which happens if multiIf is called with no nullable parameters.
void perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker);
/// The builder parameter is an object that incrementally builds the null map
/// of the result column if it is nullable. When no builder is necessary,
/// just pass a default parameter.
void perform(Block & block, const ColumnNumbers & args, size_t result, Conditional::NullMapBuilder & builder);
/// Perform multiIf in the case where all the non-null branches have the same type and all
/// the conditions are constant. The same remark as above applies with regards to
/// the tracker parameter.
bool performTrivialCase(Block & block, const ColumnNumbers & args, size_t result, size_t tracker);
/// the builder parameter.
bool performTrivialCase(Block & block, const ColumnNumbers & args, size_t result, Conditional::NullMapBuilder & builder);
/// Translate a context-free error into a contextual error.
void rethrowContextually(const Conditional::CondException & ex) const;

View File

@ -0,0 +1,76 @@
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Columns/ColumnsNumber.h>
#include <DB/Columns/ColumnNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace Conditional
{
Block NullMapBuilder::empty_block;
void NullMapBuilder::init(const ColumnNumbers & args)
{
null_map = std::make_shared<ColumnUInt8>(row_count);
cols_properties.resize(block.columns());
for (const auto & arg : args)
{
const auto & col = *block.unsafeGetByPosition(arg).column;
if (col.isNullable())
cols_properties[arg] = IS_NULLABLE;
else if (col.isNull())
cols_properties[arg] = IS_NULL;
else
cols_properties[arg] = IS_ORDINARY;
}
}
void NullMapBuilder::update(size_t index, size_t row)
{
const IColumn & from = *block.unsafeGetByPosition(index).column;
bool is_null;
auto property = cols_properties[index];
if (property == IS_NULL)
is_null = true;
else if (property == IS_NULLABLE)
{
const auto & nullable_col = static_cast<const ColumnNullable &>(from);
is_null = nullable_col.isNullAt(row);
}
else if (property == IS_ORDINARY)
is_null = false;
else
throw Exception{"NullMapBuilder: internal error", ErrorCodes::LOGICAL_ERROR};
auto & null_map_data = static_cast<ColumnUInt8 &>(*null_map).getData();
null_map_data[row] = is_null ? 1 : 0;
}
void NullMapBuilder::build(size_t index)
{
const IColumn & from = *block.unsafeGetByPosition(index).column;
if (from.isNull())
null_map = std::make_shared<ColumnUInt8>(row_count, 1);
else if (from.isNullable())
{
const auto & nullable_col = static_cast<const ColumnNullable &>(from);
null_map = nullable_col.getNullValuesByteMap();
}
else
null_map = std::make_shared<ColumnUInt8>(row_count, 0);
}
}
}

View File

@ -1,4 +1,5 @@
#include <DB/Functions/Conditional/NumericPerformer.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Functions/Conditional/CondException.h>
#include <DB/Functions/Conditional/ArgsInfo.h>
#include <DB/Functions/Conditional/NumericEvaluator.h>
@ -163,7 +164,7 @@ struct ElsePredicate final : public PredicateBase<TType>
using TFinal2 = typename RemoveNullable<TFinal>::Type;
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
if (!Base::appendBranchInfo(index, block, args, branches))
return false;
@ -179,10 +180,10 @@ struct ElsePredicate final : public PredicateBase<TType>
{
if (category & Category::NUMERIC_ARRAY)
throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
NumericEvaluator<TFinal2>::perform(branches, block, args, result, tracker);
NumericEvaluator<TFinal2>::perform(branches, block, args, result, builder);
}
else if (category & Category::NUMERIC_ARRAY)
ArrayEvaluator<TFinal2>::perform(branches, block, args, result, tracker);
ArrayEvaluator<TFinal2>::perform(branches, block, args, result, builder);
else
throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
@ -195,7 +196,7 @@ template <typename Nullity>
struct ElsePredicate<NumberTraits::Enriched::Void<Nullity>, Null> final : public PredicateBase<Null>
{
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
throw Exception{"Internal logic error", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
@ -206,7 +207,7 @@ template <typename TResult>
struct ElsePredicate<TResult, NumberTraits::Error> : public PredicateBase<NumberTraits::Error>
{
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
throw Exception{"Internal logic error", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
@ -217,7 +218,7 @@ template <typename TType>
struct ElsePredicate<NumberTraits::Error, TType>
{
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
throw Exception{"Internal logic error", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
@ -236,7 +237,7 @@ struct ThenPredicate final : public PredicateBase<TType>
>::Type;
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
if (!Base::appendBranchInfo(index, block, args, branches))
return false;
@ -247,33 +248,33 @@ struct ThenPredicate final : public PredicateBase<TType>
if (index2 != elseArg(args))
{
/// We have a pair Cond-Then. Process the next Then.
if (! (ThenPredicate<TCombined, UInt8>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, UInt16>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, UInt32>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, UInt64>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Int8>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Int16>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Int32>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Int64>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Float32>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Float64>::execute(index2 + 1, block, args, result, tracker, branches)
|| ThenPredicate<TCombined, Null>::execute(index2 + 1, block, args, result, tracker, branches)))
if (! (ThenPredicate<TCombined, UInt8>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, UInt16>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, UInt32>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, UInt64>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Int8>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Int16>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Int32>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Int64>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Float32>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Float64>::execute(index2 + 1, block, args, result, builder, branches)
|| ThenPredicate<TCombined, Null>::execute(index2 + 1, block, args, result, builder, branches)))
return false;
}
else
{
/// We have an Else which ends the multiIf. Process it.
if (! (ElsePredicate<TCombined, UInt8>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, UInt16>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, UInt32>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, UInt64>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Int8>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Int16>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Int32>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Int64>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Float32>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Float64>::execute(index2, block, args, result, tracker, branches)
|| ElsePredicate<TCombined, Null>::execute(index2, block, args, result, tracker, branches)))
if (! (ElsePredicate<TCombined, UInt8>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, UInt16>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, UInt32>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, UInt64>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Int8>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Int16>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Int32>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Int64>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Float32>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Float64>::execute(index2, block, args, result, builder, branches)
|| ElsePredicate<TCombined, Null>::execute(index2, block, args, result, builder, branches)))
return false;
}
@ -286,7 +287,7 @@ template <typename TResult>
struct ThenPredicate<TResult, NumberTraits::Error>
{
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
throw Exception{"Internal logic error", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
@ -297,7 +298,7 @@ template <typename TType>
struct ThenPredicate<NumberTraits::Error, TType>
{
static bool execute(size_t index, Block & block, const ColumnNumbers & args,
size_t result, size_t tracker, Branches & branches)
size_t result, NullMapBuilder & builder, Branches & branches)
{
throw Exception{"Internal logic error", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
@ -306,31 +307,31 @@ struct ThenPredicate<NumberTraits::Error, TType>
/// First Then
struct FirstThenPredicate final
{
static bool execute(Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
static bool execute(Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
using Void = NumberTraits::Enriched::Void<NumberTraits::HasNoNull>;
Branches branches;
return ThenPredicate<Void, UInt8>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, UInt16>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, UInt32>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, UInt64>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Int8>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Int16>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Int32>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Int64>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Float32>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Float64>::execute(firstThen(), block, args, result, tracker, branches)
|| ThenPredicate<Void, Null>::execute(firstThen(), block, args, result, tracker, branches);
return ThenPredicate<Void, UInt8>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, UInt16>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, UInt32>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, UInt64>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Int8>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Int16>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Int32>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Int64>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Float32>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Float64>::execute(firstThen(), block, args, result, builder, branches)
|| ThenPredicate<Void, Null>::execute(firstThen(), block, args, result, builder, branches);
}
};
}
bool NumericPerformer::perform(Block & block, const ColumnNumbers & args,
size_t result, size_t tracker)
size_t result, NullMapBuilder & builder)
{
return FirstThenPredicate::execute(block, args, result, tracker);
return FirstThenPredicate::execute(block, args, result, builder);
}
}

View File

@ -1,6 +1,7 @@
#include <DB/Functions/Conditional/StringArrayEvaluator.h>
#include <DB/Functions/Conditional/CondSource.h>
#include <DB/Functions/Conditional/common.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Columns/ColumnVector.h>
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnConst.h>
@ -393,7 +394,7 @@ VarStringArraySink createSink(Block & block, const StringArraySources & sources,
}
/// Process a multiIf.
bool StringArrayEvaluator::perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
bool StringArrayEvaluator::perform(Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
StringArraySources sources;
if (!createStringArraySources(sources, block, args))
@ -403,13 +404,8 @@ bool StringArrayEvaluator::perform(Block & block, const ColumnNumbers & args, si
size_t row_count = conds[0].getSize();
VarStringArraySink sink = createSink(block, sources, result, row_count);
ColumnUInt16 * tracker_col = nullptr;
if (tracker != result)
{
auto & col = block.unsafeGetByPosition(tracker).column;
col = std::make_shared<ColumnUInt16>(row_count);
tracker_col = static_cast<ColumnUInt16 *>(col.get());
}
if (builder)
builder.init(args);
for (size_t cur_row = 0; cur_row < row_count; ++cur_row)
{
@ -421,11 +417,8 @@ bool StringArrayEvaluator::perform(Block & block, const ColumnNumbers & args, si
if (cond.get(cur_row))
{
sink.store(sources[cur_source]->get());
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = sources[cur_source]->getIndex();
}
if (builder)
builder.update(sources[cur_source]->getIndex(), cur_row);
has_triggered_cond = true;
break;
}
@ -435,11 +428,8 @@ bool StringArrayEvaluator::perform(Block & block, const ColumnNumbers & args, si
if (!has_triggered_cond)
{
sink.store(sources.back()->get());
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = sources.back()->getIndex();
}
if (builder)
builder.update(sources.back()->getIndex(), cur_row);
}
for (auto & source : sources)

View File

@ -1,5 +1,6 @@
#include <DB/Functions/Conditional/StringEvaluator.h>
#include <DB/Functions/Conditional/common.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Functions/Conditional/CondSource.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/DataTypes/DataTypeString.h>
@ -437,15 +438,11 @@ class SinkUpdater
{
public:
static void execute(Block & block, const StringSources & sources, const CondSources & conds,
SinkType & sink, size_t row_count, size_t result, size_t tracker)
SinkType & sink, size_t row_count, const ColumnNumbers & args, size_t result,
NullMapBuilder & builder)
{
ColumnUInt16 * tracker_col = nullptr;
if (tracker != result)
{
auto & col = block.unsafeGetByPosition(tracker).column;
col = std::make_shared<ColumnUInt16>(row_count);
tracker_col = static_cast<ColumnUInt16 *>(col.get());
}
if (builder)
builder.init(args);
for (size_t cur_row = 0; cur_row < row_count; ++cur_row)
{
@ -457,11 +454,8 @@ public:
if (cond.get(cur_row))
{
sink.store(sources[cur_source]->get());
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = sources[cur_source]->getIndex();
}
if (builder)
builder.update(sources[cur_source]->getIndex(), cur_row);
has_triggered_cond = true;
break;
}
@ -471,11 +465,8 @@ public:
if (!has_triggered_cond)
{
sink.store(sources.back()->get());
if (tracker_col != nullptr)
{
auto & data = tracker_col->getData();
data[cur_row] = sources.back()->getIndex();
}
if (builder)
builder.update(sources.back()->getIndex(), cur_row);
}
for (auto & source : sources)
@ -493,10 +484,12 @@ class Performer<true>
{
public:
static void execute(const StringSources & sources, const CondSources & conds,
size_t row_count, Block & block, size_t result, size_t tracker)
size_t row_count, Block & block, const ColumnNumbers & args, size_t result,
NullMapBuilder & builder)
{
FixedStringSink sink = createSink(block, sources, result, row_count);
SinkUpdater<FixedStringSink>::execute(block, sources, conds, sink, row_count, result, tracker);
SinkUpdater<FixedStringSink>::execute(block, sources, conds, sink, row_count,
args, result, builder);
}
private:
@ -525,10 +518,12 @@ class Performer<false>
{
public:
static void execute(const StringSources & sources, const CondSources & conds,
size_t row_count, Block & block, size_t result, size_t tracker)
size_t row_count, Block & block, const ColumnNumbers & args, size_t result,
NullMapBuilder & builder)
{
VarStringSink sink = createSink(block, sources, result, row_count);
SinkUpdater<VarStringSink>::execute(block, sources, conds, sink, row_count, result, tracker);
SinkUpdater<VarStringSink>::execute(block, sources, conds, sink, row_count,
args, result, builder);
}
private:
@ -549,7 +544,7 @@ private:
}
/// Process a multiIf.
bool StringEvaluator::perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
bool StringEvaluator::perform(Block & block, const ColumnNumbers & args, size_t result, NullMapBuilder & builder)
{
StringSources sources;
if (!createStringSources(sources, block, args))
@ -569,9 +564,9 @@ bool StringEvaluator::perform(Block & block, const ColumnNumbers & args, size_t
}
if (has_only_fixed_sources)
Performer<true>::execute(sources, conds, row_count, block, result, tracker);
Performer<true>::execute(sources, conds, row_count, block, args, result, builder);
else
Performer<false>::execute(sources, conds, row_count, block, result, tracker);
Performer<false>::execute(sources, conds, row_count, block, args, result, builder);
return true;
}

View File

@ -3,6 +3,7 @@
#include <DB/Functions/FunctionsTransform.h>
#include <DB/Functions/FunctionFactory.h>
#include <DB/Functions/Conditional/common.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Functions/Conditional/ArgsInfo.h>
#include <DB/Functions/Conditional/CondSource.h>
#include <DB/Functions/Conditional/NumericPerformer.h>
@ -137,19 +138,11 @@ void FunctionMultiIf::executeImpl(Block & block, const ColumnNumbers & args, siz
if (!blockHasSpecialBranches(block, args))
{
/// All the branch types are ordinary. No special processing required.
perform(block, args, result, result);
Conditional::NullMapBuilder builder;
perform(block, args, result, builder);
return;
}
/// The adopted approach is quite similar to how ordinary functions deal
/// with nullable arguments. From the original block, we create a new block
/// that contains only non-nullable types and an extra column, namely a "tracker"
/// column that tracks the originating column of each row of the result column.
/// This way, after having run multiIf on this new block, we can create
/// a correct null byte map for the result column.
size_t row_count = block.rowsInFirstColumn();
/// From the block to be processed, deduce a block whose specified
/// columns are not nullable. We accept null columns because they
/// are processed independently later.
@ -161,96 +154,21 @@ void FunctionMultiIf::executeImpl(Block & block, const ColumnNumbers & args, siz
Block block_with_nested_cols = createBlockWithNestedColumns(block, args_to_transform);
/// Append a column that tracks, for each result of multiIf, the index
/// of the originating column. UInt16 is enough for 65536 columns.
/// A table with such a big number of columns is highly unlikely to appear.
ColumnWithTypeAndName elem;
elem.type = std::make_shared<DataTypeUInt16>();
size_t tracker = block_with_nested_cols.columns();
block_with_nested_cols.insert(elem);
/// Create an object that will incrementally build the null map of the
/// result column to be returned.
Conditional::NullMapBuilder builder{block};
/// Now perform multiIf.
perform(block_with_nested_cols, args, result, tracker);
perform(block_with_nested_cols, args, result, builder);
/// Store the result.
const ColumnWithTypeAndName & source_col = block_with_nested_cols.unsafeGetByPosition(result);
ColumnWithTypeAndName & dest_col = block.unsafeGetByPosition(result);
if (source_col.column->isNull())
{
/// Degenerate case: the result is a null column.
dest_col.column = source_col.column;
return;
}
/// Setup the null byte map of the result column by using the branch tracker column values.
ColumnPtr tracker_holder = block_with_nested_cols.unsafeGetByPosition(tracker).column;
ColumnPtr null_map;
if (auto col = typeid_cast<ColumnConstUInt16 *>(tracker_holder.get()))
{
auto pos = col->getData();
const IColumn & origin = *block.unsafeGetByPosition(pos).column;
if (origin.isNull())
null_map = std::make_shared<ColumnUInt8>(row_count, 1);
else if (origin.isNullable())
{
const ColumnNullable & origin_nullable = static_cast<const ColumnNullable &>(origin);
null_map = origin_nullable.getNullValuesByteMap();
}
else
null_map = std::make_shared<ColumnUInt8>(row_count, 0);
}
else if (auto col = typeid_cast<ColumnUInt16 *>(tracker_holder.get()))
{
/// Remember which columns are nullable. This avoids us many costly
/// calls to virtual functions.
std::vector<UInt8> nullable_cols_map;
nullable_cols_map.resize(args.size());
for (const auto & arg : args)
{
const auto & col = block.unsafeGetByPosition(arg).column;
nullable_cols_map[arg] = col->isNullable() ? 1 : 0;
}
/// Remember which columns are null. The same remark as above applies.
std::vector<UInt8> null_cols_map;
null_cols_map.resize(args.size());
for (const auto & arg : args)
{
const auto & col = block.unsafeGetByPosition(arg).column;
null_cols_map[arg] = col->isNull() ? 1 : 0;
}
null_map = std::make_shared<ColumnUInt8>(row_count);
auto & null_map_data = static_cast<ColumnUInt8 &>(*null_map).getData();
const auto & data = col->getData();
for (size_t row = 0; row < row_count; ++row)
{
size_t pos = data[row];
bool is_null;
if (null_cols_map[pos] != 0)
is_null = true;
else if (nullable_cols_map[pos] != 0)
{
const IColumn & origin = *block.unsafeGetByPosition(pos).column;
const auto & nullable_col = static_cast<const ColumnNullable &>(origin);
is_null = nullable_col.isNullAt(row);
}
else
is_null = false;
null_map_data[row] = is_null ? 1 : 0;
}
}
else
throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
/// Store the result.
dest_col.column = std::make_shared<ColumnNullable>(source_col.column, null_map);
dest_col.column = std::make_shared<ColumnNullable>(source_col.column, builder.getNullMap());
}
catch (const Conditional::CondException & ex)
{
@ -407,15 +325,15 @@ DataTypePtr FunctionMultiIf::getReturnTypeInternal(const DataTypes & args) const
}
}
void FunctionMultiIf::perform(Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
void FunctionMultiIf::perform(Block & block, const ColumnNumbers & args, size_t result, Conditional::NullMapBuilder & builder)
{
if (performTrivialCase(block, args, result, tracker))
if (performTrivialCase(block, args, result, builder))
return;
if (Conditional::NumericPerformer::perform(block, args, result, tracker))
if (Conditional::NumericPerformer::perform(block, args, result, builder))
return;
if (Conditional::StringEvaluator::perform(block, args, result, tracker))
if (Conditional::StringEvaluator::perform(block, args, result, builder))
return;
if (Conditional::StringArrayEvaluator::perform(block, args, result, tracker))
if (Conditional::StringArrayEvaluator::perform(block, args, result, builder))
return;
if (is_case_mode)
@ -427,13 +345,13 @@ void FunctionMultiIf::perform(Block & block, const ColumnNumbers & args, size_t
ErrorCodes::ILLEGAL_COLUMN};
}
bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & args, size_t result, size_t tracker)
bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & args,
size_t result, Conditional::NullMapBuilder & builder)
{
/// Check that all the branches have the same type. Moreover
/// some or all these branches may be null.
std::string first_type_name;
DataTypePtr type;
Field sample;
size_t else_arg = Conditional::elseArg(args);
for (size_t i = Conditional::firstThen(); i < else_arg; i = Conditional::nextThen(i))
@ -445,7 +363,6 @@ bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & ar
{
first_type_name = name;
type = block.getByPosition(args[i]).type;
block.getByPosition(args[i]).column->get(0, sample);
}
else
{
@ -458,10 +375,7 @@ bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & ar
if (!block.getByPosition(args[else_arg]).type->isNull())
{
if (first_type_name.empty())
{
type = block.getByPosition(args[else_arg]).type;
block.getByPosition(args[else_arg]).column->get(0, sample);
}
else
{
const auto & name = block.getByPosition(args[else_arg]).type->getName();
@ -476,7 +390,7 @@ bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & ar
if (!type)
{
/// Degenerate case: all the branches are null.
res_col = DataTypeNull{}.createConstColumn(row_count, Field{});
res_col = std::make_shared<ColumnNull>(row_count, Null());
return true;
}
@ -501,12 +415,14 @@ bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & ar
{
res_col = block.getByPosition(index).column;
if (res_col->isNull())
res_col = type->createConstColumn(row_count, sample);
if (tracker != result)
{
ColumnPtr & col = block.getByPosition(tracker).column;
col = std::make_shared<ColumnConstUInt16>(row_count, index);
/// The return type of multiIf is Nullable(T). Therefore we create
/// a constant column whose type is T with a default value.
/// Subsequently the null map builder will mark it as null.
res_col = type->createConstColumn(row_count, type->getDefault());
}
if (builder)
builder.build(index);
};
size_t i = Conditional::firstCond();