This commit is contained in:
Alexander Kuzmenkov 2020-04-01 15:16:39 +03:00
parent ebb5cc0542
commit 5b22a9596d
7 changed files with 115 additions and 25 deletions

View File

@ -9,6 +9,11 @@
namespace DB
{
Field IColumn::get(size_t n) const
{
return (*this)[n];
}
String IColumn::dumpStructure() const
{
WriteBufferFromOwnString res;

View File

@ -70,6 +70,7 @@ public:
/// Returns value of n-th element in universal Field representation.
/// Is used in rare cases, since creation of Field instance is expensive usually.
virtual Field operator[](size_t n) const = 0;
Field get(size_t n) const;
/// Like the previous one, but avoids extra copying if Field is in a container, for example.
virtual void get(size_t n, Field & res) const = 0;

View File

@ -181,25 +181,25 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
}
ColumnWithTypeAndName & Block::getByName(const std::string & name)
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
{
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return data[it->second];
{
return nullptr;
}
return &data[it->second];
}
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
{
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
auto * result = findByName(name);
if (!result)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return data[it->second];
return *result;
}

View File

@ -28,7 +28,7 @@ class Block
{
private:
using Container = ColumnsWithTypeAndName;
using IndexByName = std::map<String, size_t>;
using IndexByName = std::unordered_map<String, size_t>;
Container data;
IndexByName index_by_name;
@ -64,7 +64,20 @@ public:
ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName & getByName(const std::string & name);
ColumnWithTypeAndName* findByName(const std::string & name)
{
return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name));
}
const ColumnWithTypeAndName* findByName(const std::string & name) const;
ColumnWithTypeAndName & getByName(const std::string & name)
{
return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name));
}
const ColumnWithTypeAndName & getByName(const std::string & name) const;
Container::iterator begin() { return data.begin(); }

View File

@ -195,17 +195,17 @@ SetPtr makeExplicitSet(
return set;
}
static String getUniqueName(const Block & block, const String & prefix)
static String getUniqueName(ActionsVisitor::Data & data, const String & prefix)
{
auto & block = data.getSampleBlock();
auto result = prefix;
if (block.has(result))
{
int i = 1;
do
{
result = prefix + "_" + toString(i);
++i;
result = prefix + "_" + toString(data.next_unique_suffix);
++data.next_unique_suffix;
}
while (block.has(result));
}
@ -468,7 +468,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
/// so that sets with the same literal representation do not fuse together (they can have different types).
if (!prepared_set->empty())
column.name = getUniqueName(data.getSampleBlock(), "__set");
column.name = getUniqueName(data, "__set");
else
column.name = child->getColumnName();
@ -496,7 +496,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
ColumnWithTypeAndName column(
ColumnConst::create(std::move(column_string), 1),
std::make_shared<DataTypeString>(),
getUniqueName(data.getSampleBlock(), "__joinGet"));
getUniqueName(data, "__joinGet"));
data.addAction(ExpressionAction::addColumn(column));
argument_types.push_back(column.type);
argument_names.push_back(column.name);
@ -577,7 +577,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
/// We can not name `getColumnName()`,
/// because it does not uniquely define the expression (the types of arguments can be different).
String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda");
String lambda_name = getUniqueName(data, "__lambda");
auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>(
lambda_actions, captured, lambda_arguments, result_type, result_name);
@ -612,16 +612,50 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
Data & data)
{
DataTypePtr type = applyVisitor(FieldToDataType(), literal.value);
const auto value = convertFieldToType(literal.value, *type);
// FIXME why do we have a second pass with a clean sample block over the same
// AST here? Anyway, do not modify the column name if it is set already.
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName();
auto & block = data.getSampleBlock();
auto * existing_column = block.findByName(default_name);
/*
* To approximate CSE, build all identical literals to a single temporary
* columns. We try to find the column by its default name, but after that
* we have to check that it contains the correct data. This might not be
* the case if it is a user-supplied column, or it is from under a join,
* etc.
* Overall, this is a hack around a generally poor name-based notion of
* column identity we currently use.
*/
if (existing_column
&& existing_column->column
&& isColumnConst(*existing_column->column)
&& existing_column->column->size() == 1
&& existing_column->column->get(0) == value)
{
const_cast<ASTLiteral &>(literal).unique_column_name = default_name;
}
else
{
const_cast<ASTLiteral &>(literal).unique_column_name
= getUniqueName(data, default_name);
}
}
if (data.hasColumn(literal.unique_column_name))
{
return;
}
ColumnWithTypeAndName column;
column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type));
column.name = literal.unique_column_name;
column.column = type->createColumnConst(1, value);
column.type = type;
// Always create columns for literals with a unique name. Otherwise, there
// may be some weird clashes, see 01101_literal_column_clash.
column.name = getUniqueName(data.getSampleBlock(), literal.getColumnName());
const_cast<ASTLiteral &>(literal).unique_column_name = column.name;
data.addAction(ExpressionAction::addColumn(column));
}

View File

@ -42,6 +42,7 @@ struct ScopeStack
const Context & context;
public:
ScopeStack(const ExpressionActionsPtr & actions, const Context & context_);
void pushLevel(const NamesAndTypesList & input_columns);
@ -80,6 +81,13 @@ public:
size_t visit_depth;
ScopeStack actions_stack;
/*
* Remember the last unique column suffix to avoid quadratic behavior
* when we add lots of column with same prefix. One counter for all
* prefixes is good enough.
*/
int next_unique_suffix;
Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_,
const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions,
PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_,
@ -95,7 +103,8 @@ public:
only_consts(only_consts_),
no_storage_or_local(no_storage_or_local_),
visit_depth(0),
actions_stack(actions, context)
actions_stack(actions, context),
next_unique_suffix(actions_stack.getSampleBlock().columns() + 1)
{}
void updateActions(ExpressionActionsPtr & actions)

View File

@ -855,7 +855,35 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain &
String result_name = ast->getAliasOrColumnName();
if (required_result_columns.empty() || required_result_columns.count(result_name))
{
result_columns.emplace_back(ast->getColumnName(), result_name);
std::string source_name = ast->getColumnName();
/*
* For temporary columns created by ExpressionAnalyzer for literals,
* use the correct source column. Using the default display name
* returned by getColumnName is not enough, and we have to use the
* column id set by EA. In principle, this logic applies to all kinds
* of columns, not only literals. Literals are especially problematic
* for two reasons:
* 1) confusing different literal columns leads to weird side
* effects (see 01101_literal_columns_clash);
* 2) the disambiguation mechanism in SyntaxAnalyzer, that, among
* other things, creates unique aliases for columns with same
* names from different tables, is applied before these temporary
* columns are created by ExpressionAnalyzer.
* Similar problems should also manifest for function columns, which
* are likewise created at a later stage by EA.
* In general, we need to have explicit separation between display
* names and identifiers for columns. This code is a workaround for
* a particular subclass of problems, and not a proper solution.
*/
if (auto as_literal = dynamic_cast<ASTLiteral *>(ast.get());
as_literal)
{
source_name = as_literal->unique_column_name;
assert(!source_name.empty());
}
result_columns.emplace_back(source_name, result_name);
step.required_output.push_back(result_columns.back().second);
}
}