mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 20:12:02 +00:00
fixup
This commit is contained in:
parent
ebb5cc0542
commit
5b22a9596d
@ -9,6 +9,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
Field IColumn::get(size_t n) const
|
||||
{
|
||||
return (*this)[n];
|
||||
}
|
||||
|
||||
String IColumn::dumpStructure() const
|
||||
{
|
||||
WriteBufferFromOwnString res;
|
||||
|
@ -70,6 +70,7 @@ public:
|
||||
/// Returns value of n-th element in universal Field representation.
|
||||
/// Is used in rare cases, since creation of Field instance is expensive usually.
|
||||
virtual Field operator[](size_t n) const = 0;
|
||||
Field get(size_t n) const;
|
||||
|
||||
/// Like the previous one, but avoids extra copying if Field is in a container, for example.
|
||||
virtual void get(size_t n, Field & res) const = 0;
|
||||
|
@ -181,25 +181,25 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
|
||||
}
|
||||
|
||||
|
||||
ColumnWithTypeAndName & Block::getByName(const std::string & name)
|
||||
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
|
||||
{
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return data[it->second];
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return &data[it->second];
|
||||
}
|
||||
|
||||
|
||||
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
|
||||
{
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
auto * result = findByName(name);
|
||||
if (!result)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return data[it->second];
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,7 +28,7 @@ class Block
|
||||
{
|
||||
private:
|
||||
using Container = ColumnsWithTypeAndName;
|
||||
using IndexByName = std::map<String, size_t>;
|
||||
using IndexByName = std::unordered_map<String, size_t>;
|
||||
|
||||
Container data;
|
||||
IndexByName index_by_name;
|
||||
@ -64,7 +64,20 @@ public:
|
||||
ColumnWithTypeAndName & safeGetByPosition(size_t position);
|
||||
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
|
||||
|
||||
ColumnWithTypeAndName & getByName(const std::string & name);
|
||||
ColumnWithTypeAndName* findByName(const std::string & name)
|
||||
{
|
||||
return const_cast<ColumnWithTypeAndName *>(
|
||||
const_cast<const Block *>(this)->findByName(name));
|
||||
}
|
||||
|
||||
const ColumnWithTypeAndName* findByName(const std::string & name) const;
|
||||
|
||||
ColumnWithTypeAndName & getByName(const std::string & name)
|
||||
{
|
||||
return const_cast<ColumnWithTypeAndName &>(
|
||||
const_cast<const Block *>(this)->getByName(name));
|
||||
}
|
||||
|
||||
const ColumnWithTypeAndName & getByName(const std::string & name) const;
|
||||
|
||||
Container::iterator begin() { return data.begin(); }
|
||||
|
@ -195,17 +195,17 @@ SetPtr makeExplicitSet(
|
||||
return set;
|
||||
}
|
||||
|
||||
static String getUniqueName(const Block & block, const String & prefix)
|
||||
static String getUniqueName(ActionsVisitor::Data & data, const String & prefix)
|
||||
{
|
||||
auto & block = data.getSampleBlock();
|
||||
auto result = prefix;
|
||||
|
||||
if (block.has(result))
|
||||
{
|
||||
int i = 1;
|
||||
do
|
||||
{
|
||||
result = prefix + "_" + toString(i);
|
||||
++i;
|
||||
result = prefix + "_" + toString(data.next_unique_suffix);
|
||||
++data.next_unique_suffix;
|
||||
}
|
||||
while (block.has(result));
|
||||
}
|
||||
@ -468,7 +468,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
|
||||
/// so that sets with the same literal representation do not fuse together (they can have different types).
|
||||
if (!prepared_set->empty())
|
||||
column.name = getUniqueName(data.getSampleBlock(), "__set");
|
||||
column.name = getUniqueName(data, "__set");
|
||||
else
|
||||
column.name = child->getColumnName();
|
||||
|
||||
@ -496,7 +496,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
ColumnWithTypeAndName column(
|
||||
ColumnConst::create(std::move(column_string), 1),
|
||||
std::make_shared<DataTypeString>(),
|
||||
getUniqueName(data.getSampleBlock(), "__joinGet"));
|
||||
getUniqueName(data, "__joinGet"));
|
||||
data.addAction(ExpressionAction::addColumn(column));
|
||||
argument_types.push_back(column.type);
|
||||
argument_names.push_back(column.name);
|
||||
@ -577,7 +577,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
|
||||
/// We can not name `getColumnName()`,
|
||||
/// because it does not uniquely define the expression (the types of arguments can be different).
|
||||
String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda");
|
||||
String lambda_name = getUniqueName(data, "__lambda");
|
||||
|
||||
auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>(
|
||||
lambda_actions, captured, lambda_arguments, result_type, result_name);
|
||||
@ -612,16 +612,50 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
|
||||
Data & data)
|
||||
{
|
||||
DataTypePtr type = applyVisitor(FieldToDataType(), literal.value);
|
||||
const auto value = convertFieldToType(literal.value, *type);
|
||||
|
||||
// FIXME why do we have a second pass with a clean sample block over the same
|
||||
// AST here? Anyway, do not modify the column name if it is set already.
|
||||
if (literal.unique_column_name.empty())
|
||||
{
|
||||
const auto default_name = literal.getColumnName();
|
||||
auto & block = data.getSampleBlock();
|
||||
auto * existing_column = block.findByName(default_name);
|
||||
|
||||
/*
|
||||
* To approximate CSE, build all identical literals to a single temporary
|
||||
* columns. We try to find the column by its default name, but after that
|
||||
* we have to check that it contains the correct data. This might not be
|
||||
* the case if it is a user-supplied column, or it is from under a join,
|
||||
* etc.
|
||||
* Overall, this is a hack around a generally poor name-based notion of
|
||||
* column identity we currently use.
|
||||
*/
|
||||
if (existing_column
|
||||
&& existing_column->column
|
||||
&& isColumnConst(*existing_column->column)
|
||||
&& existing_column->column->size() == 1
|
||||
&& existing_column->column->get(0) == value)
|
||||
{
|
||||
const_cast<ASTLiteral &>(literal).unique_column_name = default_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
const_cast<ASTLiteral &>(literal).unique_column_name
|
||||
= getUniqueName(data, default_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (data.hasColumn(literal.unique_column_name))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName column;
|
||||
column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type));
|
||||
column.name = literal.unique_column_name;
|
||||
column.column = type->createColumnConst(1, value);
|
||||
column.type = type;
|
||||
|
||||
// Always create columns for literals with a unique name. Otherwise, there
|
||||
// may be some weird clashes, see 01101_literal_column_clash.
|
||||
column.name = getUniqueName(data.getSampleBlock(), literal.getColumnName());
|
||||
const_cast<ASTLiteral &>(literal).unique_column_name = column.name;
|
||||
|
||||
data.addAction(ExpressionAction::addColumn(column));
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@ struct ScopeStack
|
||||
|
||||
const Context & context;
|
||||
|
||||
public:
|
||||
ScopeStack(const ExpressionActionsPtr & actions, const Context & context_);
|
||||
|
||||
void pushLevel(const NamesAndTypesList & input_columns);
|
||||
@ -80,6 +81,13 @@ public:
|
||||
size_t visit_depth;
|
||||
ScopeStack actions_stack;
|
||||
|
||||
/*
|
||||
* Remember the last unique column suffix to avoid quadratic behavior
|
||||
* when we add lots of column with same prefix. One counter for all
|
||||
* prefixes is good enough.
|
||||
*/
|
||||
int next_unique_suffix;
|
||||
|
||||
Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_,
|
||||
const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions,
|
||||
PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_,
|
||||
@ -95,7 +103,8 @@ public:
|
||||
only_consts(only_consts_),
|
||||
no_storage_or_local(no_storage_or_local_),
|
||||
visit_depth(0),
|
||||
actions_stack(actions, context)
|
||||
actions_stack(actions, context),
|
||||
next_unique_suffix(actions_stack.getSampleBlock().columns() + 1)
|
||||
{}
|
||||
|
||||
void updateActions(ExpressionActionsPtr & actions)
|
||||
|
@ -855,7 +855,35 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain &
|
||||
String result_name = ast->getAliasOrColumnName();
|
||||
if (required_result_columns.empty() || required_result_columns.count(result_name))
|
||||
{
|
||||
result_columns.emplace_back(ast->getColumnName(), result_name);
|
||||
std::string source_name = ast->getColumnName();
|
||||
|
||||
/*
|
||||
* For temporary columns created by ExpressionAnalyzer for literals,
|
||||
* use the correct source column. Using the default display name
|
||||
* returned by getColumnName is not enough, and we have to use the
|
||||
* column id set by EA. In principle, this logic applies to all kinds
|
||||
* of columns, not only literals. Literals are especially problematic
|
||||
* for two reasons:
|
||||
* 1) confusing different literal columns leads to weird side
|
||||
* effects (see 01101_literal_columns_clash);
|
||||
* 2) the disambiguation mechanism in SyntaxAnalyzer, that, among
|
||||
* other things, creates unique aliases for columns with same
|
||||
* names from different tables, is applied before these temporary
|
||||
* columns are created by ExpressionAnalyzer.
|
||||
* Similar problems should also manifest for function columns, which
|
||||
* are likewise created at a later stage by EA.
|
||||
* In general, we need to have explicit separation between display
|
||||
* names and identifiers for columns. This code is a workaround for
|
||||
* a particular subclass of problems, and not a proper solution.
|
||||
*/
|
||||
if (auto as_literal = dynamic_cast<ASTLiteral *>(ast.get());
|
||||
as_literal)
|
||||
{
|
||||
source_name = as_literal->unique_column_name;
|
||||
assert(!source_name.empty());
|
||||
}
|
||||
|
||||
result_columns.emplace_back(source_name, result_name);
|
||||
step.required_output.push_back(result_columns.back().second);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user