Merge pull request #19371 from kssenii/test-coverage-with-factories

test coverage with factories
This commit is contained in:
alexey-milovidov 2021-01-25 07:11:09 +03:00 committed by GitHub
commit ba3e064a63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 316 additions and 14 deletions

View File

@ -14,6 +14,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/CurrentThread.h>
#include <Poco/String.h>
#include "registerAggregateFunctions.h"
@ -108,10 +109,17 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
const Context * query_context = nullptr;
if (CurrentThread::isInitialized())
query_context = CurrentThread::get().getQueryContext();
if (found.creator)
{
out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunction, name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments)
return nullptr;
@ -128,6 +136,9 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
if (combinator->isForInternalUsageOnly())
throw Exception("Aggregate function combinator '" + combinator->getName() + "' is only for internal usage", ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunctionCombinator, combinator->getName());
String nested_name = name.substr(0, name.size() - combinator->getName().size());
DataTypes nested_types = combinator->transformArguments(argument_types);
Array nested_parameters = combinator->transformParameters(parameters);

View File

@ -178,6 +178,11 @@ public:
return query_id;
}
const Context * getQueryContext() const
{
return query_context;
}
/// Starts new query and create new thread group for it, current thread becomes master thread of the query
void initializeQuery();

View File

@ -10,6 +10,8 @@
#include <Common/StringUtils/StringUtils.h>
#include <IO/WriteHelpers.h>
#include <Core/Defines.h>
#include <Common/CurrentThread.h>
#include <Interpreters/Context.h>
namespace DB
@ -76,7 +78,16 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr
return get("LowCardinality", low_cardinality_params);
}
return findCreatorByName(family_name)(parameters);
DataTypePtr res = findCreatorByName(family_name)(parameters);
if (CurrentThread::isInitialized())
{
const auto * query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name);
}
return res;
}
DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const

View File

@ -12,6 +12,7 @@
#include <Parsers/formatAST.h>
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Interpreters/Context.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
@ -59,7 +60,14 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
/// In this case Ordinary database is created on server startup if the corresponding metadata directory exists.
/// So we should remove metadata directory if database creation failed.
created = Poco::File(metadata_path).createDirectory();
return getImpl(create, metadata_path, context);
DatabasePtr impl = getImpl(create, metadata_path, context);
if (impl && context.hasQueryContext() && context.getSettingsRef().log_queries)
context.getQueryContext().addQueryFactoriesInfo(Context::QueryLogFactories::Database, impl->getEngineName());
return impl;
}
catch (...)
{

View File

@ -4,9 +4,10 @@
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
#include "getDictionaryConfigurationFromAST.h"
#include <Interpreters/Context.h>
#include <common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
@ -46,6 +47,9 @@ DictionaryPtr DictionaryFactory::create(
name, config, config_prefix + ".source", dict_struct, context, config.getString(config_prefix + ".database", ""), check_source_config);
LOG_TRACE(&Poco::Logger::get("DictionaryFactory"), "Created dictionary source '{}' for dictionary '{}'", source_ptr->toString(), name);
if (context.hasQueryContext() && context.getSettingsRef().log_queries)
context.getQueryContext().addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, name);
const auto & layout_type = keys.front();
{

View File

@ -278,6 +278,9 @@ InputFormatPtr FormatFactory::getInputFormat(
const Settings & settings = context.getSettingsRef();
if (context.hasQueryContext() && settings.log_queries)
context.getQueryContext().addQueryFactoriesInfo(Context::QueryLogFactories::Format, name);
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
@ -320,6 +323,9 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible(
ParallelFormattingOutputFormat::Params builder{buf, sample, formatter_creator, settings.max_threads};
if (context.hasQueryContext() && settings.log_queries)
context.getQueryContext().addQueryFactoriesInfo(Context::QueryLogFactories::Format, name);
return std::make_shared<ParallelFormattingOutputFormat>(builder);
}
@ -336,6 +342,9 @@ OutputFormatPtr FormatFactory::getOutputFormat(
if (!output_getter)
throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
if (context.hasQueryContext() && context.getSettingsRef().log_queries)
context.getQueryContext().addQueryFactoriesInfo(Context::QueryLogFactories::Format, name);
RowOutputFormatParams params;
params.callback = std::move(callback);

View File

@ -3,6 +3,7 @@
#include <Interpreters/Context.h>
#include <Common/Exception.h>
#include <Common/CurrentThread.h>
#include <Poco/String.h>
@ -10,6 +11,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
@ -58,6 +60,7 @@ FunctionOverloadResolverImplPtr FunctionFactory::getImpl(
else
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown function {}{}", name, extra_info);
}
return res;
}
@ -82,16 +85,29 @@ FunctionOverloadResolverImplPtr FunctionFactory::tryGetImpl(
const Context & context) const
{
String name = getAliasToOrName(name_param);
FunctionOverloadResolverImplPtr res;
auto it = functions.find(name);
if (functions.end() != it)
return it->second(context);
res = it->second(context);
else
{
it = case_insensitive_functions.find(Poco::toLower(name));
if (case_insensitive_functions.end() != it)
res = it->second(context);
}
it = case_insensitive_functions.find(Poco::toLower(name));
if (case_insensitive_functions.end() != it)
return it->second(context);
if (!res)
return nullptr;
return {};
if (CurrentThread::isInitialized())
{
const auto * query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::Function, name);
}
return res;
}
FunctionOverloadResolverPtr FunctionFactory::tryGet(

View File

@ -954,6 +954,43 @@ void Context::addQueryAccessInfo(const String & quoted_database_name, const Stri
}
void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const
{
assert(global_context != this || getApplicationType() == ApplicationType::LOCAL);
auto lock = getLock();
switch (factory_type)
{
case QueryLogFactories::AggregateFunction:
query_factories_info.aggregate_functions.emplace(created_object);
break;
case QueryLogFactories::AggregateFunctionCombinator:
query_factories_info.aggregate_function_combinators.emplace(created_object);
break;
case QueryLogFactories::Database:
query_factories_info.database_engines.emplace(created_object);
break;
case QueryLogFactories::DataType:
query_factories_info.data_type_families.emplace(created_object);
break;
case QueryLogFactories::Dictionary:
query_factories_info.dictionaries.emplace(created_object);
break;
case QueryLogFactories::Format:
query_factories_info.formats.emplace(created_object);
break;
case QueryLogFactories::Function:
query_factories_info.functions.emplace(created_object);
break;
case QueryLogFactories::Storage:
query_factories_info.storages.emplace(created_object);
break;
case QueryLogFactories::TableFunction:
query_factories_info.table_functions.emplace(created_object);
}
}
StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
{
/// Slightly suboptimal.

View File

@ -228,6 +228,23 @@ private:
QueryAccessInfo query_access_info;
/// Record names of created objects of factories (for testing, etc)
struct QueryFactoriesInfo
{
std::unordered_set<std::string> aggregate_functions;
std::unordered_set<std::string> aggregate_function_combinators;
std::unordered_set<std::string> database_engines;
std::unordered_set<std::string> data_type_families;
std::unordered_set<std::string> dictionaries;
std::unordered_set<std::string> formats;
std::unordered_set<std::string> functions;
std::unordered_set<std::string> storages;
std::unordered_set<std::string> table_functions;
};
/// Needs to be chandged while having const context in factories methods
mutable QueryFactoriesInfo query_factories_info;
//TODO maybe replace with temporary tables?
StoragePtr view_source; /// Temporary StorageValues used to generate alias columns for materialized views
Tables table_function_results; /// Temporary tables obtained by execution of table functions. Keyed by AST tree id.
@ -396,6 +413,23 @@ public:
const QueryAccessInfo & getQueryAccessInfo() const { return query_access_info; }
void addQueryAccessInfo(const String & quoted_database_name, const String & full_quoted_table_name, const Names & column_names);
/// Supported factories for records in query_log
enum class QueryLogFactories
{
AggregateFunction,
AggregateFunctionCombinator,
Database,
DataType,
Dictionary,
Format,
Function,
Storage,
TableFunction
};
const QueryFactoriesInfo & getQueryFactoriesInfo() const { return query_factories_info; }
void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const;
StoragePtr executeTableFunction(const ASTPtr & table_expression);
void addViewSource(const StoragePtr & storage);

View File

@ -96,7 +96,17 @@ Block QueryLogElement::createBlock()
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "ProfileEvents.Names"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()), "ProfileEvents.Values"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Settings.Names"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Settings.Values"}
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "Settings.Values"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_aggregate_functions"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_aggregate_function_combinators"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_database_engines"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_data_type_families"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_dictionaries"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_formats"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_functions"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_storages"},
{std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "used_table_functions"}
};
}
@ -132,6 +142,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
auto & column_databases = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_tables = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_columns = typeid_cast<ColumnArray &>(*columns[i++]);
auto fill_column = [](const std::set<String> & data, ColumnArray & column)
{
size_t size = 0;
@ -143,6 +154,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
auto & offsets = column.getOffsets();
offsets.push_back(offsets.back() + size);
};
fill_column(query_databases, column_databases);
fill_column(query_tables, column_tables);
fill_column(query_columns, column_columns);
@ -187,6 +199,40 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insertDefault();
columns[i++]->insertDefault();
}
{
auto & column_aggregate_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_aggregate_function_combinator_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_database_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_data_type_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_dictionary_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_format_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_storage_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto & column_table_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
auto fill_column = [](const std::unordered_set<String> & data, ColumnArray & column)
{
size_t size = 0;
for (const auto & name : data)
{
column.getData().insertData(name.data(), name.size());
++size;
}
auto & offsets = column.getOffsets();
offsets.push_back(offsets.back() + size);
};
fill_column(used_aggregate_functions, column_aggregate_function_factory_objects);
fill_column(used_aggregate_function_combinators, column_aggregate_function_combinator_factory_objects);
fill_column(used_database_engines, column_database_factory_objects);
fill_column(used_data_type_families, column_data_type_factory_objects);
fill_column(used_dictionaries, column_dictionary_factory_objects);
fill_column(used_formats, column_format_factory_objects);
fill_column(used_functions, column_function_factory_objects);
fill_column(used_storages, column_storage_factory_objects);
fill_column(used_table_functions, column_table_function_factory_objects);
}
}
void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i)

View File

@ -58,6 +58,16 @@ struct QueryLogElement
std::set<String> query_tables;
std::set<String> query_columns;
std::unordered_set<String> used_aggregate_functions;
std::unordered_set<String> used_aggregate_function_combinators;
std::unordered_set<String> used_database_engines;
std::unordered_set<String> used_data_type_families;
std::unordered_set<String> used_dictionaries;
std::unordered_set<String> used_formats;
std::unordered_set<String> used_functions;
std::unordered_set<String> used_storages;
std::unordered_set<String> used_table_functions;
Int32 exception_code{}; // because ErrorCodes are int
String exception;
String stack_trace;

View File

@ -733,6 +733,17 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
elem.thread_ids = std::move(info.thread_ids);
elem.profile_counters = std::move(info.profile_counters);
const auto & factories_info = context.getQueryFactoriesInfo();
elem.used_aggregate_functions = factories_info.aggregate_functions;
elem.used_aggregate_function_combinators = factories_info.aggregate_function_combinators;
elem.used_database_engines = factories_info.database_engines;
elem.used_data_type_families = factories_info.data_type_families;
elem.used_dictionaries = factories_info.dictionaries;
elem.used_formats = factories_info.formats;
elem.used_functions = factories_info.functions;
elem.used_storages = factories_info.storages;
elem.used_table_functions = factories_info.table_functions;
if (log_queries && elem.type >= log_queries_min_type && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms)
{
if (auto query_log = context.getQueryLog())

View File

@ -189,6 +189,10 @@ StoragePtr StorageFactory::get(
storage_def->engine->children.push_back(storage_def->engine->arguments);
storage_def->engine->arguments->children = empty_engine_args;
}
if (local_context.hasQueryContext() && context.getSettingsRef().log_queries)
local_context.getQueryContext().addQueryFactoriesInfo(Context::QueryLogFactories::Storage, name);
return res;
}

View File

@ -1,6 +1,7 @@
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/Context.h>
#include <Common/CurrentThread.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTFunction.h>
@ -52,16 +53,29 @@ TableFunctionPtr TableFunctionFactory::tryGet(
const Context &) const
{
String name = getAliasToOrName(name_param);
TableFunctionPtr res;
auto it = table_functions.find(name);
if (table_functions.end() != it)
return it->second();
res = it->second();
else
{
it = case_insensitive_table_functions.find(Poco::toLower(name));
if (case_insensitive_table_functions.end() != it)
res = it->second();
}
it = case_insensitive_table_functions.find(Poco::toLower(name));
if (case_insensitive_table_functions.end() != it)
return it->second();
if (!res)
return nullptr;
return {};
if (CurrentThread::isInitialized())
{
const auto * query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::TableFunction, name);
}
return res;
}
bool TableFunctionFactory::isTableFunctionName(const std::string & name) const

View File

@ -0,0 +1,24 @@
2 worl [123,1,1] 49 \N 50 4950 Nullable(UInt64) 50
2 worl [123,1,1] 49 \N 50 4950 Nullable(UInt64) 50
arraySort(used_aggregate_functions)
['avg','count','groupBitAnd','sum','uniq']
arraySort(used_aggregate_function_combinators)
['Array','If','OrDefault','OrNull']
arraySort(used_table_functions)
['numbers']
arraySort(used_functions)
['addDays','array','arrayFlatten','cast','modulo','plus','substring','toDate','toDayOfYear','toTypeName','toWeek']
arraySort(used_data_type_families)
['Array','Int32','Nullable','String']
used_database_engines
['Atomic']
arraySort(used_data_type_families) used_storages
['DateTime','Int64'] ['Memory']

View File

@ -0,0 +1,58 @@
SELECT uniqArray([1, 1, 2]),
SUBSTRING('Hello, world', 7, 5),
flatten([[[BIT_AND(123)]], [[mod(3, 2)], [CAST('1' AS INTEGER)]]]),
week(toDate('2000-12-05')),
CAST(arrayJoin([NULL, NULL]) AS Nullable(TEXT)),
avgOrDefaultIf(number, number % 2),
sumOrNull(number),
toTypeName(sumOrNull(number)),
countIf(toDate('2000-12-05') + number as d,
toDayOfYear(d) % 2)
FROM numbers(100);
SELECT '';
SYSTEM FLUSH LOGS;
SELECT arraySort(used_aggregate_functions)
FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
SELECT arraySort(used_aggregate_function_combinators)
FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
SELECT arraySort(used_table_functions)
FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
SELECT arraySort(used_functions)
FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
SELECT arraySort(used_data_type_families)
FROM system.query_log WHERE type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
DROP database IF EXISTS test_query_log_factories_info1;
CREATE database test_query_log_factories_info1 ENGINE=Atomic;
SYSTEM FLUSH LOGS;
SELECT used_database_engines
FROM system.query_log
WHERE type == 'QueryFinish' AND (query LIKE '%database test_query_log_factories_info%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';
CREATE OR REPLACE TABLE test_query_log_factories_info1.memory_table (id BIGINT, date DateTime) ENGINE=Memory();
SYSTEM FLUSH LOGS;
SELECT arraySort(used_data_type_families), used_storages
FROM system.query_log
WHERE type == 'QueryFinish' AND (query LIKE '%TABLE test%')
ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames;
SELECT '';