Compare commits

...

12 Commits

Author SHA1 Message Date
Alexey Milovidov
31f84e0a32
Merge bf7ec7d639 into 1c6165f6ee 2024-09-19 10:55:58 +02:00
vdimir
1c6165f6ee
Merge pull request #69203 from Avogar/json-dynamic-hash
Fix uniq and GROUP BY for JSON/Dynamic types
2024-09-19 08:24:03 +00:00
Robert Schulze
71dd3d5cf6
Merge pull request #69746 from rschu1ze/sparse-pg
CI: Include PostgreSQL in sparse checkout script
2024-09-19 08:23:41 +00:00
Yakov Olkhovskiy
c0c83236b6
Merge pull request #69570 from alexkats/fix-azure
Mask azure connection string sensitive info
2024-09-19 05:40:47 +00:00
Kruglov Pavel
228ac44a92
Fix asan issue 2024-09-18 21:27:38 +02:00
Robert Schulze
d2de15871c
Include postgres in sparse checkout script 2024-09-18 19:15:13 +00:00
Alex Katsman
b88cd79959 Mask azure connection string sensitive info 2024-09-18 18:32:22 +00:00
Alexey Milovidov
bf7ec7d639
Revert "Revert "Fix unexpected behavior with FORMAT and SETTINGS parsing"" 2024-09-11 13:22:27 +02:00
avogar
4ece895b41 Merge branch 'master' of github.com:ClickHouse/ClickHouse into json-dynamic-hash 2024-09-09 10:54:18 +00:00
avogar
f495a4f431 Merge branch 'master' of github.com:ClickHouse/ClickHouse into json-dynamic-hash 2024-09-04 11:21:21 +00:00
avogar
a44b3d0268 Fix sorted typed paths 2024-09-03 17:31:07 +00:00
avogar
f1377b0b4a Fix uniq and GROUP BY for JSON/Dynamic types 2024-09-03 14:10:28 +00:00
30 changed files with 493 additions and 316 deletions

View File

@ -14,5 +14,6 @@ git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh'
git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh'
git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh'
git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh'
git config submodule."contrib/postgres".update '!../sparse-checkout/update-postgres.sh'
git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh'
git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh'

View File

@ -0,0 +1,16 @@
#!/bin/sh
echo "Using sparse checkout for postgres"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '!/*' > $FILES_TO_CHECKOUT
echo '/src/interfaces/libpq/*' >> $FILES_TO_CHECKOUT
echo '!/src/interfaces/libpq/*/*' >> $FILES_TO_CHECKOUT
echo '/src/common/*' >> $FILES_TO_CHECKOUT
echo '!/src/port/*/*' >> $FILES_TO_CHECKOUT
echo '/src/port/*' >> $FILES_TO_CHECKOUT
echo '/src/include/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -1164,6 +1164,9 @@ void Client::processOptions(const OptionsDescription & options_description,
/// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
client_context = global_context;
initClientContext();
/// Allow to pass-through unknown settings to the server.
client_context->getAccessControl().allowAllSettings();
}

View File

@ -1924,7 +1924,7 @@ try
auto & access_control = global_context->getAccessControl();
try
{
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
}
catch (...)
{

View File

@ -280,7 +280,7 @@ void AccessControl::shutdown()
}
void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
if (config_.has("custom_settings_prefixes"))
@ -868,4 +868,10 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const
return *external_authenticators;
}
void AccessControl::allowAllSettings()
{
custom_settings_prefixes->registerPrefixes({""});
}
}

View File

@ -57,7 +57,7 @@ public:
void shutdown() override;
/// Initializes access storage (user directories).
void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_);
/// Parses access entities from a configuration loaded from users.xml.
@ -238,6 +238,9 @@ public:
/// Gets manager of notifications.
AccessChangesNotifier & getChangesNotifier();
/// Allow all setting names - this can be used in clients to pass-through unknown settings to the server.
void allowAllSettings();
private:
class ContextAccessCache;
class CustomSettingsPrefixes;

View File

@ -219,8 +219,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang
});
}
template <class T>
bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure)
template <typename SettingsT>
bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure)
{
Field current_value;
bool has_current_value = current_settings.tryGet(change.name, current_value);
@ -230,12 +230,12 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel
return false;
if (throw_on_failure)
new_value = T::castValueUtil(change.name, change.value);
new_value = SettingsT::castValueUtil(change.name, change.value);
else
{
try
{
new_value = T::castValueUtil(change.name, change.value);
new_value = SettingsT::castValueUtil(change.name, change.value);
}
catch (...)
{

View File

@ -59,6 +59,7 @@
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Interpreters/ProfileEventsExt.h>
#include <Interpreters/InterpreterSetQuery.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/CompressionMethod.h>
@ -1609,14 +1610,14 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
auto metadata = storage->getInMemoryMetadataPtr();
QueryPlan plan;
storage->read(
plan,
sample.getNames(),
storage->getStorageSnapshot(metadata, client_context),
query_info,
client_context,
{},
client_context->getSettingsRef().max_block_size,
getNumberOfPhysicalCPUCores());
plan,
sample.getNames(),
storage->getStorageSnapshot(metadata, client_context),
query_info,
client_context,
{},
client_context->getSettingsRef().max_block_size,
getNumberOfPhysicalCPUCores());
auto builder = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(client_context),
@ -1914,42 +1915,13 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (old_settings)
client_context->setSettings(*old_settings);
});
auto apply_query_settings = [&](const IAST & settings_ast)
{
if (!old_settings)
old_settings.emplace(client_context->getSettingsRef());
client_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
client_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
};
const auto * insert = parsed_query->as<ASTInsertQuery>();
if (const auto * select = parsed_query->as<ASTSelectQuery>(); select && select->settings())
apply_query_settings(*select->settings());
else if (const auto * select_with_union = parsed_query->as<ASTSelectWithUnionQuery>())
{
const ASTs & children = select_with_union->list_of_selects->children;
if (!children.empty())
{
// On the client it is enough to apply settings only for the
// last SELECT, since the only thing that is important to apply
// on the client is format settings.
const auto * last_select = children.back()->as<ASTSelectQuery>();
if (last_select && last_select->settings())
{
apply_query_settings(*last_select->settings());
}
}
}
else if (const auto * query_with_output = parsed_query->as<ASTQueryWithOutput>(); query_with_output && query_with_output->settings_ast)
apply_query_settings(*query_with_output->settings_ast);
else if (insert && insert->settings_ast)
apply_query_settings(*insert->settings_ast);
InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context);
if (!connection->checkConnected(connection_parameters.timeouts))
connect();
ASTPtr input_function;
const auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && insert->select)
insert->tryFindInputFunction(input_function);

View File

@ -816,6 +816,22 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
return;
}
/// If it's not null we update hash with the type name and the actual value.
/// If value in this row is in shared variant, deserialize type and value and
/// update hash with it.
if (discr == getSharedVariantDiscriminator())
{
auto value = getSharedVariant().getDataAt(variant_col.offsetAt(n));
ReadBufferFromMemory buf(value.data, value.size);
auto type = decodeDataType(buf);
hash.update(type->getName());
auto tmp_column = type->createColumn();
type->getDefaultSerialization()->deserializeBinary(*tmp_column, buf, getFormatSettings());
tmp_column->updateHashWithValue(0, hash);
return;
}
hash.update(variant_info.variant_names[discr]);
variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
}

View File

@ -47,15 +47,21 @@ ColumnObject::ColumnObject(
, statistics(statistics_)
{
typed_paths.reserve(typed_paths_.size());
sorted_typed_paths.reserve(typed_paths_.size());
for (auto & [path, column] : typed_paths_)
typed_paths[path] = std::move(column);
{
auto it = typed_paths.emplace(path, std::move(column)).first;
sorted_typed_paths.push_back(it->first);
}
std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end());
dynamic_paths.reserve(dynamic_paths_.size());
dynamic_paths_ptrs.reserve(dynamic_paths_.size());
for (auto & [path, column] : dynamic_paths_)
{
dynamic_paths[path] = std::move(column);
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(dynamic_paths[path].get());
auto it = dynamic_paths.emplace(path, std::move(column)).first;
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(it->second.get());
sorted_dynamic_paths.insert(it->first);
}
}
@ -64,13 +70,17 @@ ColumnObject::ColumnObject(
: max_dynamic_paths(max_dynamic_paths_), global_max_dynamic_paths(max_dynamic_paths_), max_dynamic_types(max_dynamic_types_)
{
typed_paths.reserve(typed_paths_.size());
sorted_typed_paths.reserve(typed_paths_.size());
for (auto & [path, column] : typed_paths_)
{
if (!column->empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected non-empty typed path column in ColumnObject constructor");
typed_paths[path] = std::move(column);
auto it = typed_paths.emplace(path, std::move(column)).first;
sorted_typed_paths.push_back(it->first);
}
std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end());
MutableColumns paths_and_values;
paths_and_values.emplace_back(ColumnString::create());
paths_and_values.emplace_back(ColumnString::create());
@ -129,13 +139,8 @@ std::string ColumnObject::getName() const
ss << "Object(";
ss << "max_dynamic_paths=" << global_max_dynamic_paths;
ss << ", max_dynamic_types=" << max_dynamic_types;
std::vector<String> sorted_typed_paths;
sorted_typed_paths.reserve(typed_paths.size());
for (const auto & [path, column] : typed_paths)
sorted_typed_paths.push_back(path);
std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end());
for (const auto & path : sorted_typed_paths)
ss << ", " << path << " " << typed_paths.at(path)->getName();
ss << ", " << path << " " << typed_paths.find(path)->second->getName();
ss << ")";
return ss.str();
}
@ -260,6 +265,7 @@ ColumnDynamic * ColumnObject::tryToAddNewDynamicPath(std::string_view path)
new_dynamic_column->insertManyDefaults(size());
auto it = dynamic_paths.emplace(path, std::move(new_dynamic_column)).first;
auto it_ptr = dynamic_paths_ptrs.emplace(path, assert_cast<ColumnDynamic *>(it->second.get())).first;
sorted_dynamic_paths.insert(it->first);
return it_ptr->second;
}
@ -288,8 +294,9 @@ void ColumnObject::setDynamicPaths(const std::vector<String> & paths)
auto new_dynamic_column = ColumnDynamic::create(max_dynamic_types);
if (size)
new_dynamic_column->insertManyDefaults(size);
dynamic_paths[path] = std::move(new_dynamic_column);
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(dynamic_paths[path].get());
auto it = dynamic_paths.emplace(path, std::move(new_dynamic_column)).first;
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(it->second.get());
sorted_dynamic_paths.insert(it->first);
}
}
@ -658,39 +665,61 @@ void ColumnObject::popBack(size_t n)
StringRef ColumnObject::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
{
StringRef res(begin, 0);
// Serialize all paths and values in binary format.
/// First serialize values from typed paths in sorted order. They are the same for all instances of this column.
for (auto path : sorted_typed_paths)
{
auto data_ref = typed_paths.find(path)->second->serializeValueIntoArena(n, arena, begin);
res.data = data_ref.data - res.size;
res.size += data_ref.size;
}
/// Second, serialize paths and values in bunary format from dynamic paths and shared data in sorted by path order.
/// Calculate total number of paths to serialize and write it.
const auto & shared_data_offsets = getSharedDataOffsets();
size_t offset = shared_data_offsets[static_cast<ssize_t>(n) - 1];
size_t end = shared_data_offsets[static_cast<ssize_t>(n)];
size_t num_paths = typed_paths.size() + dynamic_paths.size() + (end - offset);
size_t num_paths = (end - offset);
/// Don't serialize Nulls from dynamic paths.
for (const auto & [_, column] : dynamic_paths)
num_paths += !column->isNullAt(n);
char * pos = arena.allocContinue(sizeof(size_t), begin);
memcpy(pos, &num_paths, sizeof(size_t));
res.data = pos - res.size;
res.size += sizeof(size_t);
/// Serialize paths and values from typed paths.
for (const auto & [path, column] : typed_paths)
{
size_t path_size = path.size();
pos = arena.allocContinue(sizeof(size_t) + path_size, begin);
memcpy(pos, &path_size, sizeof(size_t));
memcpy(pos + sizeof(size_t), path.data(), path_size);
auto data_ref = column->serializeValueIntoArena(n, arena, begin);
res.data = data_ref.data - res.size - sizeof(size_t) - path_size;
res.size += data_ref.size + sizeof(size_t) + path_size;
}
/// Serialize paths and values from dynamic paths.
for (const auto & [path, column] : dynamic_paths)
{
WriteBufferFromOwnString buf;
getDynamicSerialization()->serializeBinary(*column, n, buf, getFormatSettings());
serializePathAndValueIntoArena(arena, begin, path, buf.str(), res);
}
/// Serialize paths and values from shared data.
auto dynamic_paths_it = sorted_dynamic_paths.begin();
auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
for (size_t i = offset; i != end; ++i)
serializePathAndValueIntoArena(arena, begin, shared_data_paths->getDataAt(i), shared_data_values->getDataAt(i), res);
{
auto path = shared_data_paths->getDataAt(i).toView();
/// Paths in shared data are sorted. Serialize all paths from dynamic paths that go before this path in sorted order.
while (dynamic_paths_it != sorted_dynamic_paths.end() && *dynamic_paths_it < path)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
/// Don't serialize Nulls.
if (!dynamic_column->isNullAt(n))
{
WriteBufferFromOwnString buf;
getDynamicSerialization()->serializeBinary(*dynamic_column, n, buf, getFormatSettings());
serializePathAndValueIntoArena(arena, begin, StringRef(*dynamic_paths_it), buf.str(), res);
}
++dynamic_paths_it;
}
serializePathAndValueIntoArena(arena, begin, StringRef(path), shared_data_values->getDataAt(i), res);
}
/// Serialize all remaining paths in dynamic paths.
for (; dynamic_paths_it != sorted_dynamic_paths.end(); ++dynamic_paths_it)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
if (!dynamic_column->isNullAt(n))
{
WriteBufferFromOwnString buf;
getDynamicSerialization()->serializeBinary(*dynamic_column, n, buf, getFormatSettings());
serializePathAndValueIntoArena(arena, begin, StringRef(*dynamic_paths_it), buf.str(), res);
}
}
return res;
}
@ -711,70 +740,49 @@ void ColumnObject::serializePathAndValueIntoArena(DB::Arena & arena, const char
const char * ColumnObject::deserializeAndInsertFromArena(const char * pos)
{
size_t current_size = size();
/// Deserialize paths and values and insert them into typed paths, dynamic paths or shared data.
/// Serialized paths could be unsorted, so we will have to sort all paths that will be inserted into shared data.
std::vector<std::pair<std::string_view, std::string_view>> paths_and_values_for_shared_data;
/// First deserialize typed paths. They come first.
for (auto path : sorted_typed_paths)
pos = typed_paths.find(path)->second->deserializeAndInsertFromArena(pos);
/// Second deserialize all other paths and values and insert them into dynamic paths or shared data.
auto num_paths = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
for (size_t i = 0; i != num_paths; ++i)
{
auto path_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
std::string_view path(pos, path_size);
pos += path_size;
/// Check if it's a typed path. In this case we should use
/// deserializeAndInsertFromArena of corresponding column.
if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end())
/// Deserialize binary value and try to insert it to dynamic paths or shared data.
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
std::string_view value(pos, value_size);
pos += value_size;
/// Check if we have this path in dynamic paths.
if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end())
{
pos = typed_it->second->deserializeAndInsertFromArena(pos);
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_it->second, buf, getFormatSettings());
}
/// If it's not a typed path, deserialize binary value and try to insert it
/// to dynamic paths or shared data.
/// Try to add a new dynamic path.
else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path))
{
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_path_column, buf, getFormatSettings());
}
/// Limit on dynamic paths is reached, add this path to shared data.
/// Serialized paths are sorted, so we can insert right away.
else
{
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
std::string_view value(pos, value_size);
pos += value_size;
/// Check if we have this path in dynamic paths.
if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end())
{
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_it->second, buf, getFormatSettings());
}
/// Try to add a new dynamic path.
else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path))
{
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_path_column, buf, getFormatSettings());
}
/// Limit on dynamic paths is reached, add this path to shared data later.
else
{
paths_and_values_for_shared_data.emplace_back(path, value);
}
shared_data_paths->insertData(path.data(), path.size());
shared_data_values->insertData(value.data(), value.size());
}
}
/// Sort and insert all paths from paths_and_values_for_shared_data into shared data.
std::sort(paths_and_values_for_shared_data.begin(), paths_and_values_for_shared_data.end());
const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
for (const auto & [path, value] : paths_and_values_for_shared_data)
{
shared_data_paths->insertData(path.data(), path.size());
shared_data_values->insertData(value.data(), value.size());
}
getSharedDataOffsets().push_back(shared_data_paths->size());
/// Insert default value in all remaining typed and dynamic paths.
for (auto & [_, column] : typed_paths)
{
if (column->size() == current_size)
column->insertDefault();
}
/// Insert default value in all remaining dynamic paths.
for (auto & [_, column] : dynamic_paths_ptrs)
{
if (column->size() == current_size)
@ -786,6 +794,11 @@ const char * ColumnObject::deserializeAndInsertFromArena(const char * pos)
const char * ColumnObject::skipSerializedInArena(const char * pos) const
{
/// First, skip all values of typed paths;
for (auto path : sorted_typed_paths)
pos = typed_paths.find(path)->second->skipSerializedInArena(pos);
/// Second, skip all other paths and values.
auto num_paths = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
for (size_t i = 0; i != num_paths; ++i)
@ -794,15 +807,8 @@ const char * ColumnObject::skipSerializedInArena(const char * pos) const
pos += sizeof(size_t);
std::string_view path(pos, path_size);
pos += path_size;
if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end())
{
pos = typed_it->second->skipSerializedInArena(pos);
}
else
{
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t) + value_size;
}
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t) + value_size;
}
return pos;
@ -810,11 +816,51 @@ const char * ColumnObject::skipSerializedInArena(const char * pos) const
void ColumnObject::updateHashWithValue(size_t n, SipHash & hash) const
{
for (const auto & [_, column] : typed_paths)
column->updateHashWithValue(n, hash);
for (const auto & [_, column] : dynamic_paths_ptrs)
column->updateHashWithValue(n, hash);
shared_data->updateHashWithValue(n, hash);
for (auto path : sorted_typed_paths)
typed_paths.find(path)->second->updateHashWithValue(n, hash);
/// The hash of the object in row should not depend on the way we store paths (in dynamic paths or in shared data)
/// and should be the same for the same objects. To support it we update hash with path and its value (if not null) in
/// sorted by path order from both dynamic paths and shared data.
const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
const auto & shared_data_offsets = getSharedDataOffsets();
size_t start = shared_data_offsets[static_cast<ssize_t>(n) - 1];
size_t end = shared_data_offsets[static_cast<ssize_t>(n)];
auto dynamic_paths_it = sorted_dynamic_paths.begin();
for (size_t i = start; i != end; ++i)
{
auto path = shared_data_paths->getDataAt(i).toView();
/// Paths in shared data are sorted. Update hash with all paths from dynamic paths that go before this path in sorted order.
while (dynamic_paths_it != sorted_dynamic_paths.end() && *dynamic_paths_it < path)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
if (!dynamic_column->isNullAt(n))
{
hash.update(*dynamic_paths_it);
dynamic_column->updateHashWithValue(n, hash);
}
++dynamic_paths_it;
}
/// Deserialize value in temporary column to get its hash.
auto value = shared_data_values->getDataAt(i);
ReadBufferFromMemory buf(value.data, value.size);
auto tmp_column = ColumnDynamic::create();
getDynamicSerialization()->deserializeBinary(*tmp_column, buf, getFormatSettings());
hash.update(path);
tmp_column->updateHashWithValue(0, hash);
}
/// Iterate over all remaining paths in dynamic paths.
for (; dynamic_paths_it != sorted_dynamic_paths.end(); ++dynamic_paths_it)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
if (!dynamic_column->isNullAt(n))
{
hash.update(*dynamic_paths_it);
dynamic_column->updateHashWithValue(n, hash);
}
}
}
WeakHash32 ColumnObject::getWeakHash32() const
@ -1310,6 +1356,7 @@ void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & sou
/// Reset current state.
dynamic_paths.clear();
dynamic_paths_ptrs.clear();
sorted_dynamic_paths.clear();
max_dynamic_paths = global_max_dynamic_paths;
Statistics new_statistics(Statistics::Source::MERGE);
@ -1328,8 +1375,9 @@ void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & sou
{
if (dynamic_paths.size() < max_dynamic_paths)
{
dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types));
dynamic_paths_ptrs.emplace(path, assert_cast<ColumnDynamic *>(dynamic_paths.find(path)->second.get()));
auto it = dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types)).first;
dynamic_paths_ptrs.emplace(path, assert_cast<ColumnDynamic *>(it->second.get()));
sorted_dynamic_paths.insert(it->first);
}
/// Add all remaining paths into shared data statistics until we reach its max size;
else if (new_statistics.shared_data_paths_statistics.size() < Statistics::MAX_SHARED_DATA_STATISTICS_SIZE)
@ -1343,8 +1391,9 @@ void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & sou
{
for (const auto & [path, _] : path_to_total_number_of_non_null_values)
{
dynamic_paths[path] = ColumnDynamic::create(max_dynamic_types);
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(dynamic_paths[path].get());
auto it = dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types)).first;
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(it->second.get());
sorted_dynamic_paths.insert(it->first);
}
}

View File

@ -238,10 +238,15 @@ private:
/// Map path -> column for paths with explicitly specified types.
/// This set of paths is constant and cannot be changed.
PathToColumnMap typed_paths;
/// Sorted list of typed paths. Used to avoid sorting paths every time in some methods.
std::vector<std::string_view> sorted_typed_paths;
/// Map path -> column for dynamically added paths. All columns
/// here are Dynamic columns. This set of paths can be extended
/// during inerts into the column.
PathToColumnMap dynamic_paths;
/// Sorted list of dynamic paths. Used to avoid sorting paths every time in some methods.
std::set<std::string_view> sorted_dynamic_paths;
/// Store and use pointers to ColumnDynamic to avoid virtual calls.
/// With hundreds of dynamic paths these virtual calls are noticeable.
PathToDynamicColumnPtrMap dynamic_paths_ptrs;

View File

@ -9,6 +9,7 @@
#include <Parsers/ASTQueryWithOutput.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
namespace DB
{
@ -45,9 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel
// It is flattened later, when we process UNION ALL/DISTINCT.
const auto * last_select = children.back()->as<ASTSelectQuery>();
if (last_select && last_select->settings())
{
InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext();
}
InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false);
}
void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_)
@ -55,10 +54,20 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
if (!ast)
return;
/// First apply the outermost settings. Then they could be overridden by deeper settings.
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
{
if (query_with_output->settings_ast)
InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
if (const auto * create_query = ast->as<ASTCreateQuery>(); create_query && create_query->select)
applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
}
if (const auto * select_query = ast->as<ASTSelectQuery>())
{
if (auto new_settings = select_query->settings())
InterpreterSetQuery(new_settings, context_).executeForCurrentContext();
InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
}
else if (const auto * select_with_union_query = ast->as<ASTSelectWithUnionQuery>())
{
@ -67,28 +76,15 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
else if (const auto * explain_query = ast->as<ASTExplainQuery>())
{
if (explain_query->settings_ast)
InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
}
else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
{
if (query_with_output->settings_ast)
InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext();
if (const auto * create_query = ast->as<ASTCreateQuery>())
{
if (create_query->select)
{
applySettingsFromSelectWithUnion(create_query->select->as<ASTSelectWithUnionQuery &>(), context_);
}
}
}
else if (auto * insert_query = ast->as<ASTInsertQuery>())
{
context_->setInsertFormat(insert_query->format);
if (insert_query->settings_ast)
InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext();
InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false);
}
}

View File

@ -23,7 +23,7 @@ public:
/** Set setting for current context (query context).
* It is used for interpretation of SETTINGS clause in SELECT query.
*/
void executeForCurrentContext(bool ignore_setting_constraints = false);
void executeForCurrentContext(bool ignore_setting_constraints);
bool supportsTransactions() const override { return true; }

View File

@ -722,7 +722,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
}
settings.ostr << "'[HIDDEN]'";
if (!secret_arguments.replacement.empty())
{
settings.ostr << "'" << secret_arguments.replacement << "'";
}
else
{
settings.ostr << "'[HIDDEN]'";
}
if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
break; /// All other arguments should also be hidden.
continue;

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/KnownObjectNames.h>
#include <Common/re2.h>
#include <Core/QualifiedTableName.h>
#include <base/defines.h>
#include <boost/algorithm/string/predicate.hpp>
@ -49,6 +50,11 @@ public:
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
/// Full replacement of an argument. Only supported when count is 1, otherwise all arguments will be replaced with this string.
/// It's needed in cases when we don't want to hide the entire parameter, but some part of it, e.g. "connection_string" in
/// `azureBlobStorage('DefaultEndpointsProtocol=https;AccountKey=secretkey;...', ...)` should be replaced with
/// `azureBlobStorage('DefaultEndpointsProtocol=https;AccountKey=[HIDDEN];...', ...)`.
std::string replacement;
bool hasSecrets() const
{
@ -74,6 +80,7 @@ protected:
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
chassert(result.replacement.empty()); /// We shouldn't use replacement with masking other arguments
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
@ -199,32 +206,39 @@ protected:
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
/// azureBlobStorageCluster('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
if (maskAzureConnectionString(-1, true, 1))
return;
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
if (maskAzureConnectionString(-1, true, 2))
return;
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
if (maskAzureConnectionString(url_arg_idx))
return;
/// We should check other arguments first because we don't need to do any replacement in case of
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
size_t count = function->arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
String fourth_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
@ -234,6 +248,40 @@ protected:
markSecretArgument(url_arg_idx + 4);
}
bool maskAzureConnectionString(ssize_t url_arg_idx, bool argument_is_named = false, size_t start = 0)
{
String url_arg;
if (argument_is_named)
{
url_arg_idx = findNamedArgument(&url_arg, "connection_string", start);
if (url_arg_idx == -1 || url_arg.empty())
url_arg_idx = findNamedArgument(&url_arg, "storage_account_url", start);
if (url_arg_idx == -1 || url_arg.empty())
return false;
}
else
{
if (!tryGetStringFromArgument(url_arg_idx, &url_arg))
return false;
}
if (!url_arg.starts_with("http"))
{
static re2::RE2 account_key_pattern = "AccountKey=.*?(;|$)";
if (RE2::Replace(&url_arg, account_key_pattern, "AccountKey=[HIDDEN]\\1"))
{
chassert(result.count == 0); /// We shouldn't use replacement with masking other arguments
result.start = url_arg_idx;
result.are_named = argument_is_named;
result.count = 1;
result.replacement = url_arg;
return true;
}
}
return false;
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
@ -513,8 +561,9 @@ protected:
return function->arguments->at(arg_idx)->isIdentifier();
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
/// Looks for an argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
/// Returns -1 if no argument was found.
ssize_t findNamedArgument(String * res, const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < function->arguments->size(); ++i)
{
@ -531,8 +580,22 @@ protected:
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
{
tryGetStringFromArgument(*equals_func->arguments->at(1), res);
return i;
}
}
return -1;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
/// If the argument is found, it is marked as a secret.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
ssize_t arg_idx = findNamedArgument(nullptr, key, start);
if (arg_idx >= 0)
markSecretArgument(arg_idx, /* argument_is_named= */ true);
}
};

View File

@ -25,7 +25,6 @@
#include <Parsers/ParserTablePropertiesQuery.h>
#include <Parsers/ParserWatchQuery.h>
#include <Parsers/ParserDescribeCacheQuery.h>
#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
#include <Parsers/Access/ParserShowAccessEntitiesQuery.h>
#include <Parsers/Access/ParserShowAccessQuery.h>
#include <Parsers/Access/ParserShowCreateAccessEntityQuery.h>
@ -152,37 +151,55 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
}
/// These two sections are allowed in an arbitrary order.
ParserKeyword s_format(Keyword::FORMAT);
if (s_format.ignore(pos, expected))
{
ParserIdentifier format_p;
if (!format_p.parse(pos, query_with_output.format, expected))
return false;
setIdentifierSpecial(query_with_output.format);
query_with_output.children.push_back(query_with_output.format);
}
// SETTINGS key1 = value1, key2 = value2, ...
ParserKeyword s_settings(Keyword::SETTINGS);
if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
{
ParserSetQuery parser_settings(true);
if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
return false;
query_with_output.children.push_back(query_with_output.settings_ast);
// SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery)
// Pass them manually, to apply in InterpreterSelectQuery::initSettings()
if (query->as<ASTSelectWithUnionQuery>())
/** Why: let's take the following example:
* SELECT 1 UNION ALL SELECT 2 FORMAT TSV
* Each subquery can be put in parentheses and have its own settings:
* (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV
* And the whole query can have settings:
* (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f
* A single query with output is parsed in the same way as the UNION ALL chain:
* SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f
* So while these forms have a slightly different meaning, they both exist:
* SELECT 1 SETTINGS a=b FORMAT TSV
* SELECT 1 FORMAT TSV SETTINGS e=f
* And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order.
* But while this work:
* (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f
* This does not work automatically, unless we explicitly allow different orders:
* (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV
* Inevitably, we also allow this:
* SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV
* ^^^^^^^^^^^^^^^^^^^^^
* Because this part is consumed into ASTSelectWithUnionQuery
* and the rest into ASTQueryWithOutput.
*/
for (size_t i = 0; i < 2; ++i)
{
if (!query_with_output.format && s_format.ignore(pos, expected))
{
auto settings = query_with_output.settings_ast->clone();
assert_cast<ASTSetQuery *>(settings.get())->print_in_format = false;
QueryWithOutputSettingsPushDownVisitor::Data data{settings};
QueryWithOutputSettingsPushDownVisitor(data).visit(query);
ParserIdentifier format_p;
if (!format_p.parse(pos, query_with_output.format, expected))
return false;
setIdentifierSpecial(query_with_output.format);
query_with_output.children.push_back(query_with_output.format);
}
else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
{
// SETTINGS key1 = value1, key2 = value2, ...
ParserSetQuery parser_settings(true);
if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
return false;
query_with_output.children.push_back(query_with_output.settings_ast);
}
else
break;
}
node = std::move(query);

View File

@ -1,56 +0,0 @@
#include <Common/SettingsChanges.h>
#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTSubquery.h>
#include <iterator>
#include <algorithm>
namespace DB
{
bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
{
if (node->as<ASTSelectWithUnionQuery>())
return true;
if (node->as<ASTSubquery>())
return true;
if (child->as<ASTSelectQuery>())
return true;
return false;
}
void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * select_query = ast->as<ASTSelectQuery>())
visit(*select_query, ast, data);
}
void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data)
{
ASTPtr select_settings_ast = select_query.settings();
if (!select_settings_ast)
{
select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone());
return;
}
SettingsChanges & select_settings = select_settings_ast->as<ASTSetQuery &>().changes;
SettingsChanges & settings = data.settings_ast->as<ASTSetQuery &>().changes;
for (auto & setting : settings)
{
auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting)
{
return select_setting.name == setting.name;
});
if (it == select_settings.end())
select_settings.push_back(setting);
else
it->value = setting.value;
}
}
}

View File

@ -1,39 +0,0 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTSelectQuery;
struct SettingChange;
class SettingsChanges;
/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query:
/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput)
///
/// SELECT 1 FORMAT Null SETTINGS max_block_size = 1 ->
/// SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1
///
/// Otherwise settings after FORMAT will not be applied.
class QueryWithOutputSettingsPushDownMatcher
{
public:
using Visitor = InDepthNodeVisitor<QueryWithOutputSettingsPushDownMatcher, true>;
struct Data
{
const ASTPtr & settings_ast;
};
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
static void visit(ASTPtr & ast, Data & data);
private:
static void visit(ASTSelectQuery &, ASTPtr &, Data &);
};
using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor;
}

View File

@ -223,7 +223,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
{
account_name = fourth_arg;
account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/structure");
if (is_format_arg(sixth_arg))
{
format = sixth_arg;
@ -257,10 +257,10 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
}
else if (with_structure && engine_args.size() == 8)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "account_name");
account_name = fourth_arg;
account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format");
if (!is_format_arg(sixth_arg))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
format = sixth_arg;

View File

@ -1,5 +1,6 @@
import pytest
import random, string
import re
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
@ -336,6 +337,10 @@ def test_create_database():
def test_table_functions():
password = new_password()
azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
account_key_pattern = re.compile("AccountKey=.*?(;|$)")
masked_azure_conn_string = re.sub(
account_key_pattern, "AccountKey=[HIDDEN]\\1", azure_conn_string
)
azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
azure_account_name = "devstoreaccount1"
azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
@ -467,23 +472,23 @@ def test_table_functions():
"CREATE TABLE tablefunc30 (x int) AS s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')",
"CREATE TABLE tablefunc31 (`x` int) AS s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc32 (`x` int) AS deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
f"CREATE TABLE tablefunc33 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"CREATE TABLE tablefunc34 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc35 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc33 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"CREATE TABLE tablefunc34 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc35 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc36 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '[HIDDEN]')",
f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
f"CREATE TABLE tablefunc39 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc40 (x int) AS azureBlobStorage(named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')",
f"CREATE TABLE tablefunc40 (`x` int) AS azureBlobStorage(named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')",
f"CREATE TABLE tablefunc41 (`x` int) AS azureBlobStorage(named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_8.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
f"CREATE TABLE tablefunc42 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')",
f"CREATE TABLE tablefunc43 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc44 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc42 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')",
f"CREATE TABLE tablefunc43 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc44 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc45 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_12.csv', '{azure_account_name}', '[HIDDEN]')",
f"CREATE TABLE tablefunc46 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_13.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
f"CREATE TABLE tablefunc47 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_14.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc49 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
"CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
],

View File

@ -1,4 +1,3 @@
DROP TABLE IF EXISTS local_table;
DROP TABLE IF EXISTS other_table;

View File

@ -1,7 +1,7 @@
1
1
1
1
1
2
1
2
2

View File

@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1'
# push down append
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1'
# overwrite on push down (since these settings goes latest)
# not overwrite on push down
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1'
# on push-down
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV'

View File

@ -1,2 +1 @@
select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS}
select 42 settings compatibility=NULL; -- {clientError BAD_GET}

View File

@ -0,0 +1,14 @@
1
2
1
2
1
2
1
1
3
3
3
3
3
1

View File

@ -0,0 +1,30 @@
SET max_block_size = 10, max_threads = 1;
-- Take the following example:
SELECT 1 UNION ALL SELECT 2 FORMAT TSV;
-- Each subquery can be put in parentheses and have its own settings:
(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV;
-- And the whole query can have settings:
(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3;
-- A single query with output is parsed in the same way as the UNION ALL chain:
SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3;
-- So while these forms have a slightly different meaning, they both exist:
SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV;
SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3;
-- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order.
-- But while this work:
(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3;
-- This does not work automatically, unless we explicitly allow different orders:
(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV;
-- Inevitably, we allow this:
SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV;
/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/
-- Because this part is consumed into ASTSelectWithUnionQuery
-- and the rest into ASTQueryWithOutput.

View File

@ -0,0 +1,5 @@
4
5 [1,2,3]
5 2020-01-01
5 42
5 Hello

View File

@ -0,0 +1,15 @@
set allow_experimental_dynamic_type = 1;
drop table if exists test;
create table test (d Dynamic(max_types=2)) engine=Memory;
insert into test values (42), ('Hello'), ([1,2,3]), ('2020-01-01');
insert into test values ('Hello'), ([1,2,3]), ('2020-01-01'), (42);
insert into test values ([1,2,3]), ('2020-01-01'), (42), ('Hello');
insert into test values ('2020-01-01'), (42), ('Hello'), ([1,2,3]);
insert into test values (42);
insert into test values ('Hello');
insert into test values ([1,2,3]);
insert into test values ('2020-01-01');
select uniqExact(d) from test;
select count(), d from test group by d order by d;
drop table test;

View File

@ -0,0 +1,12 @@
11
6 {"a":0,"b":"Hello"}
6 {"a":0,"b":[{"f":"42"}]}
6 {"a":0,"c":"Hello"}
6 {"a":0,"c":["1","2","3"]}
6 {"a":0,"d":"2020-01-01"}
6 {"a":0,"d":["1","2","3"]}
6 {"a":0,"e":"2020-01-01"}
6 {"a":0,"e":[{"f":"42"}]}
5 {"a":42,"b":"Hello","c":["1","2","3"],"d":"2020-01-01","e":[{"f":"42"}]}
5 {"a":42,"b":[{"f":"42"}],"c":"Hello","d":["1","2","3"],"e":"2020-01-01"}
12 {"a":42}

View File

@ -0,0 +1,39 @@
set allow_experimental_json_type = 1;
drop table if exists test;
create table test (json JSON(a UInt32, max_dynamic_paths=2)) engine=Memory;
insert into test values ('{"a" : 42, "b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01", "e" : [{"f" : 42}]}');
insert into test values ('{"b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01", "e" : [{"f" : 42}], "a" : 42}');
insert into test values ('{"c" : [1, 2, 3], "d" : "2020-01-01", "e" : [{"f" : 42}], "a" : 42, "b" : "Hello"}');
insert into test values ('{"d" : "2020-01-01", "e" : [{"f" : 42}], "a" : 42, "b" : "Hello", "c" : [1, 2, 3]}');
insert into test values ('{"e" : [{"f" : 42}], "a" : 42, "b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01"}');
insert into test values ('{"a" : 42}'), ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}');
insert into test values ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}');
insert into test values ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"b" : "Hello"}');
insert into test values ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}');
insert into test values ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}');
insert into test values ('{"a" : 42}');
insert into test values ('{"b" : "Hello"}');
insert into test values ('{"c" : [1, 2, 3]}');
insert into test values ('{"d" : "2020-01-01"}');
insert into test values ('{"e" : [{"f" : 42}]}');
insert into test values ('{"a" : 42, "c" : "Hello", "d" : [1, 2, 3], "e" : "2020-01-01", "b" : [{"f" : 42}]}');
insert into test values ('{"c" : "Hello", "d" : [1, 2, 3], "e" : "2020-01-01", "b" : [{"f" : 42}], "a" : 42}');
insert into test values ('{"d" : [1, 2, 3], "e" : "2020-01-01", "b" : [{"f" : 42}], "a" : 42, "c" : "Hello"}');
insert into test values ('{"e" : "2020-01-01", "b" : [{"f" : 42}], "a" : 42, "c" : "Hello", "d" : [1, 2, 3]}');
insert into test values ('{"b" : [{"f" : 42}], "a" : 42, "c" : "Hello", "d" : [1, 2, 3], "e" : "2020-01-01"}');
insert into test values ('{"a" : 42}'), ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}');
insert into test values ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}');
insert into test values ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"c" : "Hello"}');
insert into test values ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}');
insert into test values ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}');
insert into test values ('{"a" : 42}');
insert into test values ('{"c" : "Hello"}');
insert into test values ('{"d" : [1, 2, 3]}');
insert into test values ('{"e" : "2020-01-01"}');
insert into test values ('{"b" : [{"f" : 42}]}');
select uniqExact(json) from test;
select count(), json from test group by json order by toString(json);
drop table test;