Merge remote-tracking branch 'origin' into improve-integration-tests-3

This commit is contained in:
Yatsishin Ilya 2024-07-24 20:29:09 +00:00
commit 5f5eb0a3f2
137 changed files with 2524 additions and 1712 deletions

View File

@ -269,7 +269,7 @@ jobs:
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -135,7 +135,7 @@ jobs:
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -108,7 +108,7 @@ jobs:
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -54,7 +54,7 @@ jobs:
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -152,8 +152,9 @@ jobs:
CheckReadyForMerge:
if: ${{ !cancelled() }}
# Test_2 or Test_3 must not have jobs required for Mergeable check
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1]
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
# however, set them as "needs" to get all checks results before the automatic merge occurs.
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -168,7 +169,7 @@ jobs:
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -489,7 +489,7 @@ jobs:
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF

View File

@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
add_library(_datasketches INTERFACE)
target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")
add_library(ch_contrib::datasketches ALIAS _datasketches)

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update --yes \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
- `count_min`
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:

View File

@ -55,7 +55,7 @@ CMPLNT_FR_TM Nullable(String)
```
:::tip
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/en/integrations/data-formats/json/inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
you can get a better idea of the content.
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.

View File

@ -7,7 +7,7 @@ keywords: [object, data type]
# Object Data Type (deprecated)
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
<hr />

View File

@ -49,7 +49,7 @@ enum class QueryTreeNodeType : uint8_t
/// Convert query tree node type to string
const char * toString(QueryTreeNodeType type);
/** Query tree is semantical representation of query.
/** Query tree is a semantic representation of query.
* Query tree node represent node in query tree.
* IQueryTreeNode is base class for all query tree nodes.
*

View File

@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx)
endif()
if (TARGET ch_contrib::datasketches)
target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
endif ()
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)

View File

@ -33,7 +33,7 @@ size_t toMilliseconds(auto duration)
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
}
const auto epsilon = 500us;
const auto epsilon = 1ms;
class ResolvePoolMock : public DB::HostResolver
{
@ -358,53 +358,59 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses
TEST_F(ResolvePoolTest, BannedForConsiquenceFail)
{
auto history = 5ms;
auto history = 10ms;
auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr));
auto start_at = now();
failed_addr.setFail();
auto start_at = now();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
start_at = now();
resolver->update();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
failed_addr.setFail();
start_at = now();
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
start_at = now();
resolver->update();
// too much time has passed
if (now() > start_at + 2*history - epsilon)
return;
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
// ip still banned adter history_ms + update, because it was his second consiquent fail
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + 2*history - epsilon);
}
TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
{
auto history = 5ms;
auto history = 10ms;
auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr));
auto start_at = now();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
auto start_at = now();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
@ -413,6 +419,7 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
sleep_until(start_at + history + epsilon);
resolver->update();
// ip is cleared after just 1 history_ms interval.
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));

View File

@ -383,7 +383,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
LockMemoryExceptionInThread::removeUniqueLock();
};
asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores();
/// At least 16 threads for network communication in asio.
/// asio is async framework, so even with 1 thread it should be ok, but
/// still as safeguard it's better to have some redundant capacity here
asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores());
if (state_manager->isSecure())
{

View File

@ -125,23 +125,6 @@ DataTypePtr DataTypeFactory::getImpl(const String & family_name_param, const AST
{
String family_name = getAliasToOrName(family_name_param);
if (endsWith(family_name, "WithDictionary"))
{
ASTPtr low_cardinality_params = std::make_shared<ASTExpressionList>();
String param_name = family_name.substr(0, family_name.size() - strlen("WithDictionary"));
if (parameters)
{
auto func = std::make_shared<ASTFunction>();
func->name = param_name;
func->arguments = parameters;
low_cardinality_params->children.push_back(func);
}
else
low_cardinality_params->children.push_back(std::make_shared<ASTIdentifier>(param_name));
return getImpl<nullptr_on_error>("LowCardinality", low_cardinality_params);
}
const auto * creator = findCreatorByName<nullptr_on_error>(family_name);
if constexpr (nullptr_on_error)
{

View File

@ -739,7 +739,8 @@ public:
{
NumberType value;
tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error);
if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error))
return false;
auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
col_vec.insertValue(value);
return true;

View File

@ -17,13 +17,19 @@
namespace DB
{
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_,
const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_)
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_,
const ContextMutablePtr & context_, const SelectQueryOptions & options_)
: query_ptr(query_ptr_)
, context(context_)
, options(options_)
, max_streams(context->getSettingsRef().max_threads)
{
/// FIXME All code here will work with the old analyzer, however for views over Distributed tables
/// it's possible that new analyzer will be enabled in ::getQueryProcessingStage method
/// of the underlying storage when all other parts of infrastructure are not ready for it
/// (built with old analyzer).
context->setSetting("allow_experimental_analyzer", false);
if (options.shard_num)
context->addSpecialScalar(
"_shard_num",

View File

@ -75,7 +75,6 @@
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Storages/StorageDistributed.h>
#include <Storages/StorageDummy.h>
#include <Storages/StorageMerge.h>
#include <Storages/StorageValues.h>
#include <Storages/StorageView.h>
@ -227,18 +226,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const StoragePtr & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const SelectQueryOptions & options_)
: InterpreterSelectQuery(
query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
{
}
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
{}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const ContextPtr & context_,
const SelectQueryOptions & options_,
PreparedSetsPtr prepared_sets_)
: InterpreterSelectQuery(
query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
{}
InterpreterSelectQuery::~InterpreterSelectQuery() = default;

View File

@ -26,7 +26,6 @@ class Logger;
namespace DB
{
class SubqueryForSet;
class InterpreterSelectWithUnionQuery;
class Context;
class QueryPlan;

View File

@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get<String>(), type.getName());
e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
throw;

View File

@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P(
DecimalField(DateTime64(123 * Day * 1'000'000), 6)
}
})
);
);
INSTANTIATE_TEST_SUITE_P(
DateTimeToDateTime64,
@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P(
},
})
);
INSTANTIATE_TEST_SUITE_P(
StringToNumber,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"String",
Field("1"),
"Int8",
Field(1)
},
{
"String",
Field("256"),
"Int8",
Field()
},
{
"String",
Field("not a number"),
"Int8",
{}
},
{
"String",
Field("1.1"),
"Int8",
{} /// we can not convert '1.1' to Int8
},
{
"String",
Field("1.1"),
"Float64",
Field(1.1)
},
})
);
INSTANTIATE_TEST_SUITE_P(
NumberToString,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"Int8",
Field(1),
"String",
Field("1")
},
{
"Int8",
Field(-1),
"String",
Field("-1")
},
{
"Float64",
Field(1.1),
"String",
Field("1.1")
},
})
);
INSTANTIATE_TEST_SUITE_P(
StringToDate,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"String",
Field("2024-07-12"),
"Date",
Field(static_cast<UInt16>(19916))
},
{
"String",
Field("not a date"),
"Date",
{}
},
})
);

View File

@ -9,7 +9,7 @@ namespace DB
{
/** The SELECT subquery is in parenthesis.
/** The SELECT subquery, in parentheses.
*/
class ParserSubquery : public IParserBase
{

View File

@ -11,15 +11,12 @@
namespace DB
{
bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_describe(Keyword::DESCRIBE);
ParserKeyword s_desc(Keyword::DESC);
ParserKeyword s_table(Keyword::TABLE);
ParserKeyword s_settings(Keyword::SETTINGS);
ParserToken s_dot(TokenType::Dot);
ParserIdentifier name_p;
ParserSetQuery parser_settings(true);
ASTPtr database;
@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
return true;
}
}

View File

@ -1,48 +1,48 @@
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Interpreters/Squashing.h>
#include <Parsers/ASTInsertQuery.h>
#include <algorithm>
#include <exception>
#include <memory>
#include <mutex>
#include <vector>
#include <string_view>
#include <Poco/Net/NetException.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <Common/CurrentThread.h>
#include <Common/Stopwatch.h>
#include <Common/NetException.h>
#include <Common/setThreadName.h>
#include <Common/OpenSSLHelpers.h>
#include <IO/Progress.h>
#include <vector>
#include <Access/AccessControl.h>
#include <Access/Credentials.h>
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedWriteBuffer.h>
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <IO/LimitReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Compression/CompressionFactory.h>
#include <Core/ExternalTable.h>
#include <Core/ServerSettings.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/TablesStatus.h>
#include <IO/LimitReadBuffer.h>
#include <IO/Progress.h>
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Interpreters/InternalTextLogsQueue.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/Session.h>
#include <Interpreters/Squashing.h>
#include <Interpreters/TablesStatus.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Server/TCPServer.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
#include <Core/ExternalTable.h>
#include <Core/ServerSettings.h>
#include <Access/AccessControl.h>
#include <Access/Credentials.h>
#include <Compression/CompressionFactory.h>
#include <Common/logger_useful.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <Common/CurrentMetrics.h>
#include <Common/CurrentThread.h>
#include <Common/NetException.h>
#include <Common/OpenSSLHelpers.h>
#include <Common/Stopwatch.h>
#include <Common/logger_useful.h>
#include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h>
#include <Common/thread_local_rng.h>
#include <fmt/format.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/Executors/PushingPipelineExecutor.h>
@ -61,6 +61,8 @@
#include <Common/config_version.h>
#include <fmt/format.h>
using namespace std::literals;
using namespace DB;
@ -1036,6 +1038,17 @@ void TCPHandler::processOrdinaryQuery()
PullingAsyncPipelineExecutor executor(pipeline);
CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};
/// The following may happen:
/// * current thread is holding the lock
/// * because of the exception we unwind the stack and call the destructor of `executor`
/// * the destructor calls cancel() and waits for all query threads to finish
/// * at the same time one of the query threads is trying to acquire the lock, e.g. inside `merge_tree_read_task_callback`
/// * deadlock
SCOPE_EXIT({
if (out_lock.owns_lock())
out_lock.unlock();
});
Block block;
while (executor.pull(block, interactive_delay / 1000))
{
@ -1079,8 +1092,7 @@ void TCPHandler::processOrdinaryQuery()
}
/// This lock wasn't acquired before and we make .lock() call here
/// so everything under this line is covered even together
/// with sendProgress() out of the scope
/// so everything under this line is covered.
out_lock.lock();
/** If data has run out, we will send the profiling data and total values to
@ -1107,6 +1119,7 @@ void TCPHandler::processOrdinaryQuery()
last_sent_snapshots.clear();
}
out_lock.lock();
sendProgress();
}

View File

@ -304,7 +304,7 @@ void RefreshTask::refreshTask()
{
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
auto text = message.text;
message.text = fmt::format("Refresh failed: {}", message.text);
message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text);
LOG_ERROR(log, message);
exception = text;
}

View File

@ -16,6 +16,7 @@
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Common/CurrentMetrics.h>
#include <Common/NetException.h>
#include <Common/randomDelay.h>
@ -224,13 +225,17 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM
&& !isRetryableException(std::current_exception()))
{
report_broken_part();
}
throw;
}
catch (...)
{
if (!isRetryableException(std::current_exception()))
report_broken_part();
throw;
}

View File

@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
{
auto stats = part->loadStatistics();
/// TODO: We only have one stats file for every part.
result.addRows(part->rows_count);
for (const auto & stat : stats)
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
result.merge(part->info.getPartNameV1(), stat);
}
catch (...)
{
@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
if (!partition_pruner.canBePruned(*part))
{
auto stats = part->loadStatistics();
result.addRows(part->rows_count);
for (const auto & stat : stats)
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
result.merge(part->info.getPartNameV1(), stat);
}
}
catch (...)

View File

@ -15,16 +15,11 @@
#include <Processors/QueryPlan/FilterStep.h>
#include <Common/logger_useful.h>
#include <Processors/Merges/Algorithms/MergeTreePartLevelInfo.h>
#include <Storages/MergeTree/checkDataPart.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
}
/// Lightweight (in terms of logic) stream for reading single part from
/// MergeTree, used for merges and mutations.
///
@ -281,7 +276,7 @@ try
catch (...)
{
/// Suspicion of the broken part. A part is added to the queue for verification.
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
if (!isRetryableException(std::current_exception()))
storage.reportBrokenPart(data_part);
throw;
}

View File

@ -36,11 +36,13 @@ namespace ErrorCodes
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int CANNOT_SCHEDULE_TASK;
extern const int UNEXPECTED_FILE_IN_DATA_PART;
extern const int NO_FILE_IN_DATA_PART;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int BROKEN_PROJECTION;
extern const int ABORTED;
}
@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr)
{
return isNotEnoughMemoryErrorCode(e.code())
|| e.code() == ErrorCodes::NETWORK_ERROR
|| e.code() == ErrorCodes::SOCKET_TIMEOUT;
|| e.code() == ErrorCodes::SOCKET_TIMEOUT
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|| e.code() == ErrorCodes::ABORTED;
}
catch (const Poco::Net::NetException &)
{
@ -329,17 +333,22 @@ static IMergeTreeDataPart::Checksums checkDataPart(
projections_on_disk.erase(projection_file);
}
if (throw_on_broken_projection && !broken_projections_message.empty())
if (throw_on_broken_projection)
{
if (!broken_projections_message.empty())
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
/// This one is actually not broken, just redundant files on disk which
/// MergeTree will never use.
if (require_checksums && !projections_on_disk.empty())
{
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
"Found unexpected projection directories: {}",
fmt::join(projections_on_disk, ","));
}
}
if (is_cancelled())
return {};

View File

@ -163,7 +163,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
{
for (const auto & object_info : read_keys)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName());
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;
@ -221,7 +223,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
{
for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName());
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;

View File

@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
part_statistics[part_name] = stats;
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
return default_normal_cond_factor * rows;
@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(
return result * rows / part_rows;
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const
{
return rows - estimateLess(val, rows);
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
{
if (val < - threshold || val > threshold)
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val)
return default_unknown_cond_factor * rows;
else if (float_val.value() < - threshold || float_val.value() > threshold)
return default_normal_cond_factor * rows;
else
return default_good_cond_factor * rows;
@ -87,7 +90,7 @@ static std::pair<String, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNod
return result;
}
std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
std::pair<String, Field> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
{
if (!node.isFunction())
return {};
@ -123,48 +126,35 @@ std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const
DataTypePtr output_type;
if (!constant_node->tryGetConstant(output_value, output_type))
return {};
const auto type = output_value.getType();
Float64 value;
if (type == Field::Types::Int64)
value = output_value.get<Int64>();
else if (type == Field::Types::UInt64)
value = output_value.get<UInt64>();
else if (type == Field::Types::Float64)
value = output_value.get<Float64>();
else
return {};
return std::make_pair(function_name, value);
return std::make_pair(function_name, output_value);
}
Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const
{
auto result = tryToExtractSingleColumn(node);
if (result.second != 1)
{
return default_unknown_cond_factor;
}
return default_unknown_cond_factor * total_rows;
String col = result.first;
auto it = column_estimators.find(col);
/// If there the estimator of the column is not found or there are no data at all,
/// we use dummy estimation.
bool dummy = total_rows == 0;
bool dummy = false;
ColumnSelectivityEstimator estimator;
if (it != column_estimators.end())
{
estimator = it->second;
}
else
{
dummy = true;
}
auto [op, val] = extractBinaryOp(node, col);
if (op == "equals")
{
if (dummy)
{
if (val < - threshold || val > threshold)
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val || (float_val < - threshold || float_val > threshold))
return default_normal_cond_factor * total_rows;
else
return default_good_cond_factor * total_rows;
@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
return default_unknown_cond_factor * total_rows;
}
void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat)
void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat)
{
if (!part_names.contains(part_name))
{
total_rows += part_rows;
part_names.insert(part_name);
}
if (column_stat != nullptr)
column_estimators[column_stat->columnName()].merge(part_name, column_stat);
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Storages/Statistics/Statistics.h>
#include <Core/Field.h>
namespace DB
{
@ -10,6 +11,14 @@ class RPNBuilderTreeNode;
/// It estimates the selectivity of a condition.
class ConditionSelectivityEstimator
{
public:
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
/// Right now we only support simple condition like col = val / col < val
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
void merge(String part_name, ColumnStatisticsPtr column_stat);
void addRows(UInt64 part_rows) { total_rows += part_rows; }
private:
friend class ColumnStatistics;
struct ColumnSelectivityEstimator
@ -20,13 +29,15 @@ private:
void merge(String part_name, ColumnStatisticsPtr stats);
Float64 estimateLess(Float64 val, Float64 rows) const;
Float64 estimateLess(const Field & val, Float64 rows) const;
Float64 estimateGreater(Float64 val, Float64 rows) const;
Float64 estimateGreater(const Field & val, Float64 rows) const;
Float64 estimateEqual(Float64 val, Float64 rows) const;
Float64 estimateEqual(const Field & val, Float64 rows) const;
};
std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
static constexpr auto default_good_cond_factor = 0.1;
static constexpr auto default_normal_cond_factor = 0.5;
static constexpr auto default_unknown_cond_factor = 1.0;
@ -35,16 +46,7 @@ private:
static constexpr auto threshold = 2;
UInt64 total_rows = 0;
std::set<String> part_names;
std::map<String, ColumnSelectivityEstimator> column_estimators;
std::pair<String, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
public:
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
/// Right now we only support simple condition like col = val / col < val
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat);
};
}

View File

@ -1,15 +1,18 @@
#include <Storages/Statistics/Statistics.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Storages/Statistics/StatisticsUniq.h>
#include <Storages/StatisticsDescription.h>
#include <Storages/ColumnsDescription.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include "config.h" /// USE_DATASKETCHES
namespace DB
{
@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16
V0 = 0,
};
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
{
switch (field.getType())
{
case Field::Types::Int64:
return field.get<Int64>();
case Field::Types::UInt64:
return field.get<UInt64>();
case Field::Types::Float64:
return field.get<Float64>();
case Field::Types::Int128:
return field.get<Int128>();
case Field::Types::UInt128:
return field.get<UInt128>();
case Field::Types::Int256:
return field.get<Int256>();
case Field::Types::UInt256:
return field.get<UInt256>();
default:
return {};
}
}
std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
{
if (field.getType() == Field::Types::String)
return field.get<String>();
return {};
}
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
: stat(stat_)
{
@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateEqual(Float64 /*val*/) const
Float64 IStatistics::estimateEqual(const Field & /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateLess(Float64 /*val*/) const
Float64 IStatistics::estimateLess(const Field & /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
}
@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
/// object that supports it natively.
Float64 ColumnStatistics::estimateLess(Float64 val) const
Float64 ColumnStatistics::estimateLess(const Field & val) const
{
if (stats.contains(StatisticsType::TDigest))
return stats.at(StatisticsType::TDigest)->estimateLess(val);
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
}
Float64 ColumnStatistics::estimateGreater(Float64 val) const
Float64 ColumnStatistics::estimateGreater(const Field & val) const
{
return rows - estimateLess(val);
}
Float64 ColumnStatistics::estimateEqual(Float64 val) const
Float64 ColumnStatistics::estimateEqual(const Field & val) const
{
if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
{
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
return stats.at(StatisticsType::TDigest)->estimateEqual(val);
}
if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
#if USE_DATASKETCHES
if (stats.contains(StatisticsType::CountMinSketch))
return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
#endif
if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
else
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
{
registerValidator(StatisticsType::TDigest, TDigestValidator);
registerCreator(StatisticsType::TDigest, TDigestCreator);
registerValidator(StatisticsType::TDigest, tdigestValidator);
registerCreator(StatisticsType::TDigest, tdigestCreator);
registerValidator(StatisticsType::Uniq, UniqValidator);
registerCreator(StatisticsType::Uniq, UniqCreator);
registerValidator(StatisticsType::Uniq, uniqValidator);
registerCreator(StatisticsType::Uniq, uniqCreator);
#if USE_DATASKETCHES
registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
#endif
}
MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
{
auto it = creators.find(type);
if (it == creators.end())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
auto stat_ptr = (it->second)(desc, stats.data_type);
column_stat->stats[type] = stat_ptr;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Block.h>
#include <Core/Field.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Storages/StatisticsDescription.h>
@ -13,6 +14,14 @@ namespace DB
constexpr auto STATS_FILE_PREFIX = "statistics_";
constexpr auto STATS_FILE_SUFFIX = ".stats";
struct StatisticsUtils
{
/// Returns std::nullopt if input Field cannot be converted to a concrete value
static std::optional<Float64> tryConvertToFloat64(const Field & field);
static std::optional<String> tryConvertToString(const Field & field);
};
/// Statistics describe properties of the values in the column,
/// e.g. how many unique values exist,
/// what are the N most frequent values,
@ -34,8 +43,8 @@ public:
/// Per-value estimations.
/// Throws if the statistics object is not able to do a meaningful estimation.
virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column
virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column
virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column
protected:
SingleStatisticsDescription stat;
@ -58,9 +67,9 @@ public:
void update(const ColumnPtr & column);
Float64 estimateLess(Float64 val) const;
Float64 estimateGreater(Float64 val) const;
Float64 estimateEqual(Float64 val) const;
Float64 estimateLess(const Field & val) const;
Float64 estimateGreater(const Field & val) const;
Float64 estimateEqual(const Field & val) const;
private:
friend class MergeTreeStatisticsFactory;

View File

@ -0,0 +1,102 @@
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/convertFieldToType.h>
#if USE_DATASKETCHES
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_STATISTICS;
}
/// Constants chosen based on rolling dices.
/// The values provides:
/// 1. an error tolerance of 0.1% (ε = 0.001)
/// 2. a confidence level of 99.9% (δ = 0.001).
/// And sketch the size is 152kb.
static constexpr auto num_hashes = 7uz;
static constexpr auto num_buckets = 2718uz;
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
: IStatistics(stat_)
, sketch(num_hashes, num_buckets)
, data_type(data_type_)
{
}
Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
{
/// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc.
/// Return null if val larger than the range of data_type
///
/// For example: if data_type is Int32:
/// 1. For 1.0, 1, '1', return Field(1)
/// 2. For 1.1, max_value_int64, return null
Field val_converted = convertFieldToType(val, *data_type);
if (val_converted.isNull())
return 0;
if (data_type->isValueRepresentedByNumber())
return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory());
if (isStringOrFixedString(data_type))
return sketch.get_estimate(val.get<String>());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName());
}
void StatisticsCountMinSketch::update(const ColumnPtr & column)
{
for (size_t row = 0; row < column->size(); ++row)
{
if (column->isNullAt(row))
continue;
auto data = column->getDataAt(row);
sketch.update(data.data, data.size, 1);
}
}
void StatisticsCountMinSketch::serialize(WriteBuffer & buf)
{
Sketch::vector_bytes bytes = sketch.serialize();
writeIntBinary(static_cast<UInt64>(bytes.size()), buf);
buf.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
}
void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
{
UInt64 size;
readIntBinary(size, buf);
Sketch::vector_bytes bytes;
bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking
buf.readStrict(reinterpret_cast<char *>(bytes.data()), size);
sketch = Sketch::deserialize(bytes.data(), size);
}
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
}
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
}
}
#endif

View File

@ -0,0 +1,39 @@
#pragma once
#include <Storages/Statistics/Statistics.h>
#include "config.h"
#if USE_DATASKETCHES
#include <count_min.hpp>
namespace DB
{
class StatisticsCountMinSketch : public IStatistics
{
public:
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
Float64 estimateEqual(const Field & val) const override;
void update(const ColumnPtr & column) override;
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
private:
using Sketch = datasketches::count_min_sketch<UInt64>;
Sketch sketch;
DataTypePtr data_type;
};
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}
#endif

View File

@ -1,11 +1,13 @@
#include <Storages/Statistics/StatisticsTDigest.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_STATISTICS;
extern const int ILLEGAL_STATISTICS;
extern const int LOGICAL_ERROR;
}
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
void StatisticsTDigest::update(const ColumnPtr & column)
{
size_t rows = column->size();
for (size_t row = 0; row < rows; ++row)
{
/// TODO: support more types.
Float64 value = column->getFloat64(row);
t_digest.add(value, 1);
Field field;
column->get(row, field);
if (field.isNull())
continue;
if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
t_digest.add(*field_as_float, 1);
}
}
@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
t_digest.deserialize(buf);
}
Float64 StatisticsTDigest::estimateLess(Float64 val) const
Float64 StatisticsTDigest::estimateLess(const Field & val) const
{
return t_digest.getCountLessThan(val);
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountLessThan(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
Float64 StatisticsTDigest::estimateEqual(Float64 val) const
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
{
return t_digest.getCountEqual(val);
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountEqual(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
}
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
{
return std::make_shared<StatisticsTDigest>(stat);
}

View File

@ -16,14 +16,14 @@ public:
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
Float64 estimateLess(Float64 val) const override;
Float64 estimateEqual(Float64 val) const override;
Float64 estimateLess(const Field & val) const override;
Float64 estimateEqual(const Field & val) const override;
private:
QuantileTDigest<Float64> t_digest;
};
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}

View File

@ -1,6 +1,7 @@
#include <Storages/Statistics/StatisticsUniq.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const
return column->getUInt(0);
}
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
}
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<StatisticsUniq>(stat, data_type);
}

View File

@ -27,7 +27,7 @@ private:
};
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
}

View File

@ -1,6 +1,10 @@
#include <gtest/gtest.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Interpreters/convertFieldToType.h>
#include <DataTypes/DataTypeFactory.h>
using namespace DB;
TEST(Statistics, TDigestLessThan)
{
@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan)
std::reverse(data.begin(), data.end());
test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
}

View File

@ -1,19 +1,14 @@
#include <Storages/StatisticsDescription.h>
#include <base/defines.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTStatisticsDeclaration.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ParserCreateQuery.h>
#include <Poco/Logger.h>
#include <Storages/extractKeyExpressionList.h>
#include <Storages/ColumnsDescription.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -54,7 +49,9 @@ static StatisticsType stringToStatisticsType(String type)
return StatisticsType::TDigest;
if (type == "uniq")
return StatisticsType::Uniq;
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
if (type == "count_min")
return StatisticsType::CountMinSketch;
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
}
String SingleStatisticsDescription::getTypeName() const
@ -65,8 +62,10 @@ String SingleStatisticsDescription::getTypeName() const
return "TDigest";
case StatisticsType::Uniq:
return "Uniq";
case StatisticsType::CountMinSketch:
return "count_min";
default:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
}
}
@ -99,10 +98,9 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
chassert(merging_column_type);
if (column_name.empty())
{
column_name = merging_column_name;
data_type = merging_column_type;
}
for (const auto & [stats_type, stats_desc]: other.types_to_desc)
{
@ -121,6 +119,7 @@ void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & oth
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
types_to_desc = other.types_to_desc;
data_type = other.data_type;
}
void ColumnStatisticsDescription::clear()
@ -159,6 +158,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
const auto & column = columns.getPhysical(physical_column_name);
stats.column_name = column.name;
stats.data_type = column.type;
stats.types_to_desc = statistics_types;
result.push_back(stats);
}

View File

@ -13,6 +13,7 @@ enum class StatisticsType : UInt8
{
TDigest = 0,
Uniq = 1,
CountMinSketch = 2,
Max = 63,
};

View File

@ -43,7 +43,6 @@
#include <Parsers/parseQuery.h>
#include <Parsers/IAST.h>
#include <Analyzer/Utils.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h>
@ -61,26 +60,20 @@
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
#include <Interpreters/ClusterProxy/executeQuery.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/JoinedTables.h>
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/AddDefaultDatabaseVisitor.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/Context.h>
#include <Interpreters/createBlockSelector.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/getClusterName.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <TableFunctions/TableFunctionView.h>
#include <TableFunctions/TableFunctionFactory.h>
@ -90,7 +83,6 @@
#include <Processors/Executors/PushingPipelineExecutor.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
@ -496,7 +488,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
}
std::optional<QueryProcessingStage::Enum> optimized_stage;
if (settings.allow_experimental_analyzer)
if (query_info.query_tree)
optimized_stage = getOptimizedQueryProcessingStageAnalyzer(query_info, settings);
else
optimized_stage = getOptimizedQueryProcessingStage(query_info, settings);
@ -860,25 +852,21 @@ void StorageDistributed::read(
modified_query_info.query = queryNodeToDistributedSelectQuery(query_tree_distributed);
modified_query_info.query_tree = std::move(query_tree_distributed);
/// Return directly (with correct header) if no shard to query.
if (modified_query_info.getCluster()->getShardsInfo().empty())
return;
}
else
{
header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
}
if (!settings.allow_experimental_analyzer)
{
modified_query_info.query = ClusterProxy::rewriteSelectQuery(
local_context, modified_query_info.query,
remote_database, remote_table, remote_table_function_ptr);
}
/// Return directly (with correct header) if no shard to query.
if (modified_query_info.getCluster()->getShardsInfo().empty())
{
if (settings.allow_experimental_analyzer)
return;
Pipe pipe(std::make_shared<NullSource>(header));
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
read_from_pipe->setStepDescription("Read from NullSource (Distributed)");
@ -886,6 +874,7 @@ void StorageDistributed::read(
return;
}
}
const auto & snapshot_data = assert_cast<const SnapshotData &>(*storage_snapshot->data);
ClusterProxy::SelectStreamFactory select_stream_factory =

View File

@ -427,7 +427,9 @@ namespace
{
for (const auto & path : paths)
{
if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path))
auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path);
/// Use this format only if we have a schema reader for it.
if (format_from_path && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_path))
{
format = format_from_path;
break;
@ -716,7 +718,9 @@ namespace
/// If format is unknown we can try to determine it by the file name.
if (!format)
{
if (auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename))
auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename);
/// Use this format only if we have a schema reader for it.
if (format_from_file && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file))
format = format_from_file;
}

View File

@ -505,18 +505,18 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
additional_info = fmt::format(" (TID: {}; TIDH: {})", current_tid, current_tid.getHash());
}
Int64 version;
{
std::lock_guard lock(currently_processing_in_background_mutex);
MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings());
version = increment.get();
Int64 version = increment.get();
entry.commit(version);
String mutation_id = entry.file_name;
if (txn)
txn->addMutation(shared_from_this(), mutation_id);
bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false);
{
std::lock_guard lock(currently_processing_in_background_mutex);
bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second;
if (!inserted)
{
@ -527,9 +527,9 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version);
}
}
LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info);
}
background_operations_assignee.trigger();
return version;
}

View File

@ -737,7 +737,9 @@ namespace
{
for (const auto & url : options)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url);
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;

View File

@ -172,7 +172,7 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType
/// with subqueries it's possible that new analyzer will be enabled in ::read method
/// of underlying storage when all other parts of infra are not ready for it
/// (built with old analyzer).
context_copy->setSetting("allow_experimental_analyzer", Field{0});
context_copy->setSetting("allow_experimental_analyzer", false);
auto syntax_analyzer_result = TreeRewriter(context_copy).analyze(ast, columns);
ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context_copy);
auto dag = analyzer.getActionsDAG(false);

View File

@ -52,6 +52,7 @@ from helpers.client import QueryRuntimeException
import docker
from .client import Client
from .retry_decorator import retry
from .config_cluster import *
@ -2690,15 +2691,12 @@ class ClickHouseCluster:
images_pull_cmd = self.base_cmd + ["pull"]
# sometimes dockerhub/proxy can be flaky
for i in range(5):
try:
run_and_check(images_pull_cmd)
break
except Exception as ex:
if i == 4:
raise ex
logging.info("Got exception pulling images: %s", ex)
time.sleep(i * 3)
retry(
log_function=lambda exception: logging.info(
"Got exception pulling images: %s", exception
),
)(run_and_check)(images_pull_cmd)
if self.with_zookeeper_secure and self.base_zookeeper_cmd:
logging.debug("Setup ZooKeeper Secure")
@ -2971,7 +2969,11 @@ class ClickHouseCluster:
"Trying to create Azurite instance by command %s",
" ".join(map(str, azurite_start_cmd)),
)
run_and_check(azurite_start_cmd)
retry(
log_function=lambda exception: logging.info(
f"Azurite initialization failed with error: {exception}"
),
)(run_and_check)(azurite_start_cmd)
self.up_called = True
logging.info("Trying to connect to Azurite")
self.wait_azurite_to_start()

View File

@ -0,0 +1,36 @@
import time
import random
from typing import Type, List
def retry(
retries: int = 5,
delay: float = 1,
backoff: float = 1.5,
jitter: float = 2,
log_function=lambda *args, **kwargs: None,
retriable_expections_list: List[Type[BaseException]] = [Exception],
):
def inner(func):
def wrapper(*args, **kwargs):
current_delay = delay
for retry in range(retries):
try:
func(*args, **kwargs)
break
except Exception as e:
should_retry = False
for retriable_exception in retriable_expections_list:
if isinstance(e, retriable_exception):
should_retry = True
break
if not should_retry or (retry == retries - 1):
raise e
log_function(retry=retry, exception=e)
sleep_time = current_delay + random.uniform(0, jitter)
time.sleep(sleep_time)
current_delay *= backoff
return wrapper
return inner

View File

@ -1,7 +1,7 @@
<clickhouse>
<query_cache>
<max_entries>1</max_entries>
<max_entries>0</max_entries>
</query_cache>
</clickhouse>

View File

@ -94,54 +94,61 @@ CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs")
def test_query_cache_size_is_runtime_configurable(start_cluster):
# the initial config specifies the maximum query cache size as 2, run 3 queries, expect 2 cache entries
node.query("SYSTEM DROP QUERY CACHE")
# The initial config allows at most two query cache entries but we don't mind
node.query("SELECT 1 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
time.sleep(2)
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
res = node.query(
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
)
assert res == "2\n"
# At this point, the query cache contains one entry and it is stale
# switch to a config with a maximum query cache size of 1
res = node.query(
"SELECT count(*) FROM system.query_cache",
)
assert res == "1\n"
# switch to a config with a maximum query cache size of _0_
node.copy_file_to_container(
os.path.join(CONFIG_DIR, "smaller_query_cache.xml"),
os.path.join(CONFIG_DIR, "empty_query_cache.xml"),
"/etc/clickhouse-server/config.d/default.xml",
)
node.query("SYSTEM RELOAD CONFIG")
# check that eviction worked as expected
time.sleep(2)
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
res = node.query(
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
)
assert (
res == "2\n"
) # "Why not 1?", you think. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert.
# Not a real issue, can be changed later, at least there's a test now.
# Also, you may also wonder "why query_cache_ttl = 1"? Reason is that TTLCachePolicy only removes *stale* entries. With the default TTL
# (60 sec), no entries would be removed at all. Again: not a real issue, can be changed later and there's at least a test now.
# check that the new query cache maximum size is respected when more queries run
node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
time.sleep(2)
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
res = node.query(
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
"SELECT count(*) FROM system.query_cache",
)
assert res == "1\n"
# "Why not 0?", I hear you say. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert.
# Not a real issue, can be changed later, at least there's a test now.
# restore the original config
# The next SELECT will find a single stale entry which is one entry too much according to the new config.
# This triggers the eviction of all stale entries, in this case the 'SELECT 1' result.
# Then, it tries to insert the 'SELECT 2' result but it also cannot be added according to the config.
node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
res = node.query(
"SELECT count(*) FROM system.query_cache",
)
assert res == "0\n"
# The new maximum cache size is respected when more queries run
node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
res = node.query(
"SELECT count(*) FROM system.query_cache",
)
assert res == "0\n"
# Restore the original config
node.copy_file_to_container(
os.path.join(CONFIG_DIR, "default.xml"),
"/etc/clickhouse-server/config.d/default.xml",
)
node.query("SYSTEM RELOAD CONFIG")
# It is possible to insert entries again
node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
res = node.query(
"SELECT count(*) FROM system.query_cache",
)
assert res == "1\n"

View File

@ -7,7 +7,7 @@
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000);</query>

View File

@ -1,5 +1,5 @@
drop table if exists lc_dict_reading;
create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
create table lc_dict_reading (val UInt64, str LowCardinality(String), pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000;
select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4;
drop table if exists lc_dict_reading;

View File

@ -1,6 +1,6 @@
set allow_suspicious_low_cardinality_types = 1;
drop table if exists lc_00688;
create table lc_00688 (str StringWithDictionary, val UInt8WithDictionary) engine = MergeTree order by tuple();
create table lc_00688 (str LowCardinality(String), val LowCardinality(UInt8)) engine = MergeTree order by tuple();
insert into lc_00688 values ('a', 1), ('b', 2);
select str, str in ('a', 'd') from lc_00688;
select val, val in (1, 3) from lc_00688;

View File

@ -1,5 +1,5 @@
drop table if exists lc_prewhere;
create table lc_prewhere (key UInt64, val UInt64, str StringWithDictionary, s String) engine = MergeTree order by key settings index_granularity = 8192;
create table lc_prewhere (key UInt64, val UInt64, str LowCardinality(String), s String) engine = MergeTree order by key settings index_granularity = 8192;
insert into lc_prewhere select number, if(number < 10 or number > 8192 * 9, 1, 0), toString(number) as s, s from system.numbers limit 100000;
select sum(toUInt64(str)), sum(toUInt64(s)) from lc_prewhere prewhere val == 1;
drop table if exists lc_prewhere;

View File

@ -8,8 +8,8 @@ select 'MergeTree';
drop table if exists lc_small_dict;
drop table if exists lc_big_dict;
create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
create table lc_small_dict (str LowCardinality(String)) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
create table lc_big_dict (str LowCardinality(String)) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
insert into lc_small_dict select toString(number % 1000) from system.numbers limit 1000000;
insert into lc_big_dict select toString(number) from system.numbers limit 1000000;

View File

@ -1,13 +1,7 @@
a
a
a
a
1
1
1
1
ab
ab
ab
ab
-

View File

@ -13,56 +13,32 @@ drop table if exists lc_null_fix_str_0;
drop table if exists lc_null_fix_str_1;
create table lc_str_0 (str LowCardinality(String)) engine = Memory;
create table lc_str_1 (str StringWithDictionary) engine = Memory;
create table lc_null_str_0 (str LowCardinality(Nullable(String))) engine = Memory;
create table lc_null_str_1 (str NullableWithDictionary(String)) engine = Memory;
create table lc_int8_0 (val LowCardinality(Int8)) engine = Memory;
create table lc_int8_1 (val Int8WithDictionary) engine = Memory;
create table lc_null_int8_0 (val LowCardinality(Nullable(Int8))) engine = Memory;
create table lc_null_int8_1 (val NullableWithDictionary(Int8)) engine = Memory;
create table lc_fix_str_0 (str LowCardinality(FixedString(2))) engine = Memory;
create table lc_fix_str_1 (str FixedStringWithDictionary(2)) engine = Memory;
create table lc_null_fix_str_0 (str LowCardinality(Nullable(FixedString(2)))) engine = Memory;
create table lc_null_fix_str_1 (str NullableWithDictionary(FixedString(2))) engine = Memory;
insert into lc_str_0 select 'a';
insert into lc_str_1 select 'a';
insert into lc_null_str_0 select 'a';
insert into lc_null_str_1 select 'a';
insert into lc_int8_0 select 1;
insert into lc_int8_1 select 1;
insert into lc_null_int8_0 select 1;
insert into lc_null_int8_1 select 1;
insert into lc_fix_str_0 select 'ab';
insert into lc_fix_str_1 select 'ab';
insert into lc_null_fix_str_0 select 'ab';
insert into lc_null_fix_str_1 select 'ab';
select str from lc_str_0;
select str from lc_str_1;
select str from lc_null_str_0;
select str from lc_null_str_1;
select val from lc_int8_0;
select val from lc_int8_1;
select val from lc_null_int8_0;
select val from lc_null_int8_1;
select str from lc_fix_str_0;
select str from lc_fix_str_1;
select str from lc_null_fix_str_0;
select str from lc_null_fix_str_1;
drop table if exists lc_str_0;
drop table if exists lc_str_1;
drop table if exists lc_null_str_0;
drop table if exists lc_null_str_1;
drop table if exists lc_int8_0;
drop table if exists lc_int8_1;
drop table if exists lc_null_int8_0;
drop table if exists lc_null_int8_1;
drop table if exists lc_fix_str_0;
drop table if exists lc_fix_str_1;
drop table if exists lc_null_fix_str_0;
drop table if exists lc_null_fix_str_1;
select '-';
SELECT toLowCardinality('a') AS s, toTypeName(s), toTypeName(length(s)) from system.one;
@ -73,7 +49,7 @@ select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) a
select '-';
drop table if exists lc_str_uuid;
create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 StringWithDictionary) ENGINE=Memory;
create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 LowCardinality(String)) ENGINE=Memory;
select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid;
select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid;
insert into lc_str_uuid values ('61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0');

View File

@ -1,5 +1,5 @@
drop table if exists tab_00717;
create table tab_00717 (a String, b StringWithDictionary) engine = MergeTree order by a;
create table tab_00717 (a String, b LowCardinality(String)) engine = MergeTree order by a;
insert into tab_00717 values ('a_1', 'b_1'), ('a_2', 'b_2');
select count() from tab_00717;
select a from tab_00717 group by a order by a;

View File

@ -7,7 +7,7 @@ alter table tab_00718 modify column b UInt32;
select *, toTypeName(b) from tab_00718;
alter table tab_00718 modify column b LowCardinality(UInt32);
select *, toTypeName(b) from tab_00718;
alter table tab_00718 modify column b StringWithDictionary;
alter table tab_00718 modify column b LowCardinality(String);
select *, toTypeName(b) from tab_00718;
alter table tab_00718 modify column b LowCardinality(UInt32);
select *, toTypeName(b) from tab_00718;

View File

@ -1,7 +1,7 @@
drop table if exists lc_00752;
drop table if exists lc_mv_00752;
create table lc_00752 (str StringWithDictionary) engine = MergeTree order by tuple();
create table lc_00752 (str LowCardinality(String)) engine = MergeTree order by tuple();
insert into lc_00752 values ('a'), ('bbb'), ('ab'), ('accccc'), ('baasddas'), ('bcde');
@ -12,4 +12,3 @@ select * from lc_mv_00752 order by letter;
drop table if exists lc_00752;
drop table if exists lc_mv_00752;

View File

@ -11,12 +11,17 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mem"
$CLICKHOUSE_CLIENT -q "CREATE TABLE mem (x UInt64) engine = Memory"
function f {
local TIMELIMIT=$((SECONDS+$1))
for _ in $(seq 1 300); do
$CLICKHOUSE_CLIENT -q "SELECT count() FROM (SELECT * FROM mem SETTINGS max_threads=2) FORMAT Null;"
if [ $SECONDS -ge "$TIMELIMIT" ]; then
break
fi
done
}
function g {
local TIMELIMIT=$((SECONDS+$1))
for _ in $(seq 1 100); do
$CLICKHOUSE_CLIENT -n -q "
INSERT INTO mem SELECT number FROM numbers(1000000);
@ -30,14 +35,18 @@ function g {
INSERT INTO mem VALUES (1);
TRUNCATE TABLE mem;
"
if [ $SECONDS -ge "$TIMELIMIT" ]; then
break
fi
done
}
export -f f;
export -f g;
timeout 20 bash -c f > /dev/null &
timeout 20 bash -c g > /dev/null &
TIMEOUT=20
f $TIMEOUT &
g $TIMEOUT &
wait
$CLICKHOUSE_CLIENT -q "DROP TABLE mem"

View File

@ -28,7 +28,7 @@ ORDER BY tuple();
INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(100000);
create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val;
create table lc_dict_reading (val UInt64, str LowCardinality(String), pat String) engine = MergeTree order by val;
insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 100000;
"""

View File

@ -0,0 +1,13 @@
-- Tags: no-parallel
-- Tag no-parallel: Messes with internal cache
SYSTEM DROP QUERY CACHE;
-- Create an entry in the query cache
SELECT 1 SETTINGS use_query_cache = true;
-- Asynchronous metrics must know about the entry
SYSTEM RELOAD ASYNCHRONOUS METRICS;
SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries';
SYSTEM DROP QUERY CACHE;

View File

@ -3,7 +3,11 @@ all_1_1_0
all_2_2_0
all_3_3_0
all_4_4_0
40000
5000 all_1_1_0_9
5000 all_2_2_0_9
5000 all_3_3_0_9
5000 all_4_4_0_9
mutation_version has_parts_for_which_set_was_built has_parts_that_shared_set
8 1 1
9 1 1

View File

@ -18,12 +18,35 @@ SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '0258
-- Start multiple mutations simultaneously
SYSTEM STOP MERGES 02581_trips;
ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
SYSTEM START MERGES 02581_trips;
DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(200000000));
SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part;
-- Wait for mutations to finish
SELECT count() FROM 02581_trips SETTINGS select_sequential_consistency = 1;
DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(10000000)) SETTINGS lightweight_deletes_sync = 2;
SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part SETTINGS select_sequential_consistency=1;
SYSTEM FLUSH LOGS;
-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )
WITH (
SELECT uuid
FROM system.tables
WHERE (database = currentDatabase()) AND (name = '02581_trips')
) AS table_uuid
SELECT
CAST(splitByChar('_', query_id)[5], 'UInt64') AS mutation_version, -- '5521485f-8a40-4aba-87a2-00342c369563::all_3_3_0_6'
sum(message LIKE 'Created Set with % entries%') >= 1 AS has_parts_for_which_set_was_built,
sum(message LIKE 'Got set from cache%') >= 1 AS has_parts_that_shared_set
FROM system.text_log
WHERE
query_id LIKE concat(CAST(table_uuid, 'String'), '::all\\_%')
AND (event_date >= yesterday())
AND (message LIKE 'Created Set with % entries%' OR message LIKE 'Got set from cache%')
GROUP BY mutation_version ORDER BY mutation_version FORMAT TSVWithNames;
DROP TABLE 02581_trips;

View File

@ -10,3 +10,11 @@ all_4_4_0
20000
16000
12000
mutation_version has_parts_for_which_set_was_built has_parts_that_shared_set
5 1 1
6 1 1
7 1 1
8 1 1
9 1 1
10 1 1
11 1 1

View File

@ -18,42 +18,63 @@ SELECT count() from 02581_trips WHERE description = '';
SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '02581_trips' AND active ORDER BY name;
-- Run mutation with `id` a 'IN big subquery'
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2;
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000;
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000;
SELECT count() from 02581_trips WHERE description = '';
-- Run mutation with func(`id`) IN big subquery
ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2;
ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
-- Run mutation with non-PK `id2` IN big subquery
ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2;
--SELECT count(), _part FROM 02581_trips WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) GROUP BY _part ORDER BY _part;
--EXPLAIN SELECT (), _part FROM 02581_trips WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000));
ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
-- Run mutation with PK and non-PK IN big subquery
ALTER TABLE 02581_trips UPDATE description='c'
WHERE
(id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000))) OR
(id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000)))
(id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR
(id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000)))
SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
-- Run mutation with PK and non-PK IN big subquery
ALTER TABLE 02581_trips UPDATE description='c'
WHERE
(id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000))) OR
(id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000)))
(id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR
(id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)))
SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
-- Run mutation with PK and non-PK IN big subquery
ALTER TABLE 02581_trips UPDATE description='c'
WHERE
(id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000))) OR
((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000)))
(id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR
((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)))
SETTINGS mutations_sync=2;
SELECT count() from 02581_trips WHERE description = '';
SYSTEM FLUSH LOGS;
-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )
WITH (
SELECT uuid
FROM system.tables
WHERE (database = currentDatabase()) AND (name = '02581_trips')
) AS table_uuid
SELECT
CAST(splitByChar('_', query_id)[5], 'UInt64') AS mutation_version, -- '5521485f-8a40-4aba-87a2-00342c369563::all_3_3_0_6'
sum(message LIKE 'Created Set with % entries%') >= 1 AS has_parts_for_which_set_was_built,
sum(message LIKE 'Got set from cache%') >= 1 AS has_parts_that_shared_set
FROM system.text_log
WHERE
query_id LIKE concat(CAST(table_uuid, 'String'), '::all\\_%')
AND (event_date >= yesterday())
AND (message LIKE 'Created Set with % entries%' OR message LIKE 'Got set from cache%')
GROUP BY mutation_version ORDER BY mutation_version FORMAT TSVWithNames;
DROP TABLE 02581_trips;

View File

@ -0,0 +1,14 @@
CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
Test statistics count_min:
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed)
Test statistics multi-types:
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed)
Test LowCardinality and Nullable data type:
tab2

View File

@ -0,0 +1,70 @@
-- Tags: no-fasttest
DROP TABLE IF EXISTS tab SYNC;
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET allow_suspicious_low_cardinality_types=1;
SET mutations_sync = 2;
CREATE TABLE tab
(
a String,
b UInt64,
c Int64,
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
SHOW CREATE TABLE tab;
INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000;
SELECT 'Test statistics count_min:';
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
ALTER TABLE tab ADD STATISTICS b TYPE count_min;
ALTER TABLE tab ADD STATISTICS c TYPE count_min;
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE tab DROP STATISTICS a, b, c;
SELECT 'Test statistics multi-types:';
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest;
ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest;
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/)
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/)
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE tab DROP STATISTICS a, b, c;
DROP TABLE IF EXISTS tab SYNC;
SELECT 'Test LowCardinality and Nullable data type:';
DROP TABLE IF EXISTS tab2 SYNC;
SET allow_suspicious_low_cardinality_types=1;
CREATE TABLE tab2
(
a LowCardinality(Int64) STATISTICS(count_min),
b Nullable(Int64) STATISTICS(count_min),
c LowCardinality(Nullable(Int64)) STATISTICS(count_min),
pk String,
) Engine = MergeTree() ORDER BY pk;
select name from system.tables where name = 'tab2' and database = currentDatabase();
DROP TABLE IF EXISTS tab2 SYNC;

View File

@ -70,3 +70,4 @@ SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;
DROP TABLE IF EXISTS t3;

View File

@ -2,8 +2,6 @@
# Tags: atomic-database
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
@ -134,7 +132,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshe
do
sleep 0.1
done
sleep 1
$CLICKHOUSE_CLIENT -nq "
select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes;
truncate src;
@ -172,13 +170,13 @@ $CLICKHOUSE_CLIENT -nq "
drop table b;
create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src;
drop table src;"
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Exception' ]
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ]
do
sleep 0.1
done
# Check exception, create src, expect successful refresh.
$CLICKHOUSE_CLIENT -nq "
select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' from refreshes;
select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c';
create table src (x Int64) engine Memory as select 1;
system refresh view c;"
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ]
@ -224,22 +222,27 @@ done
$CLICKHOUSE_CLIENT -nq "
rename table e to f;
select '<24: rename during refresh>', * from f;
select '<25: rename during refresh>', view, status from refreshes;
select '<25: rename during refresh>', view, status from refreshes where view = 'f';
alter table f modify refresh after 10 year;"
sleep 2 # make it likely that at least one row was processed
# Cancel.
$CLICKHOUSE_CLIENT -nq "
system cancel view f;"
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Cancelled' ]
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ]
do
sleep 0.1
done
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ]
do
sleep 0.1
done
# Check that another refresh doesn't immediately start after the cancelled one.
sleep 1
$CLICKHOUSE_CLIENT -nq "
select '<27: cancelled>', view, status from refreshes;
select '<27: cancelled>', view, status from refreshes where view = 'f';
system refresh view f;"
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ]
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ]
do
sleep 0.1
done

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
touch $CLICKHOUSE_TEST_UNIQUE_NAME.xml
$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.*')" 2>&1 | grep -c "CANNOT_DETECT_FORMAT"
rm $CLICKHOUSE_TEST_UNIQUE_NAME.xml

View File

@ -1,14 +1,4 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
function test()
{
@ -43,20 +33,3 @@ function test()
$CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null"
$CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null"
}
$CH_CLIENT -q "drop table if exists test;"
echo "Memory"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree compact"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree wide"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
test
$CH_CLIENT -q "drop table test;"

View File

@ -1,57 +0,0 @@
Memory
test
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0
MergeTree compact
test
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0
MergeTree wide
test
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,19 @@
Memory
test
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./03036_dynamic_read_subcolumns.lib
. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
$CH_CLIENT -q "drop table if exists test;"
echo "Memory"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory"
test
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,19 @@
MergeTree compact
test
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./03036_dynamic_read_subcolumns.lib
. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
$CH_CLIENT -q "drop table if exists test;"
echo "MergeTree compact"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
test
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,19 @@
MergeTree wide
test
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./03036_dynamic_read_subcolumns.lib
. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
$CH_CLIENT -q "drop table if exists test;"
echo "MergeTree wide"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
test
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,17 @@
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,40 @@
-- Tags: long
set allow_experimental_variant_type = 1;
set use_variant_as_common_type = 1;
set allow_experimental_dynamic_type = 1;
drop table if exists test;
create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;
insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
select distinct dynamicType(d) as type from test order by type;
select count() from test where dynamicType(d) == 'UInt64';
select count() from test where d.UInt64 is not NULL;
select count() from test where dynamicType(d) == 'String';
select count() from test where d.String is not NULL;
select count() from test where dynamicType(d) == 'Date';
select count() from test where d.Date is not NULL;
select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
select count() from test where not empty(d.`Array(Array(Dynamic))`);
select count() from test where d is NULL;
select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
select d.Int8, d.Date, d.`Array(String)` from test format Null;
select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
drop table test;

View File

@ -0,0 +1,17 @@
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,40 @@
-- Tags: long
set allow_experimental_variant_type = 1;
set use_variant_as_common_type = 1;
set allow_experimental_dynamic_type = 1;
drop table if exists test;
create table test (id UInt64, d Dynamic) engine=Memory;
insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
select distinct dynamicType(d) as type from test order by type;
select count() from test where dynamicType(d) == 'UInt64';
select count() from test where d.UInt64 is not NULL;
select count() from test where dynamicType(d) == 'String';
select count() from test where d.String is not NULL;
select count() from test where dynamicType(d) == 'Date';
select count() from test where d.Date is not NULL;
select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
select count() from test where not empty(d.`Array(Array(Dynamic))`);
select count() from test where d is NULL;
select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
select d.Int8, d.Date, d.`Array(String)` from test format Null;
select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
drop table test;

View File

@ -0,0 +1,17 @@
Array(Array(Dynamic))
Array(Variant(String, UInt64))
None
String
UInt64
200000
200000
200000
200000
0
0
200000
200000
100000
100000
200000
0

View File

@ -0,0 +1,40 @@
-- Tags: long
set allow_experimental_variant_type = 1;
set use_variant_as_common_type = 1;
set allow_experimental_dynamic_type = 1;
drop table if exists test;
create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
select distinct dynamicType(d) as type from test order by type;
select count() from test where dynamicType(d) == 'UInt64';
select count() from test where d.UInt64 is not NULL;
select count() from test where dynamicType(d) == 'String';
select count() from test where d.String is not NULL;
select count() from test where dynamicType(d) == 'Date';
select count() from test where d.Date is not NULL;
select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
select count() from test where not empty(d.`Array(Array(Dynamic))`);
select count() from test where d is NULL;
select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
select d.Int8, d.Date, d.`Array(String)` from test format Null;
select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
drop table test;

View File

@ -1,60 +0,0 @@
MergeTree compact
test
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String
MergeTree wide
test
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String

View File

@ -1,52 +0,0 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
function test()
{
echo "test"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)"
$CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)"
$CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)"
$CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
}
$CH_CLIENT -q "drop table if exists test;"
echo "MergeTree compact"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree wide"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
test
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,28 @@
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String

View File

@ -0,0 +1,33 @@
-- Tags: long
set allow_experimental_dynamic_type=1;
drop table if exists test;
create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;
system stop merges test;
insert into test select number, number from numbers(100000);
insert into test select number, 'str_' || toString(number) from numbers(80000);
insert into test select number, range(number % 10 + 1) from numbers(70000);
insert into test select number, toDate(number) from numbers(60000);
insert into test select number, toDateTime(number) from numbers(50000);
insert into test select number, NULL from numbers(100000);
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system start merges test; optimize table test final;;
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system stop merges test;
insert into test select number, map(number, number) from numbers(200000);
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system start merges test;
optimize table test final;
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system stop merges test;
insert into test select number, tuple(number, number) from numbers(10000);
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system start merges test;
optimize table test final;
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
drop table test;

View File

@ -0,0 +1,28 @@
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String

View File

@ -0,0 +1,33 @@
-- Tags: long
set allow_experimental_dynamic_type=1;
drop table if exists test;
create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;
system stop merges test;
insert into test select number, number from numbers(100000);
insert into test select number, 'str_' || toString(number) from numbers(80000);
insert into test select number, range(number % 10 + 1) from numbers(70000);
insert into test select number, toDate(number) from numbers(60000);
insert into test select number, toDateTime(number) from numbers(50000);
insert into test select number, NULL from numbers(100000);
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system start merges test; optimize table test final;;
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system stop merges test;
insert into test select number, map(number, number) from numbers(200000);
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system start merges test;
optimize table test final;
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system stop merges test;
insert into test select number, tuple(number, number) from numbers(10000);
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
system start merges test;
optimize table test final;
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
drop table test;

View File

@ -1,60 +0,0 @@
MergeTree compact
test
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String
MergeTree wide
test
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String

View File

@ -1,51 +0,0 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
function test()
{
echo "test"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)"
$CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)"
$CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)"
$CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
}
$CH_CLIENT -q "drop table if exists test;"
echo "MergeTree compact"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree wide"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
test
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,28 @@
50000 DateTime
60000 Date
70000 Array(UInt16)
80000 String
100000 None
100000 UInt64
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
70000 Array(UInt16)
100000 None
100000 UInt64
190000 String
200000 Map(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
10000 Tuple(UInt64, UInt64)
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
260000 String
100000 None
100000 UInt64
200000 Map(UInt64, UInt64)
270000 String

Some files were not shown because too many files have changed in this diff Show More