mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge remote-tracking branch 'origin' into improve-integration-tests-3
This commit is contained in:
commit
5f5eb0a3f2
2
.github/workflows/backport_branches.yml
vendored
2
.github/workflows/backport_branches.yml
vendored
@ -269,7 +269,7 @@ jobs:
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
2
.github/workflows/master.yml
vendored
2
.github/workflows/master.yml
vendored
@ -135,7 +135,7 @@ jobs:
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
2
.github/workflows/merge_queue.yml
vendored
2
.github/workflows/merge_queue.yml
vendored
@ -108,7 +108,7 @@ jobs:
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
2
.github/workflows/nightly.yml
vendored
2
.github/workflows/nightly.yml
vendored
@ -54,7 +54,7 @@ jobs:
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
7
.github/workflows/pull_request.yml
vendored
7
.github/workflows/pull_request.yml
vendored
@ -152,8 +152,9 @@ jobs:
|
||||
|
||||
CheckReadyForMerge:
|
||||
if: ${{ !cancelled() }}
|
||||
# Test_2 or Test_3 must not have jobs required for Mergeable check
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1]
|
||||
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
|
||||
# however, set them as "needs" to get all checks results before the automatic merge occurs.
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@ -168,7 +169,7 @@ jobs:
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
2
.github/workflows/release_branches.yml
vendored
2
.github/workflows/release_branches.yml
vendored
@ -489,7 +489,7 @@ jobs:
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
|
||||
|
@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
|
||||
add_library(_datasketches INTERFACE)
|
||||
target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")
|
||||
|
||||
add_library(ch_contrib::datasketches ALIAS _datasketches)
|
||||
|
@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update --yes \
|
||||
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
|
||||
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
|
@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
|
||||
|
||||
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
|
||||
|
||||
- `count_min`
|
||||
|
||||
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
Certain MergeTree settings can be override at column level:
|
||||
|
@ -55,7 +55,7 @@ CMPLNT_FR_TM Nullable(String)
|
||||
```
|
||||
|
||||
:::tip
|
||||
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
|
||||
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/en/integrations/data-formats/json/inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
|
||||
you can get a better idea of the content.
|
||||
|
||||
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.
|
||||
|
@ -7,7 +7,7 @@ keywords: [object, data type]
|
||||
|
||||
# Object Data Type (deprecated)
|
||||
|
||||
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
|
||||
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
|
||||
|
||||
<hr />
|
||||
|
||||
|
@ -49,7 +49,7 @@ enum class QueryTreeNodeType : uint8_t
|
||||
/// Convert query tree node type to string
|
||||
const char * toString(QueryTreeNodeType type);
|
||||
|
||||
/** Query tree is semantical representation of query.
|
||||
/** Query tree is a semantic representation of query.
|
||||
* Query tree node represent node in query tree.
|
||||
* IQueryTreeNode is base class for all query tree nodes.
|
||||
*
|
||||
|
@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::datasketches)
|
||||
target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
|
||||
endif ()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
|
||||
|
@ -33,7 +33,7 @@ size_t toMilliseconds(auto duration)
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
|
||||
}
|
||||
|
||||
const auto epsilon = 500us;
|
||||
const auto epsilon = 1ms;
|
||||
|
||||
class ResolvePoolMock : public DB::HostResolver
|
||||
{
|
||||
@ -358,53 +358,59 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses
|
||||
|
||||
TEST_F(ResolvePoolTest, BannedForConsiquenceFail)
|
||||
{
|
||||
auto history = 5ms;
|
||||
auto history = 10ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
|
||||
auto start_at = now();
|
||||
|
||||
failed_addr.setFail();
|
||||
auto start_at = now();
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
start_at = now();
|
||||
|
||||
resolver->update();
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
|
||||
|
||||
failed_addr.setFail();
|
||||
start_at = now();
|
||||
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
start_at = now();
|
||||
|
||||
resolver->update();
|
||||
|
||||
// too much time has passed
|
||||
if (now() > start_at + 2*history - epsilon)
|
||||
return;
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
|
||||
// ip still banned adter history_ms + update, because it was his second consiquent fail
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + 2*history - epsilon);
|
||||
}
|
||||
|
||||
TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
|
||||
{
|
||||
auto history = 5ms;
|
||||
auto history = 10ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
|
||||
auto start_at = now();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
auto start_at = now();
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
@ -413,6 +419,7 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
|
||||
sleep_until(start_at + history + epsilon);
|
||||
|
||||
resolver->update();
|
||||
|
||||
// ip is cleared after just 1 history_ms interval.
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
|
||||
|
@ -383,7 +383,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
|
||||
LockMemoryExceptionInThread::removeUniqueLock();
|
||||
};
|
||||
|
||||
asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores();
|
||||
/// At least 16 threads for network communication in asio.
|
||||
/// asio is async framework, so even with 1 thread it should be ok, but
|
||||
/// still as safeguard it's better to have some redundant capacity here
|
||||
asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores());
|
||||
|
||||
if (state_manager->isSecure())
|
||||
{
|
||||
|
@ -125,23 +125,6 @@ DataTypePtr DataTypeFactory::getImpl(const String & family_name_param, const AST
|
||||
{
|
||||
String family_name = getAliasToOrName(family_name_param);
|
||||
|
||||
if (endsWith(family_name, "WithDictionary"))
|
||||
{
|
||||
ASTPtr low_cardinality_params = std::make_shared<ASTExpressionList>();
|
||||
String param_name = family_name.substr(0, family_name.size() - strlen("WithDictionary"));
|
||||
if (parameters)
|
||||
{
|
||||
auto func = std::make_shared<ASTFunction>();
|
||||
func->name = param_name;
|
||||
func->arguments = parameters;
|
||||
low_cardinality_params->children.push_back(func);
|
||||
}
|
||||
else
|
||||
low_cardinality_params->children.push_back(std::make_shared<ASTIdentifier>(param_name));
|
||||
|
||||
return getImpl<nullptr_on_error>("LowCardinality", low_cardinality_params);
|
||||
}
|
||||
|
||||
const auto * creator = findCreatorByName<nullptr_on_error>(family_name);
|
||||
if constexpr (nullptr_on_error)
|
||||
{
|
||||
|
@ -739,7 +739,8 @@ public:
|
||||
{
|
||||
NumberType value;
|
||||
|
||||
tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error);
|
||||
if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error))
|
||||
return false;
|
||||
auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
|
||||
col_vec.insertValue(value);
|
||||
return true;
|
||||
|
@ -17,13 +17,19 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_,
|
||||
const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_)
|
||||
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_,
|
||||
const ContextMutablePtr & context_, const SelectQueryOptions & options_)
|
||||
: query_ptr(query_ptr_)
|
||||
, context(context_)
|
||||
, options(options_)
|
||||
, max_streams(context->getSettingsRef().max_threads)
|
||||
{
|
||||
/// FIXME All code here will work with the old analyzer, however for views over Distributed tables
|
||||
/// it's possible that new analyzer will be enabled in ::getQueryProcessingStage method
|
||||
/// of the underlying storage when all other parts of infrastructure are not ready for it
|
||||
/// (built with old analyzer).
|
||||
context->setSetting("allow_experimental_analyzer", false);
|
||||
|
||||
if (options.shard_num)
|
||||
context->addSpecialScalar(
|
||||
"_shard_num",
|
||||
|
@ -75,7 +75,6 @@
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <Storages/StorageDummy.h>
|
||||
#include <Storages/StorageMerge.h>
|
||||
#include <Storages/StorageValues.h>
|
||||
#include <Storages/StorageView.h>
|
||||
@ -227,18 +226,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const StoragePtr & storage_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const SelectQueryOptions & options_)
|
||||
: InterpreterSelectQuery(
|
||||
query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
|
||||
{
|
||||
}
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
const ContextPtr & context_,
|
||||
const SelectQueryOptions & options_,
|
||||
PreparedSetsPtr prepared_sets_)
|
||||
: InterpreterSelectQuery(
|
||||
query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::~InterpreterSelectQuery() = default;
|
||||
|
@ -26,7 +26,6 @@ class Logger;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class SubqueryForSet;
|
||||
class InterpreterSelectWithUnionQuery;
|
||||
class Context;
|
||||
class QueryPlan;
|
||||
|
@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get<String>(), type.getName());
|
||||
|
||||
e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
|
||||
throw;
|
||||
|
@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
DecimalField(DateTime64(123 * Day * 1'000'000), 6)
|
||||
}
|
||||
})
|
||||
);
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
DateTimeToDateTime64,
|
||||
@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
StringToNumber,
|
||||
ConvertFieldToTypeTest,
|
||||
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
|
||||
{
|
||||
"String",
|
||||
Field("1"),
|
||||
"Int8",
|
||||
Field(1)
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("256"),
|
||||
"Int8",
|
||||
Field()
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("not a number"),
|
||||
"Int8",
|
||||
{}
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("1.1"),
|
||||
"Int8",
|
||||
{} /// we can not convert '1.1' to Int8
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("1.1"),
|
||||
"Float64",
|
||||
Field(1.1)
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
NumberToString,
|
||||
ConvertFieldToTypeTest,
|
||||
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
|
||||
{
|
||||
"Int8",
|
||||
Field(1),
|
||||
"String",
|
||||
Field("1")
|
||||
},
|
||||
{
|
||||
"Int8",
|
||||
Field(-1),
|
||||
"String",
|
||||
Field("-1")
|
||||
},
|
||||
{
|
||||
"Float64",
|
||||
Field(1.1),
|
||||
"String",
|
||||
Field("1.1")
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
StringToDate,
|
||||
ConvertFieldToTypeTest,
|
||||
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
|
||||
{
|
||||
"String",
|
||||
Field("2024-07-12"),
|
||||
"Date",
|
||||
Field(static_cast<UInt16>(19916))
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("not a date"),
|
||||
"Date",
|
||||
{}
|
||||
},
|
||||
})
|
||||
);
|
||||
|
@ -9,7 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** The SELECT subquery is in parenthesis.
|
||||
/** The SELECT subquery, in parentheses.
|
||||
*/
|
||||
class ParserSubquery : public IParserBase
|
||||
{
|
||||
|
@ -11,15 +11,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_describe(Keyword::DESCRIBE);
|
||||
ParserKeyword s_desc(Keyword::DESC);
|
||||
ParserKeyword s_table(Keyword::TABLE);
|
||||
ParserKeyword s_settings(Keyword::SETTINGS);
|
||||
ParserToken s_dot(TokenType::Dot);
|
||||
ParserIdentifier name_p;
|
||||
ParserSetQuery parser_settings(true);
|
||||
|
||||
ASTPtr database;
|
||||
@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,48 +1,48 @@
|
||||
#include <Interpreters/AsynchronousInsertQueue.h>
|
||||
#include <Interpreters/Squashing.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <string_view>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/OpenSSLHelpers.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <vector>
|
||||
#include <Access/AccessControl.h>
|
||||
#include <Access/Credentials.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <IO/ReadBufferFromPocoSocket.h>
|
||||
#include <IO/WriteBufferFromPocoSocket.h>
|
||||
#include <IO/LimitReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <Formats/NativeReader.h>
|
||||
#include <Formats/NativeWriter.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/TablesStatus.h>
|
||||
#include <IO/LimitReadBuffer.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <IO/ReadBufferFromPocoSocket.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromPocoSocket.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/AsynchronousInsertQueue.h>
|
||||
#include <Interpreters/InternalTextLogsQueue.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Interpreters/Session.h>
|
||||
#include <Interpreters/Squashing.h>
|
||||
#include <Interpreters/TablesStatus.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Server/TCPServer.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <Access/AccessControl.h>
|
||||
#include <Access/Credentials.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/OpenSSLHelpers.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
||||
#include <Processors/Executors/PushingPipelineExecutor.h>
|
||||
@ -61,6 +61,8 @@
|
||||
|
||||
#include <Common/config_version.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
using namespace std::literals;
|
||||
using namespace DB;
|
||||
|
||||
@ -1036,6 +1038,17 @@ void TCPHandler::processOrdinaryQuery()
|
||||
PullingAsyncPipelineExecutor executor(pipeline);
|
||||
CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};
|
||||
|
||||
/// The following may happen:
|
||||
/// * current thread is holding the lock
|
||||
/// * because of the exception we unwind the stack and call the destructor of `executor`
|
||||
/// * the destructor calls cancel() and waits for all query threads to finish
|
||||
/// * at the same time one of the query threads is trying to acquire the lock, e.g. inside `merge_tree_read_task_callback`
|
||||
/// * deadlock
|
||||
SCOPE_EXIT({
|
||||
if (out_lock.owns_lock())
|
||||
out_lock.unlock();
|
||||
});
|
||||
|
||||
Block block;
|
||||
while (executor.pull(block, interactive_delay / 1000))
|
||||
{
|
||||
@ -1079,8 +1092,7 @@ void TCPHandler::processOrdinaryQuery()
|
||||
}
|
||||
|
||||
/// This lock wasn't acquired before and we make .lock() call here
|
||||
/// so everything under this line is covered even together
|
||||
/// with sendProgress() out of the scope
|
||||
/// so everything under this line is covered.
|
||||
out_lock.lock();
|
||||
|
||||
/** If data has run out, we will send the profiling data and total values to
|
||||
@ -1107,6 +1119,7 @@ void TCPHandler::processOrdinaryQuery()
|
||||
last_sent_snapshots.clear();
|
||||
}
|
||||
|
||||
out_lock.lock();
|
||||
sendProgress();
|
||||
}
|
||||
|
||||
|
@ -304,7 +304,7 @@ void RefreshTask::refreshTask()
|
||||
{
|
||||
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
|
||||
auto text = message.text;
|
||||
message.text = fmt::format("Refresh failed: {}", message.text);
|
||||
message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text);
|
||||
LOG_ERROR(log, message);
|
||||
exception = text;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/randomDelay.h>
|
||||
@ -224,13 +225,17 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
|
||||
if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM
|
||||
&& !isRetryableException(std::current_exception()))
|
||||
{
|
||||
report_broken_part();
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!isRetryableException(std::current_exception()))
|
||||
report_broken_part();
|
||||
throw;
|
||||
}
|
||||
|
@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
|
||||
{
|
||||
auto stats = part->loadStatistics();
|
||||
/// TODO: We only have one stats file for every part.
|
||||
result.addRows(part->rows_count);
|
||||
for (const auto & stat : stats)
|
||||
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
|
||||
result.merge(part->info.getPartNameV1(), stat);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
|
||||
if (!partition_pruner.canBePruned(*part))
|
||||
{
|
||||
auto stats = part->loadStatistics();
|
||||
result.addRows(part->rows_count);
|
||||
for (const auto & stat : stats)
|
||||
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
|
||||
result.merge(part->info.getPartNameV1(), stat);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
|
@ -15,16 +15,11 @@
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Processors/Merges/Algorithms/MergeTreePartLevelInfo.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||
}
|
||||
|
||||
|
||||
/// Lightweight (in terms of logic) stream for reading single part from
|
||||
/// MergeTree, used for merges and mutations.
|
||||
///
|
||||
@ -281,7 +276,7 @@ try
|
||||
catch (...)
|
||||
{
|
||||
/// Suspicion of the broken part. A part is added to the queue for verification.
|
||||
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
||||
if (!isRetryableException(std::current_exception()))
|
||||
storage.reportBrokenPart(data_part);
|
||||
throw;
|
||||
}
|
||||
|
@ -36,11 +36,13 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
extern const int CANNOT_SCHEDULE_TASK;
|
||||
extern const int UNEXPECTED_FILE_IN_DATA_PART;
|
||||
extern const int NO_FILE_IN_DATA_PART;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int SOCKET_TIMEOUT;
|
||||
extern const int BROKEN_PROJECTION;
|
||||
extern const int ABORTED;
|
||||
}
|
||||
|
||||
|
||||
@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr)
|
||||
{
|
||||
return isNotEnoughMemoryErrorCode(e.code())
|
||||
|| e.code() == ErrorCodes::NETWORK_ERROR
|
||||
|| e.code() == ErrorCodes::SOCKET_TIMEOUT;
|
||||
|| e.code() == ErrorCodes::SOCKET_TIMEOUT
|
||||
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|
||||
|| e.code() == ErrorCodes::ABORTED;
|
||||
}
|
||||
catch (const Poco::Net::NetException &)
|
||||
{
|
||||
@ -329,17 +333,22 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
projections_on_disk.erase(projection_file);
|
||||
}
|
||||
|
||||
if (throw_on_broken_projection && !broken_projections_message.empty())
|
||||
if (throw_on_broken_projection)
|
||||
{
|
||||
if (!broken_projections_message.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
|
||||
}
|
||||
|
||||
/// This one is actually not broken, just redundant files on disk which
|
||||
/// MergeTree will never use.
|
||||
if (require_checksums && !projections_on_disk.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
|
||||
"Found unexpected projection directories: {}",
|
||||
fmt::join(projections_on_disk, ","));
|
||||
}
|
||||
}
|
||||
|
||||
if (is_cancelled())
|
||||
return {};
|
||||
|
@ -163,7 +163,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
|
||||
{
|
||||
for (const auto & object_info : read_keys)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()))
|
||||
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName());
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
@ -221,7 +223,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
|
||||
{
|
||||
for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
|
||||
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName());
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
|
@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
|
||||
part_statistics[part_name] = stats;
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
|
||||
{
|
||||
if (part_statistics.empty())
|
||||
return default_normal_cond_factor * rows;
|
||||
@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(
|
||||
return result * rows / part_rows;
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const
|
||||
{
|
||||
return rows - estimateLess(val, rows);
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const
|
||||
{
|
||||
if (part_statistics.empty())
|
||||
{
|
||||
if (val < - threshold || val > threshold)
|
||||
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (!float_val)
|
||||
return default_unknown_cond_factor * rows;
|
||||
else if (float_val.value() < - threshold || float_val.value() > threshold)
|
||||
return default_normal_cond_factor * rows;
|
||||
else
|
||||
return default_good_cond_factor * rows;
|
||||
@ -87,7 +90,7 @@ static std::pair<String, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNod
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
|
||||
std::pair<String, Field> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
|
||||
{
|
||||
if (!node.isFunction())
|
||||
return {};
|
||||
@ -123,48 +126,35 @@ std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const
|
||||
DataTypePtr output_type;
|
||||
if (!constant_node->tryGetConstant(output_value, output_type))
|
||||
return {};
|
||||
|
||||
const auto type = output_value.getType();
|
||||
Float64 value;
|
||||
if (type == Field::Types::Int64)
|
||||
value = output_value.get<Int64>();
|
||||
else if (type == Field::Types::UInt64)
|
||||
value = output_value.get<UInt64>();
|
||||
else if (type == Field::Types::Float64)
|
||||
value = output_value.get<Float64>();
|
||||
else
|
||||
return {};
|
||||
return std::make_pair(function_name, value);
|
||||
return std::make_pair(function_name, output_value);
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const
|
||||
{
|
||||
auto result = tryToExtractSingleColumn(node);
|
||||
if (result.second != 1)
|
||||
{
|
||||
return default_unknown_cond_factor;
|
||||
}
|
||||
return default_unknown_cond_factor * total_rows;
|
||||
|
||||
String col = result.first;
|
||||
auto it = column_estimators.find(col);
|
||||
|
||||
/// If there the estimator of the column is not found or there are no data at all,
|
||||
/// we use dummy estimation.
|
||||
bool dummy = total_rows == 0;
|
||||
bool dummy = false;
|
||||
ColumnSelectivityEstimator estimator;
|
||||
if (it != column_estimators.end())
|
||||
{
|
||||
estimator = it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
dummy = true;
|
||||
}
|
||||
|
||||
auto [op, val] = extractBinaryOp(node, col);
|
||||
|
||||
if (op == "equals")
|
||||
{
|
||||
if (dummy)
|
||||
{
|
||||
if (val < - threshold || val > threshold)
|
||||
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (!float_val || (float_val < - threshold || float_val > threshold))
|
||||
return default_normal_cond_factor * total_rows;
|
||||
else
|
||||
return default_good_cond_factor * total_rows;
|
||||
@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
|
||||
return default_unknown_cond_factor * total_rows;
|
||||
}
|
||||
|
||||
void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat)
|
||||
void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat)
|
||||
{
|
||||
if (!part_names.contains(part_name))
|
||||
{
|
||||
total_rows += part_rows;
|
||||
part_names.insert(part_name);
|
||||
}
|
||||
if (column_stat != nullptr)
|
||||
column_estimators[column_stat->columnName()].merge(part_name, column_stat);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,6 +11,14 @@ class RPNBuilderTreeNode;
|
||||
/// It estimates the selectivity of a condition.
|
||||
class ConditionSelectivityEstimator
|
||||
{
|
||||
public:
|
||||
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
|
||||
/// Right now we only support simple condition like col = val / col < val
|
||||
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
|
||||
|
||||
void merge(String part_name, ColumnStatisticsPtr column_stat);
|
||||
void addRows(UInt64 part_rows) { total_rows += part_rows; }
|
||||
|
||||
private:
|
||||
friend class ColumnStatistics;
|
||||
struct ColumnSelectivityEstimator
|
||||
@ -20,13 +29,15 @@ private:
|
||||
|
||||
void merge(String part_name, ColumnStatisticsPtr stats);
|
||||
|
||||
Float64 estimateLess(Float64 val, Float64 rows) const;
|
||||
Float64 estimateLess(const Field & val, Float64 rows) const;
|
||||
|
||||
Float64 estimateGreater(Float64 val, Float64 rows) const;
|
||||
Float64 estimateGreater(const Field & val, Float64 rows) const;
|
||||
|
||||
Float64 estimateEqual(Float64 val, Float64 rows) const;
|
||||
Float64 estimateEqual(const Field & val, Float64 rows) const;
|
||||
};
|
||||
|
||||
std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
|
||||
|
||||
static constexpr auto default_good_cond_factor = 0.1;
|
||||
static constexpr auto default_normal_cond_factor = 0.5;
|
||||
static constexpr auto default_unknown_cond_factor = 1.0;
|
||||
@ -35,16 +46,7 @@ private:
|
||||
static constexpr auto threshold = 2;
|
||||
|
||||
UInt64 total_rows = 0;
|
||||
std::set<String> part_names;
|
||||
std::map<String, ColumnSelectivityEstimator> column_estimators;
|
||||
std::pair<String, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
|
||||
|
||||
public:
|
||||
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
|
||||
/// Right now we only support simple condition like col = val / col < val
|
||||
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
|
||||
|
||||
void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,15 +1,18 @@
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
|
||||
#include <Storages/Statistics/StatisticsCountMinSketch.h>
|
||||
#include <Storages/Statistics/StatisticsTDigest.h>
|
||||
#include <Storages/Statistics/StatisticsUniq.h>
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
#include "config.h" /// USE_DATASKETCHES
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16
|
||||
V0 = 0,
|
||||
};
|
||||
|
||||
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
|
||||
{
|
||||
switch (field.getType())
|
||||
{
|
||||
case Field::Types::Int64:
|
||||
return field.get<Int64>();
|
||||
case Field::Types::UInt64:
|
||||
return field.get<UInt64>();
|
||||
case Field::Types::Float64:
|
||||
return field.get<Float64>();
|
||||
case Field::Types::Int128:
|
||||
return field.get<Int128>();
|
||||
case Field::Types::UInt128:
|
||||
return field.get<UInt128>();
|
||||
case Field::Types::Int256:
|
||||
return field.get<Int256>();
|
||||
case Field::Types::UInt256:
|
||||
return field.get<UInt256>();
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
|
||||
{
|
||||
if (field.getType() == Field::Types::String)
|
||||
return field.get<String>();
|
||||
return {};
|
||||
}
|
||||
|
||||
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
|
||||
: stat(stat_)
|
||||
{
|
||||
@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
|
||||
}
|
||||
|
||||
Float64 IStatistics::estimateEqual(Float64 /*val*/) const
|
||||
Float64 IStatistics::estimateEqual(const Field & /*val*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
|
||||
}
|
||||
|
||||
Float64 IStatistics::estimateLess(Float64 /*val*/) const
|
||||
Float64 IStatistics::estimateLess(const Field & /*val*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
|
||||
}
|
||||
@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const
|
||||
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
|
||||
/// object that supports it natively.
|
||||
|
||||
Float64 ColumnStatistics::estimateLess(Float64 val) const
|
||||
Float64 ColumnStatistics::estimateLess(const Field & val) const
|
||||
{
|
||||
if (stats.contains(StatisticsType::TDigest))
|
||||
return stats.at(StatisticsType::TDigest)->estimateLess(val);
|
||||
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
|
||||
}
|
||||
|
||||
Float64 ColumnStatistics::estimateGreater(Float64 val) const
|
||||
Float64 ColumnStatistics::estimateGreater(const Field & val) const
|
||||
{
|
||||
return rows - estimateLess(val);
|
||||
}
|
||||
|
||||
Float64 ColumnStatistics::estimateEqual(Float64 val) const
|
||||
Float64 ColumnStatistics::estimateEqual(const Field & val) const
|
||||
{
|
||||
if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
|
||||
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
|
||||
{
|
||||
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
|
||||
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
|
||||
return stats.at(StatisticsType::TDigest)->estimateEqual(val);
|
||||
}
|
||||
if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
|
||||
#if USE_DATASKETCHES
|
||||
if (stats.contains(StatisticsType::CountMinSketch))
|
||||
return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
|
||||
#endif
|
||||
if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
|
||||
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
|
||||
else
|
||||
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
|
||||
@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
|
||||
|
||||
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
|
||||
{
|
||||
registerValidator(StatisticsType::TDigest, TDigestValidator);
|
||||
registerCreator(StatisticsType::TDigest, TDigestCreator);
|
||||
registerValidator(StatisticsType::TDigest, tdigestValidator);
|
||||
registerCreator(StatisticsType::TDigest, tdigestCreator);
|
||||
|
||||
registerValidator(StatisticsType::Uniq, UniqValidator);
|
||||
registerCreator(StatisticsType::Uniq, UniqCreator);
|
||||
registerValidator(StatisticsType::Uniq, uniqValidator);
|
||||
registerCreator(StatisticsType::Uniq, uniqCreator);
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
|
||||
registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
|
||||
#endif
|
||||
}
|
||||
|
||||
MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
|
||||
@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
|
||||
{
|
||||
auto it = creators.find(type);
|
||||
if (it == creators.end())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
|
||||
auto stat_ptr = (it->second)(desc, stats.data_type);
|
||||
column_stat->stats[type] = stat_ptr;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Core/Field.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
@ -13,6 +14,14 @@ namespace DB
|
||||
constexpr auto STATS_FILE_PREFIX = "statistics_";
|
||||
constexpr auto STATS_FILE_SUFFIX = ".stats";
|
||||
|
||||
|
||||
struct StatisticsUtils
|
||||
{
|
||||
/// Returns std::nullopt if input Field cannot be converted to a concrete value
|
||||
static std::optional<Float64> tryConvertToFloat64(const Field & field);
|
||||
static std::optional<String> tryConvertToString(const Field & field);
|
||||
};
|
||||
|
||||
/// Statistics describe properties of the values in the column,
|
||||
/// e.g. how many unique values exist,
|
||||
/// what are the N most frequent values,
|
||||
@ -34,8 +43,8 @@ public:
|
||||
|
||||
/// Per-value estimations.
|
||||
/// Throws if the statistics object is not able to do a meaningful estimation.
|
||||
virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
|
||||
virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column
|
||||
virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column
|
||||
virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column
|
||||
|
||||
protected:
|
||||
SingleStatisticsDescription stat;
|
||||
@ -58,9 +67,9 @@ public:
|
||||
|
||||
void update(const ColumnPtr & column);
|
||||
|
||||
Float64 estimateLess(Float64 val) const;
|
||||
Float64 estimateGreater(Float64 val) const;
|
||||
Float64 estimateEqual(Float64 val) const;
|
||||
Float64 estimateLess(const Field & val) const;
|
||||
Float64 estimateGreater(const Field & val) const;
|
||||
Float64 estimateEqual(const Field & val) const;
|
||||
|
||||
private:
|
||||
friend class MergeTreeStatisticsFactory;
|
||||
|
102
src/Storages/Statistics/StatisticsCountMinSketch.cpp
Normal file
102
src/Storages/Statistics/StatisticsCountMinSketch.cpp
Normal file
@ -0,0 +1,102 @@
|
||||
|
||||
#include <Storages/Statistics/StatisticsCountMinSketch.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
}
|
||||
|
||||
/// Constants chosen based on rolling dices.
|
||||
/// The values provides:
|
||||
/// 1. an error tolerance of 0.1% (ε = 0.001)
|
||||
/// 2. a confidence level of 99.9% (δ = 0.001).
|
||||
/// And sketch the size is 152kb.
|
||||
static constexpr auto num_hashes = 7uz;
|
||||
static constexpr auto num_buckets = 2718uz;
|
||||
|
||||
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
|
||||
: IStatistics(stat_)
|
||||
, sketch(num_hashes, num_buckets)
|
||||
, data_type(data_type_)
|
||||
{
|
||||
}
|
||||
|
||||
Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
|
||||
{
|
||||
/// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc.
|
||||
/// Return null if val larger than the range of data_type
|
||||
///
|
||||
/// For example: if data_type is Int32:
|
||||
/// 1. For 1.0, 1, '1', return Field(1)
|
||||
/// 2. For 1.1, max_value_int64, return null
|
||||
Field val_converted = convertFieldToType(val, *data_type);
|
||||
if (val_converted.isNull())
|
||||
return 0;
|
||||
|
||||
if (data_type->isValueRepresentedByNumber())
|
||||
return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory());
|
||||
|
||||
if (isStringOrFixedString(data_type))
|
||||
return sketch.get_estimate(val.get<String>());
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName());
|
||||
}
|
||||
|
||||
void StatisticsCountMinSketch::update(const ColumnPtr & column)
|
||||
{
|
||||
for (size_t row = 0; row < column->size(); ++row)
|
||||
{
|
||||
if (column->isNullAt(row))
|
||||
continue;
|
||||
auto data = column->getDataAt(row);
|
||||
sketch.update(data.data, data.size, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void StatisticsCountMinSketch::serialize(WriteBuffer & buf)
|
||||
{
|
||||
Sketch::vector_bytes bytes = sketch.serialize();
|
||||
writeIntBinary(static_cast<UInt64>(bytes.size()), buf);
|
||||
buf.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
|
||||
}
|
||||
|
||||
void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
|
||||
{
|
||||
UInt64 size;
|
||||
readIntBinary(size, buf);
|
||||
|
||||
Sketch::vector_bytes bytes;
|
||||
bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking
|
||||
buf.readStrict(reinterpret_cast<char *>(bytes.data()), size);
|
||||
|
||||
sketch = Sketch::deserialize(bytes.data(), size);
|
||||
}
|
||||
|
||||
|
||||
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
{
|
||||
data_type = removeNullable(data_type);
|
||||
data_type = removeLowCardinalityAndNullable(data_type);
|
||||
if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
|
||||
}
|
||||
|
||||
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
|
||||
{
|
||||
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
39
src/Storages/Statistics/StatisticsCountMinSketch.h
Normal file
39
src/Storages/Statistics/StatisticsCountMinSketch.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
#include <count_min.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StatisticsCountMinSketch : public IStatistics
|
||||
{
|
||||
public:
|
||||
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
|
||||
|
||||
Float64 estimateEqual(const Field & val) const override;
|
||||
|
||||
void update(const ColumnPtr & column) override;
|
||||
|
||||
void serialize(WriteBuffer & buf) override;
|
||||
void deserialize(ReadBuffer & buf) override;
|
||||
|
||||
private:
|
||||
using Sketch = datasketches::count_min_sketch<UInt64>;
|
||||
Sketch sketch;
|
||||
|
||||
DataTypePtr data_type;
|
||||
};
|
||||
|
||||
|
||||
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,11 +1,13 @@
|
||||
#include <Storages/Statistics/StatisticsTDigest.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
|
||||
@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
|
||||
void StatisticsTDigest::update(const ColumnPtr & column)
|
||||
{
|
||||
size_t rows = column->size();
|
||||
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
/// TODO: support more types.
|
||||
Float64 value = column->getFloat64(row);
|
||||
t_digest.add(value, 1);
|
||||
Field field;
|
||||
column->get(row, field);
|
||||
|
||||
if (field.isNull())
|
||||
continue;
|
||||
|
||||
if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
|
||||
t_digest.add(*field_as_float, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
|
||||
t_digest.deserialize(buf);
|
||||
}
|
||||
|
||||
Float64 StatisticsTDigest::estimateLess(Float64 val) const
|
||||
Float64 StatisticsTDigest::estimateLess(const Field & val) const
|
||||
{
|
||||
return t_digest.getCountLessThan(val);
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (val_as_float)
|
||||
return t_digest.getCountLessThan(*val_as_float);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
|
||||
}
|
||||
|
||||
Float64 StatisticsTDigest::estimateEqual(Float64 val) const
|
||||
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
|
||||
{
|
||||
return t_digest.getCountEqual(val);
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (val_as_float)
|
||||
return t_digest.getCountEqual(*val_as_float);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
|
||||
}
|
||||
|
||||
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
{
|
||||
data_type = removeNullable(data_type);
|
||||
data_type = removeLowCardinalityAndNullable(data_type);
|
||||
if (!data_type->isValueRepresentedByNumber())
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
|
||||
}
|
||||
|
||||
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
|
||||
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
|
||||
{
|
||||
return std::make_shared<StatisticsTDigest>(stat);
|
||||
}
|
||||
|
@ -16,14 +16,14 @@ public:
|
||||
void serialize(WriteBuffer & buf) override;
|
||||
void deserialize(ReadBuffer & buf) override;
|
||||
|
||||
Float64 estimateLess(Float64 val) const override;
|
||||
Float64 estimateEqual(Float64 val) const override;
|
||||
Float64 estimateLess(const Field & val) const override;
|
||||
Float64 estimateEqual(const Field & val) const override;
|
||||
|
||||
private:
|
||||
QuantileTDigest<Float64> t_digest;
|
||||
};
|
||||
|
||||
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
|
||||
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Storages/Statistics/StatisticsUniq.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const
|
||||
return column->getUInt(0);
|
||||
}
|
||||
|
||||
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
{
|
||||
data_type = removeNullable(data_type);
|
||||
data_type = removeLowCardinalityAndNullable(data_type);
|
||||
if (!data_type->isValueRepresentedByNumber())
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
|
||||
}
|
||||
|
||||
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
|
||||
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
|
||||
{
|
||||
return std::make_shared<StatisticsUniq>(stat, data_type);
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ private:
|
||||
|
||||
};
|
||||
|
||||
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
|
||||
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Storages/Statistics/StatisticsTDigest.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
TEST(Statistics, TDigestLessThan)
|
||||
{
|
||||
@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan)
|
||||
|
||||
std::reverse(data.begin(), data.end());
|
||||
test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,19 +1,14 @@
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTStatisticsDeclaration.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Storages/extractKeyExpressionList.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -54,7 +49,9 @@ static StatisticsType stringToStatisticsType(String type)
|
||||
return StatisticsType::TDigest;
|
||||
if (type == "uniq")
|
||||
return StatisticsType::Uniq;
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
|
||||
if (type == "count_min")
|
||||
return StatisticsType::CountMinSketch;
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
|
||||
}
|
||||
|
||||
String SingleStatisticsDescription::getTypeName() const
|
||||
@ -65,8 +62,10 @@ String SingleStatisticsDescription::getTypeName() const
|
||||
return "TDigest";
|
||||
case StatisticsType::Uniq:
|
||||
return "Uniq";
|
||||
case StatisticsType::CountMinSketch:
|
||||
return "count_min";
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,10 +98,9 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
|
||||
chassert(merging_column_type);
|
||||
|
||||
if (column_name.empty())
|
||||
{
|
||||
column_name = merging_column_name;
|
||||
|
||||
data_type = merging_column_type;
|
||||
}
|
||||
|
||||
for (const auto & [stats_type, stats_desc]: other.types_to_desc)
|
||||
{
|
||||
@ -121,6 +119,7 @@ void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & oth
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
|
||||
|
||||
types_to_desc = other.types_to_desc;
|
||||
data_type = other.data_type;
|
||||
}
|
||||
|
||||
void ColumnStatisticsDescription::clear()
|
||||
@ -159,6 +158,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
|
||||
|
||||
const auto & column = columns.getPhysical(physical_column_name);
|
||||
stats.column_name = column.name;
|
||||
stats.data_type = column.type;
|
||||
stats.types_to_desc = statistics_types;
|
||||
result.push_back(stats);
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ enum class StatisticsType : UInt8
|
||||
{
|
||||
TDigest = 0,
|
||||
Uniq = 1,
|
||||
CountMinSketch = 2,
|
||||
|
||||
Max = 63,
|
||||
};
|
||||
|
@ -43,7 +43,6 @@
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/TableNode.h>
|
||||
@ -61,26 +60,20 @@
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/ClusterProxy/executeQuery.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
#include <Interpreters/JoinedTables.h>
|
||||
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
||||
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/createBlockSelector.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/getClusterName.h>
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
|
||||
#include <Interpreters/getHeaderForProcessingStage.h>
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <TableFunctions/TableFunctionView.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
@ -90,7 +83,6 @@
|
||||
#include <Processors/Executors/PushingPipelineExecutor.h>
|
||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
|
||||
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
||||
@ -496,7 +488,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
|
||||
}
|
||||
|
||||
std::optional<QueryProcessingStage::Enum> optimized_stage;
|
||||
if (settings.allow_experimental_analyzer)
|
||||
if (query_info.query_tree)
|
||||
optimized_stage = getOptimizedQueryProcessingStageAnalyzer(query_info, settings);
|
||||
else
|
||||
optimized_stage = getOptimizedQueryProcessingStage(query_info, settings);
|
||||
@ -860,25 +852,21 @@ void StorageDistributed::read(
|
||||
modified_query_info.query = queryNodeToDistributedSelectQuery(query_tree_distributed);
|
||||
|
||||
modified_query_info.query_tree = std::move(query_tree_distributed);
|
||||
|
||||
/// Return directly (with correct header) if no shard to query.
|
||||
if (modified_query_info.getCluster()->getShardsInfo().empty())
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
||||
}
|
||||
|
||||
if (!settings.allow_experimental_analyzer)
|
||||
{
|
||||
modified_query_info.query = ClusterProxy::rewriteSelectQuery(
|
||||
local_context, modified_query_info.query,
|
||||
remote_database, remote_table, remote_table_function_ptr);
|
||||
}
|
||||
|
||||
/// Return directly (with correct header) if no shard to query.
|
||||
if (modified_query_info.getCluster()->getShardsInfo().empty())
|
||||
{
|
||||
if (settings.allow_experimental_analyzer)
|
||||
return;
|
||||
|
||||
Pipe pipe(std::make_shared<NullSource>(header));
|
||||
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
read_from_pipe->setStepDescription("Read from NullSource (Distributed)");
|
||||
@ -886,6 +874,7 @@ void StorageDistributed::read(
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const auto & snapshot_data = assert_cast<const SnapshotData &>(*storage_snapshot->data);
|
||||
ClusterProxy::SelectStreamFactory select_stream_factory =
|
||||
|
@ -427,7 +427,9 @@ namespace
|
||||
{
|
||||
for (const auto & path : paths)
|
||||
{
|
||||
if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path))
|
||||
auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path);
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_path && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_path))
|
||||
{
|
||||
format = format_from_path;
|
||||
break;
|
||||
@ -716,7 +718,9 @@ namespace
|
||||
/// If format is unknown we can try to determine it by the file name.
|
||||
if (!format)
|
||||
{
|
||||
if (auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename))
|
||||
auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename);
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_file && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file))
|
||||
format = format_from_file;
|
||||
}
|
||||
|
||||
|
@ -505,18 +505,18 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
|
||||
additional_info = fmt::format(" (TID: {}; TIDH: {})", current_tid, current_tid.getHash());
|
||||
}
|
||||
|
||||
Int64 version;
|
||||
{
|
||||
std::lock_guard lock(currently_processing_in_background_mutex);
|
||||
|
||||
MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings());
|
||||
version = increment.get();
|
||||
Int64 version = increment.get();
|
||||
entry.commit(version);
|
||||
String mutation_id = entry.file_name;
|
||||
if (txn)
|
||||
txn->addMutation(shared_from_this(), mutation_id);
|
||||
|
||||
bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false);
|
||||
|
||||
{
|
||||
std::lock_guard lock(currently_processing_in_background_mutex);
|
||||
|
||||
bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second;
|
||||
if (!inserted)
|
||||
{
|
||||
@ -527,9 +527,9 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
|
||||
}
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info);
|
||||
}
|
||||
background_operations_assignee.trigger();
|
||||
return version;
|
||||
}
|
||||
|
@ -737,7 +737,9 @@ namespace
|
||||
{
|
||||
for (const auto & url : options)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url))
|
||||
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url);
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
|
@ -172,7 +172,7 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType
|
||||
/// with subqueries it's possible that new analyzer will be enabled in ::read method
|
||||
/// of underlying storage when all other parts of infra are not ready for it
|
||||
/// (built with old analyzer).
|
||||
context_copy->setSetting("allow_experimental_analyzer", Field{0});
|
||||
context_copy->setSetting("allow_experimental_analyzer", false);
|
||||
auto syntax_analyzer_result = TreeRewriter(context_copy).analyze(ast, columns);
|
||||
ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context_copy);
|
||||
auto dag = analyzer.getActionsDAG(false);
|
||||
|
@ -52,6 +52,7 @@ from helpers.client import QueryRuntimeException
|
||||
import docker
|
||||
|
||||
from .client import Client
|
||||
from .retry_decorator import retry
|
||||
|
||||
from .config_cluster import *
|
||||
|
||||
@ -2690,15 +2691,12 @@ class ClickHouseCluster:
|
||||
|
||||
images_pull_cmd = self.base_cmd + ["pull"]
|
||||
# sometimes dockerhub/proxy can be flaky
|
||||
for i in range(5):
|
||||
try:
|
||||
run_and_check(images_pull_cmd)
|
||||
break
|
||||
except Exception as ex:
|
||||
if i == 4:
|
||||
raise ex
|
||||
logging.info("Got exception pulling images: %s", ex)
|
||||
time.sleep(i * 3)
|
||||
|
||||
retry(
|
||||
log_function=lambda exception: logging.info(
|
||||
"Got exception pulling images: %s", exception
|
||||
),
|
||||
)(run_and_check)(images_pull_cmd)
|
||||
|
||||
if self.with_zookeeper_secure and self.base_zookeeper_cmd:
|
||||
logging.debug("Setup ZooKeeper Secure")
|
||||
@ -2971,7 +2969,11 @@ class ClickHouseCluster:
|
||||
"Trying to create Azurite instance by command %s",
|
||||
" ".join(map(str, azurite_start_cmd)),
|
||||
)
|
||||
run_and_check(azurite_start_cmd)
|
||||
retry(
|
||||
log_function=lambda exception: logging.info(
|
||||
f"Azurite initialization failed with error: {exception}"
|
||||
),
|
||||
)(run_and_check)(azurite_start_cmd)
|
||||
self.up_called = True
|
||||
logging.info("Trying to connect to Azurite")
|
||||
self.wait_azurite_to_start()
|
||||
|
36
tests/integration/helpers/retry_decorator.py
Normal file
36
tests/integration/helpers/retry_decorator.py
Normal file
@ -0,0 +1,36 @@
|
||||
import time
|
||||
import random
|
||||
from typing import Type, List
|
||||
|
||||
|
||||
def retry(
|
||||
retries: int = 5,
|
||||
delay: float = 1,
|
||||
backoff: float = 1.5,
|
||||
jitter: float = 2,
|
||||
log_function=lambda *args, **kwargs: None,
|
||||
retriable_expections_list: List[Type[BaseException]] = [Exception],
|
||||
):
|
||||
def inner(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
current_delay = delay
|
||||
for retry in range(retries):
|
||||
try:
|
||||
func(*args, **kwargs)
|
||||
break
|
||||
except Exception as e:
|
||||
should_retry = False
|
||||
for retriable_exception in retriable_expections_list:
|
||||
if isinstance(e, retriable_exception):
|
||||
should_retry = True
|
||||
break
|
||||
if not should_retry or (retry == retries - 1):
|
||||
raise e
|
||||
log_function(retry=retry, exception=e)
|
||||
sleep_time = current_delay + random.uniform(0, jitter)
|
||||
time.sleep(sleep_time)
|
||||
current_delay *= backoff
|
||||
|
||||
return wrapper
|
||||
|
||||
return inner
|
@ -1,7 +1,7 @@
|
||||
<clickhouse>
|
||||
|
||||
<query_cache>
|
||||
<max_entries>1</max_entries>
|
||||
<max_entries>0</max_entries>
|
||||
</query_cache>
|
||||
|
||||
</clickhouse>
|
@ -94,54 +94,61 @@ CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs")
|
||||
|
||||
|
||||
def test_query_cache_size_is_runtime_configurable(start_cluster):
|
||||
# the initial config specifies the maximum query cache size as 2, run 3 queries, expect 2 cache entries
|
||||
node.query("SYSTEM DROP QUERY CACHE")
|
||||
|
||||
# The initial config allows at most two query cache entries but we don't mind
|
||||
node.query("SELECT 1 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
|
||||
time.sleep(2)
|
||||
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
|
||||
res = node.query(
|
||||
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
|
||||
)
|
||||
assert res == "2\n"
|
||||
# At this point, the query cache contains one entry and it is stale
|
||||
|
||||
# switch to a config with a maximum query cache size of 1
|
||||
res = node.query(
|
||||
"SELECT count(*) FROM system.query_cache",
|
||||
)
|
||||
assert res == "1\n"
|
||||
|
||||
# switch to a config with a maximum query cache size of _0_
|
||||
node.copy_file_to_container(
|
||||
os.path.join(CONFIG_DIR, "smaller_query_cache.xml"),
|
||||
os.path.join(CONFIG_DIR, "empty_query_cache.xml"),
|
||||
"/etc/clickhouse-server/config.d/default.xml",
|
||||
)
|
||||
|
||||
node.query("SYSTEM RELOAD CONFIG")
|
||||
|
||||
# check that eviction worked as expected
|
||||
time.sleep(2)
|
||||
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
|
||||
res = node.query(
|
||||
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
|
||||
)
|
||||
assert (
|
||||
res == "2\n"
|
||||
) # "Why not 1?", you think. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert.
|
||||
# Not a real issue, can be changed later, at least there's a test now.
|
||||
|
||||
# Also, you may also wonder "why query_cache_ttl = 1"? Reason is that TTLCachePolicy only removes *stale* entries. With the default TTL
|
||||
# (60 sec), no entries would be removed at all. Again: not a real issue, can be changed later and there's at least a test now.
|
||||
|
||||
# check that the new query cache maximum size is respected when more queries run
|
||||
node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
|
||||
time.sleep(2)
|
||||
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
|
||||
res = node.query(
|
||||
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'",
|
||||
"SELECT count(*) FROM system.query_cache",
|
||||
)
|
||||
assert res == "1\n"
|
||||
# "Why not 0?", I hear you say. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert.
|
||||
# Not a real issue, can be changed later, at least there's a test now.
|
||||
|
||||
# restore the original config
|
||||
# The next SELECT will find a single stale entry which is one entry too much according to the new config.
|
||||
# This triggers the eviction of all stale entries, in this case the 'SELECT 1' result.
|
||||
# Then, it tries to insert the 'SELECT 2' result but it also cannot be added according to the config.
|
||||
node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
res = node.query(
|
||||
"SELECT count(*) FROM system.query_cache",
|
||||
)
|
||||
assert res == "0\n"
|
||||
|
||||
# The new maximum cache size is respected when more queries run
|
||||
node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
res = node.query(
|
||||
"SELECT count(*) FROM system.query_cache",
|
||||
)
|
||||
assert res == "0\n"
|
||||
|
||||
# Restore the original config
|
||||
node.copy_file_to_container(
|
||||
os.path.join(CONFIG_DIR, "default.xml"),
|
||||
"/etc/clickhouse-server/config.d/default.xml",
|
||||
)
|
||||
|
||||
node.query("SYSTEM RELOAD CONFIG")
|
||||
|
||||
# It is possible to insert entries again
|
||||
node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1")
|
||||
res = node.query(
|
||||
"SELECT count(*) FROM system.query_cache",
|
||||
)
|
||||
assert res == "1\n"
|
||||
|
@ -7,7 +7,7 @@
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 100000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000);</query>
|
||||
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000);</query>
|
||||
|
@ -1,5 +1,5 @@
|
||||
drop table if exists lc_dict_reading;
|
||||
create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
create table lc_dict_reading (val UInt64, str LowCardinality(String), pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000;
|
||||
select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4;
|
||||
drop table if exists lc_dict_reading;
|
||||
|
@ -1,6 +1,6 @@
|
||||
set allow_suspicious_low_cardinality_types = 1;
|
||||
drop table if exists lc_00688;
|
||||
create table lc_00688 (str StringWithDictionary, val UInt8WithDictionary) engine = MergeTree order by tuple();
|
||||
create table lc_00688 (str LowCardinality(String), val LowCardinality(UInt8)) engine = MergeTree order by tuple();
|
||||
insert into lc_00688 values ('a', 1), ('b', 2);
|
||||
select str, str in ('a', 'd') from lc_00688;
|
||||
select val, val in (1, 3) from lc_00688;
|
||||
|
@ -1,5 +1,5 @@
|
||||
drop table if exists lc_prewhere;
|
||||
create table lc_prewhere (key UInt64, val UInt64, str StringWithDictionary, s String) engine = MergeTree order by key settings index_granularity = 8192;
|
||||
create table lc_prewhere (key UInt64, val UInt64, str LowCardinality(String), s String) engine = MergeTree order by key settings index_granularity = 8192;
|
||||
insert into lc_prewhere select number, if(number < 10 or number > 8192 * 9, 1, 0), toString(number) as s, s from system.numbers limit 100000;
|
||||
select sum(toUInt64(str)), sum(toUInt64(s)) from lc_prewhere prewhere val == 1;
|
||||
drop table if exists lc_prewhere;
|
||||
|
@ -8,8 +8,8 @@ select 'MergeTree';
|
||||
drop table if exists lc_small_dict;
|
||||
drop table if exists lc_big_dict;
|
||||
|
||||
create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
create table lc_small_dict (str LowCardinality(String)) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
create table lc_big_dict (str LowCardinality(String)) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
|
||||
insert into lc_small_dict select toString(number % 1000) from system.numbers limit 1000000;
|
||||
insert into lc_big_dict select toString(number) from system.numbers limit 1000000;
|
||||
|
@ -1,13 +1,7 @@
|
||||
a
|
||||
a
|
||||
a
|
||||
a
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
ab
|
||||
ab
|
||||
ab
|
||||
ab
|
||||
-
|
||||
|
@ -13,56 +13,32 @@ drop table if exists lc_null_fix_str_0;
|
||||
drop table if exists lc_null_fix_str_1;
|
||||
|
||||
create table lc_str_0 (str LowCardinality(String)) engine = Memory;
|
||||
create table lc_str_1 (str StringWithDictionary) engine = Memory;
|
||||
create table lc_null_str_0 (str LowCardinality(Nullable(String))) engine = Memory;
|
||||
create table lc_null_str_1 (str NullableWithDictionary(String)) engine = Memory;
|
||||
create table lc_int8_0 (val LowCardinality(Int8)) engine = Memory;
|
||||
create table lc_int8_1 (val Int8WithDictionary) engine = Memory;
|
||||
create table lc_null_int8_0 (val LowCardinality(Nullable(Int8))) engine = Memory;
|
||||
create table lc_null_int8_1 (val NullableWithDictionary(Int8)) engine = Memory;
|
||||
create table lc_fix_str_0 (str LowCardinality(FixedString(2))) engine = Memory;
|
||||
create table lc_fix_str_1 (str FixedStringWithDictionary(2)) engine = Memory;
|
||||
create table lc_null_fix_str_0 (str LowCardinality(Nullable(FixedString(2)))) engine = Memory;
|
||||
create table lc_null_fix_str_1 (str NullableWithDictionary(FixedString(2))) engine = Memory;
|
||||
|
||||
insert into lc_str_0 select 'a';
|
||||
insert into lc_str_1 select 'a';
|
||||
insert into lc_null_str_0 select 'a';
|
||||
insert into lc_null_str_1 select 'a';
|
||||
insert into lc_int8_0 select 1;
|
||||
insert into lc_int8_1 select 1;
|
||||
insert into lc_null_int8_0 select 1;
|
||||
insert into lc_null_int8_1 select 1;
|
||||
insert into lc_fix_str_0 select 'ab';
|
||||
insert into lc_fix_str_1 select 'ab';
|
||||
insert into lc_null_fix_str_0 select 'ab';
|
||||
insert into lc_null_fix_str_1 select 'ab';
|
||||
|
||||
select str from lc_str_0;
|
||||
select str from lc_str_1;
|
||||
select str from lc_null_str_0;
|
||||
select str from lc_null_str_1;
|
||||
select val from lc_int8_0;
|
||||
select val from lc_int8_1;
|
||||
select val from lc_null_int8_0;
|
||||
select val from lc_null_int8_1;
|
||||
select str from lc_fix_str_0;
|
||||
select str from lc_fix_str_1;
|
||||
select str from lc_null_fix_str_0;
|
||||
select str from lc_null_fix_str_1;
|
||||
|
||||
drop table if exists lc_str_0;
|
||||
drop table if exists lc_str_1;
|
||||
drop table if exists lc_null_str_0;
|
||||
drop table if exists lc_null_str_1;
|
||||
drop table if exists lc_int8_0;
|
||||
drop table if exists lc_int8_1;
|
||||
drop table if exists lc_null_int8_0;
|
||||
drop table if exists lc_null_int8_1;
|
||||
drop table if exists lc_fix_str_0;
|
||||
drop table if exists lc_fix_str_1;
|
||||
drop table if exists lc_null_fix_str_0;
|
||||
drop table if exists lc_null_fix_str_1;
|
||||
|
||||
select '-';
|
||||
SELECT toLowCardinality('a') AS s, toTypeName(s), toTypeName(length(s)) from system.one;
|
||||
@ -73,7 +49,7 @@ select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) a
|
||||
|
||||
select '-';
|
||||
drop table if exists lc_str_uuid;
|
||||
create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 StringWithDictionary) ENGINE=Memory;
|
||||
create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 LowCardinality(String)) ENGINE=Memory;
|
||||
select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid;
|
||||
select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid;
|
||||
insert into lc_str_uuid values ('61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0');
|
||||
|
@ -1,5 +1,5 @@
|
||||
drop table if exists tab_00717;
|
||||
create table tab_00717 (a String, b StringWithDictionary) engine = MergeTree order by a;
|
||||
create table tab_00717 (a String, b LowCardinality(String)) engine = MergeTree order by a;
|
||||
insert into tab_00717 values ('a_1', 'b_1'), ('a_2', 'b_2');
|
||||
select count() from tab_00717;
|
||||
select a from tab_00717 group by a order by a;
|
||||
|
@ -7,7 +7,7 @@ alter table tab_00718 modify column b UInt32;
|
||||
select *, toTypeName(b) from tab_00718;
|
||||
alter table tab_00718 modify column b LowCardinality(UInt32);
|
||||
select *, toTypeName(b) from tab_00718;
|
||||
alter table tab_00718 modify column b StringWithDictionary;
|
||||
alter table tab_00718 modify column b LowCardinality(String);
|
||||
select *, toTypeName(b) from tab_00718;
|
||||
alter table tab_00718 modify column b LowCardinality(UInt32);
|
||||
select *, toTypeName(b) from tab_00718;
|
||||
|
@ -1,7 +1,7 @@
|
||||
drop table if exists lc_00752;
|
||||
drop table if exists lc_mv_00752;
|
||||
|
||||
create table lc_00752 (str StringWithDictionary) engine = MergeTree order by tuple();
|
||||
create table lc_00752 (str LowCardinality(String)) engine = MergeTree order by tuple();
|
||||
|
||||
insert into lc_00752 values ('a'), ('bbb'), ('ab'), ('accccc'), ('baasddas'), ('bcde');
|
||||
|
||||
@ -12,4 +12,3 @@ select * from lc_mv_00752 order by letter;
|
||||
|
||||
drop table if exists lc_00752;
|
||||
drop table if exists lc_mv_00752;
|
||||
|
||||
|
@ -11,12 +11,17 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mem"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE mem (x UInt64) engine = Memory"
|
||||
|
||||
function f {
|
||||
local TIMELIMIT=$((SECONDS+$1))
|
||||
for _ in $(seq 1 300); do
|
||||
$CLICKHOUSE_CLIENT -q "SELECT count() FROM (SELECT * FROM mem SETTINGS max_threads=2) FORMAT Null;"
|
||||
if [ $SECONDS -ge "$TIMELIMIT" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function g {
|
||||
local TIMELIMIT=$((SECONDS+$1))
|
||||
for _ in $(seq 1 100); do
|
||||
$CLICKHOUSE_CLIENT -n -q "
|
||||
INSERT INTO mem SELECT number FROM numbers(1000000);
|
||||
@ -30,14 +35,18 @@ function g {
|
||||
INSERT INTO mem VALUES (1);
|
||||
TRUNCATE TABLE mem;
|
||||
"
|
||||
if [ $SECONDS -ge "$TIMELIMIT" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
export -f f;
|
||||
export -f g;
|
||||
|
||||
timeout 20 bash -c f > /dev/null &
|
||||
timeout 20 bash -c g > /dev/null &
|
||||
TIMEOUT=20
|
||||
f $TIMEOUT &
|
||||
g $TIMEOUT &
|
||||
wait
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE mem"
|
||||
|
@ -28,7 +28,7 @@ ORDER BY tuple();
|
||||
|
||||
INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(100000);
|
||||
|
||||
create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val;
|
||||
create table lc_dict_reading (val UInt64, str LowCardinality(String), pat String) engine = MergeTree order by val;
|
||||
insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 100000;
|
||||
"""
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
1
|
||||
1
|
@ -0,0 +1,13 @@
|
||||
-- Tags: no-parallel
|
||||
-- Tag no-parallel: Messes with internal cache
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
||||
|
||||
-- Create an entry in the query cache
|
||||
SELECT 1 SETTINGS use_query_cache = true;
|
||||
|
||||
-- Asynchronous metrics must know about the entry
|
||||
SYSTEM RELOAD ASYNCHRONOUS METRICS;
|
||||
SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries';
|
||||
|
||||
SYSTEM DROP QUERY CACHE;
|
@ -3,7 +3,11 @@ all_1_1_0
|
||||
all_2_2_0
|
||||
all_3_3_0
|
||||
all_4_4_0
|
||||
40000
|
||||
5000 all_1_1_0_9
|
||||
5000 all_2_2_0_9
|
||||
5000 all_3_3_0_9
|
||||
5000 all_4_4_0_9
|
||||
mutation_version has_parts_for_which_set_was_built has_parts_that_shared_set
|
||||
8 1 1
|
||||
9 1 1
|
||||
|
@ -18,12 +18,35 @@ SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '0258
|
||||
|
||||
-- Start multiple mutations simultaneously
|
||||
SYSTEM STOP MERGES 02581_trips;
|
||||
ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
|
||||
ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0;
|
||||
SYSTEM START MERGES 02581_trips;
|
||||
DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(200000000));
|
||||
SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part;
|
||||
|
||||
-- Wait for mutations to finish
|
||||
SELECT count() FROM 02581_trips SETTINGS select_sequential_consistency = 1;
|
||||
|
||||
DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(10000000)) SETTINGS lightweight_deletes_sync = 2;
|
||||
SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part SETTINGS select_sequential_consistency=1;
|
||||
|
||||
SYSTEM FLUSH LOGS;
|
||||
-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
|
||||
-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )
|
||||
WITH (
|
||||
SELECT uuid
|
||||
FROM system.tables
|
||||
WHERE (database = currentDatabase()) AND (name = '02581_trips')
|
||||
) AS table_uuid
|
||||
SELECT
|
||||
CAST(splitByChar('_', query_id)[5], 'UInt64') AS mutation_version, -- '5521485f-8a40-4aba-87a2-00342c369563::all_3_3_0_6'
|
||||
sum(message LIKE 'Created Set with % entries%') >= 1 AS has_parts_for_which_set_was_built,
|
||||
sum(message LIKE 'Got set from cache%') >= 1 AS has_parts_that_shared_set
|
||||
FROM system.text_log
|
||||
WHERE
|
||||
query_id LIKE concat(CAST(table_uuid, 'String'), '::all\\_%')
|
||||
AND (event_date >= yesterday())
|
||||
AND (message LIKE 'Created Set with % entries%' OR message LIKE 'Got set from cache%')
|
||||
GROUP BY mutation_version ORDER BY mutation_version FORMAT TSVWithNames;
|
||||
|
||||
DROP TABLE 02581_trips;
|
||||
|
@ -10,3 +10,11 @@ all_4_4_0
|
||||
20000
|
||||
16000
|
||||
12000
|
||||
mutation_version has_parts_for_which_set_was_built has_parts_that_shared_set
|
||||
5 1 1
|
||||
6 1 1
|
||||
7 1 1
|
||||
8 1 1
|
||||
9 1 1
|
||||
10 1 1
|
||||
11 1 1
|
||||
|
@ -18,42 +18,63 @@ SELECT count() from 02581_trips WHERE description = '';
|
||||
SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '02581_trips' AND active ORDER BY name;
|
||||
|
||||
-- Run mutation with `id` a 'IN big subquery'
|
||||
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2;
|
||||
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000;
|
||||
ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
-- Run mutation with func(`id`) IN big subquery
|
||||
ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2;
|
||||
ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
-- Run mutation with non-PK `id2` IN big subquery
|
||||
ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2;
|
||||
--SELECT count(), _part FROM 02581_trips WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) GROUP BY _part ORDER BY _part;
|
||||
--EXPLAIN SELECT (), _part FROM 02581_trips WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000));
|
||||
ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
-- Run mutation with PK and non-PK IN big subquery
|
||||
ALTER TABLE 02581_trips UPDATE description='c'
|
||||
WHERE
|
||||
(id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000))) OR
|
||||
(id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000)))
|
||||
(id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR
|
||||
(id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000)))
|
||||
SETTINGS mutations_sync=2;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
-- Run mutation with PK and non-PK IN big subquery
|
||||
ALTER TABLE 02581_trips UPDATE description='c'
|
||||
WHERE
|
||||
(id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000))) OR
|
||||
(id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000)))
|
||||
(id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR
|
||||
(id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)))
|
||||
SETTINGS mutations_sync=2;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
-- Run mutation with PK and non-PK IN big subquery
|
||||
ALTER TABLE 02581_trips UPDATE description='c'
|
||||
WHERE
|
||||
(id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000))) OR
|
||||
((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000)))
|
||||
(id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR
|
||||
((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)))
|
||||
SETTINGS mutations_sync=2;
|
||||
SELECT count() from 02581_trips WHERE description = '';
|
||||
|
||||
SYSTEM FLUSH LOGS;
|
||||
-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
|
||||
-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )
|
||||
WITH (
|
||||
SELECT uuid
|
||||
FROM system.tables
|
||||
WHERE (database = currentDatabase()) AND (name = '02581_trips')
|
||||
) AS table_uuid
|
||||
SELECT
|
||||
CAST(splitByChar('_', query_id)[5], 'UInt64') AS mutation_version, -- '5521485f-8a40-4aba-87a2-00342c369563::all_3_3_0_6'
|
||||
sum(message LIKE 'Created Set with % entries%') >= 1 AS has_parts_for_which_set_was_built,
|
||||
sum(message LIKE 'Got set from cache%') >= 1 AS has_parts_that_shared_set
|
||||
FROM system.text_log
|
||||
WHERE
|
||||
query_id LIKE concat(CAST(table_uuid, 'String'), '::all\\_%')
|
||||
AND (event_date >= yesterday())
|
||||
AND (message LIKE 'Created Set with % entries%' OR message LIKE 'Got set from cache%')
|
||||
GROUP BY mutation_version ORDER BY mutation_version FORMAT TSVWithNames;
|
||||
|
||||
DROP TABLE 02581_trips;
|
||||
|
@ -0,0 +1,14 @@
|
||||
CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
|
||||
Test statistics count_min:
|
||||
Prewhere info
|
||||
Prewhere filter
|
||||
Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed)
|
||||
Test statistics multi-types:
|
||||
Prewhere info
|
||||
Prewhere filter
|
||||
Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed)
|
||||
Prewhere info
|
||||
Prewhere filter
|
||||
Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed)
|
||||
Test LowCardinality and Nullable data type:
|
||||
tab2
|
@ -0,0 +1,70 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
DROP TABLE IF EXISTS tab SYNC;
|
||||
|
||||
SET allow_experimental_statistics = 1;
|
||||
SET allow_statistics_optimize = 1;
|
||||
SET allow_suspicious_low_cardinality_types=1;
|
||||
SET mutations_sync = 2;
|
||||
|
||||
CREATE TABLE tab
|
||||
(
|
||||
a String,
|
||||
b UInt64,
|
||||
c Int64,
|
||||
pk String,
|
||||
) Engine = MergeTree() ORDER BY pk
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
SHOW CREATE TABLE tab;
|
||||
|
||||
INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000;
|
||||
|
||||
SELECT 'Test statistics count_min:';
|
||||
|
||||
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
|
||||
ALTER TABLE tab ADD STATISTICS b TYPE count_min;
|
||||
ALTER TABLE tab ADD STATISTICS c TYPE count_min;
|
||||
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
|
||||
|
||||
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
|
||||
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx
|
||||
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
|
||||
|
||||
ALTER TABLE tab DROP STATISTICS a, b, c;
|
||||
|
||||
|
||||
SELECT 'Test statistics multi-types:';
|
||||
|
||||
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
|
||||
ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest;
|
||||
ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest;
|
||||
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
|
||||
|
||||
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
|
||||
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/)
|
||||
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
|
||||
|
||||
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
|
||||
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/)
|
||||
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
|
||||
|
||||
ALTER TABLE tab DROP STATISTICS a, b, c;
|
||||
|
||||
DROP TABLE IF EXISTS tab SYNC;
|
||||
|
||||
|
||||
SELECT 'Test LowCardinality and Nullable data type:';
|
||||
DROP TABLE IF EXISTS tab2 SYNC;
|
||||
SET allow_suspicious_low_cardinality_types=1;
|
||||
CREATE TABLE tab2
|
||||
(
|
||||
a LowCardinality(Int64) STATISTICS(count_min),
|
||||
b Nullable(Int64) STATISTICS(count_min),
|
||||
c LowCardinality(Nullable(Int64)) STATISTICS(count_min),
|
||||
pk String,
|
||||
) Engine = MergeTree() ORDER BY pk;
|
||||
|
||||
select name from system.tables where name = 'tab2' and database = currentDatabase();
|
||||
|
||||
DROP TABLE IF EXISTS tab2 SYNC;
|
@ -70,3 +70,4 @@ SETTINGS min_bytes_for_wide_part = 0;
|
||||
INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;
|
||||
|
||||
DROP TABLE IF EXISTS t3;
|
||||
|
||||
|
@ -2,8 +2,6 @@
|
||||
# Tags: atomic-database
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
@ -134,7 +132,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshe
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
sleep 1
|
||||
|
||||
$CLICKHOUSE_CLIENT -nq "
|
||||
select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes;
|
||||
truncate src;
|
||||
@ -172,13 +170,13 @@ $CLICKHOUSE_CLIENT -nq "
|
||||
drop table b;
|
||||
create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src;
|
||||
drop table src;"
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Exception' ]
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ]
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
# Check exception, create src, expect successful refresh.
|
||||
$CLICKHOUSE_CLIENT -nq "
|
||||
select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' from refreshes;
|
||||
select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c';
|
||||
create table src (x Int64) engine Memory as select 1;
|
||||
system refresh view c;"
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ]
|
||||
@ -224,22 +222,27 @@ done
|
||||
$CLICKHOUSE_CLIENT -nq "
|
||||
rename table e to f;
|
||||
select '<24: rename during refresh>', * from f;
|
||||
select '<25: rename during refresh>', view, status from refreshes;
|
||||
select '<25: rename during refresh>', view, status from refreshes where view = 'f';
|
||||
alter table f modify refresh after 10 year;"
|
||||
sleep 2 # make it likely that at least one row was processed
|
||||
|
||||
# Cancel.
|
||||
$CLICKHOUSE_CLIENT -nq "
|
||||
system cancel view f;"
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Cancelled' ]
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ]
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ]
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
# Check that another refresh doesn't immediately start after the cancelled one.
|
||||
sleep 1
|
||||
$CLICKHOUSE_CLIENT -nq "
|
||||
select '<27: cancelled>', view, status from refreshes;
|
||||
select '<27: cancelled>', view, status from refreshes where view = 'f';
|
||||
system refresh view f;"
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ]
|
||||
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ]
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
|
@ -0,0 +1 @@
|
||||
1
|
10
tests/queries/0_stateless/03023_invalid_format_detection.sh
Executable file
10
tests/queries/0_stateless/03023_invalid_format_detection.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
touch $CLICKHOUSE_TEST_UNIQUE_NAME.xml
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.*')" 2>&1 | grep -c "CANNOT_DETECT_FORMAT"
|
||||
rm $CLICKHOUSE_TEST_UNIQUE_NAME.xml
|
||||
|
@ -1,14 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
|
||||
|
||||
|
||||
function test()
|
||||
{
|
||||
@ -43,20 +33,3 @@ function test()
|
||||
$CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null"
|
||||
$CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null"
|
||||
}
|
||||
|
||||
$CH_CLIENT -q "drop table if exists test;"
|
||||
|
||||
echo "Memory"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
||||
|
||||
echo "MergeTree compact"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
||||
|
||||
echo "MergeTree wide"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
@ -1,57 +0,0 @@
|
||||
Memory
|
||||
test
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
||||
MergeTree compact
|
||||
test
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
||||
MergeTree wide
|
||||
test
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
@ -0,0 +1,19 @@
|
||||
Memory
|
||||
test
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
21
tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh
Executable file
21
tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
|
||||
# shellcheck source=./03036_dynamic_read_subcolumns.lib
|
||||
. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib
|
||||
|
||||
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
|
||||
|
||||
$CH_CLIENT -q "drop table if exists test;"
|
||||
|
||||
echo "Memory"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
@ -0,0 +1,19 @@
|
||||
MergeTree compact
|
||||
test
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
21
tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh
Executable file
21
tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
|
||||
# shellcheck source=./03036_dynamic_read_subcolumns.lib
|
||||
. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib
|
||||
|
||||
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
|
||||
|
||||
$CH_CLIENT -q "drop table if exists test;"
|
||||
|
||||
echo "MergeTree compact"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
@ -0,0 +1,19 @@
|
||||
MergeTree wide
|
||||
test
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
21
tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh
Executable file
21
tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
|
||||
# shellcheck source=./03036_dynamic_read_subcolumns.lib
|
||||
. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib
|
||||
|
||||
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
|
||||
|
||||
$CH_CLIENT -q "drop table if exists test;"
|
||||
|
||||
echo "MergeTree wide"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
@ -0,0 +1,17 @@
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
@ -0,0 +1,40 @@
|
||||
-- Tags: long
|
||||
|
||||
set allow_experimental_variant_type = 1;
|
||||
set use_variant_as_common_type = 1;
|
||||
set allow_experimental_dynamic_type = 1;
|
||||
|
||||
drop table if exists test;
|
||||
create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;
|
||||
|
||||
insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
|
||||
|
||||
select distinct dynamicType(d) as type from test order by type;
|
||||
select count() from test where dynamicType(d) == 'UInt64';
|
||||
select count() from test where d.UInt64 is not NULL;
|
||||
select count() from test where dynamicType(d) == 'String';
|
||||
select count() from test where d.String is not NULL;
|
||||
select count() from test where dynamicType(d) == 'Date';
|
||||
select count() from test where d.Date is not NULL;
|
||||
select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
|
||||
select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
|
||||
select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
|
||||
select count() from test where not empty(d.`Array(Array(Dynamic))`);
|
||||
select count() from test where d is NULL;
|
||||
select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
|
||||
|
||||
select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
|
||||
select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
|
||||
select d.Int8, d.Date, d.`Array(String)` from test format Null;
|
||||
select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
|
||||
select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
|
||||
|
||||
drop table test;
|
@ -0,0 +1,17 @@
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
@ -0,0 +1,40 @@
|
||||
-- Tags: long
|
||||
|
||||
set allow_experimental_variant_type = 1;
|
||||
set use_variant_as_common_type = 1;
|
||||
set allow_experimental_dynamic_type = 1;
|
||||
|
||||
drop table if exists test;
|
||||
create table test (id UInt64, d Dynamic) engine=Memory;
|
||||
|
||||
insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
|
||||
|
||||
select distinct dynamicType(d) as type from test order by type;
|
||||
select count() from test where dynamicType(d) == 'UInt64';
|
||||
select count() from test where d.UInt64 is not NULL;
|
||||
select count() from test where dynamicType(d) == 'String';
|
||||
select count() from test where d.String is not NULL;
|
||||
select count() from test where dynamicType(d) == 'Date';
|
||||
select count() from test where d.Date is not NULL;
|
||||
select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
|
||||
select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
|
||||
select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
|
||||
select count() from test where not empty(d.`Array(Array(Dynamic))`);
|
||||
select count() from test where d is NULL;
|
||||
select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
|
||||
|
||||
select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
|
||||
select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
|
||||
select d.Int8, d.Date, d.`Array(String)` from test format Null;
|
||||
select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
|
||||
select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
|
||||
|
||||
drop table test;
|
@ -0,0 +1,17 @@
|
||||
Array(Array(Dynamic))
|
||||
Array(Variant(String, UInt64))
|
||||
None
|
||||
String
|
||||
UInt64
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
200000
|
||||
0
|
||||
0
|
||||
200000
|
||||
200000
|
||||
100000
|
||||
100000
|
||||
200000
|
||||
0
|
@ -0,0 +1,40 @@
|
||||
-- Tags: long
|
||||
|
||||
set allow_experimental_variant_type = 1;
|
||||
set use_variant_as_common_type = 1;
|
||||
set allow_experimental_dynamic_type = 1;
|
||||
|
||||
drop table if exists test;
|
||||
create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
|
||||
|
||||
insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
|
||||
insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
|
||||
|
||||
select distinct dynamicType(d) as type from test order by type;
|
||||
select count() from test where dynamicType(d) == 'UInt64';
|
||||
select count() from test where d.UInt64 is not NULL;
|
||||
select count() from test where dynamicType(d) == 'String';
|
||||
select count() from test where d.String is not NULL;
|
||||
select count() from test where dynamicType(d) == 'Date';
|
||||
select count() from test where d.Date is not NULL;
|
||||
select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
|
||||
select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
|
||||
select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
|
||||
select count() from test where not empty(d.`Array(Array(Dynamic))`);
|
||||
select count() from test where d is NULL;
|
||||
select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
|
||||
|
||||
select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
|
||||
select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
|
||||
select d.Int8, d.Date, d.`Array(String)` from test format Null;
|
||||
select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
|
||||
select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
|
||||
select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
|
||||
|
||||
drop table test;
|
@ -1,60 +0,0 @@
|
||||
MergeTree compact
|
||||
test
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
||||
MergeTree wide
|
||||
test
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
@ -1,52 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
|
||||
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
|
||||
|
||||
function test()
|
||||
{
|
||||
echo "test"
|
||||
$CH_CLIENT -q "system stop merges test"
|
||||
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
|
||||
$CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)"
|
||||
$CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)"
|
||||
$CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)"
|
||||
$CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
|
||||
$CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
|
||||
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
|
||||
$CH_CLIENT -q "system stop merges test"
|
||||
$CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
|
||||
$CH_CLIENT -q "system stop merges test"
|
||||
$CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
}
|
||||
|
||||
$CH_CLIENT -q "drop table if exists test;"
|
||||
|
||||
echo "MergeTree compact"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
||||
|
||||
echo "MergeTree wide"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
||||
|
@ -0,0 +1,28 @@
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
@ -0,0 +1,33 @@
|
||||
-- Tags: long
|
||||
set allow_experimental_dynamic_type=1;
|
||||
|
||||
drop table if exists test;
|
||||
create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;
|
||||
|
||||
system stop merges test;
|
||||
insert into test select number, number from numbers(100000);
|
||||
insert into test select number, 'str_' || toString(number) from numbers(80000);
|
||||
insert into test select number, range(number % 10 + 1) from numbers(70000);
|
||||
insert into test select number, toDate(number) from numbers(60000);
|
||||
insert into test select number, toDateTime(number) from numbers(50000);
|
||||
insert into test select number, NULL from numbers(100000);
|
||||
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
system start merges test; optimize table test final;;
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
|
||||
system stop merges test;
|
||||
insert into test select number, map(number, number) from numbers(200000);
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
system start merges test;
|
||||
optimize table test final;
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
|
||||
system stop merges test;
|
||||
insert into test select number, tuple(number, number) from numbers(10000);
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
system start merges test;
|
||||
optimize table test final;
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
|
||||
drop table test;
|
@ -0,0 +1,28 @@
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
@ -0,0 +1,33 @@
|
||||
-- Tags: long
|
||||
set allow_experimental_dynamic_type=1;
|
||||
|
||||
drop table if exists test;
|
||||
create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;
|
||||
|
||||
system stop merges test;
|
||||
insert into test select number, number from numbers(100000);
|
||||
insert into test select number, 'str_' || toString(number) from numbers(80000);
|
||||
insert into test select number, range(number % 10 + 1) from numbers(70000);
|
||||
insert into test select number, toDate(number) from numbers(60000);
|
||||
insert into test select number, toDateTime(number) from numbers(50000);
|
||||
insert into test select number, NULL from numbers(100000);
|
||||
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
system start merges test; optimize table test final;;
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
|
||||
system stop merges test;
|
||||
insert into test select number, map(number, number) from numbers(200000);
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
system start merges test;
|
||||
optimize table test final;
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
|
||||
system stop merges test;
|
||||
insert into test select number, tuple(number, number) from numbers(10000);
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
system start merges test;
|
||||
optimize table test final;
|
||||
select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
|
||||
|
||||
drop table test;
|
@ -1,60 +0,0 @@
|
||||
MergeTree compact
|
||||
test
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
||||
MergeTree wide
|
||||
test
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# reset --log_comment
|
||||
CLICKHOUSE_LOG_COMMENT=
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
|
||||
|
||||
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
|
||||
function test()
|
||||
{
|
||||
echo "test"
|
||||
$CH_CLIENT -q "system stop merges test"
|
||||
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
|
||||
$CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)"
|
||||
$CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)"
|
||||
$CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)"
|
||||
$CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
|
||||
$CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
|
||||
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
|
||||
$CH_CLIENT -q "system stop merges test"
|
||||
$CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
|
||||
$CH_CLIENT -q "system stop merges test"
|
||||
$CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
|
||||
$CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
|
||||
}
|
||||
|
||||
$CH_CLIENT -q "drop table if exists test;"
|
||||
|
||||
echo "MergeTree compact"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
||||
|
||||
echo "MergeTree wide"
|
||||
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;"
|
||||
test
|
||||
$CH_CLIENT -q "drop table test;"
|
@ -0,0 +1,28 @@
|
||||
50000 DateTime
|
||||
60000 Date
|
||||
70000 Array(UInt16)
|
||||
80000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
70000 Array(UInt16)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
190000 String
|
||||
200000 Map(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
10000 Tuple(UInt64, UInt64)
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
260000 String
|
||||
100000 None
|
||||
100000 UInt64
|
||||
200000 Map(UInt64, UInt64)
|
||||
270000 String
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user