Compare commits

..

1 Commits

Author SHA1 Message Date
李扬
9637a5d32e
Merge a2f9329e18 into 733c57dae7 2024-09-16 06:05:55 +00:00
32 changed files with 51 additions and 328 deletions

View File

@ -4,31 +4,15 @@ description: Prints workflow debug info
runs:
using: "composite"
steps:
- name: Envs, event.json and contexts
- name: Print envs
shell: bash
run: |
echo '::group::Environment variables'
env | sort
echo '::endgroup::'
echo '::group::event.json'
echo "::group::Envs"
env
echo "::endgroup::"
- name: Print Event.json
shell: bash
run: |
echo "::group::Event.json"
python3 -m json.tool "$GITHUB_EVENT_PATH"
echo '::endgroup::'
cat << 'EOF'
::group::github context
${{ toJSON(github) }}
::endgroup::
::group::env context
${{ toJSON(env) }}
::endgroup::
::group::runner context
${{ toJSON(runner) }}
::endgroup::
::group::job context
${{ toJSON(job) }}
::endgroup::
EOF
echo "::endgroup::"

View File

@ -27,8 +27,6 @@ jobs:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Labels check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -33,8 +33,6 @@ jobs:
clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Debug Info
uses: ./.github/actions/debug
- name: Cherry pick
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -56,13 +56,13 @@ jobs:
GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
runs-on: [self-hosted, release-maker]
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Debug Info
uses: ./.github/actions/debug
- name: Prepare Release Info
shell: bash
run: |

View File

@ -11,7 +11,6 @@ name: Build docker images
required: false
type: boolean
default: false
jobs:
DockerBuildAarch64:
runs-on: [self-hosted, style-checker-aarch64]

View File

@ -8,28 +8,27 @@ on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */6 * * *'
workflow_dispatch:
jobs:
RunConfig:
runs-on: [self-hosted, style-checker-aarch64]
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: PrepareRunConfig
id: runconfig
run: |
echo "::group::configure CI run"
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --workflow "$GITHUB_WORKFLOW" --outfile ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"
echo "::group::CI run configure results"
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"

View File

@ -15,14 +15,14 @@ jobs:
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Merge sync PR
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -14,14 +14,14 @@ jobs:
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get a version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Cancel PR workflow
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run

View File

@ -15,14 +15,14 @@ jobs:
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: PrepareRunConfig
id: runconfig
run: |

View File

@ -25,14 +25,14 @@ jobs:
outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get a version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Cancel previous Sync PR workflow
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run

View File

@ -24,8 +24,6 @@ jobs:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Labels check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -62,6 +62,8 @@ jobs:
env:
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
@ -70,8 +72,6 @@ jobs:
submodules: ${{inputs.submodules}}
fetch-depth: ${{inputs.checkout_depth}}
filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Set build envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'

View File

@ -13,17 +13,16 @@ Here is a complete list of available database engines. Follow the links for more
- [Atomic](../../engines/database-engines/atomic.md)
- [Lazy](../../engines/database-engines/lazy.md)
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
- [MySQL](../../engines/database-engines/mysql.md)
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
- [MySQL](../../engines/database-engines/mysql.md)
- [Lazy](../../engines/database-engines/lazy.md)
- [PostgreSQL](../../engines/database-engines/postgresql.md)
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
- [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md)

View File

@ -107,10 +107,6 @@ The vector similarity index currently does not work with per-table, non-default
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
:::
Vector index creation is known to be slow. To speed the process up, index creation can be parallelized. The maximum number of threads can be
configured using server configuration
setting [max_build_vector_similarity_index_thread_pool_size](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size).
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
requests.

View File

@ -491,14 +491,6 @@ Type: Double
Default: 0.9
## max_build_vector_similarity_index_thread_pool_size {#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size}
The maximum number of threads to use for building vector indexes. 0 means all cores.
Type: UInt64
Default: 16
## cgroups_memory_usage_observer_wait_time
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see

View File

@ -178,9 +178,6 @@
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
M(BuildVectorSimilarityIndexThreads, "Number of threads in the build vector similarity index thread pool.") \
M(BuildVectorSimilarityIndexThreadsActive, "Number of threads in the build vector similarity index thread pool running a task.") \
M(BuildVectorSimilarityIndexThreadsScheduled, "Number of queued or active jobs in the build vector similarity index thread pool.") \
\
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \

View File

@ -63,7 +63,6 @@ static struct InitFiu
REGULAR(keepermap_fail_drop_data) \
REGULAR(lazy_pipe_fds_fail_close) \
PAUSEABLE(infinite_sleep) \
PAUSEABLE(stop_moving_part_before_swap_with_active) \
namespace FailPoints

View File

@ -50,7 +50,7 @@ namespace DB
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
M(String, default_database, "default", "Default database name.", 0) \
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
@ -65,7 +65,6 @@ namespace DB
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
\
/* Database Catalog */ \
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \

View File

@ -50,6 +50,13 @@ private:
return executeNonconstant(input);
}
[[maybe_unused]] String toString() const
{
WriteBufferFromOwnString buf;
buf << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << "\n";
return buf.str();
}
private:
ColumnWithTypeAndName executeLiteral(std::string_view literal) const
{
@ -224,7 +231,9 @@ public:
const auto & instruction = instructions[i];
try
{
// std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl;
concat_args[i] = instruction.execute();
// std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl;
}
catch (const fmt::v9::format_error & e)
{
@ -349,14 +358,7 @@ private:
REGISTER_FUNCTION(Printf)
{
factory.registerFunction<FunctionPrintf>(
FunctionDocumentation{.description=R"(
The `printf` function formats the given string with the values (strings, integers, floating-points etc.) listed in the arguments, similar to printf function in C++.
The format string can contain format specifiers starting with `%` character.
Anything not contained in `%` and the following format specifier is considered literal text and copied verbatim into the output.
Literal `%` character can be escaped by `%%`.)", .examples{{"sum", "select printf('%%%s %s %d', 'Hello', 'World', 2024);", "%Hello World 2024"}}, .categories{"String"}
});
factory.registerFunction<FunctionPrintf>();
}
}

View File

@ -10,7 +10,6 @@
#include <Common/SensitiveDataMasker.h>
#include <Common/Macros.h>
#include <Common/EventNotifier.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/Stopwatch.h>
#include <Common/formatReadable.h>
#include <Common/Throttler.h>
@ -122,6 +121,7 @@
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <base/defines.h>
namespace fs = std::filesystem;
namespace ProfileEvents
@ -164,9 +164,6 @@ namespace CurrentMetrics
extern const Metric TablesLoaderForegroundThreadsActive;
extern const Metric TablesLoaderForegroundThreadsScheduled;
extern const Metric IOWriterThreadsScheduled;
extern const Metric BuildVectorSimilarityIndexThreads;
extern const Metric BuildVectorSimilarityIndexThreadsActive;
extern const Metric BuildVectorSimilarityIndexThreadsScheduled;
extern const Metric AttachedTable;
extern const Metric AttachedView;
extern const Metric AttachedDictionary;
@ -300,8 +297,6 @@ struct ContextSharedPart : boost::noncopyable
mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
mutable OnceFlag prefetch_threadpool_initialized;
mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache.
mutable OnceFlag build_vector_similarity_index_threadpool_initialized;
mutable std::unique_ptr<ThreadPool> build_vector_similarity_index_threadpool; /// Threadpool for vector-similarity index creation.
mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices.
mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
@ -3302,21 +3297,6 @@ size_t Context::getPrefetchThreadpoolSize() const
return config.getUInt(".prefetch_threadpool_pool_size", 100);
}
ThreadPool & Context::getBuildVectorSimilarityIndexThreadPool() const
{
callOnce(shared->build_vector_similarity_index_threadpool_initialized, [&] {
size_t pool_size = shared->server_settings.max_build_vector_similarity_index_thread_pool_size > 0
? shared->server_settings.max_build_vector_similarity_index_thread_pool_size
: getNumberOfPhysicalCPUCores();
shared->build_vector_similarity_index_threadpool = std::make_unique<ThreadPool>(
CurrentMetrics::BuildVectorSimilarityIndexThreads,
CurrentMetrics::BuildVectorSimilarityIndexThreadsActive,
CurrentMetrics::BuildVectorSimilarityIndexThreadsScheduled,
pool_size);
});
return *shared->build_vector_similarity_index_threadpool;
}
BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
{
callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {

View File

@ -1097,8 +1097,6 @@ public:
/// and make a prefetch by putting a read task to threadpoolReader.
size_t getPrefetchThreadpoolSize() const;
ThreadPool & getBuildVectorSimilarityIndexThreadPool() const;
/// Settings for MergeTree background tasks stored in config.xml
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;

View File

@ -74,8 +74,7 @@ private:
findMySQLFunctionSecretArguments();
}
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
(function.name == "gcs"))
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);

View File

@ -5,11 +5,9 @@
#include <Columns/ColumnArray.h>
#include <Common/BitHelpers.h>
#include <Common/formatReadable.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/logger_useful.h>
#include <Common/typeid_cast.h>
#include <Core/Field.h>
#include <Core/ServerSettings.h>
#include <DataTypes/DataTypeArray.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -31,6 +29,7 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int FORMAT_VERSION_TOO_OLD;
extern const int ILLEGAL_COLUMN;
extern const int INCORRECT_DATA;
@ -132,7 +131,8 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
/// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release()));
try_reserve(limits());
if (!try_reserve(limits()))
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
}
USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
@ -270,49 +270,20 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");
/// Reserving space is mandatory
size_t max_thread_pool_size = Context::getGlobalContextInstance()->getServerSettings().max_build_vector_similarity_index_thread_pool_size;
if (max_thread_pool_size == 0)
max_thread_pool_size = getNumberOfPhysicalCPUCores();
unum::usearch::index_limits_t limits(roundUpToPowerOfTwoOrZero(index->size() + rows), max_thread_pool_size);
index->reserve(limits);
if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows)))
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
/// Vector index creation is slooooow. Add the new rows in parallel. The threadpool is global to avoid oversubscription when multiple
/// indexes are build simultaneously (e.g. multiple merges run at the same time).
auto & thread_pool = Context::getGlobalContextInstance()->getBuildVectorSimilarityIndexThreadPool();
auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row, ThreadGroupPtr thread_group)
for (size_t row = 0; row < rows; ++row)
{
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachFromGroupIfNotDetached();
);
if (thread_group)
CurrentThread::attachToGroupIfDetached(thread_group);
/// add is thread-safe
if (auto result = index->add(key, &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
{
if (auto result = index->add(static_cast<USearchIndex::vector_key_t>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
}
else
{
ProfileEvents::increment(ProfileEvents::USearchAddCount);
ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
}
};
size_t index_size = index->size();
for (size_t row = 0; row < rows; ++row)
{
auto key = static_cast<USearchIndex::vector_key_t>(index_size + row);
auto task = [group = CurrentThread::getGroup(), &add_vector_to_index, key, row] { add_vector_to_index(key, row, group); };
thread_pool.scheduleOrThrowOnError(task);
}
thread_pool.wait();
}
}

View File

@ -1,7 +1,6 @@
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreePartsMover.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Common/FailPoint.h>
#include <Common/logger_useful.h>
#include <set>
@ -16,11 +15,6 @@ namespace ErrorCodes
extern const int DIRECTORY_ALREADY_EXISTS;
}
namespace FailPoints
{
extern const char stop_moving_part_before_swap_with_active[];
}
namespace
{
@ -232,7 +226,6 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name);
MutableDataPartStoragePtr cloned_part_storage;
bool preserve_blobs = false;
if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
{
/// Try zero-copy replication and fallback to default copy if it's not possible
@ -260,7 +253,6 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
if (zero_copy_part)
{
/// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder
preserve_blobs = true;
zero_copy_part->is_temp = false; /// Do not remove it in dtor
cloned_part_storage = zero_copy_part->getDataPartStoragePtr();
}
@ -280,17 +272,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
cloned_part.part = std::move(builder).withPartFormatFromDisk().build();
LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath());
cloned_part.part->is_temp = false;
if (data->allowRemoveStaleMovingParts())
{
cloned_part.part->is_temp = true;
/// Setting it in case connection to zookeeper is lost while moving
/// Otherwise part might be stuck in the moving directory due to the KEEPER_EXCEPTION in part's destructor
if (preserve_blobs)
cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::PRESERVE_BLOBS;
else
cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
}
cloned_part.part->is_temp = data->allowRemoveStaleMovingParts();
cloned_part.part->loadColumnsChecksumsIndexes(true, true);
cloned_part.part->loadVersionMetadata();
cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime();
@ -300,8 +282,6 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const
{
/// Used to get some stuck parts in the moving directory by stopping moves while pause is active
FailPointInjection::pauseFailPoint(FailPoints::stop_moving_part_before_swap_with_active);
if (moves_blocker.isCancelled())
throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");

View File

@ -42,7 +42,6 @@
<multi_read>1</multi_read>
<check_not_exists>1</check_not_exists>
<create_if_not_exists>1</create_if_not_exists>
<remove_recursive>1</remove_recursive>
</feature_flags>
</keeper_server>
</clickhouse>

View File

@ -64,7 +64,6 @@ function configure()
randomize_config_boolean_value multi_read keeper_port
randomize_config_boolean_value check_not_exists keeper_port
randomize_config_boolean_value create_if_not_exists keeper_port
randomize_config_boolean_value remove_recursive keeper_port
fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml

View File

@ -393,7 +393,6 @@ def test_table_functions():
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')",
f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
f"gcs('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
]
def make_test_case(i):

View File

@ -1,46 +0,0 @@
<clickhouse>
<remote_servers>
<cluster>
<shard>
<replica>
<host>ch1</host>
<port>9000</port>
</replica>
</shard>
</cluster>
</remote_servers>
<macros>
<shard>01</shard>
</macros>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>http://minio1:9001/root/data/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3>
</disks>
<policies>
<s3>
<volumes>
<default>
<disk>default</disk>
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
</default>
<s3>
<disk>s3</disk>
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
</s3>
</volumes>
<move_factor>0.0</move_factor>
</s3>
</policies>
</storage_configuration>
<merge_tree>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<storage_policy>s3</storage_policy>
</merge_tree>
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
</clickhouse>

View File

@ -1,117 +0,0 @@
from pathlib import Path
import time
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
ch1 = cluster.add_instance(
"ch1",
main_configs=[
"config.xml",
],
macros={"replica": "node1"},
with_zookeeper=True,
with_minio=True,
)
DATABASE_NAME = "stale_moving_parts"
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def q(node, query):
return node.query(database=DATABASE_NAME, sql=query)
# .../disks/s3/store/
def get_table_path(node, table):
return (
node.query(
sql=f"SELECT data_paths FROM system.tables WHERE table = '{table}' and database = '{DATABASE_NAME}' LIMIT 1"
)
.strip('"\n[]')
.split(",")[1]
.strip("'")
)
def exec(node, cmd, path):
return node.exec_in_container(
[
"bash",
"-c",
f"{cmd} {path}",
]
)
def wait_part_is_stuck(node, table_moving_path, moving_part):
num_tries = 5
while q(node, "SELECT part_name FROM system.moves").strip() != moving_part:
if num_tries == 0:
raise Exception("Part has not started to move")
num_tries -= 1
time.sleep(1)
num_tries = 5
while exec(node, "ls", table_moving_path).strip() != moving_part:
if num_tries == 0:
raise Exception("Part is not stuck in the moving directory")
num_tries -= 1
time.sleep(1)
def wait_zookeeper_node_to_start(zk_nodes, timeout=60):
start = time.time()
while time.time() - start < timeout:
try:
for instance in zk_nodes:
conn = cluster.get_kazoo_client(instance)
conn.get_children("/")
print("All instances of ZooKeeper started")
return
except Exception as ex:
print(("Can't connect to ZooKeeper " + str(ex)))
time.sleep(0.5)
def test_remove_stale_moving_parts_without_zookeeper(started_cluster):
ch1.query(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}")
q(
ch1,
"CREATE TABLE test_remove ON CLUSTER cluster ( id UInt32 ) ENGINE ReplicatedMergeTree() ORDER BY id;",
)
table_moving_path = Path(get_table_path(ch1, "test_remove")) / "moving"
q(ch1, "SYSTEM ENABLE FAILPOINT stop_moving_part_before_swap_with_active")
q(ch1, "INSERT INTO test_remove SELECT number FROM numbers(100);")
moving_part = "all_0_0_0"
move_response = ch1.get_query_request(
sql=f"ALTER TABLE test_remove MOVE PART '{moving_part}' TO DISK 's3'",
database=DATABASE_NAME,
)
wait_part_is_stuck(ch1, table_moving_path, moving_part)
cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
# Stop moves in case table is not read-only yet
q(ch1, "SYSTEM STOP MOVES")
q(ch1, "SYSTEM DISABLE FAILPOINT stop_moving_part_before_swap_with_active")
assert "Cancelled moving parts" in move_response.get_error()
assert exec(ch1, "ls", table_moving_path).strip() == ""
cluster.start_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
wait_zookeeper_node_to_start(["zoo1", "zoo2", "zoo3"])
q(ch1, "SYSTEM START MOVES")
q(ch1, f"DROP TABLE test_remove")

View File

@ -560,6 +560,7 @@ positionCaseInsensitive
positionCaseInsensitiveUTF8
positionUTF8
pow
printf
proportionsZTest
protocol
queryID

View File

@ -1,2 +1,2 @@
default 127.0.0.1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS','REMOVE_RECURSIVE']
default 127.0.0.1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS']
zookeeper2 localhost 9181 0 0 0 1