2020-03-19 16:37:55 +00:00
# include <Compression/CompressedReadBuffer.h>
2017-04-01 09:19:00 +00:00
# include <DataStreams/copyData.h>
2020-03-19 16:37:55 +00:00
# include <DataTypes/DataTypeArray.h>
2017-04-01 09:19:00 +00:00
# include <DataTypes/DataTypeDate.h>
# include <DataTypes/DataTypeDateTime.h>
# include <DataTypes/DataTypeEnum.h>
# include <DataTypes/DataTypeNullable.h>
2020-03-19 16:37:55 +00:00
# include <DataTypes/NestedUtils.h>
# include <Formats/FormatFactory.h>
2017-04-01 09:19:00 +00:00
# include <Functions/FunctionFactory.h>
# include <Functions/IFunction.h>
2020-03-19 16:37:55 +00:00
# include <IO/ConcatReadBuffer.h>
# include <IO/HexWriteBuffer.h>
# include <IO/Operators.h>
# include <IO/ReadBufferFromMemory.h>
# include <IO/WriteBufferFromString.h>
# include <Interpreters/ExpressionAnalyzer.h>
# include <Interpreters/PartLog.h>
2020-07-22 17:13:05 +00:00
# include <Interpreters/TreeRewriter.h>
2020-05-20 20:16:32 +00:00
# include <Interpreters/Context.h>
2020-03-19 16:37:55 +00:00
# include <Parsers/ASTFunction.h>
# include <Parsers/ASTLiteral.h>
# include <Parsers/ASTNameTypePair.h>
# include <Parsers/ASTPartition.h>
# include <Parsers/ASTSetQuery.h>
# include <Parsers/ExpressionListParsers.h>
# include <Parsers/parseQuery.h>
# include <Parsers/queryToString.h>
# include <Storages/AlterCommands.h>
# include <Storages/MergeTree/MergeTreeData.h>
2020-04-14 19:47:19 +00:00
# include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
2020-03-19 16:37:55 +00:00
# include <Storages/MergeTree/MergeTreeDataPartCompact.h>
# include <Storages/MergeTree/MergeTreeDataPartWide.h>
2020-04-02 16:28:50 +00:00
# include <Storages/MergeTree/MergeTreeSequentialSource.h>
2020-03-19 16:37:55 +00:00
# include <Storages/MergeTree/MergedBlockOutputStream.h>
# include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
# include <Storages/MergeTree/checkDataPart.h>
# include <Storages/MergeTree/localBackup.h>
# include <Storages/StorageMergeTree.h>
# include <Storages/StorageReplicatedMergeTree.h>
2017-05-10 08:08:32 +00:00
# include <Common/Increment.h>
2017-05-10 06:49:19 +00:00
# include <Common/SimpleIncrement.h>
2020-03-19 16:37:55 +00:00
# include <Common/Stopwatch.h>
# include <Common/StringUtils/StringUtils.h>
2017-04-01 09:19:00 +00:00
# include <Common/escapeForFileName.h>
2019-11-28 10:13:53 +00:00
# include <Common/quoteString.h>
2017-07-13 20:58:19 +00:00
# include <Common/typeid_cast.h>
2017-09-05 12:12:55 +00:00
# include <Poco/DirectoryIterator.h>
2014-10-16 01:21:03 +00:00
2017-09-11 22:40:51 +00:00
# include <boost/range/adaptor/filtered.hpp>
2020-09-17 14:38:06 +00:00
# include <boost/algorithm/string/join.hpp>
2017-09-11 22:40:51 +00:00
2014-06-10 14:24:33 +00:00
# include <algorithm>
2014-10-16 01:21:03 +00:00
# include <iomanip>
2019-12-10 09:31:26 +00:00
# include <optional>
2019-02-05 14:50:25 +00:00
# include <set>
2015-04-16 07:22:29 +00:00
# include <thread>
2017-02-09 17:29:36 +00:00
# include <typeinfo>
# include <typeindex>
2019-12-10 09:31:26 +00:00
# include <unordered_set>
2014-03-09 17:36:01 +00:00
2016-10-24 02:02:37 +00:00
namespace ProfileEvents
{
2017-04-01 07:20:54 +00:00
extern const Event RejectedInserts ;
extern const Event DelayedInserts ;
extern const Event DelayedInsertsMilliseconds ;
2016-10-24 02:02:37 +00:00
}
2016-10-27 22:50:02 +00:00
namespace CurrentMetrics
{
2017-04-01 07:20:54 +00:00
extern const Metric DelayedInserts ;
2020-10-14 07:22:48 +00:00
extern const Metric BackgroundMovePoolTask ;
2016-10-27 22:50:02 +00:00
}
2014-03-09 17:36:01 +00:00
2019-10-31 10:40:11 +00:00
namespace
{
constexpr UInt64 RESERVATION_MIN_ESTIMATION_SIZE = 1u * 1024u * 1024u ; /// 1MB
}
2014-03-09 17:36:01 +00:00
namespace DB
{
2016-11-20 12:43:20 +00:00
namespace ErrorCodes
{
2020-02-25 18:02:41 +00:00
extern const int NO_SUCH_DATA_PART ;
extern const int NOT_IMPLEMENTED ;
extern const int DIRECTORY_ALREADY_EXISTS ;
extern const int TOO_MANY_UNEXPECTED_DATA_PARTS ;
extern const int DUPLICATE_DATA_PART ;
extern const int NO_SUCH_COLUMN_IN_TABLE ;
extern const int LOGICAL_ERROR ;
extern const int ILLEGAL_COLUMN ;
extern const int CORRUPTED_DATA ;
extern const int BAD_TYPE_OF_FIELD ;
2019-02-12 17:54:52 +00:00
extern const int BAD_ARGUMENTS ;
2017-09-06 20:34:26 +00:00
extern const int INVALID_PARTITION_VALUE ;
2017-09-08 18:11:09 +00:00
extern const int METADATA_MISMATCH ;
2017-11-20 19:33:12 +00:00
extern const int PART_IS_TEMPORARILY_LOCKED ;
2018-03-09 23:23:15 +00:00
extern const int TOO_MANY_PARTS ;
2018-05-21 13:49:54 +00:00
extern const int INCOMPATIBLE_COLUMNS ;
2019-04-15 09:30:45 +00:00
extern const int BAD_TTL_EXPRESSION ;
2019-07-26 20:04:45 +00:00
extern const int INCORRECT_FILE_NAME ;
extern const int BAD_DATA_PART_NAME ;
2019-08-30 20:12:26 +00:00
extern const int READONLY_SETTING ;
2019-09-05 13:12:29 +00:00
extern const int ABORTED ;
2020-01-17 12:24:27 +00:00
extern const int UNKNOWN_PART_TYPE ;
2020-02-25 13:57:33 +00:00
extern const int UNKNOWN_DISK ;
extern const int NOT_ENOUGH_SPACE ;
2020-04-27 13:54:31 +00:00
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN ;
2020-07-13 17:27:52 +00:00
extern const int SUPPORT_IS_DISABLED ;
2016-11-20 12:43:20 +00:00
}
2020-08-27 13:10:10 +00:00
static void checkSampleExpression ( const StorageInMemoryMetadata & metadata , bool allow_sampling_expression_not_in_primary_key )
{
const auto & pk_sample_block = metadata . getPrimaryKey ( ) . sample_block ;
if ( ! pk_sample_block . has ( metadata . sampling_key . column_names [ 0 ] ) & & ! allow_sampling_expression_not_in_primary_key )
throw Exception ( " Sampling expression must be present in the primary key " , ErrorCodes : : BAD_ARGUMENTS ) ;
}
2019-11-18 08:42:46 +00:00
2014-03-09 17:36:01 +00:00
MergeTreeData : : MergeTreeData (
2019-12-04 16:06:55 +00:00
const StorageID & table_id_ ,
2019-10-28 20:12:14 +00:00
const String & relative_data_path_ ,
2020-06-19 15:14:08 +00:00
const StorageInMemoryMetadata & metadata_ ,
2017-04-01 07:20:54 +00:00
Context & context_ ,
2017-09-08 18:11:09 +00:00
const String & date_column_name ,
2017-04-01 07:20:54 +00:00
const MergingParams & merging_params_ ,
2019-08-26 14:24:29 +00:00
std : : unique_ptr < MergeTreeSettings > storage_settings_ ,
2017-04-01 07:20:54 +00:00
bool require_part_metadata_ ,
bool attach ,
2017-12-01 21:40:58 +00:00
BrokenPartCallback broken_part_callback_ )
2020-04-27 13:55:30 +00:00
: IStorage ( table_id_ )
2019-12-03 16:25:32 +00:00
, global_context ( context_ )
2019-08-29 16:17:47 +00:00
, merging_params ( merging_params_ )
, require_part_metadata ( require_part_metadata_ )
2019-10-28 20:12:14 +00:00
, relative_data_path ( relative_data_path_ )
2019-08-29 16:17:47 +00:00
, broken_part_callback ( broken_part_callback_ )
2019-12-04 16:06:55 +00:00
, log_name ( table_id_ . getNameForLogs ( ) )
2020-05-30 21:57:37 +00:00
, log ( & Poco : : Logger : : get ( log_name ) )
2019-08-29 16:17:47 +00:00
, storage_settings ( std : : move ( storage_settings_ ) )
, data_parts_by_info ( data_parts_indexes . get < TagByInfo > ( ) )
, data_parts_by_state_and_info ( data_parts_indexes . get < TagByStateAndInfo > ( ) )
2019-09-05 13:12:29 +00:00
, parts_mover ( this )
2014-03-09 17:36:01 +00:00
{
2020-07-12 12:58:17 +00:00
const auto settings = getSettings ( ) ;
allow_nullable_key = attach | | settings - > allow_nullable_key ;
2020-01-21 11:11:11 +00:00
if ( relative_data_path . empty ( ) )
throw Exception ( " MergeTree storages require data path " , ErrorCodes : : INCORRECT_FILE_NAME ) ;
2020-07-30 19:08:13 +00:00
/// Check sanity of MergeTreeSettings. Only when table is created.
if ( ! attach )
settings - > sanityCheck ( global_context . getSettingsRef ( ) ) ;
2017-09-08 18:11:09 +00:00
MergeTreeDataFormatVersion min_format_version ( 0 ) ;
if ( ! date_column_name . empty ( ) )
2017-09-01 20:33:17 +00:00
{
2017-09-08 18:11:09 +00:00
try
{
2020-06-19 15:21:48 +00:00
checkPartitionKeyAndInitMinMax ( metadata_ . partition_key ) ;
2017-09-08 18:11:09 +00:00
if ( minmax_idx_date_column_pos = = - 1 )
throw Exception ( " Could not find Date column " , ErrorCodes : : BAD_TYPE_OF_FIELD ) ;
}
catch ( Exception & e )
{
/// Better error message.
2019-06-15 12:06:22 +00:00
e . addMessage ( " (while initializing MergeTree partition key from date column " + backQuote ( date_column_name ) + " ) " ) ;
2017-09-08 18:11:09 +00:00
throw ;
}
2017-09-08 13:17:38 +00:00
}
2017-09-08 18:11:09 +00:00
else
2017-09-08 13:17:38 +00:00
{
2018-11-13 12:51:55 +00:00
is_custom_partitioned = true ;
2020-06-19 15:21:48 +00:00
checkPartitionKeyAndInitMinMax ( metadata_ . partition_key ) ;
2017-09-08 18:11:09 +00:00
min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING ;
2017-09-01 20:33:17 +00:00
}
2017-04-01 07:20:54 +00:00
2020-06-17 10:34:23 +00:00
setProperties ( metadata_ , metadata_ , attach ) ;
2020-06-15 17:24:49 +00:00
/// NOTE: using the same columns list as is read when performing actual merges.
2020-07-06 14:33:31 +00:00
merging_params . check ( metadata_ ) ;
2020-06-15 17:24:49 +00:00
if ( metadata_ . sampling_key . definition_ast ! = nullptr )
{
2020-08-27 13:10:10 +00:00
/// This is for backward compatibility.
checkSampleExpression ( metadata_ , attach | | settings - > compatibility_allow_sampling_expression_not_in_primary_key ) ;
2020-06-15 17:24:49 +00:00
}
2020-06-18 17:09:06 +00:00
checkTTLExpressions ( metadata_ , metadata_ ) ;
2020-02-27 16:47:40 +00:00
/// format_file always contained on any data path
PathWithDisk version_file ;
2017-04-01 07:20:54 +00:00
/// Creating directories, if not exist.
2020-02-27 16:47:40 +00:00
for ( const auto & [ path , disk ] : getRelativeDataPathsWithDisks ( ) )
2019-04-05 17:37:27 +00:00
{
2020-02-27 16:47:40 +00:00
disk - > createDirectories ( path ) ;
disk - > createDirectories ( path + " detached " ) ;
2020-02-18 11:44:16 +00:00
auto current_version_file_path = path + " format_version.txt " ;
2020-02-27 16:47:40 +00:00
if ( disk - > exists ( current_version_file_path ) )
2019-04-21 20:23:02 +00:00
{
2020-02-27 16:47:40 +00:00
if ( ! version_file . first . empty ( ) )
2019-04-21 18:38:44 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Duplication of version file {} and {} " , fullPath ( version_file . second , version_file . first ) , current_version_file_path ) ;
2019-04-21 18:38:44 +00:00
throw Exception ( " Multiple format_version.txt file " , ErrorCodes : : CORRUPTED_DATA ) ;
}
2020-02-27 16:47:40 +00:00
version_file = { current_version_file_path , disk } ;
2019-04-21 18:38:44 +00:00
}
2019-04-01 18:41:19 +00:00
}
2019-04-21 18:38:44 +00:00
/// If not choose any
2020-02-27 16:47:40 +00:00
if ( version_file . first . empty ( ) )
2020-03-06 07:31:00 +00:00
version_file = { relative_data_path + " format_version.txt " , getStoragePolicy ( ) - > getAnyDisk ( ) } ;
2018-04-16 10:04:59 +00:00
2020-02-27 16:47:40 +00:00
bool version_file_exists = version_file . second - > exists ( version_file . first ) ;
2017-08-25 20:41:45 +00:00
2018-07-04 15:23:25 +00:00
// When data path or file not exists, ignore the format_version check
2019-04-21 18:38:44 +00:00
if ( ! attach | | ! version_file_exists )
2017-08-25 20:41:45 +00:00
{
2017-09-08 18:11:09 +00:00
format_version = min_format_version ;
2020-02-27 16:47:40 +00:00
auto buf = version_file . second - > writeFile ( version_file . first ) ;
writeIntText ( format_version . toUnderType ( ) , * buf ) ;
2020-09-01 01:39:36 +00:00
if ( global_context . getSettingsRef ( ) . fsync_metadata )
buf - > sync ( ) ;
2017-08-25 20:41:45 +00:00
}
2019-05-22 19:20:10 +00:00
else
2017-08-25 20:41:45 +00:00
{
2020-02-27 16:47:40 +00:00
auto buf = version_file . second - > readFile ( version_file . first ) ;
2018-12-26 01:12:13 +00:00
UInt32 read_format_version ;
2020-02-27 16:47:40 +00:00
readIntText ( read_format_version , * buf ) ;
2018-12-26 01:12:13 +00:00
format_version = read_format_version ;
2020-02-27 16:47:40 +00:00
if ( ! buf - > eof ( ) )
throw Exception ( " Bad version file: " + fullPath ( version_file . second , version_file . first ) , ErrorCodes : : CORRUPTED_DATA ) ;
2017-08-25 20:41:45 +00:00
}
2017-09-08 18:11:09 +00:00
if ( format_version < min_format_version )
2018-11-12 16:14:37 +00:00
{
if ( min_format_version = = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING . toUnderType ( ) )
throw Exception (
" MergeTree data format version on disk doesn't support custom partitioning " ,
ErrorCodes : : METADATA_MISMATCH ) ;
}
2020-01-13 16:28:29 +00:00
2020-02-14 21:33:02 +00:00
String reason ;
if ( ! canUsePolymorphicParts ( * settings , & reason ) & & ! reason . empty ( ) )
2020-06-15 17:41:44 +00:00
LOG_WARNING ( log , " {} Settings 'min_rows_for_wide_part', 'min_bytes_for_wide_part', "
" 'min_rows_for_compact_part' and 'min_bytes_for_compact_part' will be ignored. " , reason ) ;
2019-03-25 13:55:24 +00:00
}
2020-01-09 14:50:34 +00:00
StoragePolicyPtr MergeTreeData : : getStoragePolicy ( ) const
{
return global_context . getStoragePolicy ( getSettings ( ) - > storage_policy ) ;
}
2020-07-12 12:58:17 +00:00
static void checkKeyExpression ( const ExpressionActions & expr , const Block & sample_block , const String & key_name , bool allow_nullable_key )
2017-12-09 10:14:45 +00:00
{
2020-11-03 11:28:28 +00:00
for ( const auto & action : expr . getActions ( ) )
2018-02-21 17:05:21 +00:00
{
2020-11-10 14:54:59 +00:00
if ( action . node - > type = = ActionsDAG : : ActionType : : ARRAY_JOIN )
2018-11-22 21:19:58 +00:00
throw Exception ( key_name + " key cannot contain array joins " , ErrorCodes : : ILLEGAL_COLUMN ) ;
2018-02-21 17:05:21 +00:00
2020-11-10 14:54:59 +00:00
if ( action . node - > type = = ActionsDAG : : ActionType : : FUNCTION )
2018-02-21 17:05:21 +00:00
{
2020-11-03 11:28:28 +00:00
IFunctionBase & func = * action . node - > function_base ;
2018-02-21 17:05:21 +00:00
if ( ! func . isDeterministic ( ) )
throw Exception ( key_name + " key cannot contain non-deterministic functions, "
" but contains function " + func . getName ( ) ,
ErrorCodes : : BAD_ARGUMENTS ) ;
}
}
2017-12-09 10:14:45 +00:00
2018-02-21 17:05:21 +00:00
for ( const ColumnWithTypeAndName & element : sample_block )
{
const ColumnPtr & column = element . column ;
2019-06-27 19:28:52 +00:00
if ( column & & ( isColumnConst ( * column ) | | column - > isDummy ( ) ) )
2018-02-21 17:05:21 +00:00
throw Exception { key_name + " key cannot contain constants " , ErrorCodes : : ILLEGAL_COLUMN } ;
2020-07-12 12:58:17 +00:00
if ( ! allow_nullable_key & & element . type - > isNullable ( ) )
2018-02-21 17:05:21 +00:00
throw Exception { key_name + " key cannot contain nullable columns " , ErrorCodes : : ILLEGAL_COLUMN } ;
}
2017-12-09 10:14:45 +00:00
}
2020-06-25 23:21:04 +00:00
void MergeTreeData : : checkProperties (
const StorageInMemoryMetadata & new_metadata , const StorageInMemoryMetadata & old_metadata , bool attach ) const
2016-05-16 23:04:03 +00:00
{
2020-06-09 17:21:38 +00:00
if ( ! new_metadata . sorting_key . definition_ast )
2020-06-05 17:29:40 +00:00
throw Exception ( " ORDER BY cannot be empty " , ErrorCodes : : BAD_ARGUMENTS ) ;
2018-10-23 13:34:04 +00:00
2020-06-10 11:16:31 +00:00
KeyDescription new_sorting_key = new_metadata . sorting_key ;
KeyDescription new_primary_key = new_metadata . primary_key ;
2018-02-09 10:53:50 +00:00
2020-06-05 17:29:40 +00:00
size_t sorting_key_size = new_sorting_key . column_names . size ( ) ;
size_t primary_key_size = new_primary_key . column_names . size ( ) ;
2018-10-12 19:00:43 +00:00
if ( primary_key_size > sorting_key_size )
2018-10-11 17:06:55 +00:00
throw Exception ( " Primary key must be a prefix of the sorting key, but its length: "
2018-10-12 19:00:43 +00:00
+ toString ( primary_key_size ) + " is greater than the sorting key length: " + toString ( sorting_key_size ) ,
2018-10-11 17:06:55 +00:00
ErrorCodes : : BAD_ARGUMENTS ) ;
2017-04-01 07:20:54 +00:00
2019-12-19 14:05:26 +00:00
NameSet primary_key_columns_set ;
2017-04-01 07:20:54 +00:00
2018-10-12 19:00:43 +00:00
for ( size_t i = 0 ; i < sorting_key_size ; + + i )
2017-04-01 07:20:54 +00:00
{
2020-06-05 17:29:40 +00:00
const String & sorting_key_column = new_sorting_key . column_names [ i ] ;
2018-10-11 17:06:55 +00:00
if ( i < primary_key_size )
{
2020-06-05 17:29:40 +00:00
const String & pk_column = new_primary_key . column_names [ i ] ;
2018-10-12 19:00:43 +00:00
if ( pk_column ! = sorting_key_column )
2018-10-11 17:06:55 +00:00
throw Exception ( " Primary key must be a prefix of the sorting key, but in position "
2018-10-12 19:00:43 +00:00
+ toString ( i ) + " its column is " + pk_column + " , not " + sorting_key_column ,
2018-10-11 17:06:55 +00:00
ErrorCodes : : BAD_ARGUMENTS ) ;
2019-12-19 14:05:26 +00:00
if ( ! primary_key_columns_set . emplace ( pk_column ) . second )
throw Exception ( " Primary key contains duplicate columns " , ErrorCodes : : BAD_ARGUMENTS ) ;
2018-10-11 17:06:55 +00:00
}
2018-02-09 10:53:50 +00:00
}
2020-06-09 17:21:38 +00:00
auto all_columns = new_metadata . columns . getAllPhysical ( ) ;
2017-04-01 07:20:54 +00:00
2019-12-26 18:17:05 +00:00
/// Order by check AST
2020-06-17 11:05:11 +00:00
if ( old_metadata . hasSortingKey ( ) )
2018-02-09 10:53:50 +00:00
{
2018-11-13 12:51:55 +00:00
/// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key
/// expression have just been added (so that the sorting order is guaranteed to be valid with the new key).
2020-06-05 17:29:40 +00:00
Names new_primary_key_columns = new_primary_key . column_names ;
Names new_sorting_key_columns = new_sorting_key . column_names ;
2018-11-13 12:51:55 +00:00
ASTPtr added_key_column_expr_list = std : : make_shared < ASTExpressionList > ( ) ;
2020-06-17 11:05:11 +00:00
const auto & old_sorting_key_columns = old_metadata . getSortingKeyColumns ( ) ;
2018-11-13 12:51:55 +00:00
for ( size_t new_i = 0 , old_i = 0 ; new_i < sorting_key_size ; + + new_i )
{
2020-05-20 18:11:38 +00:00
if ( old_i < old_sorting_key_columns . size ( ) )
2018-11-13 12:51:55 +00:00
{
2020-05-20 18:11:38 +00:00
if ( new_sorting_key_columns [ new_i ] ! = old_sorting_key_columns [ old_i ] )
2020-06-05 17:29:40 +00:00
added_key_column_expr_list - > children . push_back ( new_sorting_key . expression_list_ast - > children [ new_i ] ) ;
2018-11-13 12:51:55 +00:00
else
+ + old_i ;
}
else
2020-06-05 17:29:40 +00:00
added_key_column_expr_list - > children . push_back ( new_sorting_key . expression_list_ast - > children [ new_i ] ) ;
2018-11-13 12:51:55 +00:00
}
if ( ! added_key_column_expr_list - > children . empty ( ) )
{
2020-07-22 17:13:05 +00:00
auto syntax = TreeRewriter ( global_context ) . analyze ( added_key_column_expr_list , all_columns ) ;
2019-08-09 14:50:04 +00:00
Names used_columns = syntax - > requiredSourceColumns ( ) ;
2018-11-13 12:51:55 +00:00
NamesAndTypesList deleted_columns ;
NamesAndTypesList added_columns ;
2020-06-17 11:05:11 +00:00
old_metadata . getColumns ( ) . getAllPhysical ( ) . getDifference ( all_columns , deleted_columns , added_columns ) ;
2018-11-08 15:43:14 +00:00
2018-11-13 12:51:55 +00:00
for ( const String & col : used_columns )
{
if ( ! added_columns . contains ( col ) | | deleted_columns . contains ( col ) )
2020-06-25 23:21:04 +00:00
throw Exception ( " Existing column " + backQuoteIfNeed ( col ) + " is used in the expression that was "
2018-11-13 12:51:55 +00:00
" added to the sorting key. You can add expressions that use only the newly added columns " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
2020-06-09 17:21:38 +00:00
if ( new_metadata . columns . getDefaults ( ) . count ( col ) )
2020-06-25 23:21:04 +00:00
throw Exception ( " Newly added column " + backQuoteIfNeed ( col ) + " has a default expression, so adding "
2018-11-13 12:51:55 +00:00
" expressions that use it to the sorting key is forbidden " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
}
}
2018-02-09 10:53:50 +00:00
}
2017-04-01 07:20:54 +00:00
2020-06-09 17:21:38 +00:00
if ( ! new_metadata . secondary_indices . empty ( ) )
2019-02-05 14:50:25 +00:00
{
2020-06-05 17:29:40 +00:00
std : : unordered_set < String > indices_names ;
2019-02-05 14:50:25 +00:00
2020-06-09 17:21:38 +00:00
for ( const auto & index : new_metadata . secondary_indices )
2019-02-05 14:50:25 +00:00
{
2020-05-28 12:37:05 +00:00
MergeTreeIndexFactory : : instance ( ) . validate ( index , attach ) ;
2019-02-05 14:50:25 +00:00
2020-05-28 12:37:05 +00:00
if ( indices_names . find ( index . name ) ! = indices_names . end ( ) )
2019-02-05 14:50:25 +00:00
throw Exception (
2020-08-08 00:47:03 +00:00
" Index with name " + backQuote ( index . name ) + " already exists " ,
2019-02-05 14:50:25 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
2020-05-28 12:37:05 +00:00
indices_names . insert ( index . name ) ;
2019-02-05 14:50:25 +00:00
}
}
2020-07-12 12:58:17 +00:00
checkKeyExpression ( * new_sorting_key . expression , new_sorting_key . sample_block , " Sorting " , allow_nullable_key ) ;
2020-06-08 14:18:38 +00:00
2020-06-10 11:16:31 +00:00
}
2020-06-17 10:34:23 +00:00
void MergeTreeData : : setProperties ( const StorageInMemoryMetadata & new_metadata , const StorageInMemoryMetadata & old_metadata , bool attach )
2020-06-10 11:16:31 +00:00
{
2020-06-17 10:34:23 +00:00
checkProperties ( new_metadata , old_metadata , attach ) ;
2020-06-15 16:55:33 +00:00
setInMemoryMetadata ( new_metadata ) ;
2014-03-13 12:48:07 +00:00
}
2014-03-09 17:36:01 +00:00
2020-06-01 12:39:20 +00:00
namespace
{
ExpressionActionsPtr getCombinedIndicesExpression (
2020-06-05 11:54:54 +00:00
const KeyDescription & key ,
2020-06-01 12:39:20 +00:00
const IndicesDescription & indices ,
const ColumnsDescription & columns ,
const Context & context )
{
ASTPtr combined_expr_list = key . expression_list_ast - > clone ( ) ;
for ( const auto & index : indices )
for ( const auto & index_expr : index . expression_list_ast - > children )
combined_expr_list - > children . push_back ( index_expr - > clone ( ) ) ;
2020-07-22 17:13:05 +00:00
auto syntax_result = TreeRewriter ( context ) . analyze ( combined_expr_list , columns . getAllPhysical ( ) ) ;
2020-06-01 12:39:20 +00:00
return ExpressionAnalyzer ( combined_expr_list , syntax_result , context ) . getActions ( false ) ;
}
}
2020-06-17 09:38:47 +00:00
ExpressionActionsPtr MergeTreeData : : getPrimaryKeyAndSkipIndicesExpression ( const StorageMetadataPtr & metadata_snapshot ) const
2020-06-01 12:39:20 +00:00
{
2020-06-17 12:39:20 +00:00
return getCombinedIndicesExpression ( metadata_snapshot - > getPrimaryKey ( ) , metadata_snapshot - > getSecondaryIndices ( ) , metadata_snapshot - > getColumns ( ) , global_context ) ;
2020-06-01 12:39:20 +00:00
}
2020-06-17 09:38:47 +00:00
ExpressionActionsPtr MergeTreeData : : getSortingKeyAndSkipIndicesExpression ( const StorageMetadataPtr & metadata_snapshot ) const
2020-06-01 12:39:20 +00:00
{
2020-06-17 11:05:11 +00:00
return getCombinedIndicesExpression ( metadata_snapshot - > getSortingKey ( ) , metadata_snapshot - > getSecondaryIndices ( ) , metadata_snapshot - > getColumns ( ) , global_context ) ;
2020-06-01 12:39:20 +00:00
}
2018-11-13 12:51:55 +00:00
2016-04-15 17:42:51 +00:00
2020-06-19 15:21:48 +00:00
void MergeTreeData : : checkPartitionKeyAndInitMinMax ( const KeyDescription & new_partition_key )
2017-08-19 18:11:20 +00:00
{
2020-05-21 19:46:03 +00:00
if ( new_partition_key . expression_list_ast - > children . empty ( ) )
2017-09-01 20:33:17 +00:00
return ;
2020-07-12 12:58:17 +00:00
checkKeyExpression ( * new_partition_key . expression , new_partition_key . sample_block , " Partition " , allow_nullable_key ) ;
2018-02-21 17:05:21 +00:00
2017-09-01 20:33:17 +00:00
/// Add all columns used in the partition key to the min-max index.
2020-05-21 19:46:03 +00:00
const NamesAndTypesList & minmax_idx_columns_with_types = new_partition_key . expression - > getRequiredColumnsWithTypes ( ) ;
2020-11-10 16:27:55 +00:00
minmax_idx_expr = std : : make_shared < ExpressionActions > ( std : : make_shared < ActionsDAG > ( minmax_idx_columns_with_types ) ) ;
2017-12-25 21:57:29 +00:00
for ( const NameAndTypePair & column : minmax_idx_columns_with_types )
2017-08-21 15:35:29 +00:00
{
minmax_idx_columns . emplace_back ( column . name ) ;
minmax_idx_column_types . emplace_back ( column . type ) ;
}
2017-09-01 20:33:17 +00:00
/// Try to find the date column in columns used by the partition key (a common case).
2017-08-21 15:35:29 +00:00
bool encountered_date_column = false ;
for ( size_t i = 0 ; i < minmax_idx_column_types . size ( ) ; + + i )
{
if ( typeid_cast < const DataTypeDate * > ( minmax_idx_column_types [ i ] . get ( ) ) )
{
if ( ! encountered_date_column )
{
minmax_idx_date_column_pos = i ;
encountered_date_column = true ;
}
else
{
/// There is more than one Date column in partition key and we don't know which one to choose.
minmax_idx_date_column_pos = - 1 ;
}
}
}
2018-12-10 10:20:19 +00:00
if ( ! encountered_date_column )
{
for ( size_t i = 0 ; i < minmax_idx_column_types . size ( ) ; + + i )
{
if ( typeid_cast < const DataTypeDateTime * > ( minmax_idx_column_types [ i ] . get ( ) ) )
{
if ( ! encountered_date_column )
{
minmax_idx_time_column_pos = i ;
encountered_date_column = true ;
}
else
{
/// There is more than one DateTime column in partition key and we don't know which one to choose.
minmax_idx_time_column_pos = - 1 ;
}
}
}
}
2017-08-19 18:11:20 +00:00
}
2019-04-15 09:30:45 +00:00
2020-06-17 10:34:23 +00:00
void MergeTreeData : : checkTTLExpressions ( const StorageInMemoryMetadata & new_metadata , const StorageInMemoryMetadata & old_metadata ) const
2019-04-15 09:30:45 +00:00
{
2020-06-10 09:09:51 +00:00
auto new_column_ttls = new_metadata . column_ttls_by_name ;
2019-04-15 09:30:45 +00:00
2020-06-10 09:09:51 +00:00
if ( ! new_column_ttls . empty ( ) )
2019-04-15 09:30:45 +00:00
{
NameSet columns_ttl_forbidden ;
2020-06-17 10:34:23 +00:00
if ( old_metadata . hasPartitionKey ( ) )
for ( const auto & col : old_metadata . getColumnsRequiredForPartitionKey ( ) )
2019-04-15 09:30:45 +00:00
columns_ttl_forbidden . insert ( col ) ;
2020-06-17 11:05:11 +00:00
if ( old_metadata . hasSortingKey ( ) )
for ( const auto & col : old_metadata . getColumnsRequiredForSortingKey ( ) )
2019-04-15 09:30:45 +00:00
columns_ttl_forbidden . insert ( col ) ;
2020-06-10 09:09:51 +00:00
for ( const auto & [ name , ttl_description ] : new_column_ttls )
2019-04-15 09:30:45 +00:00
{
if ( columns_ttl_forbidden . count ( name ) )
2019-11-26 08:02:48 +00:00
throw Exception ( " Trying to set TTL for key column " + name , ErrorCodes : : ILLEGAL_COLUMN ) ;
2019-04-15 09:30:45 +00:00
}
}
2020-06-10 09:09:51 +00:00
auto new_table_ttl = new_metadata . table_ttl ;
2020-06-05 17:29:40 +00:00
if ( new_table_ttl . definition_ast )
2019-04-15 09:30:45 +00:00
{
2020-06-05 17:29:40 +00:00
for ( const auto & move_ttl : new_table_ttl . move_ttl )
2019-04-15 09:30:45 +00:00
{
2020-09-18 15:41:14 +00:00
if ( ! getDestinationForMoveTTL ( move_ttl ) )
2019-10-16 07:32:37 +00:00
{
2020-06-05 17:29:40 +00:00
String message ;
if ( move_ttl . destination_type = = DataDestinationType : : DISK )
message = " No such disk " + backQuote ( move_ttl . destination_name ) + " for given storage policy. " ;
else
message = " No such volume " + backQuote ( move_ttl . destination_name ) + " for given storage policy. " ;
throw Exception ( message , ErrorCodes : : BAD_TTL_EXPRESSION ) ;
2019-10-16 07:32:37 +00:00
}
2019-04-15 09:30:45 +00:00
}
}
}
2017-08-19 18:11:20 +00:00
2020-04-22 06:22:14 +00:00
void MergeTreeData : : checkStoragePolicy ( const StoragePolicyPtr & new_storage_policy ) const
2020-01-09 13:52:37 +00:00
{
const auto old_storage_policy = getStoragePolicy ( ) ;
2020-01-09 14:50:34 +00:00
old_storage_policy - > checkCompatibleWith ( new_storage_policy ) ;
2020-01-09 13:52:37 +00:00
}
2020-07-06 14:33:31 +00:00
void MergeTreeData : : MergingParams : : check ( const StorageInMemoryMetadata & metadata ) const
2016-04-15 17:42:51 +00:00
{
2020-07-06 14:33:31 +00:00
const auto columns = metadata . getColumns ( ) . getAllPhysical ( ) ;
2018-02-02 09:46:54 +00:00
if ( ! sign_column . empty ( ) & & mode ! = MergingParams : : Collapsing & & mode ! = MergingParams : : VersionedCollapsing )
throw Exception ( " Sign column for MergeTree cannot be specified in modes except Collapsing or VersionedCollapsing. " ,
2018-01-29 17:42:19 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-02 09:46:54 +00:00
if ( ! version_column . empty ( ) & & mode ! = MergingParams : : Replacing & & mode ! = MergingParams : : VersionedCollapsing )
throw Exception ( " Version column for MergeTree cannot be specified in modes except Replacing or VersionedCollapsing. " ,
2018-01-29 17:42:19 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
if ( ! columns_to_sum . empty ( ) & & mode ! = MergingParams : : Summing )
throw Exception ( " List of columns to sum for MergeTree cannot be specified in all modes except Summing. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-01 07:20:54 +00:00
/// Check that if the sign column is needed, it exists and is of type Int8.
2018-01-29 17:42:19 +00:00
auto check_sign_column = [ this , & columns ] ( bool is_optional , const std : : string & storage )
2017-04-01 07:20:54 +00:00
{
if ( sign_column . empty ( ) )
2018-01-29 17:42:19 +00:00
{
if ( is_optional )
return ;
throw Exception ( " Logical error: Sign column for storage " + storage + " is empty " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2017-04-01 07:20:54 +00:00
2018-01-01 12:00:05 +00:00
bool miss_column = true ;
2017-04-01 07:20:54 +00:00
for ( const auto & column : columns )
{
if ( column . name = = sign_column )
{
if ( ! typeid_cast < const DataTypeInt8 * > ( column . type . get ( ) ) )
2018-01-29 17:42:19 +00:00
throw Exception ( " Sign column ( " + sign_column + " ) for storage " + storage + " must have type Int8. "
" Provided column of type " + column . type - > getName ( ) + " . " , ErrorCodes : : BAD_TYPE_OF_FIELD ) ;
2018-01-01 12:00:05 +00:00
miss_column = false ;
2017-04-01 07:20:54 +00:00
break ;
}
}
2018-01-09 18:00:19 +00:00
if ( miss_column )
2018-11-22 21:19:58 +00:00
throw Exception ( " Sign column " + sign_column + " does not exist in table declaration. " , ErrorCodes : : NO_SUCH_COLUMN_IN_TABLE ) ;
2018-01-29 17:42:19 +00:00
} ;
2017-04-01 07:20:54 +00:00
2018-01-29 17:42:19 +00:00
/// that if the version_column column is needed, it exists and is of unsigned integer type.
auto check_version_column = [ this , & columns ] ( bool is_optional , const std : : string & storage )
2017-04-01 07:20:54 +00:00
{
2018-01-29 17:42:19 +00:00
if ( version_column . empty ( ) )
{
if ( is_optional )
return ;
2017-04-01 07:20:54 +00:00
2018-01-29 17:42:19 +00:00
throw Exception ( " Logical error: Version column for storage " + storage + " is empty " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2017-04-01 07:20:54 +00:00
2018-01-01 12:00:05 +00:00
bool miss_column = true ;
2017-04-01 07:20:54 +00:00
for ( const auto & column : columns )
{
if ( column . name = = version_column )
{
2018-03-06 14:49:27 +00:00
if ( ! column . type - > canBeUsedAsVersion ( ) )
throw Exception ( " The column " + version_column +
" cannot be used as a version column for storage " + storage +
" because it is of type " + column . type - > getName ( ) +
2018-03-06 19:01:45 +00:00
" (must be of an integer type or of type Date or DateTime) " , ErrorCodes : : BAD_TYPE_OF_FIELD ) ;
2018-01-01 12:00:05 +00:00
miss_column = false ;
2017-04-01 07:20:54 +00:00
break ;
}
}
2018-01-09 18:00:19 +00:00
if ( miss_column )
2018-11-22 21:19:58 +00:00
throw Exception ( " Version column " + version_column + " does not exist in table declaration. " , ErrorCodes : : NO_SUCH_COLUMN_IN_TABLE ) ;
2018-01-29 17:42:19 +00:00
} ;
if ( mode = = MergingParams : : Collapsing )
check_sign_column ( false , " CollapsingMergeTree " ) ;
if ( mode = = MergingParams : : Summing )
{
/// If columns_to_sum are set, then check that such columns exist.
for ( const auto & column_to_sum : columns_to_sum )
2018-02-02 12:14:30 +00:00
{
auto check_column_to_sum_exists = [ & column_to_sum ] ( const NameAndTypePair & name_and_type )
{
return column_to_sum = = Nested : : extractTableName ( name_and_type . name ) ;
} ;
if ( columns . end ( ) = = std : : find_if ( columns . begin ( ) , columns . end ( ) , check_column_to_sum_exists ) )
throw Exception (
2018-11-22 21:19:58 +00:00
" Column " + column_to_sum + " listed in columns to sum does not exist in table declaration. " , ErrorCodes : : NO_SUCH_COLUMN_IN_TABLE ) ;
2018-02-02 12:14:30 +00:00
}
2020-07-06 14:33:31 +00:00
/// Check that summing columns are not in partition key.
if ( metadata . isPartitionKeyDefined ( ) )
{
auto partition_key_columns = metadata . getPartitionKey ( ) . column_names ;
Names names_intersection ;
std : : set_intersection ( columns_to_sum . begin ( ) , columns_to_sum . end ( ) ,
partition_key_columns . begin ( ) , partition_key_columns . end ( ) ,
std : : back_inserter ( names_intersection ) ) ;
if ( ! names_intersection . empty ( ) )
2020-09-17 14:38:06 +00:00
throw Exception ( " Columns: " + boost : : algorithm : : join ( names_intersection , " , " ) +
2020-08-08 00:47:03 +00:00
" listed both in columns to sum and in partition key. That is not allowed. " , ErrorCodes : : BAD_ARGUMENTS ) ;
2020-07-06 14:33:31 +00:00
}
2018-01-29 17:42:19 +00:00
}
if ( mode = = MergingParams : : Replacing )
check_version_column ( true , " ReplacingMergeTree " ) ;
2018-02-02 09:46:54 +00:00
if ( mode = = MergingParams : : VersionedCollapsing )
2018-01-29 17:42:19 +00:00
{
2018-02-02 09:46:54 +00:00
check_sign_column ( false , " VersionedCollapsingMergeTree " ) ;
check_version_column ( false , " VersionedCollapsingMergeTree " ) ;
2017-04-01 07:20:54 +00:00
}
/// TODO Checks for Graphite mode.
2016-04-24 09:44:47 +00:00
}
String MergeTreeData : : MergingParams : : getModeName ( ) const
{
2017-04-01 07:20:54 +00:00
switch ( mode )
{
2017-09-11 22:40:51 +00:00
case Ordinary : return " " ;
case Collapsing : return " Collapsing " ;
case Summing : return " Summing " ;
case Aggregating : return " Aggregating " ;
2017-04-01 07:20:54 +00:00
case Replacing : return " Replacing " ;
2017-09-11 22:40:51 +00:00
case Graphite : return " Graphite " ;
2018-11-26 00:56:50 +00:00
case VersionedCollapsing : return " VersionedCollapsing " ;
2017-04-01 07:20:54 +00:00
}
2019-01-05 03:33:22 +00:00
__builtin_unreachable ( ) ;
2016-04-15 17:42:51 +00:00
}
2019-05-04 03:45:58 +00:00
Int64 MergeTreeData : : getMaxBlockNumber ( ) const
2014-03-13 12:48:07 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2016-01-30 00:57:35 +00:00
2018-05-13 00:24:23 +00:00
Int64 max_block_num = 0 ;
2018-02-19 15:31:43 +00:00
for ( const DataPartPtr & part : data_parts_by_info )
2018-07-16 03:14:46 +00:00
max_block_num = std : : max ( { max_block_num , part - > info . max_block , part - > info . mutation } ) ;
2014-09-29 05:03:03 +00:00
2018-05-13 00:24:23 +00:00
return max_block_num ;
2014-03-09 17:36:01 +00:00
}
2014-08-13 08:07:52 +00:00
void MergeTreeData : : loadDataParts ( bool skip_sanity_checks )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
LOG_DEBUG ( log , " Loading data parts " ) ;
2020-06-26 11:30:23 +00:00
auto metadata_snapshot = getInMemoryMetadataPtr ( ) ;
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-11-27 09:39:44 +00:00
std : : vector < std : : pair < String , DiskPtr > > part_names_with_disks ;
2020-05-05 15:06:16 +00:00
MutableDataPartsVector parts_from_wal ;
2017-04-01 07:20:54 +00:00
Strings part_file_names ;
2019-04-28 14:49:41 +00:00
2020-01-09 14:50:34 +00:00
auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
2019-04-28 14:49:41 +00:00
2019-12-27 18:58:59 +00:00
/// Only check if user did touch storage configuration for this table.
2019-12-28 07:30:12 +00:00
if ( ! getStoragePolicy ( ) - > isDefaultPolicy ( ) & & ! skip_sanity_checks )
2019-12-10 09:31:26 +00:00
{
/// Check extra parts at different disks, in order to not allow to miss data parts at undefined disks.
2019-12-10 14:15:42 +00:00
std : : unordered_set < String > defined_disk_names ;
for ( const auto & disk_ptr : disks )
defined_disk_names . insert ( disk_ptr - > getName ( ) ) ;
2020-05-21 14:11:56 +00:00
for ( const auto & [ disk_name , disk ] : global_context . getDisksMap ( ) )
2019-12-10 09:31:26 +00:00
{
2020-02-27 16:47:40 +00:00
if ( defined_disk_names . count ( disk_name ) = = 0 & & disk - > exists ( relative_data_path ) )
2019-12-10 09:31:26 +00:00
{
2020-02-27 16:47:40 +00:00
for ( const auto it = disk - > iterateDirectory ( relative_data_path ) ; it - > isValid ( ) ; it - > next ( ) )
2019-12-10 09:31:26 +00:00
{
2019-12-25 22:12:17 +00:00
MergeTreePartInfo part_info ;
2020-02-27 16:47:40 +00:00
if ( MergeTreePartInfo : : tryParsePartName ( it - > name ( ) , & part_info , format_version ) )
throw Exception ( " Part " + backQuote ( it - > name ( ) ) + " was found on disk " + backQuote ( disk_name ) + " which is not defined in the storage policy " , ErrorCodes : : UNKNOWN_DISK ) ;
2019-12-10 09:31:26 +00:00
}
}
}
}
2019-07-16 13:06:23 +00:00
/// Reversed order to load part from low priority disks firstly.
/// Used for keep part on low priority disk if duplication found
for ( auto disk_it = disks . rbegin ( ) ; disk_it ! = disks . rend ( ) ; + + disk_it )
2017-04-01 07:20:54 +00:00
{
2019-07-16 13:06:23 +00:00
auto disk_ptr = * disk_it ;
2020-02-27 16:47:40 +00:00
for ( auto it = disk_ptr - > iterateDirectory ( relative_data_path ) ; it - > isValid ( ) ; it - > next ( ) )
2019-04-01 18:41:19 +00:00
{
/// Skip temporary directories.
2020-02-27 16:47:40 +00:00
if ( startsWith ( it - > name ( ) , " tmp " ) )
2019-04-01 18:41:19 +00:00
continue ;
2017-04-01 07:20:54 +00:00
2020-02-27 16:47:40 +00:00
part_names_with_disks . emplace_back ( it - > name ( ) , disk_ptr ) ;
2020-04-14 19:47:19 +00:00
2020-05-29 15:02:12 +00:00
/// Create and correctly initialize global WAL object, if it's needed
2020-06-22 18:56:53 +00:00
if ( it - > name ( ) = = MergeTreeWriteAheadLog : : DEFAULT_WAL_FILE_NAME & & settings - > in_memory_parts_enable_wal )
2020-05-29 15:02:12 +00:00
{
write_ahead_log = std : : make_shared < MergeTreeWriteAheadLog > ( * this , disk_ptr , it - > name ( ) ) ;
2020-06-26 11:30:23 +00:00
for ( auto & & part : write_ahead_log - > restore ( metadata_snapshot ) )
2020-05-29 15:02:12 +00:00
parts_from_wal . push_back ( std : : move ( part ) ) ;
}
else if ( startsWith ( it - > name ( ) , MergeTreeWriteAheadLog : : WAL_FILE_NAME ) )
2020-05-05 15:06:16 +00:00
{
MergeTreeWriteAheadLog wal ( * this , disk_ptr , it - > name ( ) ) ;
2020-06-26 11:30:23 +00:00
for ( auto & & part : wal . restore ( metadata_snapshot ) )
2020-05-05 15:06:16 +00:00
parts_from_wal . push_back ( std : : move ( part ) ) ;
}
2019-04-01 18:41:19 +00:00
}
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
auto part_lock = lockParts ( ) ;
data_parts_indexes . clear ( ) ;
2020-05-05 15:06:16 +00:00
if ( part_names_with_disks . empty ( ) & & parts_from_wal . empty ( ) )
2019-08-11 20:02:51 +00:00
{
LOG_DEBUG ( log , " There is no data parts " ) ;
return ;
}
/// Parallel loading of data parts.
2019-08-29 16:17:47 +00:00
size_t num_threads = std : : min ( size_t ( settings - > max_part_loading_threads ) , part_names_with_disks . size ( ) ) ;
2019-08-11 20:02:51 +00:00
std : : mutex mutex ;
2017-04-01 07:20:54 +00:00
DataPartsVector broken_parts_to_remove ;
DataPartsVector broken_parts_to_detach ;
size_t suspicious_broken_parts = 0 ;
2019-08-11 20:02:51 +00:00
std : : atomic < bool > has_adaptive_parts = false ;
std : : atomic < bool > has_non_adaptive_parts = false ;
ThreadPool pool ( num_threads ) ;
2017-11-20 19:33:12 +00:00
2019-08-14 09:46:30 +00:00
for ( size_t i = 0 ; i < part_names_with_disks . size ( ) ; + + i )
2017-04-01 07:20:54 +00:00
{
2019-10-17 14:41:27 +00:00
pool . scheduleOrThrowOnError ( [ & , i ]
2017-04-01 07:20:54 +00:00
{
2019-08-14 11:59:45 +00:00
const auto & part_name = part_names_with_disks [ i ] . first ;
const auto part_disk_ptr = part_names_with_disks [ i ] . second ;
2019-10-31 14:44:17 +00:00
2019-08-11 20:02:51 +00:00
MergeTreePartInfo part_info ;
2019-08-14 09:46:30 +00:00
if ( ! MergeTreePartInfo : : tryParsePartName ( part_name , & part_info , format_version ) )
2019-08-11 20:02:51 +00:00
return ;
2017-04-01 07:20:54 +00:00
2020-10-20 15:10:24 +00:00
auto single_disk_volume = std : : make_shared < SingleDiskVolume > ( " volume_ " + part_name , part_disk_ptr , 0 ) ;
2020-05-09 21:24:15 +00:00
auto part = createPart ( part_name , part_info , single_disk_volume , part_name ) ;
2019-08-11 20:02:51 +00:00
bool broken = false ;
2020-02-27 16:47:40 +00:00
String part_path = relative_data_path + " / " + part_name ;
2020-08-26 15:29:46 +00:00
String marker_path = part_path + " / " + IMergeTreeDataPart : : DELETE_ON_DESTROY_MARKER_FILE_NAME ;
2020-02-27 16:47:40 +00:00
if ( part_disk_ptr - > exists ( marker_path ) )
2019-12-09 13:44:11 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Detaching stale part {}{}, which should have been deleted after a move. That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part. " , getFullPathOnDisk ( part_disk_ptr ) , part_name ) ;
2019-12-09 13:44:11 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_detach . push_back ( part ) ;
+ + suspicious_broken_parts ;
2019-12-09 16:20:56 +00:00
return ;
2019-12-09 13:44:11 +00:00
}
2019-08-11 20:02:51 +00:00
try
2017-04-01 07:20:54 +00:00
{
2019-08-11 20:02:51 +00:00
part - > loadColumnsChecksumsIndexes ( require_part_metadata , true ) ;
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
catch ( const Exception & e )
2017-04-01 07:20:54 +00:00
{
2019-08-11 20:02:51 +00:00
/// Don't count the part as broken if there is not enough memory to load it.
/// In fact, there can be many similar situations.
/// But it is OK, because there is a safety guard against deleting too many parts.
2020-04-20 01:44:24 +00:00
if ( isNotEnoughMemoryErrorCode ( e . code ( ) ) )
2019-08-11 20:02:51 +00:00
throw ;
broken = true ;
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
catch ( . . . )
{
broken = true ;
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2017-04-01 07:20:54 +00:00
2019-08-11 20:02:51 +00:00
/// Ignore and possibly delete broken parts that can appear as a result of hard server restart.
if ( broken )
{
if ( part - > info . level = = 0 )
2017-04-01 07:20:54 +00:00
{
2019-08-11 20:02:51 +00:00
/// It is impossible to restore level 0 parts.
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Considering to remove broken part {}{} because it's impossible to repair. " , getFullPathOnDisk ( part_disk_ptr ) , part_name ) ;
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_remove . push_back ( part ) ;
}
else
{
/// Count the number of parts covered by the broken part. If it is at least two, assume that
/// the broken part was created as a result of merging them and we won't lose data if we
/// delete it.
size_t contained_parts = 0 ;
2017-04-18 20:38:07 +00:00
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Part {}{} is broken. Looking for parts to replace it. " , getFullPathOnDisk ( part_disk_ptr ) , part_name ) ;
2017-04-18 20:38:07 +00:00
2019-08-14 09:46:30 +00:00
for ( const auto & [ contained_name , contained_disk_ptr ] : part_names_with_disks )
2017-04-01 07:20:54 +00:00
{
2019-08-14 09:46:30 +00:00
if ( contained_name = = part_name )
2019-08-11 20:02:51 +00:00
continue ;
MergeTreePartInfo contained_part_info ;
if ( ! MergeTreePartInfo : : tryParsePartName ( contained_name , & contained_part_info , format_version ) )
continue ;
if ( part - > info . contains ( contained_part_info ) )
{
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Found part {}{} " , getFullPathOnDisk ( contained_disk_ptr ) , contained_name ) ;
2019-08-11 20:02:51 +00:00
+ + contained_parts ;
}
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
if ( contained_parts > = 2 )
{
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Considering to remove broken part {}{} because it covers at least 2 other parts " , getFullPathOnDisk ( part_disk_ptr ) , part_name ) ;
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_remove . push_back ( part ) ;
}
else
{
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Detaching broken part {}{} because it covers less than 2 parts. You need to resolve this manually " , getFullPathOnDisk ( part_disk_ptr ) , part_name ) ;
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_detach . push_back ( part ) ;
+ + suspicious_broken_parts ;
}
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
return ;
}
if ( ! part - > index_granularity_info . is_adaptive )
has_non_adaptive_parts . store ( true , std : : memory_order_relaxed ) ;
else
has_adaptive_parts . store ( true , std : : memory_order_relaxed ) ;
2017-04-01 07:20:54 +00:00
2020-03-05 14:02:15 +00:00
part - > modification_time = part_disk_ptr - > getLastModified ( relative_data_path + part_name ) . epochTime ( ) ;
2019-08-11 20:02:51 +00:00
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
part - > state = DataPartState : : Committed ;
2017-04-01 07:20:54 +00:00
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
if ( ! data_parts_indexes . insert ( part ) . second )
throw Exception ( " Part " + part - > name + " already exists " , ErrorCodes : : DUPLICATE_DATA_PART ) ;
} ) ;
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
pool . wait ( ) ;
2020-05-05 15:06:16 +00:00
for ( auto & part : parts_from_wal )
{
2020-05-27 20:05:55 +00:00
if ( getActiveContainingPart ( part - > info , DataPartState : : Committed , part_lock ) )
continue ;
2020-05-05 15:06:16 +00:00
part - > modification_time = time ( nullptr ) ;
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
part - > state = DataPartState : : Committed ;
if ( ! data_parts_indexes . insert ( part ) . second )
throw Exception ( " Part " + part - > name + " already exists " , ErrorCodes : : DUPLICATE_DATA_PART ) ;
}
2019-08-13 10:29:31 +00:00
if ( has_non_adaptive_parts & & has_adaptive_parts & & ! settings - > enable_mixed_granularity_parts )
2019-06-19 14:46:06 +00:00
throw Exception ( " Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled " , ErrorCodes : : LOGICAL_ERROR ) ;
has_non_adaptive_index_granularity_parts = has_non_adaptive_parts ;
2019-08-13 10:29:31 +00:00
if ( suspicious_broken_parts > settings - > max_suspicious_broken_parts & & ! skip_sanity_checks )
2017-04-01 07:20:54 +00:00
throw Exception ( " Suspiciously many ( " + toString ( suspicious_broken_parts ) + " ) broken parts to remove. " ,
ErrorCodes : : TOO_MANY_UNEXPECTED_DATA_PARTS ) ;
2017-05-16 15:40:32 +00:00
for ( auto & part : broken_parts_to_remove )
2017-04-01 07:20:54 +00:00
part - > remove ( ) ;
2017-05-16 15:40:32 +00:00
for ( auto & part : broken_parts_to_detach )
2018-05-21 13:49:54 +00:00
part - > renameToDetached ( " " ) ;
2017-04-01 07:20:54 +00:00
2020-08-26 15:29:46 +00:00
2017-04-01 07:20:54 +00:00
/// Delete from the set of current parts those parts that are covered by another part (those parts that
2017-06-21 19:07:08 +00:00
/// were merged), but that for some reason are still not deleted from the filesystem.
2017-04-01 07:20:54 +00:00
/// Deletion of files will be performed later in the clearOldParts() method.
2017-11-20 19:33:12 +00:00
if ( data_parts_indexes . size ( ) > = 2 )
2017-04-01 07:20:54 +00:00
{
2018-02-19 15:31:43 +00:00
/// Now all parts are committed, so data_parts_by_state_and_info == committed_parts_range
auto prev_jt = data_parts_by_state_and_info . begin ( ) ;
2017-09-21 21:51:17 +00:00
auto curr_jt = std : : next ( prev_jt ) ;
2018-02-19 15:31:43 +00:00
auto deactivate_part = [ & ] ( DataPartIteratorByStateAndInfo it )
2017-11-20 19:33:12 +00:00
{
2018-03-03 17:44:53 +00:00
( * it ) - > remove_time . store ( ( * it ) - > modification_time , std : : memory_order_relaxed ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( it , DataPartState : : Outdated ) ;
} ;
( * prev_jt ) - > assertState ( { DataPartState : : Committed } ) ;
2018-02-19 15:31:43 +00:00
while ( curr_jt ! = data_parts_by_state_and_info . end ( ) & & ( * curr_jt ) - > state = = DataPartState : : Committed )
2017-04-01 07:20:54 +00:00
{
2017-08-14 18:16:11 +00:00
/// Don't consider data parts belonging to different partitions.
if ( ( * curr_jt ) - > info . partition_id ! = ( * prev_jt ) - > info . partition_id )
2017-04-01 07:20:54 +00:00
{
+ + prev_jt ;
+ + curr_jt ;
continue ;
}
if ( ( * curr_jt ) - > contains ( * * prev_jt ) )
{
2017-11-20 19:33:12 +00:00
deactivate_part ( prev_jt ) ;
2017-04-01 07:20:54 +00:00
prev_jt = curr_jt ;
+ + curr_jt ;
}
else if ( ( * prev_jt ) - > contains ( * * curr_jt ) )
{
2017-11-20 19:33:12 +00:00
auto next = std : : next ( curr_jt ) ;
deactivate_part ( curr_jt ) ;
curr_jt = next ;
2017-04-01 07:20:54 +00:00
}
else
{
+ + prev_jt ;
+ + curr_jt ;
}
}
}
2017-05-14 23:14:21 +00:00
calculateColumnSizesImpl ( ) ;
2017-04-01 07:20:54 +00:00
2020-08-26 15:29:46 +00:00
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Loaded data parts ({} items) " , data_parts_indexes . size ( ) ) ;
2014-03-09 17:36:01 +00:00
}
2017-02-07 17:52:41 +00:00
/// Is the part directory old.
/// True if its modification time and the modification time of all files inside it is less then threshold.
/// (Only files on the first level of nesting are considered).
2020-03-05 14:02:15 +00:00
static bool isOldPartDirectory ( const DiskPtr & disk , const String & directory_path , time_t threshold )
2016-06-06 19:16:34 +00:00
{
2020-03-05 14:02:15 +00:00
if ( disk - > getLastModified ( directory_path ) . epochTime ( ) > = threshold )
2017-04-01 07:20:54 +00:00
return false ;
2016-06-06 19:16:34 +00:00
2020-03-05 14:02:15 +00:00
for ( auto it = disk - > iterateDirectory ( directory_path ) ; it - > isValid ( ) ; it - > next ( ) )
if ( disk - > getLastModified ( it - > path ( ) ) . epochTime ( ) > = threshold )
2017-04-01 07:20:54 +00:00
return false ;
2016-06-06 19:16:34 +00:00
2017-04-01 07:20:54 +00:00
return true ;
2016-06-06 19:16:34 +00:00
}
2017-05-31 15:01:25 +00:00
void MergeTreeData : : clearOldTemporaryDirectories ( ssize_t custom_directories_lifetime_seconds )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
/// If the method is already called from another thread, then we don't need to do anything.
2019-01-02 06:44:36 +00:00
std : : unique_lock lock ( clear_old_temporary_directories_mutex , std : : defer_lock ) ;
2017-04-01 07:20:54 +00:00
if ( ! lock . try_lock ( ) )
return ;
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2017-08-04 14:00:26 +00:00
time_t current_time = time ( nullptr ) ;
2017-05-31 15:01:25 +00:00
ssize_t deadline = ( custom_directories_lifetime_seconds > = 0 )
? current_time - custom_directories_lifetime_seconds
2019-08-13 10:29:31 +00:00
: current_time - settings - > temporary_directories_lifetime . totalSeconds ( ) ;
2017-04-01 07:20:54 +00:00
/// Delete temporary directories older than a day.
2020-02-27 16:47:40 +00:00
for ( const auto & [ path , disk ] : getRelativeDataPathsWithDisks ( ) )
2017-04-01 07:20:54 +00:00
{
2020-02-27 16:47:40 +00:00
for ( auto it = disk - > iterateDirectory ( path ) ; it - > isValid ( ) ; it - > next ( ) )
2017-04-01 07:20:54 +00:00
{
2020-02-27 16:47:40 +00:00
if ( startsWith ( it - > name ( ) , " tmp_ " ) )
2017-04-01 07:20:54 +00:00
{
2019-04-05 17:37:27 +00:00
try
2017-04-01 07:20:54 +00:00
{
2020-03-05 14:02:15 +00:00
if ( disk - > isDirectory ( it - > path ( ) ) & & isOldPartDirectory ( disk , it - > path ( ) , deadline ) )
2019-04-05 17:37:27 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Removing temporary directory {} " , fullPath ( disk , it - > path ( ) ) ) ;
2020-03-05 14:02:15 +00:00
disk - > removeRecursive ( it - > path ( ) ) ;
2019-04-01 18:41:19 +00:00
}
}
2019-04-05 17:37:27 +00:00
catch ( const Poco : : FileNotFoundException & )
{
2019-04-01 18:41:19 +00:00
/// If the file is already deleted, do nothing.
2017-04-01 07:20:54 +00:00
}
}
}
}
2016-02-14 11:02:47 +00:00
}
2020-01-10 09:46:24 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : grabOldParts ( bool force )
2016-02-14 11:02:47 +00:00
{
2017-04-01 07:20:54 +00:00
DataPartsVector res ;
/// If the method is already called from another thread, then we don't need to do anything.
2019-01-02 06:44:36 +00:00
std : : unique_lock lock ( grab_old_parts_mutex , std : : defer_lock ) ;
2017-04-01 07:20:54 +00:00
if ( ! lock . try_lock ( ) )
return res ;
2017-08-04 14:00:26 +00:00
time_t now = time ( nullptr ) ;
2018-02-19 15:31:43 +00:00
std : : vector < DataPartIteratorByStateAndInfo > parts_to_delete ;
2017-04-01 07:20:54 +00:00
{
2019-03-28 19:58:41 +00:00
auto parts_lock = lockParts ( ) ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
auto outdated_parts_range = getDataPartsStateRange ( DataPartState : : Outdated ) ;
for ( auto it = outdated_parts_range . begin ( ) ; it ! = outdated_parts_range . end ( ) ; + + it )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
const DataPartPtr & part = * it ;
2018-03-03 17:44:53 +00:00
auto part_remove_time = part - > remove_time . load ( std : : memory_order_relaxed ) ;
2018-02-19 15:31:43 +00:00
if ( part . unique ( ) & & /// Grab only parts that are not used by anyone (SELECTs for example).
2020-01-10 09:46:24 +00:00
( ( part_remove_time < now & &
2020-06-29 20:36:18 +00:00
now - part_remove_time > getSettings ( ) - > old_parts_lifetime . totalSeconds ( ) ) | | force
2020-08-08 00:47:03 +00:00
| | isInMemoryPart ( part ) ) ) /// Remove in-memory parts immediately to not store excessive data in RAM
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
parts_to_delete . emplace_back ( it ) ;
2017-04-01 07:20:54 +00:00
}
}
2017-11-20 19:33:12 +00:00
res . reserve ( parts_to_delete . size ( ) ) ;
for ( const auto & it_to_delete : parts_to_delete )
{
res . emplace_back ( * it_to_delete ) ;
modifyPartState ( it_to_delete , DataPartState : : Deleting ) ;
}
2017-04-01 07:20:54 +00:00
}
if ( ! res . empty ( ) )
2020-05-23 22:24:01 +00:00
LOG_TRACE ( log , " Found {} old parts to remove. " , res . size ( ) ) ;
2017-04-01 07:20:54 +00:00
return res ;
2014-03-09 17:36:01 +00:00
}
2016-02-14 11:02:47 +00:00
2017-09-11 22:40:51 +00:00
void MergeTreeData : : rollbackDeletingParts ( const MergeTreeData : : DataPartsVector & parts )
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2020-04-22 06:22:14 +00:00
for ( const auto & part : parts )
2017-09-11 22:40:51 +00:00
{
/// We should modify it under data_parts_mutex
2017-09-21 21:51:17 +00:00
part - > assertState ( { DataPartState : : Deleting } ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( part , DataPartState : : Outdated ) ;
2017-09-11 22:40:51 +00:00
}
}
void MergeTreeData : : removePartsFinally ( const MergeTreeData : : DataPartsVector & parts )
2014-07-25 11:15:11 +00:00
{
2018-01-23 22:56:46 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-09-11 22:40:51 +00:00
2018-01-23 22:56:46 +00:00
/// TODO: use data_parts iterators instead of pointers
2020-04-22 06:22:14 +00:00
for ( const auto & part : parts )
2018-01-23 22:56:46 +00:00
{
2018-02-19 15:31:43 +00:00
auto it = data_parts_by_info . find ( part - > info ) ;
if ( it = = data_parts_by_info . end ( ) )
throw Exception ( " Deleting data part " + part - > name + " doesn't exist " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-01-23 22:56:46 +00:00
( * it ) - > assertState ( { DataPartState : : Deleting } ) ;
data_parts_indexes . erase ( it ) ;
}
}
/// Data parts is still alive (since DataPartsVector holds shared_ptrs) and contain useful metainformation for logging
/// NOTE: There is no need to log parts deletion somewhere else, all deleting parts pass through this function and pass away
2019-12-03 16:25:32 +00:00
auto table_id = getStorageID ( ) ;
2019-12-12 12:30:31 +00:00
if ( auto part_log = global_context . getPartLog ( table_id . database_name ) )
2017-09-11 22:40:51 +00:00
{
2018-01-23 22:56:46 +00:00
PartLogElement part_log_elem ;
part_log_elem . event_type = PartLogElement : : REMOVE_PART ;
part_log_elem . event_time = time ( nullptr ) ;
part_log_elem . duration_ms = 0 ;
2017-09-11 22:40:51 +00:00
2019-12-03 16:25:32 +00:00
part_log_elem . database_name = table_id . database_name ;
part_log_elem . table_name = table_id . table_name ;
2017-11-20 19:33:12 +00:00
2020-04-22 06:22:14 +00:00
for ( const auto & part : parts )
2018-01-23 22:56:46 +00:00
{
2019-01-31 17:30:56 +00:00
part_log_elem . partition_id = part - > info . partition_id ;
2018-01-23 22:56:46 +00:00
part_log_elem . part_name = part - > name ;
2020-03-23 13:32:02 +00:00
part_log_elem . bytes_compressed_on_disk = part - > getBytesOnDisk ( ) ;
2018-01-23 22:56:46 +00:00
part_log_elem . rows = part - > rows_count ;
part_log - > add ( part_log_elem ) ;
}
2017-09-11 22:40:51 +00:00
}
2014-07-25 11:15:11 +00:00
}
2020-01-10 09:46:24 +00:00
void MergeTreeData : : clearOldPartsFromFilesystem ( bool force )
2014-07-25 11:15:11 +00:00
{
2020-01-10 09:46:24 +00:00
DataPartsVector parts_to_remove = grabOldParts ( force ) ;
2019-08-11 19:14:42 +00:00
clearPartsFromFilesystem ( parts_to_remove ) ;
removePartsFinally ( parts_to_remove ) ;
}
2014-07-25 11:15:11 +00:00
2019-08-11 19:14:42 +00:00
void MergeTreeData : : clearPartsFromFilesystem ( const DataPartsVector & parts_to_remove )
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_to_remove . size ( ) > 1 & & settings - > max_part_removal_threads > 1 & & parts_to_remove . size ( ) > settings - > concurrent_part_removal_threshold )
2017-04-01 07:20:54 +00:00
{
2019-08-11 19:14:42 +00:00
/// Parallel parts removal.
2017-11-20 19:33:12 +00:00
2019-08-13 10:29:31 +00:00
size_t num_threads = std : : min ( size_t ( settings - > max_part_removal_threads ) , parts_to_remove . size ( ) ) ;
2019-08-11 19:14:42 +00:00
ThreadPool pool ( num_threads ) ;
/// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool.
2019-08-11 19:30:39 +00:00
for ( const DataPartPtr & part : parts_to_remove )
2019-08-11 19:14:42 +00:00
{
2019-10-17 14:41:27 +00:00
pool . scheduleOrThrowOnError ( [ & ]
2019-08-11 19:14:42 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Removing part from filesystem {} " , part - > name ) ;
2019-08-11 19:30:39 +00:00
part - > remove ( ) ;
2019-08-11 19:14:42 +00:00
} ) ;
}
pool . wait ( ) ;
}
else
{
for ( const DataPartPtr & part : parts_to_remove )
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Removing part from filesystem {} " , part - > name ) ;
2019-08-11 19:14:42 +00:00
part - > remove ( ) ;
}
}
2014-07-25 11:15:11 +00:00
}
2020-05-27 20:05:55 +00:00
void MergeTreeData : : clearOldWriteAheadLogs ( )
{
DataPartsVector parts = getDataPartsVector ( ) ;
std : : vector < std : : pair < Int64 , Int64 > > all_block_numbers_on_disk ;
std : : vector < std : : pair < Int64 , Int64 > > block_numbers_on_disk ;
for ( const auto & part : parts )
if ( part - > isStoredOnDisk ( ) )
all_block_numbers_on_disk . emplace_back ( part - > info . min_block , part - > info . max_block ) ;
if ( all_block_numbers_on_disk . empty ( ) )
return ;
std : : sort ( all_block_numbers_on_disk . begin ( ) , all_block_numbers_on_disk . end ( ) ) ;
block_numbers_on_disk . push_back ( all_block_numbers_on_disk [ 0 ] ) ;
for ( size_t i = 1 ; i < all_block_numbers_on_disk . size ( ) ; + + i )
{
if ( all_block_numbers_on_disk [ i ] . first = = all_block_numbers_on_disk [ i - 1 ] . second + 1 )
block_numbers_on_disk . back ( ) . second = all_block_numbers_on_disk [ i ] . second ;
else
block_numbers_on_disk . push_back ( all_block_numbers_on_disk [ i ] ) ;
}
auto is_range_on_disk = [ & block_numbers_on_disk ] ( Int64 min_block , Int64 max_block )
{
2020-06-03 22:00:02 +00:00
auto lower = std : : lower_bound ( block_numbers_on_disk . begin ( ) , block_numbers_on_disk . end ( ) , std : : make_pair ( min_block , Int64 ( - 1L ) ) ) ;
2020-05-27 20:05:55 +00:00
if ( lower ! = block_numbers_on_disk . end ( ) & & min_block > = lower - > first & & max_block < = lower - > second )
return true ;
if ( lower ! = block_numbers_on_disk . begin ( ) )
{
- - lower ;
if ( min_block > = lower - > first & & max_block < = lower - > second )
return true ;
}
return false ;
} ;
auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
for ( auto disk_it = disks . rbegin ( ) ; disk_it ! = disks . rend ( ) ; + + disk_it )
{
auto disk_ptr = * disk_it ;
for ( auto it = disk_ptr - > iterateDirectory ( relative_data_path ) ; it - > isValid ( ) ; it - > next ( ) )
{
auto min_max_block_number = MergeTreeWriteAheadLog : : tryParseMinMaxBlockNumber ( it - > name ( ) ) ;
if ( min_max_block_number & & is_range_on_disk ( min_max_block_number - > first , min_max_block_number - > second ) )
{
LOG_DEBUG ( log , " Removing from filesystem outdated WAL file " + it - > name ( ) ) ;
disk_ptr - > remove ( relative_data_path + it - > name ( ) ) ;
}
}
}
}
2020-11-11 16:18:21 +00:00
void MergeTreeData : : clearEmptyParts ( )
{
2020-11-15 02:24:47 +00:00
if ( ! getSettings ( ) - > remove_empty_parts )
return ;
2020-11-11 16:18:21 +00:00
auto parts = getDataPartsVector ( ) ;
for ( const auto & part : parts )
{
if ( part - > rows_count = = 0 )
{
ASTPtr literal = std : : make_shared < ASTLiteral > ( part - > name ) ;
2020-11-12 17:36:02 +00:00
/// If another replica has already started drop, it's ok, no need to throw.
dropPartition ( literal , /* detach = */ false , /*drop_part = */ true , global_context , /* throw_if_noop = */ false ) ;
2020-11-11 16:18:21 +00:00
}
}
}
2020-04-07 14:05:51 +00:00
void MergeTreeData : : rename ( const String & new_table_path , const StorageID & new_table_id )
2014-03-09 17:36:01 +00:00
{
2020-01-09 14:50:34 +00:00
auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
2019-04-28 14:49:41 +00:00
for ( const auto & disk : disks )
2019-04-05 19:58:59 +00:00
{
2019-12-03 13:37:40 +00:00
if ( disk - > exists ( new_table_path ) )
throw Exception { " Target path already exists: " + fullPath ( disk , new_table_path ) , ErrorCodes : : DIRECTORY_ALREADY_EXISTS } ;
2019-04-05 19:45:59 +00:00
}
2019-04-28 14:49:41 +00:00
for ( const auto & disk : disks )
2019-04-06 15:21:29 +00:00
{
2020-03-19 16:37:55 +00:00
auto new_table_path_parent = parentPath ( new_table_path ) ;
2020-01-16 18:13:18 +00:00
disk - > createDirectories ( new_table_path_parent ) ;
2020-01-02 14:37:31 +00:00
disk - > moveDirectory ( relative_data_path , new_table_path ) ;
2019-04-06 15:21:29 +00:00
}
2017-12-03 02:15:35 +00:00
2020-08-15 06:56:47 +00:00
if ( ! getStorageID ( ) . hasUUID ( ) )
global_context . dropCaches ( ) ;
2019-04-21 18:38:44 +00:00
2019-10-28 20:12:14 +00:00
relative_data_path = new_table_path ;
2020-04-07 14:05:51 +00:00
renameInMemory ( new_table_id ) ;
2014-03-09 17:36:01 +00:00
}
2014-03-13 12:48:07 +00:00
void MergeTreeData : : dropAllData ( )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: waiting for locks. " ) ;
2015-09-17 21:31:26 +00:00
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2015-09-17 21:31:26 +00:00
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: removing data from memory. " ) ;
2015-09-17 21:31:26 +00:00
2019-08-11 19:14:42 +00:00
DataPartsVector all_parts ( data_parts_by_info . begin ( ) , data_parts_by_info . end ( ) ) ;
2017-11-20 19:33:12 +00:00
data_parts_indexes . clear ( ) ;
2017-04-01 07:20:54 +00:00
column_sizes . clear ( ) ;
2014-03-09 17:36:01 +00:00
2020-08-15 06:56:47 +00:00
/// Tables in atomic databases have UUID and stored in persistent locations.
/// No need to drop caches (that are keyed by filesystem path) because collision is not possible.
if ( ! getStorageID ( ) . hasUUID ( ) )
global_context . dropCaches ( ) ;
2014-03-13 19:14:25 +00:00
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: removing data from filesystem. " ) ;
2015-09-17 21:31:26 +00:00
2019-06-16 19:59:30 +00:00
/// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls.
2019-08-11 19:14:42 +00:00
clearPartsFromFilesystem ( all_parts ) ;
2019-06-16 19:59:30 +00:00
2020-02-27 16:47:40 +00:00
for ( const auto & [ path , disk ] : getRelativeDataPathsWithDisks ( ) )
2020-08-13 14:22:04 +00:00
{
try
{
disk - > removeRecursive ( path ) ;
}
catch ( const Poco : : FileNotFoundException & )
{
2020-08-14 01:54:51 +00:00
/// If the file is already deleted, log the error message and do nothing.
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
2020-08-13 14:22:04 +00:00
}
}
2015-09-17 21:31:26 +00:00
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: done. " ) ;
2014-03-09 17:36:01 +00:00
}
2020-06-11 03:24:52 +00:00
void MergeTreeData : : dropIfEmpty ( )
{
LOG_TRACE ( log , " dropIfEmpty " ) ;
auto lock = lockParts ( ) ;
if ( ! data_parts_by_info . empty ( ) )
return ;
for ( const auto & [ path , disk ] : getRelativeDataPathsWithDisks ( ) )
{
/// Non recursive, exception is thrown if there are more files.
disk - > remove ( path + " format_version.txt " ) ;
disk - > remove ( path + " detached " ) ;
disk - > remove ( path ) ;
}
}
2020-07-21 15:19:41 +00:00
namespace
{
2020-06-25 23:21:04 +00:00
/// Conversion that is allowed for partition key.
/// Partition key should be serialized in the same way after conversion.
/// NOTE: The list is not complete.
bool isSafeForPartitionKeyConversion ( const IDataType * from , const IDataType * to )
{
if ( from - > getName ( ) = = to - > getName ( ) )
return true ;
/// Enums are serialized in partition key as numbers - so conversion from Enum to number is Ok.
2020-06-26 20:40:20 +00:00
/// But only for types of identical width because they are serialized as binary in minmax index.
2020-06-25 23:21:04 +00:00
/// But not from number to Enum because Enum does not necessarily represents all numbers.
2020-06-26 20:40:20 +00:00
if ( const auto * from_enum8 = typeid_cast < const DataTypeEnum8 * > ( from ) )
{
if ( const auto * to_enum8 = typeid_cast < const DataTypeEnum8 * > ( to ) )
return to_enum8 - > contains ( * from_enum8 ) ;
if ( typeid_cast < const DataTypeInt8 * > ( to ) )
2020-06-27 16:19:53 +00:00
return true ; // NOLINT
2020-06-26 20:40:20 +00:00
return false ;
}
2020-06-25 23:21:04 +00:00
2020-06-26 20:40:20 +00:00
if ( const auto * from_enum16 = typeid_cast < const DataTypeEnum16 * > ( from ) )
2020-06-25 23:21:04 +00:00
{
2020-06-26 20:40:20 +00:00
if ( const auto * to_enum16 = typeid_cast < const DataTypeEnum16 * > ( to ) )
return to_enum16 - > contains ( * from_enum16 ) ;
if ( typeid_cast < const DataTypeInt16 * > ( to ) )
2020-06-27 16:19:53 +00:00
return true ; // NOLINT
2020-06-26 20:40:20 +00:00
return false ;
2020-06-25 23:21:04 +00:00
}
return false ;
}
2020-09-30 19:50:23 +00:00
/// Special check for alters of VersionedCollapsingMergeTree version column
void checkVersionColumnTypesConversion ( const IDataType * old_type , const IDataType * new_type , const String column_name )
{
/// Check new type can be used as version
if ( ! new_type - > canBeUsedAsVersion ( ) )
throw Exception ( " Cannot alter version column " + backQuoteIfNeed ( column_name ) +
" to type " + new_type - > getName ( ) +
" because version column must be of an integer type or of type Date or DateTime "
, ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
auto which_new_type = WhichDataType ( new_type ) ;
auto which_old_type = WhichDataType ( old_type ) ;
/// Check alter to different sign or float -> int and so on
if ( ( which_old_type . isInt ( ) & & ! which_new_type . isInt ( ) )
| | ( which_old_type . isUInt ( ) & & ! which_new_type . isUInt ( ) )
| | ( which_old_type . isDate ( ) & & ! which_new_type . isDate ( ) )
| | ( which_old_type . isDateTime ( ) & & ! which_new_type . isDateTime ( ) )
| | ( which_old_type . isFloat ( ) & & ! which_new_type . isFloat ( ) ) )
{
throw Exception ( " Cannot alter version column " + backQuoteIfNeed ( column_name ) +
" from type " + old_type - > getName ( ) +
" to type " + new_type - > getName ( ) + " because new type will change sort order of version column. " +
" The only possible conversion is expansion of the number of bytes of the current type. "
, ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
}
/// Check alter to smaller size: UInt64 -> UInt32 and so on
if ( new_type - > getSizeOfValueInMemory ( ) < old_type - > getSizeOfValueInMemory ( ) )
{
throw Exception ( " Cannot alter version column " + backQuoteIfNeed ( column_name ) +
" from type " + old_type - > getName ( ) +
" to type " + new_type - > getName ( ) + " because new type is smaller than current in the number of bytes. " +
" The only possible conversion is expansion of the number of bytes of the current type. "
, ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
}
}
2017-02-08 18:43:35 +00:00
}
2020-07-21 14:05:30 +00:00
void MergeTreeData : : checkAlterIsPossible ( const AlterCommands & commands , const Settings & settings ) const
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
/// Check that needed transformations can be applied to the list of columns without considering type conversions.
2020-06-09 21:22:01 +00:00
StorageInMemoryMetadata new_metadata = getInMemoryMetadata ( ) ;
2020-06-17 09:38:47 +00:00
StorageInMemoryMetadata old_metadata = getInMemoryMetadata ( ) ;
2020-07-21 14:05:30 +00:00
if ( ! settings . allow_non_metadata_alters )
{
auto mutation_commands = commands . getMutationCommands ( new_metadata , settings . materialize_ttl_after_modify , global_context ) ;
if ( ! mutation_commands . empty ( ) )
throw Exception ( ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN , " The following alter commands: '{}' will modify data on disk, but setting `allow_non_metadata_alters` is disabled " , queryToString ( mutation_commands . ast ( ) ) ) ;
}
2020-06-09 21:22:01 +00:00
commands . apply ( new_metadata , global_context ) ;
2017-04-01 07:20:54 +00:00
/// Set of columns that shouldn't be altered.
2019-12-24 18:07:51 +00:00
NameSet columns_alter_type_forbidden ;
2017-04-01 07:20:54 +00:00
2017-09-12 19:20:56 +00:00
/// Primary key columns can be ALTERed only if they are used in the key as-is
2017-04-01 07:20:54 +00:00
/// (and not as a part of some expression) and if the ALTER only affects column metadata.
2019-12-24 18:07:51 +00:00
NameSet columns_alter_type_metadata_only ;
2017-04-01 07:20:54 +00:00
2020-06-25 23:21:04 +00:00
/// Columns to check that the type change is safe for partition key.
NameSet columns_alter_type_check_safe_for_partition ;
2020-06-17 10:34:23 +00:00
if ( old_metadata . hasPartitionKey ( ) )
2017-04-01 07:20:54 +00:00
{
2020-06-25 23:21:04 +00:00
/// Forbid altering columns inside partition key expressions because it can change partition ID format.
auto partition_key_expr = old_metadata . getPartitionKey ( ) . expression ;
2020-11-03 11:28:28 +00:00
for ( const auto & action : partition_key_expr - > getActions ( ) )
2020-06-25 23:21:04 +00:00
{
2020-11-03 11:28:28 +00:00
for ( const auto * child : action . node - > children )
columns_alter_type_forbidden . insert ( child - > result_name ) ;
2020-06-25 23:21:04 +00:00
}
/// But allow to alter columns without expressions under certain condition.
for ( const String & col : partition_key_expr - > getRequiredColumns ( ) )
columns_alter_type_check_safe_for_partition . insert ( col ) ;
2017-09-12 19:20:56 +00:00
}
2017-08-31 13:33:32 +00:00
2020-06-17 09:38:47 +00:00
for ( const auto & index : old_metadata . getSecondaryIndices ( ) )
2019-02-05 14:50:25 +00:00
{
2020-05-28 12:37:05 +00:00
for ( const String & col : index . expression - > getRequiredColumns ( ) )
2019-12-24 18:07:51 +00:00
columns_alter_type_forbidden . insert ( col ) ;
2019-02-05 14:50:25 +00:00
}
2020-06-17 11:05:11 +00:00
if ( old_metadata . hasSortingKey ( ) )
2017-09-12 19:20:56 +00:00
{
2020-06-17 11:05:11 +00:00
auto sorting_key_expr = old_metadata . getSortingKey ( ) . expression ;
2020-11-03 11:28:28 +00:00
for ( const auto & action : sorting_key_expr - > getActions ( ) )
2017-04-01 07:20:54 +00:00
{
2020-11-03 11:28:28 +00:00
for ( const auto * child : action . node - > children )
columns_alter_type_forbidden . insert ( child - > result_name ) ;
2017-04-01 07:20:54 +00:00
}
2018-10-12 19:00:43 +00:00
for ( const String & col : sorting_key_expr - > getRequiredColumns ( ) )
2019-12-24 18:07:51 +00:00
columns_alter_type_metadata_only . insert ( col ) ;
2017-08-31 13:33:32 +00:00
2018-11-09 19:01:39 +00:00
/// We don't process sample_by_ast separately because it must be among the primary key columns
2018-10-12 19:00:43 +00:00
/// and we don't process primary_key_expr separately because it is a prefix of sorting_key_expr.
2018-10-11 14:53:23 +00:00
}
2017-08-31 13:33:32 +00:00
if ( ! merging_params . sign_column . empty ( ) )
2019-12-24 18:07:51 +00:00
columns_alter_type_forbidden . insert ( merging_params . sign_column ) ;
2017-04-01 07:20:54 +00:00
2020-06-25 23:37:24 +00:00
/// All of the above.
NameSet columns_in_keys ;
columns_in_keys . insert ( columns_alter_type_forbidden . begin ( ) , columns_alter_type_forbidden . end ( ) ) ;
columns_in_keys . insert ( columns_alter_type_metadata_only . begin ( ) , columns_alter_type_metadata_only . end ( ) ) ;
columns_in_keys . insert ( columns_alter_type_check_safe_for_partition . begin ( ) , columns_alter_type_check_safe_for_partition . end ( ) ) ;
2020-07-13 17:27:52 +00:00
NameSet dropped_columns ;
2017-04-01 07:20:54 +00:00
std : : map < String , const IDataType * > old_types ;
2020-06-17 10:34:23 +00:00
for ( const auto & column : old_metadata . getColumns ( ) . getAllPhysical ( ) )
2017-04-01 07:20:54 +00:00
old_types . emplace ( column . name , column . type . get ( ) ) ;
for ( const AlterCommand & command : commands )
{
2020-07-13 17:27:52 +00:00
/// Just validate partition expression
if ( command . partition )
{
getPartitionIDFromQuery ( command . partition , global_context ) ;
}
2020-09-30 19:44:35 +00:00
/// Some type changes for version column is allowed despite it's a part of sorting key
if ( command . type = = AlterCommand : : MODIFY_COLUMN & & command . column_name = = merging_params . version_column )
{
2020-10-01 06:53:47 +00:00
const IDataType * new_type = command . data_type . get ( ) ;
const IDataType * old_type = old_types [ command . column_name ] ;
2020-09-30 19:44:35 +00:00
2020-10-01 06:53:47 +00:00
checkVersionColumnTypesConversion ( old_type , new_type , command . column_name ) ;
2020-09-30 19:44:35 +00:00
2020-09-30 19:50:23 +00:00
/// No other checks required
2020-09-30 19:44:35 +00:00
continue ;
}
2019-12-24 18:07:51 +00:00
if ( command . type = = AlterCommand : : MODIFY_ORDER_BY & & ! is_custom_partitioned )
2018-10-14 15:30:06 +00:00
{
2019-12-24 18:07:51 +00:00
throw Exception (
" ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
2018-10-14 15:30:06 +00:00
}
2020-08-27 13:10:10 +00:00
if ( command . type = = AlterCommand : : MODIFY_SAMPLE_BY )
{
2020-08-28 10:18:56 +00:00
if ( ! is_custom_partitioned )
throw Exception (
" ALTER MODIFY SAMPLE BY is not supported for default-partitioned tables created with the old syntax " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
2020-08-27 13:10:10 +00:00
checkSampleExpression ( new_metadata , getSettings ( ) - > compatibility_allow_sampling_expression_not_in_primary_key ) ;
}
2020-01-30 07:13:09 +00:00
if ( command . type = = AlterCommand : : ADD_INDEX & & ! is_custom_partitioned )
2020-01-24 16:47:05 +00:00
{
throw Exception (
" ALTER ADD INDEX is not supported for tables with the old syntax " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
}
2020-04-01 18:21:27 +00:00
if ( command . type = = AlterCommand : : RENAME_COLUMN )
{
2020-06-25 23:37:24 +00:00
if ( columns_in_keys . count ( command . column_name ) )
2020-04-01 18:21:27 +00:00
{
throw Exception (
" Trying to ALTER RENAME key " + backQuoteIfNeed ( command . column_name ) + " column which is a part of key expression " ,
2020-05-17 07:47:53 +00:00
ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
2020-04-01 18:21:27 +00:00
}
}
2020-06-25 23:37:24 +00:00
else if ( command . type = = AlterCommand : : DROP_COLUMN )
{
if ( columns_in_keys . count ( command . column_name ) )
{
throw Exception (
" Trying to ALTER DROP key " + backQuoteIfNeed ( command . column_name ) + " column which is a part of key expression " ,
ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
}
2020-07-13 17:27:52 +00:00
dropped_columns . emplace ( command . column_name ) ;
2020-06-25 23:37:24 +00:00
}
2020-07-21 14:05:30 +00:00
else if ( command . isRequireMutationStage ( getInMemoryMetadata ( ) ) )
2017-04-01 07:20:54 +00:00
{
2020-07-21 14:05:30 +00:00
/// This alter will override data on disk. Let's check that it doesn't
/// modify immutable column.
2019-12-24 18:07:51 +00:00
if ( columns_alter_type_forbidden . count ( command . column_name ) )
2020-06-25 23:37:24 +00:00
throw Exception ( " ALTER of key column " + backQuoteIfNeed ( command . column_name ) + " is forbidden " ,
ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
2020-06-25 23:21:04 +00:00
if ( columns_alter_type_check_safe_for_partition . count ( command . column_name ) )
{
if ( command . type = = AlterCommand : : MODIFY_COLUMN )
{
auto it = old_types . find ( command . column_name ) ;
2020-07-21 14:05:30 +00:00
assert ( it ! = old_types . end ( ) ) ;
if ( ! isSafeForPartitionKeyConversion ( it - > second , command . data_type . get ( ) ) )
2020-06-25 23:21:04 +00:00
throw Exception ( " ALTER of partition key column " + backQuoteIfNeed ( command . column_name ) + " from type "
+ it - > second - > getName ( ) + " to type " + command . data_type - > getName ( )
2020-06-26 20:40:20 +00:00
+ " is not safe because it can change the representation of partition key " ,
2020-06-25 23:21:04 +00:00
ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
}
}
2019-12-24 18:07:51 +00:00
if ( columns_alter_type_metadata_only . count ( command . column_name ) )
2017-04-01 07:20:54 +00:00
{
2019-12-24 18:07:51 +00:00
if ( command . type = = AlterCommand : : MODIFY_COLUMN )
{
auto it = old_types . find ( command . column_name ) ;
2020-07-21 14:05:30 +00:00
assert ( it ! = old_types . end ( ) ) ;
throw Exception ( " ALTER of key column " + backQuoteIfNeed ( command . column_name ) + " from type "
+ it - > second - > getName ( ) + " to type " + command . data_type - > getName ( ) + " must be metadata-only " ,
ErrorCodes : : ALTER_OF_COLUMN_IS_FORBIDDEN ) ;
2019-12-24 18:07:51 +00:00
}
2017-04-01 07:20:54 +00:00
}
2018-11-13 12:51:55 +00:00
}
2017-04-01 07:20:54 +00:00
}
2020-06-17 10:34:23 +00:00
checkProperties ( new_metadata , old_metadata ) ;
checkTTLExpressions ( new_metadata , old_metadata ) ;
2019-04-15 09:30:45 +00:00
2020-06-17 13:46:01 +00:00
if ( old_metadata . hasSettingsChanges ( ) )
2019-12-26 18:17:05 +00:00
{
2020-06-17 13:46:01 +00:00
const auto current_changes = old_metadata . getSettingsChanges ( ) - > as < const ASTSetQuery & > ( ) . changes ;
2020-06-09 21:22:01 +00:00
const auto & new_changes = new_metadata . settings_changes - > as < const ASTSetQuery & > ( ) . changes ;
2020-02-14 21:33:02 +00:00
for ( const auto & changed_setting : new_changes )
2019-12-27 14:36:59 +00:00
{
2020-07-22 12:02:47 +00:00
const auto & setting_name = changed_setting . name ;
const auto & new_value = changed_setting . value ;
2020-07-20 09:57:17 +00:00
MergeTreeSettings : : checkCanSet ( setting_name , new_value ) ;
2020-07-22 12:02:47 +00:00
const Field * current_value = current_changes . tryGet ( setting_name ) ;
2019-12-27 14:36:59 +00:00
2020-07-22 12:02:47 +00:00
if ( ( ! current_value | | * current_value ! = new_value )
& & MergeTreeSettings : : isReadonlySetting ( setting_name ) )
2019-12-27 14:36:59 +00:00
{
2020-07-22 12:02:47 +00:00
throw Exception { " Setting ' " + setting_name + " ' is readonly for storage ' " + getName ( ) + " ' " ,
2019-12-27 14:36:59 +00:00
ErrorCodes : : READONLY_SETTING } ;
}
2020-01-09 13:52:37 +00:00
2020-07-22 12:02:47 +00:00
if ( ! current_value & & MergeTreeSettings : : isPartFormatSetting ( setting_name ) )
2020-02-14 21:33:02 +00:00
{
MergeTreeSettings copy = * getSettings ( ) ;
copy . applyChange ( changed_setting ) ;
String reason ;
if ( ! canUsePolymorphicParts ( copy , & reason ) & & ! reason . empty ( ) )
throw Exception ( " Can't change settings. Reason: " + reason , ErrorCodes : : NOT_IMPLEMENTED ) ;
}
2020-07-22 12:02:47 +00:00
if ( setting_name = = " storage_policy " )
checkStoragePolicy ( global_context . getStoragePolicy ( new_value . safeGet < String > ( ) ) ) ;
2019-12-27 14:36:59 +00:00
}
2019-12-26 18:17:05 +00:00
}
2020-07-13 17:27:52 +00:00
for ( const auto & part : getDataPartsVector ( ) )
{
bool at_least_one_column_rest = false ;
for ( const auto & column : part - > getColumns ( ) )
{
if ( ! dropped_columns . count ( column . name ) )
{
at_least_one_column_rest = true ;
break ;
}
}
if ( ! at_least_one_column_rest )
{
2020-07-14 08:19:39 +00:00
std : : string postfix ;
2020-07-13 17:27:52 +00:00
if ( dropped_columns . size ( ) > 1 )
postfix = " s " ;
throw Exception ( ErrorCodes : : BAD_ARGUMENTS ,
" Cannot drop or clear column{} '{}', because all columns in part '{}' will be removed from disk. Empty parts are not allowed " , postfix , boost : : algorithm : : join ( dropped_columns , " , " ) , part - > name ) ;
}
}
2014-03-09 17:36:01 +00:00
}
2019-11-25 20:19:43 +00:00
MergeTreeDataPartType MergeTreeData : : choosePartType ( size_t bytes_uncompressed , size_t rows_count ) const
2019-11-21 16:10:22 +00:00
{
2020-06-15 17:41:44 +00:00
const auto settings = getSettings ( ) ;
if ( ! canUsePolymorphicParts ( * settings ) )
2020-01-13 16:28:29 +00:00
return MergeTreeDataPartType : : WIDE ;
2017-04-01 07:20:54 +00:00
2020-04-14 19:47:19 +00:00
if ( bytes_uncompressed < settings - > min_bytes_for_compact_part | | rows_count < settings - > min_rows_for_compact_part )
return MergeTreeDataPartType : : IN_MEMORY ;
if ( bytes_uncompressed < settings - > min_bytes_for_wide_part | | rows_count < settings - > min_rows_for_wide_part )
return MergeTreeDataPartType : : COMPACT ;
return MergeTreeDataPartType : : WIDE ;
}
MergeTreeDataPartType MergeTreeData : : choosePartTypeOnDisk ( size_t bytes_uncompressed , size_t rows_count ) const
2019-11-21 16:10:22 +00:00
{
const auto settings = getSettings ( ) ;
2020-06-15 17:41:44 +00:00
if ( ! canUsePolymorphicParts ( * settings ) )
2020-01-13 16:28:29 +00:00
return MergeTreeDataPartType : : WIDE ;
2017-04-01 07:20:54 +00:00
2019-11-25 20:19:43 +00:00
if ( bytes_uncompressed < settings - > min_bytes_for_wide_part | | rows_count < settings - > min_rows_for_wide_part )
2019-11-21 16:10:22 +00:00
return MergeTreeDataPartType : : COMPACT ;
2017-04-01 07:20:54 +00:00
2019-11-21 16:10:22 +00:00
return MergeTreeDataPartType : : WIDE ;
}
2017-04-01 07:20:54 +00:00
2019-11-22 12:51:00 +00:00
MergeTreeData : : MutableDataPartPtr MergeTreeData : : createPart ( const String & name ,
MergeTreeDataPartType type , const MergeTreePartInfo & part_info ,
2020-05-09 21:24:15 +00:00
const VolumePtr & volume , const String & relative_path ) const
2019-11-21 16:10:22 +00:00
{
if ( type = = MergeTreeDataPartType : : COMPACT )
2020-05-09 21:24:15 +00:00
return std : : make_shared < MergeTreeDataPartCompact > ( * this , name , part_info , volume , relative_path ) ;
2019-11-21 16:10:22 +00:00
else if ( type = = MergeTreeDataPartType : : WIDE )
2020-05-09 21:24:15 +00:00
return std : : make_shared < MergeTreeDataPartWide > ( * this , name , part_info , volume , relative_path ) ;
2020-04-14 19:47:19 +00:00
else if ( type = = MergeTreeDataPartType : : IN_MEMORY )
2020-06-03 13:27:54 +00:00
return std : : make_shared < MergeTreeDataPartInMemory > ( * this , name , part_info , volume , relative_path ) ;
2019-11-21 16:10:22 +00:00
else
2020-04-14 19:47:19 +00:00
throw Exception ( " Unknown type of part " + relative_path , ErrorCodes : : UNKNOWN_PART_TYPE ) ;
2019-11-21 16:10:22 +00:00
}
2019-03-28 08:52:09 +00:00
2019-11-21 16:10:22 +00:00
static MergeTreeDataPartType getPartTypeFromMarkExtension ( const String & mrk_ext )
{
if ( mrk_ext = = getNonAdaptiveMrkExtension ( ) )
return MergeTreeDataPartType : : WIDE ;
if ( mrk_ext = = getAdaptiveMrkExtension ( MergeTreeDataPartType : : WIDE ) )
return MergeTreeDataPartType : : WIDE ;
if ( mrk_ext = = getAdaptiveMrkExtension ( MergeTreeDataPartType : : COMPACT ) )
return MergeTreeDataPartType : : COMPACT ;
2019-03-28 08:52:09 +00:00
2020-02-19 14:07:36 +00:00
throw Exception ( " Can't determine part type, because of unknown mark extension " + mrk_ext , ErrorCodes : : UNKNOWN_PART_TYPE ) ;
2019-11-21 16:10:22 +00:00
}
2017-04-01 07:20:54 +00:00
2019-11-22 12:51:00 +00:00
MergeTreeData : : MutableDataPartPtr MergeTreeData : : createPart (
2020-05-09 21:24:15 +00:00
const String & name , const VolumePtr & volume , const String & relative_path ) const
2019-11-21 16:10:22 +00:00
{
2020-05-09 21:24:15 +00:00
return createPart ( name , MergeTreePartInfo : : fromPartName ( name , format_version ) , volume , relative_path ) ;
2019-11-21 16:10:22 +00:00
}
2019-11-22 12:51:00 +00:00
MergeTreeData : : MutableDataPartPtr MergeTreeData : : createPart (
const String & name , const MergeTreePartInfo & part_info ,
2020-05-09 21:24:15 +00:00
const VolumePtr & volume , const String & relative_path ) const
2019-11-21 16:10:22 +00:00
{
2020-02-11 13:41:26 +00:00
MergeTreeDataPartType type ;
2020-02-27 16:47:40 +00:00
auto full_path = relative_data_path + relative_path + " / " ;
2020-05-09 21:24:15 +00:00
auto mrk_ext = MergeTreeIndexGranularityInfo : : getMarksExtensionFromFilesystem ( volume - > getDisk ( ) , full_path ) ;
2017-04-01 07:20:54 +00:00
2019-11-21 16:10:22 +00:00
if ( mrk_ext )
type = getPartTypeFromMarkExtension ( * mrk_ext ) ;
else
2017-04-01 07:20:54 +00:00
{
2019-11-21 16:10:22 +00:00
/// Didn't find any mark file, suppose that part is empty.
2020-06-03 09:51:23 +00:00
type = choosePartTypeOnDisk ( 0 , 0 ) ;
2017-04-01 07:20:54 +00:00
}
2020-05-09 21:24:15 +00:00
return createPart ( name , type , part_info , volume , relative_path ) ;
2014-03-20 13:00:42 +00:00
}
2014-03-09 17:36:01 +00:00
2019-08-27 09:34:53 +00:00
void MergeTreeData : : changeSettings (
2019-12-27 14:36:59 +00:00
const ASTPtr & new_settings ,
2020-06-18 16:10:47 +00:00
TableLockHolder & /* table_lock_holder */ )
2019-08-06 13:04:29 +00:00
{
2019-12-27 14:36:59 +00:00
if ( new_settings )
2019-08-27 09:34:53 +00:00
{
2020-06-23 16:40:58 +00:00
bool has_storage_policy_changed = false ;
2019-12-27 14:36:59 +00:00
const auto & new_changes = new_settings - > as < const ASTSetQuery & > ( ) . changes ;
2020-01-09 14:50:34 +00:00
for ( const auto & change : new_changes )
2020-07-30 21:42:55 +00:00
{
2020-01-09 14:50:34 +00:00
if ( change . name = = " storage_policy " )
{
StoragePolicyPtr new_storage_policy = global_context . getStoragePolicy ( change . value . safeGet < String > ( ) ) ;
StoragePolicyPtr old_storage_policy = getStoragePolicy ( ) ;
2020-06-23 16:40:58 +00:00
/// StoragePolicy of different version or name is guaranteed to have different pointer
if ( new_storage_policy ! = old_storage_policy )
{
checkStoragePolicy ( new_storage_policy ) ;
2020-01-09 14:50:34 +00:00
2020-06-23 16:40:58 +00:00
std : : unordered_set < String > all_diff_disk_names ;
for ( const auto & disk : new_storage_policy - > getDisks ( ) )
all_diff_disk_names . insert ( disk - > getName ( ) ) ;
for ( const auto & disk : old_storage_policy - > getDisks ( ) )
all_diff_disk_names . erase ( disk - > getName ( ) ) ;
2020-01-09 14:50:34 +00:00
2020-06-23 16:40:58 +00:00
for ( const String & disk_name : all_diff_disk_names )
{
auto disk = new_storage_policy - > getDiskByName ( disk_name ) ;
if ( disk - > exists ( relative_data_path ) )
throw Exception ( " New storage policy contain disks which already contain data of a table with the same name " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2020-01-09 14:50:34 +00:00
2020-06-23 16:40:58 +00:00
for ( const String & disk_name : all_diff_disk_names )
{
auto disk = new_storage_policy - > getDiskByName ( disk_name ) ;
disk - > createDirectories ( relative_data_path ) ;
disk - > createDirectories ( relative_data_path + " detached " ) ;
}
/// FIXME how would that be done while reloading configuration???
has_storage_policy_changed = true ;
2020-01-09 14:50:34 +00:00
}
}
2020-07-30 21:42:55 +00:00
}
2020-01-09 14:50:34 +00:00
2019-08-27 09:34:53 +00:00
MergeTreeSettings copy = * getSettings ( ) ;
2019-08-27 13:14:19 +00:00
copy . applyChanges ( new_changes ) ;
2020-07-30 21:42:55 +00:00
copy . sanityCheck ( global_context . getSettingsRef ( ) ) ;
2019-08-27 09:34:53 +00:00
storage_settings . set ( std : : make_unique < const MergeTreeSettings > ( copy ) ) ;
2020-06-15 18:08:05 +00:00
StorageInMemoryMetadata new_metadata = getInMemoryMetadata ( ) ;
new_metadata . setSettingsChanges ( new_settings ) ;
setInMemoryMetadata ( new_metadata ) ;
2020-06-23 16:40:58 +00:00
if ( has_storage_policy_changed )
startBackgroundMovesIfNeeded ( ) ;
2019-08-27 09:34:53 +00:00
}
2019-08-06 13:04:29 +00:00
}
2020-07-28 15:10:36 +00:00
PartitionCommandsResultInfo MergeTreeData : : freezeAll ( const String & with_name , const StorageMetadataPtr & metadata_snapshot , const Context & context , TableLockHolder & )
2018-11-01 10:35:50 +00:00
{
2020-07-28 15:10:36 +00:00
return freezePartitionsByMatcher ( [ ] ( const DataPartPtr & ) { return true ; } , metadata_snapshot , with_name , context ) ;
2018-11-01 10:35:50 +00:00
}
2019-07-30 17:24:40 +00:00
void MergeTreeData : : PartsTemporaryRename : : addPart ( const String & old_name , const String & new_name )
{
old_and_new_names . push_back ( { old_name , new_name } ) ;
2020-03-19 16:37:55 +00:00
for ( const auto & [ path , disk ] : storage . getRelativeDataPathsWithDisks ( ) )
2019-08-29 16:17:47 +00:00
{
2020-03-19 16:37:55 +00:00
for ( auto it = disk - > iterateDirectory ( path + source_dir ) ; it - > isValid ( ) ; it - > next ( ) )
2019-08-29 16:17:47 +00:00
{
2020-03-19 16:37:55 +00:00
if ( it - > name ( ) = = old_name )
2019-08-29 16:17:47 +00:00
{
2020-03-19 16:37:55 +00:00
old_part_name_to_path_and_disk [ old_name ] = { path , disk } ;
2019-08-29 16:17:47 +00:00
break ;
}
}
}
2019-07-30 17:24:40 +00:00
}
2019-07-31 14:44:55 +00:00
void MergeTreeData : : PartsTemporaryRename : : tryRenameAll ( )
{
renamed = true ;
for ( size_t i = 0 ; i < old_and_new_names . size ( ) ; + + i )
{
try
{
2020-03-19 16:37:55 +00:00
const auto & [ old_name , new_name ] = old_and_new_names [ i ] ;
if ( old_name . empty ( ) | | new_name . empty ( ) )
2019-07-31 14:44:55 +00:00
throw DB : : Exception ( " Empty part name. Most likely it's a bug. " , ErrorCodes : : INCORRECT_FILE_NAME ) ;
2020-03-19 16:37:55 +00:00
const auto & [ path , disk ] = old_part_name_to_path_and_disk [ old_name ] ;
const auto full_path = path + source_dir ; /// for old_name
disk - > moveFile ( full_path + old_name , full_path + new_name ) ;
2019-07-31 14:44:55 +00:00
}
catch ( . . . )
{
old_and_new_names . resize ( i ) ;
2020-05-23 22:24:01 +00:00
LOG_WARNING ( storage . log , " Cannot rename parts to perform operation on them: {} " , getCurrentExceptionMessage ( false ) ) ;
2019-07-31 14:44:55 +00:00
throw ;
}
}
}
2019-07-30 17:24:40 +00:00
MergeTreeData : : PartsTemporaryRename : : ~ PartsTemporaryRename ( )
{
2019-07-31 14:44:55 +00:00
// TODO what if server had crashed before this destructor was called?
if ( ! renamed )
return ;
2020-03-19 16:37:55 +00:00
for ( const auto & [ old_name , new_name ] : old_and_new_names )
2019-07-30 17:24:40 +00:00
{
2020-03-19 16:37:55 +00:00
if ( old_name . empty ( ) )
2019-07-30 17:24:40 +00:00
continue ;
2019-08-29 16:17:47 +00:00
2019-07-30 17:24:40 +00:00
try
{
2020-03-19 16:37:55 +00:00
const auto & [ path , disk ] = old_part_name_to_path_and_disk [ old_name ] ;
const auto full_path = path + source_dir ; /// for old_name
disk - > moveFile ( full_path + new_name , full_path + old_name ) ;
2019-07-30 17:24:40 +00:00
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
}
}
2014-03-09 17:36:01 +00:00
2018-02-19 15:31:43 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getActivePartsToReplace (
const MergeTreePartInfo & new_part_info ,
2018-02-19 16:12:16 +00:00
const String & new_part_name ,
2018-02-19 15:31:43 +00:00
DataPartPtr & out_covering_part ,
2018-05-21 13:49:54 +00:00
DataPartsLock & /* data_parts_lock */ ) const
2014-03-13 12:48:07 +00:00
{
2018-02-19 15:31:43 +00:00
/// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself.
2018-05-21 13:49:54 +00:00
auto it_middle = data_parts_by_state_and_info . lower_bound ( DataPartStateAndInfo { DataPartState : : Committed , new_part_info } ) ;
2018-02-19 15:31:43 +00:00
auto committed_parts_range = getDataPartsStateRange ( DataPartState : : Committed ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
/// Go to the left.
DataPartIteratorByStateAndInfo begin = it_middle ;
while ( begin ! = committed_parts_range . begin ( ) )
2017-04-01 07:20:54 +00:00
{
2018-02-19 15:31:43 +00:00
auto prev = std : : prev ( begin ) ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
if ( ! new_part_info . contains ( ( * prev ) - > info ) )
2017-09-11 17:55:41 +00:00
{
2018-02-19 15:31:43 +00:00
if ( ( * prev ) - > info . contains ( new_part_info ) )
{
out_covering_part = * prev ;
return { } ;
}
2017-09-11 17:55:41 +00:00
2018-02-19 16:12:16 +00:00
if ( ! new_part_info . isDisjoint ( ( * prev ) - > info ) )
throw Exception ( " Part " + new_part_name + " intersects previous part " + ( * prev ) - > getNameWithState ( ) +
" . It is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
break ;
2017-09-11 17:55:41 +00:00
}
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
begin = prev ;
}
2017-06-25 02:22:10 +00:00
2018-02-19 15:31:43 +00:00
/// Go to the right.
DataPartIteratorByStateAndInfo end = it_middle ;
while ( end ! = committed_parts_range . end ( ) )
{
if ( ( * end ) - > info = = new_part_info )
throw Exception ( " Unexpected duplicate part " + ( * end ) - > getNameWithState ( ) + " . It is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
if ( ! new_part_info . contains ( ( * end ) - > info ) )
2017-05-24 20:19:29 +00:00
{
2018-02-19 15:31:43 +00:00
if ( ( * end ) - > info . contains ( new_part_info ) )
2017-11-20 19:33:12 +00:00
{
2018-02-19 15:31:43 +00:00
out_covering_part = * end ;
return { } ;
2017-11-20 19:33:12 +00:00
}
2017-09-11 22:40:51 +00:00
2018-02-19 16:12:16 +00:00
if ( ! new_part_info . isDisjoint ( ( * end ) - > info ) )
throw Exception ( " Part " + new_part_name + " intersects next part " + ( * end ) - > getNameWithState ( ) +
" . It is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-05-24 20:19:29 +00:00
2018-02-19 15:31:43 +00:00
break ;
2017-05-24 20:19:29 +00:00
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
+ + end ;
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
return DataPartsVector { begin , end } ;
}
2017-11-20 19:33:12 +00:00
2017-04-01 07:20:54 +00:00
2020-06-15 18:57:38 +00:00
bool MergeTreeData : : renameTempPartAndAdd ( MutableDataPartPtr & part , SimpleIncrement * increment , Transaction * out_transaction )
2018-02-19 15:31:43 +00:00
{
2020-06-15 18:57:38 +00:00
if ( out_transaction & & & out_transaction - > data ! = this )
throw Exception ( " MergeTreeData::Transaction for one table cannot be used with another. It is a bug. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
DataPartsVector covered_parts ;
{
auto lock = lockParts ( ) ;
if ( ! renameTempPartAndReplace ( part , increment , out_transaction , lock , & covered_parts ) )
return false ;
}
if ( ! covered_parts . empty ( ) )
throw Exception ( " Added part " + part - > name + " covers " + toString ( covered_parts . size ( ) )
+ " existing part(s) (including " + covered_parts [ 0 ] - > name + " ) " , ErrorCodes : : LOGICAL_ERROR ) ;
return true ;
2018-02-19 15:31:43 +00:00
}
2017-09-11 22:40:51 +00:00
2020-06-15 18:57:38 +00:00
bool MergeTreeData : : renameTempPartAndReplace (
2018-09-20 14:30:52 +00:00
MutableDataPartPtr & part , SimpleIncrement * increment , Transaction * out_transaction ,
2018-05-21 13:49:54 +00:00
std : : unique_lock < std : : mutex > & lock , DataPartsVector * out_covered_parts )
2018-02-19 15:31:43 +00:00
{
2018-09-20 14:30:52 +00:00
if ( out_transaction & & & out_transaction - > data ! = this )
throw Exception ( " MergeTreeData::Transaction for one table cannot be used with another. It is a bug. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-19 15:31:43 +00:00
part - > assertState ( { DataPartState : : Temporary } ) ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
MergeTreePartInfo part_info = part - > info ;
String part_name ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( DataPartPtr existing_part_in_partition = getAnyPartInPartition ( part - > info . partition_id , lock ) )
{
if ( part - > partition . value ! = existing_part_in_partition - > partition . value )
throw Exception (
" Partition value mismatch between two parts with the same partition ID. Existing part: "
+ existing_part_in_partition - > name + " , newly added part: " + part - > name ,
ErrorCodes : : CORRUPTED_DATA ) ;
}
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
/** It is important that obtaining new block number and adding that block to parts set is done atomically.
* Otherwise there is race condition - merge of blocks could happen in interval that doesn ' t yet contain new part .
*/
if ( increment )
2018-05-23 19:34:37 +00:00
{
2019-12-17 12:23:17 +00:00
part_info . min_block = part_info . max_block = increment - > get ( ) ;
part_info . mutation = 0 ; /// it's equal to min_block by default
2018-05-23 19:34:37 +00:00
part_name = part - > getNewName ( part_info ) ;
}
2020-09-17 12:01:03 +00:00
else /// Parts from ReplicatedMergeTree already have names
2018-05-23 19:34:37 +00:00
part_name = part - > name ;
2017-04-01 07:20:54 +00:00
2020-05-23 22:24:01 +00:00
LOG_TRACE ( log , " Renaming temporary part {} to {}. " , part - > relative_path , part_name ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
auto it_duplicate = data_parts_by_info . find ( part_info ) ;
if ( it_duplicate ! = data_parts_by_info . end ( ) )
{
String message = " Part " + ( * it_duplicate ) - > getNameWithState ( ) + " already exists " ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( ( * it_duplicate ) - > checkState ( { DataPartState : : Outdated , DataPartState : : Deleting } ) )
throw Exception ( message + " , but it will be deleted soon " , ErrorCodes : : PART_IS_TEMPORARILY_LOCKED ) ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
throw Exception ( message , ErrorCodes : : DUPLICATE_DATA_PART ) ;
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
DataPartPtr covering_part ;
2018-02-19 16:12:16 +00:00
DataPartsVector covered_parts = getActivePartsToReplace ( part_info , part_name , covering_part , lock ) ;
2020-05-27 20:05:55 +00:00
DataPartsVector covered_parts_in_memory ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
if ( covering_part )
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Tried to add obsolete part {} covered by {} " , part_name , covering_part - > getNameWithState ( ) ) ;
2020-06-15 18:57:38 +00:00
return false ;
2018-02-19 15:31:43 +00:00
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
/// All checks are passed. Now we can rename the part on disk.
/// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
///
/// If out_transaction is null, we commit the part to the active set immediately, else add it to the transaction.
part - > name = part_name ;
part - > info = part_info ;
part - > is_temp = false ;
part - > state = DataPartState : : PreCommitted ;
2020-06-03 22:00:02 +00:00
part - > renameTo ( part_name , true ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
auto part_it = data_parts_indexes . insert ( part ) . first ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
if ( out_transaction )
{
out_transaction - > precommitted_parts . insert ( part ) ;
}
else
{
auto current_time = time ( nullptr ) ;
for ( const DataPartPtr & covered_part : covered_parts )
2017-11-20 19:33:12 +00:00
{
2018-03-03 17:44:53 +00:00
covered_part - > remove_time . store ( current_time , std : : memory_order_relaxed ) ;
2018-02-19 15:31:43 +00:00
modifyPartState ( covered_part , DataPartState : : Outdated ) ;
removePartContributionToColumnSizes ( covered_part ) ;
2017-11-20 19:33:12 +00:00
}
2018-02-19 15:31:43 +00:00
modifyPartState ( part_it , DataPartState : : Committed ) ;
addPartContributionToColumnSizes ( part ) ;
2017-04-01 07:20:54 +00:00
}
2020-06-05 20:47:46 +00:00
auto part_in_memory = asInMemoryPart ( part ) ;
2020-04-14 19:47:19 +00:00
if ( part_in_memory & & getSettings ( ) - > in_memory_parts_enable_wal )
{
auto wal = getWriteAheadLog ( ) ;
2020-05-29 15:02:12 +00:00
wal - > addPart ( part_in_memory - > block , part_in_memory - > name ) ;
2020-04-14 19:47:19 +00:00
}
2018-05-21 13:49:54 +00:00
if ( out_covered_parts )
{
for ( DataPartPtr & covered_part : covered_parts )
out_covered_parts - > emplace_back ( std : : move ( covered_part ) ) ;
}
2020-06-15 18:57:38 +00:00
return true ;
2018-05-21 13:49:54 +00:00
}
MergeTreeData : : DataPartsVector MergeTreeData : : renameTempPartAndReplace (
MutableDataPartPtr & part , SimpleIncrement * increment , Transaction * out_transaction )
{
2018-09-20 14:30:52 +00:00
if ( out_transaction & & & out_transaction - > data ! = this )
throw Exception ( " MergeTreeData::Transaction for one table cannot be used with another. It is a bug. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
2018-05-21 13:49:54 +00:00
DataPartsVector covered_parts ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-05-21 13:49:54 +00:00
renameTempPartAndReplace ( part , increment , out_transaction , lock , & covered_parts ) ;
}
2018-02-19 15:31:43 +00:00
return covered_parts ;
2014-03-13 17:44:00 +00:00
}
2018-05-21 13:49:54 +00:00
void MergeTreeData : : removePartsFromWorkingSet ( const MergeTreeData : : DataPartsVector & remove , bool clear_without_timeout , DataPartsLock & /*acquired_lock*/ )
2014-07-01 15:58:25 +00:00
{
2018-05-21 13:49:54 +00:00
auto remove_time = clear_without_timeout ? 0 : time ( nullptr ) ;
for ( const DataPartPtr & part : remove )
{
2020-01-17 12:24:27 +00:00
if ( part - > state = = IMergeTreeDataPart : : State : : Committed )
2018-05-21 13:49:54 +00:00
removePartContributionToColumnSizes ( part ) ;
2020-01-17 12:24:27 +00:00
if ( part - > state = = IMergeTreeDataPart : : State : : Committed | | clear_without_timeout )
2018-05-21 13:49:54 +00:00
part - > remove_time . store ( remove_time , std : : memory_order_relaxed ) ;
2020-01-17 12:24:27 +00:00
if ( part - > state ! = IMergeTreeDataPart : : State : : Outdated )
2020-06-08 16:34:42 +00:00
modifyPartState ( part , IMergeTreeDataPart : : State : : Outdated ) ;
2020-05-29 16:58:08 +00:00
2020-06-30 18:47:12 +00:00
if ( isInMemoryPart ( part ) & & getSettings ( ) - > in_memory_parts_enable_wal )
getWriteAheadLog ( ) - > dropPart ( part - > name ) ;
2018-05-21 13:49:54 +00:00
}
}
2020-09-17 15:33:50 +00:00
void MergeTreeData : : removePartsFromWorkingSetImmediatelyAndSetTemporaryState ( const DataPartsVector & remove )
{
auto lock = lockParts ( ) ;
for ( const auto & part : remove )
{
auto it_part = data_parts_by_info . find ( part - > info ) ;
if ( it_part = = data_parts_by_info . end ( ) )
throw Exception ( " Part " + part - > getNameWithState ( ) + " not found in data_parts " , ErrorCodes : : LOGICAL_ERROR ) ;
modifyPartState ( part , IMergeTreeDataPart : : State : : Temporary ) ;
/// Erase immediately
data_parts_indexes . erase ( it_part ) ;
}
}
2018-05-21 13:49:54 +00:00
void MergeTreeData : : removePartsFromWorkingSet ( const DataPartsVector & remove , bool clear_without_timeout , DataPartsLock * acquired_lock )
{
auto lock = ( acquired_lock ) ? DataPartsLock ( ) : lockParts ( ) ;
2014-07-01 15:58:25 +00:00
2020-04-22 06:22:14 +00:00
for ( const auto & part : remove )
2017-04-01 07:20:54 +00:00
{
2018-02-19 15:31:43 +00:00
if ( ! data_parts_by_info . count ( part - > info ) )
2017-09-11 22:40:51 +00:00
throw Exception ( " Part " + part - > getNameWithState ( ) + " not found in data_parts " , ErrorCodes : : LOGICAL_ERROR ) ;
2016-01-30 02:29:20 +00:00
2017-09-21 21:51:17 +00:00
part - > assertState ( { DataPartState : : PreCommitted , DataPartState : : Committed , DataPartState : : Outdated } ) ;
2017-04-01 07:20:54 +00:00
}
2015-09-16 04:18:16 +00:00
2018-05-21 13:49:54 +00:00
removePartsFromWorkingSet ( remove , clear_without_timeout , lock ) ;
}
MergeTreeData : : DataPartsVector MergeTreeData : : removePartsInRangeFromWorkingSet ( const MergeTreePartInfo & drop_range , bool clear_without_timeout ,
bool skip_intersecting_parts , DataPartsLock & lock )
{
DataPartsVector parts_to_remove ;
if ( drop_range . min_block > drop_range . max_block )
return parts_to_remove ;
auto partition_range = getDataPartsPartitionRange ( drop_range . partition_id ) ;
for ( const DataPartPtr & part : partition_range )
2017-04-01 07:20:54 +00:00
{
2018-05-21 13:49:54 +00:00
if ( part - > info . partition_id ! = drop_range . partition_id )
throw Exception ( " Unexpected partition_id of part " + part - > name + " . This is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-11-20 19:33:12 +00:00
2018-05-21 13:49:54 +00:00
if ( part - > info . min_block < drop_range . min_block )
{
if ( drop_range . min_block < = part - > info . max_block )
{
/// Intersect left border
String error = " Unexpected merged part " + part - > name + " intersecting drop range " + drop_range . getPartName ( ) ;
if ( ! skip_intersecting_parts )
throw Exception ( error , ErrorCodes : : LOGICAL_ERROR ) ;
LOG_WARNING ( log , error ) ;
}
continue ;
}
/// Stop on new parts
if ( part - > info . min_block > drop_range . max_block )
break ;
if ( part - > info . min_block < = drop_range . max_block & & drop_range . max_block < part - > info . max_block )
{
/// Intersect right border
String error = " Unexpected merged part " + part - > name + " intersecting drop range " + drop_range . getPartName ( ) ;
if ( ! skip_intersecting_parts )
throw Exception ( error , ErrorCodes : : LOGICAL_ERROR ) ;
LOG_WARNING ( log , error ) ;
continue ;
}
if ( part - > state ! = DataPartState : : Deleting )
parts_to_remove . emplace_back ( part ) ;
2017-04-01 07:20:54 +00:00
}
2014-07-01 15:58:25 +00:00
2018-05-21 13:49:54 +00:00
removePartsFromWorkingSet ( parts_to_remove , clear_without_timeout , lock ) ;
2017-09-05 19:03:51 +00:00
2018-05-21 13:49:54 +00:00
return parts_to_remove ;
}
2017-09-05 19:03:51 +00:00
2018-05-21 13:49:54 +00:00
void MergeTreeData : : forgetPartAndMoveToDetached ( const MergeTreeData : : DataPartPtr & part_to_detach , const String & prefix , bool
restore_covered )
2014-04-02 07:59:43 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Renaming {} to {}{} and forgiving it. " , part_to_detach - > relative_path , prefix , part_to_detach - > name ) ;
2017-04-01 07:20:54 +00:00
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
auto it_part = data_parts_by_info . find ( part_to_detach - > info ) ;
if ( it_part = = data_parts_by_info . end ( ) )
2017-09-11 22:40:51 +00:00
throw Exception ( " No such data part " + part_to_detach - > getNameWithState ( ) , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2017-04-01 07:20:54 +00:00
2018-05-21 13:49:54 +00:00
/// What if part_to_detach is a reference to *it_part? Make a new owner just in case.
2017-11-20 19:33:12 +00:00
DataPartPtr part = * it_part ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
if ( part - > state = = DataPartState : : Committed )
removePartContributionToColumnSizes ( part ) ;
modifyPartState ( it_part , DataPartState : : Deleting ) ;
2018-05-21 13:49:54 +00:00
part - > renameToDetached ( prefix ) ;
2017-11-20 19:33:12 +00:00
data_parts_indexes . erase ( it_part ) ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
if ( restore_covered & & part - > info . level = = 0 )
2017-04-01 07:20:54 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Will not recover parts covered by zero-level part {} " , part - > name ) ;
2017-11-20 19:33:12 +00:00
return ;
}
2017-09-11 22:40:51 +00:00
2017-11-20 19:33:12 +00:00
if ( restore_covered )
{
2017-04-01 07:20:54 +00:00
Strings restored ;
bool error = false ;
2017-11-20 19:33:12 +00:00
String error_parts ;
2017-04-01 07:20:54 +00:00
2017-08-14 18:16:11 +00:00
Int64 pos = part - > info . min_block ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
auto is_appropriate_state = [ ] ( DataPartState state )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
return state = = DataPartState : : Committed | | state = = DataPartState : : Outdated ;
} ;
2018-02-19 15:31:43 +00:00
auto update_error = [ & ] ( DataPartIteratorByInfo it )
2017-11-20 19:33:12 +00:00
{
error = true ;
error_parts + = ( * it ) - > getNameWithState ( ) + " " ;
} ;
2018-02-19 15:31:43 +00:00
auto it_middle = data_parts_by_info . lower_bound ( part - > info ) ;
2017-11-20 19:33:12 +00:00
/// Restore the leftmost part covered by the part
2018-02-19 15:31:43 +00:00
if ( it_middle ! = data_parts_by_info . begin ( ) )
2017-11-20 19:33:12 +00:00
{
auto it = std : : prev ( it_middle ) ;
if ( part - > contains ( * * it ) & & is_appropriate_state ( ( * it ) - > state ) )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
/// Maybe, we must consider part level somehow
2017-08-14 18:16:11 +00:00
if ( ( * it ) - > info . min_block ! = part - > info . min_block )
2017-11-20 19:33:12 +00:00
update_error ( it ) ;
2017-09-11 22:40:51 +00:00
if ( ( * it ) - > state ! = DataPartState : : Committed )
{
addPartContributionToColumnSizes ( * it ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( it , DataPartState : : Committed ) ; // iterator is not invalidated here
2017-09-11 22:40:51 +00:00
}
2017-08-14 18:16:11 +00:00
pos = ( * it ) - > info . max_block + 1 ;
2017-04-01 07:20:54 +00:00
restored . push_back ( ( * it ) - > name ) ;
}
else
2017-11-20 19:33:12 +00:00
update_error ( it ) ;
2017-04-01 07:20:54 +00:00
}
else
error = true ;
2017-11-20 19:33:12 +00:00
/// Restore "right" parts
2018-02-19 15:31:43 +00:00
for ( auto it = it_middle ; it ! = data_parts_by_info . end ( ) & & part - > contains ( * * it ) ; + + it )
2017-04-01 07:20:54 +00:00
{
2017-08-14 18:16:11 +00:00
if ( ( * it ) - > info . min_block < pos )
2017-04-01 07:20:54 +00:00
continue ;
2017-11-20 19:33:12 +00:00
if ( ! is_appropriate_state ( ( * it ) - > state ) )
{
update_error ( it ) ;
continue ;
}
2017-08-14 18:16:11 +00:00
if ( ( * it ) - > info . min_block > pos )
2017-11-20 19:33:12 +00:00
update_error ( it ) ;
2017-09-11 22:40:51 +00:00
if ( ( * it ) - > state ! = DataPartState : : Committed )
{
addPartContributionToColumnSizes ( * it ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( it , DataPartState : : Committed ) ;
2017-09-11 22:40:51 +00:00
}
2017-08-14 18:16:11 +00:00
pos = ( * it ) - > info . max_block + 1 ;
2017-04-01 07:20:54 +00:00
restored . push_back ( ( * it ) - > name ) ;
}
2017-08-14 18:16:11 +00:00
if ( pos ! = part - > info . max_block + 1 )
2017-04-01 07:20:54 +00:00
error = true ;
for ( const String & name : restored )
{
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Activated part {} " , name ) ;
2017-04-01 07:20:54 +00:00
}
if ( error )
2017-11-20 19:33:12 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " The set of parts restored in place of {} looks incomplete. There might or might not be a data loss.{} " , part - > name , ( error_parts . empty ( ) ? " " : " Suspicious parts: " + error_parts ) ) ;
2017-11-20 19:33:12 +00:00
}
2017-04-01 07:20:54 +00:00
}
2014-03-13 17:44:00 +00:00
}
2014-09-19 11:44:29 +00:00
2018-09-20 14:30:52 +00:00
void MergeTreeData : : tryRemovePartImmediately ( DataPartPtr & & part )
{
DataPartPtr part_to_delete ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-09-20 14:30:52 +00:00
2020-05-23 22:24:01 +00:00
LOG_TRACE ( log , " Trying to immediately remove part {} " , part - > getNameWithState ( ) ) ;
2018-09-20 14:30:52 +00:00
auto it = data_parts_by_info . find ( part - > info ) ;
if ( it = = data_parts_by_info . end ( ) | | ( * it ) . get ( ) ! = part . get ( ) )
throw Exception ( " Part " + part - > name + " doesn't exist " , ErrorCodes : : LOGICAL_ERROR ) ;
part . reset ( ) ;
if ( ! ( ( * it ) - > state = = DataPartState : : Outdated & & it - > unique ( ) ) )
return ;
modifyPartState ( it , DataPartState : : Deleting ) ;
part_to_delete = * it ;
}
try
{
part_to_delete - > remove ( ) ;
}
catch ( . . . )
{
rollbackDeletingParts ( { part_to_delete } ) ;
throw ;
}
removePartsFinally ( { part_to_delete } ) ;
2020-05-23 22:24:01 +00:00
LOG_TRACE ( log , " Removed part {} " , part_to_delete - > name ) ;
2018-09-20 14:30:52 +00:00
}
2015-11-18 21:37:28 +00:00
size_t MergeTreeData : : getTotalActiveSizeInBytes ( ) const
2015-04-17 05:35:53 +00:00
{
2017-04-01 07:20:54 +00:00
size_t res = 0 ;
2017-11-20 19:33:12 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-11-20 19:33:12 +00:00
2020-04-22 06:22:14 +00:00
for ( const auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2020-03-23 13:32:02 +00:00
res + = part - > getBytesOnDisk ( ) ;
2017-11-20 19:33:12 +00:00
}
2015-04-17 05:35:53 +00:00
2017-04-01 07:20:54 +00:00
return res ;
2015-04-17 05:35:53 +00:00
}
2014-04-09 16:32:32 +00:00
2019-10-28 17:27:43 +00:00
size_t MergeTreeData : : getTotalActiveSizeInRows ( ) const
{
size_t res = 0 ;
{
auto lock = lockParts ( ) ;
2020-04-22 06:22:14 +00:00
for ( const auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2019-10-28 17:27:43 +00:00
res + = part - > rows_count ;
}
return res ;
}
2019-05-02 14:48:54 +00:00
size_t MergeTreeData : : getPartsCount ( ) const
{
auto lock = lockParts ( ) ;
size_t res = 0 ;
for ( const auto & part [[maybe_unused]] : getDataPartsStateRange ( DataPartState : : Committed ) )
+ + res ;
return res ;
}
2017-08-14 18:16:11 +00:00
size_t MergeTreeData : : getMaxPartsCountForPartition ( ) const
2014-04-11 16:56:49 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-04-01 07:20:54 +00:00
size_t res = 0 ;
size_t cur_count = 0 ;
2017-08-14 18:16:11 +00:00
const String * cur_partition_id = nullptr ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
for ( const auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2017-04-01 07:20:54 +00:00
{
2017-08-14 18:16:11 +00:00
if ( cur_partition_id & & part - > info . partition_id = = * cur_partition_id )
2017-04-01 07:20:54 +00:00
{
+ + cur_count ;
}
else
{
2017-08-14 18:16:11 +00:00
cur_partition_id = & part - > info . partition_id ;
2017-04-01 07:20:54 +00:00
cur_count = 1 ;
}
res = std : : max ( res , cur_count ) ;
}
return res ;
2014-04-11 16:56:49 +00:00
}
2016-01-30 00:57:35 +00:00
2018-07-31 12:34:34 +00:00
std : : optional < Int64 > MergeTreeData : : getMinPartDataVersion ( ) const
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-07-31 12:34:34 +00:00
std : : optional < Int64 > result ;
2019-05-02 14:48:54 +00:00
for ( const auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2018-07-31 12:34:34 +00:00
{
if ( ! result | | * result > part - > info . getDataVersion ( ) )
result = part - > info . getDataVersion ( ) ;
}
return result ;
}
2019-08-07 15:21:45 +00:00
void MergeTreeData : : delayInsertOrThrowIfNeeded ( Poco : : Event * until ) const
2014-05-27 08:43:01 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-05-02 14:48:54 +00:00
const size_t parts_count_in_total = getPartsCount ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_count_in_total > = settings - > max_parts_in_total )
2019-05-02 14:48:54 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
throw Exception ( " Too many parts ( " + toString ( parts_count_in_total ) + " ) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting. " , ErrorCodes : : TOO_MANY_PARTS ) ;
}
const size_t parts_count_in_partition = getMaxPartsCountForPartition ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_count_in_partition < settings - > parts_to_delay_insert )
2017-04-01 07:20:54 +00:00
return ;
2016-10-27 22:50:02 +00:00
2019-08-13 10:29:31 +00:00
if ( parts_count_in_partition > = settings - > parts_to_throw_insert )
2017-04-01 07:20:54 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
2019-05-02 14:48:54 +00:00
throw Exception ( " Too many parts ( " + toString ( parts_count_in_partition ) + " ). Merges are processing significantly slower than inserts. " , ErrorCodes : : TOO_MANY_PARTS ) ;
2017-04-01 07:20:54 +00:00
}
2014-09-12 20:29:29 +00:00
2019-08-13 10:29:31 +00:00
const size_t max_k = settings - > parts_to_throw_insert - settings - > parts_to_delay_insert ; /// always > 0
const size_t k = 1 + parts_count_in_partition - settings - > parts_to_delay_insert ; /// from 1 to max_k
const double delay_milliseconds = : : pow ( settings - > max_delay_to_insert * 1000 , static_cast < double > ( k ) / max_k ) ;
2014-09-13 18:34:08 +00:00
2017-04-01 07:20:54 +00:00
ProfileEvents : : increment ( ProfileEvents : : DelayedInserts ) ;
2017-06-22 16:17:01 +00:00
ProfileEvents : : increment ( ProfileEvents : : DelayedInsertsMilliseconds , delay_milliseconds ) ;
2014-06-20 18:45:19 +00:00
2017-04-01 07:20:54 +00:00
CurrentMetrics : : Increment metric_increment ( CurrentMetrics : : DelayedInserts ) ;
2016-10-27 22:50:02 +00:00
2020-05-23 22:24:01 +00:00
LOG_INFO ( log , " Delaying inserting block by {} ms. because there are {} parts " , delay_milliseconds , parts_count_in_partition ) ;
2014-09-03 02:32:23 +00:00
2017-04-01 07:20:54 +00:00
if ( until )
2017-06-22 16:17:01 +00:00
until - > tryWait ( delay_milliseconds ) ;
2017-04-01 07:20:54 +00:00
else
2017-06-22 16:29:15 +00:00
std : : this_thread : : sleep_for ( std : : chrono : : milliseconds ( static_cast < size_t > ( delay_milliseconds ) ) ) ;
2014-05-27 08:43:01 +00:00
}
2018-05-21 23:17:57 +00:00
void MergeTreeData : : throwInsertIfNeeded ( ) const
2014-04-03 11:48:28 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-05-02 14:48:54 +00:00
const size_t parts_count_in_total = getPartsCount ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_count_in_total > = settings - > max_parts_in_total )
2019-05-02 14:48:54 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
throw Exception ( " Too many parts ( " + toString ( parts_count_in_total ) + " ) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting. " , ErrorCodes : : TOO_MANY_PARTS ) ;
}
const size_t parts_count_in_partition = getMaxPartsCountForPartition ( ) ;
2017-04-01 07:20:54 +00:00
2019-08-13 10:29:31 +00:00
if ( parts_count_in_partition > = settings - > parts_to_throw_insert )
2018-05-21 23:17:57 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
2019-05-02 14:48:54 +00:00
throw Exception ( " Too many parts ( " + toString ( parts_count_in_partition ) + " ). Merges are processing significantly slower than inserts. " , ErrorCodes : : TOO_MANY_PARTS ) ;
2018-05-21 23:17:57 +00:00
}
}
2017-04-01 07:20:54 +00:00
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getActiveContainingPart (
2019-10-28 17:27:43 +00:00
const MergeTreePartInfo & part_info , MergeTreeData : : DataPartState state , DataPartsLock & /*lock*/ ) const
2014-04-03 11:48:28 +00:00
{
2019-10-16 19:01:50 +00:00
auto current_state_parts_range = getDataPartsStateRange ( state ) ;
2017-11-20 19:33:12 +00:00
2017-04-01 07:20:54 +00:00
/// The part can be covered only by the previous or the next one in data_parts.
2018-05-21 13:49:54 +00:00
auto it = data_parts_by_state_and_info . lower_bound ( DataPartStateAndInfo { state , part_info } ) ;
2017-04-01 07:20:54 +00:00
2019-10-16 19:01:50 +00:00
if ( it ! = current_state_parts_range . end ( ) )
2017-04-01 07:20:54 +00:00
{
2018-05-21 13:49:54 +00:00
if ( ( * it ) - > info = = part_info )
2017-04-01 07:20:54 +00:00
return * it ;
2017-08-15 11:59:08 +00:00
if ( ( * it ) - > info . contains ( part_info ) )
2017-04-01 07:20:54 +00:00
return * it ;
}
2019-10-16 19:01:50 +00:00
if ( it ! = current_state_parts_range . begin ( ) )
2017-04-01 07:20:54 +00:00
{
- - it ;
2017-08-15 11:59:08 +00:00
if ( ( * it ) - > info . contains ( part_info ) )
2017-04-01 07:20:54 +00:00
return * it ;
}
return nullptr ;
2014-04-03 11:48:28 +00:00
}
2019-08-19 14:40:12 +00:00
void MergeTreeData : : swapActivePart ( MergeTreeData : : DataPartPtr part_copy )
2019-06-07 19:16:42 +00:00
{
2019-08-21 12:32:48 +00:00
auto lock = lockParts ( ) ;
2020-03-12 09:09:44 +00:00
for ( auto original_active_part : getDataPartsStateRange ( DataPartState : : Committed ) ) // NOLINT (copy is intended)
2019-06-07 19:16:42 +00:00
{
2019-08-16 15:57:19 +00:00
if ( part_copy - > name = = original_active_part - > name )
2019-06-07 19:16:42 +00:00
{
2019-08-16 15:57:19 +00:00
auto active_part_it = data_parts_by_info . find ( original_active_part - > info ) ;
if ( active_part_it = = data_parts_by_info . end ( ) )
2019-09-04 18:26:18 +00:00
throw Exception ( " Cannot swap part ' " + part_copy - > name + " ', no such active part. " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-06-07 19:16:42 +00:00
2019-09-04 18:26:18 +00:00
modifyPartState ( original_active_part , DataPartState : : DeleteOnDestroy ) ;
2019-08-16 15:57:19 +00:00
data_parts_indexes . erase ( active_part_it ) ;
2019-07-30 16:15:57 +00:00
2019-08-16 15:57:19 +00:00
auto part_it = data_parts_indexes . insert ( part_copy ) . first ;
2019-06-07 19:16:42 +00:00
modifyPartState ( part_it , DataPartState : : Committed ) ;
2019-11-17 21:41:40 +00:00
2020-05-09 21:24:15 +00:00
auto disk = original_active_part - > volume - > getDisk ( ) ;
2020-08-26 15:29:46 +00:00
String marker_path = original_active_part - > getFullRelativePath ( ) + IMergeTreeDataPart : : DELETE_ON_DESTROY_MARKER_FILE_NAME ;
2019-11-17 21:41:40 +00:00
try
{
2020-03-19 16:37:55 +00:00
disk - > createFile ( marker_path ) ;
2019-11-17 21:41:40 +00:00
}
2019-12-09 13:44:11 +00:00
catch ( Poco : : Exception & e )
2019-11-17 21:41:40 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " {} (while creating DeleteOnDestroy marker: {}) " , e . what ( ) , backQuote ( fullPath ( disk , marker_path ) ) ) ;
2019-11-17 21:41:40 +00:00
}
2019-08-15 09:43:31 +00:00
return ;
2019-06-07 19:16:42 +00:00
}
}
2019-09-04 18:26:18 +00:00
throw Exception ( " Cannot swap part ' " + part_copy - > name + " ', no such active part. " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-08-16 15:57:19 +00:00
}
2019-10-28 17:27:43 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getActiveContainingPart ( const MergeTreePartInfo & part_info ) const
2014-07-25 11:38:46 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
return getActiveContainingPart ( part_info , DataPartState : : Committed , lock ) ;
2018-05-21 13:49:54 +00:00
}
2019-10-28 17:27:43 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getActiveContainingPart ( const String & part_name ) const
2018-09-11 14:41:04 +00:00
{
auto part_info = MergeTreePartInfo : : fromPartName ( part_name , format_version ) ;
return getActiveContainingPart ( part_info ) ;
}
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getDataPartsVectorInPartition ( MergeTreeData : : DataPartState state , const String & partition_id )
{
DataPartStateAndPartitionID state_with_partition { state , partition_id } ;
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-05-21 13:49:54 +00:00
return DataPartsVector (
data_parts_by_state_and_info . lower_bound ( state_with_partition ) ,
data_parts_by_state_and_info . upper_bound ( state_with_partition ) ) ;
}
MergeTreeData : : DataPartPtr MergeTreeData : : getPartIfExists ( const MergeTreePartInfo & part_info , const MergeTreeData : : DataPartStates & valid_states )
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-10-03 19:04:56 +00:00
2018-02-19 15:31:43 +00:00
auto it = data_parts_by_info . find ( part_info ) ;
if ( it = = data_parts_by_info . end ( ) )
2017-11-20 19:33:12 +00:00
return nullptr ;
for ( auto state : valid_states )
{
if ( ( * it ) - > state = = state )
2017-12-18 17:26:46 +00:00
return * it ;
2017-11-20 19:33:12 +00:00
}
2014-07-25 11:38:46 +00:00
2017-04-01 07:20:54 +00:00
return nullptr ;
2014-07-25 11:38:46 +00:00
}
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getPartIfExists ( const String & part_name , const MergeTreeData : : DataPartStates & valid_states )
{
return getPartIfExists ( MergeTreePartInfo : : fromPartName ( part_name , format_version ) , valid_states ) ;
}
2016-01-28 01:00:27 +00:00
2020-03-18 00:57:00 +00:00
static void loadPartAndFixMetadataImpl ( MergeTreeData : : MutableDataPartPtr part )
2019-07-25 10:46:07 +00:00
{
2020-05-09 21:24:15 +00:00
auto disk = part - > volume - > getDisk ( ) ;
2020-03-19 16:37:55 +00:00
String full_part_path = part - > getFullRelativePath ( ) ;
2017-04-01 07:20:54 +00:00
2017-08-16 19:24:50 +00:00
part - > loadColumnsChecksumsIndexes ( false , true ) ;
2020-03-19 16:37:55 +00:00
part - > modification_time = disk - > getLastModified ( full_part_path ) . epochTime ( ) ;
2014-08-08 08:28:13 +00:00
}
2017-05-14 23:14:21 +00:00
void MergeTreeData : : calculateColumnSizesImpl ( )
2014-09-19 11:44:29 +00:00
{
2017-04-01 07:20:54 +00:00
column_sizes . clear ( ) ;
2014-09-19 11:44:29 +00:00
2017-09-21 21:51:17 +00:00
/// Take into account only committed parts
2017-11-20 19:33:12 +00:00
auto committed_parts_range = getDataPartsStateRange ( DataPartState : : Committed ) ;
for ( const auto & part : committed_parts_range )
2017-04-01 07:20:54 +00:00
addPartContributionToColumnSizes ( part ) ;
2014-09-19 11:44:29 +00:00
}
void MergeTreeData : : addPartContributionToColumnSizes ( const DataPartPtr & part )
{
2020-01-16 16:15:01 +00:00
for ( const auto & column : part - > getColumns ( ) )
2017-04-01 07:20:54 +00:00
{
2019-07-16 17:13:12 +00:00
ColumnSize & total_column_size = column_sizes [ column . name ] ;
ColumnSize part_column_size = part - > getColumnSize ( column . name , * column . type ) ;
2018-03-26 14:18:04 +00:00
total_column_size . add ( part_column_size ) ;
2017-04-01 07:20:54 +00:00
}
2014-09-19 11:44:29 +00:00
}
void MergeTreeData : : removePartContributionToColumnSizes ( const DataPartPtr & part )
{
2020-01-16 16:15:01 +00:00
for ( const auto & column : part - > getColumns ( ) )
2017-04-01 07:20:54 +00:00
{
2019-07-16 17:13:12 +00:00
ColumnSize & total_column_size = column_sizes [ column . name ] ;
ColumnSize part_column_size = part - > getColumnSize ( column . name , * column . type ) ;
2017-04-01 07:20:54 +00:00
2018-03-26 14:18:04 +00:00
auto log_subtract = [ & ] ( size_t & from , size_t value , const char * field )
2017-04-01 07:20:54 +00:00
{
2018-03-26 14:18:04 +00:00
if ( value > from )
2020-05-23 22:24:01 +00:00
LOG_ERROR ( log , " Possibly incorrect column size subtraction: {} - {} = {}, column: {}, field: {} " ,
2020-05-23 20:13:34 +00:00
from , value , from - value , column . name , field ) ;
2018-03-26 14:18:04 +00:00
from - = value ;
} ;
2017-04-01 07:20:54 +00:00
2018-03-26 14:18:04 +00:00
log_subtract ( total_column_size . data_compressed , part_column_size . data_compressed , " .data_compressed " ) ;
log_subtract ( total_column_size . data_uncompressed , part_column_size . data_uncompressed , " .data_uncompressed " ) ;
log_subtract ( total_column_size . marks , part_column_size . marks , " .marks " ) ;
2017-04-01 07:20:54 +00:00
}
2014-09-19 11:44:29 +00:00
}
2014-10-03 17:57:01 +00:00
2020-07-28 15:10:36 +00:00
PartitionCommandsResultInfo MergeTreeData : : freezePartition ( const ASTPtr & partition_ast , const StorageMetadataPtr & metadata_snapshot , const String & with_name , const Context & context , TableLockHolder & )
2014-11-11 04:11:07 +00:00
{
2017-11-20 04:15:43 +00:00
std : : optional < String > prefix ;
2017-09-18 20:49:21 +00:00
String partition_id ;
2017-10-12 18:21:17 +00:00
2017-09-07 16:21:06 +00:00
if ( format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING )
2017-09-06 20:34:26 +00:00
{
2017-09-18 20:49:21 +00:00
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
2019-03-15 16:14:13 +00:00
if ( const auto * partition_lit = partition_ast - > as < ASTPartition & > ( ) . value - > as < ASTLiteral > ( ) )
2017-09-06 20:34:26 +00:00
prefix = partition_lit - > value . getType ( ) = = Field : : Types : : UInt64
? toString ( partition_lit - > value . get < UInt64 > ( ) )
: partition_lit - > value . safeGet < String > ( ) ;
2017-09-18 20:49:21 +00:00
else
partition_id = getPartitionIDFromQuery ( partition_ast , context ) ;
2017-09-06 20:34:26 +00:00
}
else
2017-09-18 20:49:21 +00:00
partition_id = getPartitionIDFromQuery ( partition_ast , context ) ;
2017-09-06 20:34:26 +00:00
2017-09-18 20:49:21 +00:00
if ( prefix )
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Freezing parts with prefix {} " , * prefix ) ;
2017-09-18 20:49:21 +00:00
else
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Freezing parts with partition ID {} " , partition_id ) ;
2017-04-01 07:20:54 +00:00
2020-07-28 15:10:36 +00:00
return freezePartitionsByMatcher (
2018-11-20 17:05:22 +00:00
[ & prefix , & partition_id ] ( const DataPartPtr & part )
2018-11-01 17:47:19 +00:00
{
2018-11-01 17:13:01 +00:00
if ( prefix )
return startsWith ( part - > info . partition_id , * prefix ) ;
else
return part - > info . partition_id = = partition_id ;
} ,
2020-06-26 11:30:23 +00:00
metadata_snapshot ,
2018-11-01 17:13:01 +00:00
with_name ,
context ) ;
2014-11-11 04:11:07 +00:00
}
2020-07-14 08:19:39 +00:00
void MergeTreeData : : checkAlterPartitionIsPossible ( const PartitionCommands & commands , const StorageMetadataPtr & /*metadata_snapshot*/ , const Settings & settings ) const
2020-07-13 17:27:52 +00:00
{
for ( const auto & command : commands )
{
2020-07-14 13:56:30 +00:00
if ( command . type = = PartitionCommand : : DROP_DETACHED_PARTITION
& & ! settings . allow_drop_detached )
throw DB : : Exception ( " Cannot execute query: DROP DETACHED PART is disabled "
" (see allow_drop_detached setting) " , ErrorCodes : : SUPPORT_IS_DISABLED ) ;
if ( command . partition & & command . type ! = PartitionCommand : : DROP_DETACHED_PARTITION )
2020-07-14 08:19:39 +00:00
{
if ( command . part )
{
auto part_name = command . partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
/// We able to parse it
MergeTreePartInfo : : fromPartName ( part_name , format_version ) ;
}
else
{
/// We able to parse it
getPartitionIDFromQuery ( command . partition , global_context ) ;
}
}
2020-07-13 17:27:52 +00:00
}
}
2020-07-13 16:19:08 +00:00
void MergeTreeData : : checkPartitionCanBeDropped ( const ASTPtr & partition )
{
const String partition_id = getPartitionIDFromQuery ( partition , global_context ) ;
auto parts_to_remove = getDataPartsVectorInPartition ( MergeTreeDataPartState : : Committed , partition_id ) ;
UInt64 partition_size = 0 ;
for ( const auto & part : parts_to_remove )
partition_size + = part - > getBytesOnDisk ( ) ;
auto table_id = getStorageID ( ) ;
global_context . checkPartitionCanBeDropped ( table_id . database_name , table_id . table_name , partition_size ) ;
}
2014-11-11 04:11:07 +00:00
2020-11-02 17:30:53 +00:00
void MergeTreeData : : checkPartCanBeDropped ( const ASTPtr & part_ast )
{
String part_name = part_ast - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
auto part = getPartIfExists ( part_name , { MergeTreeDataPartState : : Committed } ) ;
if ( ! part )
2020-11-12 23:27:18 +00:00
throw Exception ( ErrorCodes : : NO_SUCH_DATA_PART , " No part {} in committed state " , part_name ) ;
2020-11-03 15:41:46 +00:00
auto table_id = getStorageID ( ) ;
global_context . checkPartitionCanBeDropped ( table_id . database_name , table_id . table_name , part - > getBytesOnDisk ( ) ) ;
2020-11-02 17:30:53 +00:00
}
2019-08-20 09:59:19 +00:00
void MergeTreeData : : movePartitionToDisk ( const ASTPtr & partition , const String & name , bool moving_part , const Context & context )
2019-07-18 15:19:03 +00:00
{
2019-08-20 09:59:19 +00:00
String partition_id ;
2019-07-23 13:34:17 +00:00
2019-08-20 09:59:19 +00:00
if ( moving_part )
partition_id = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
else
partition_id = getPartitionIDFromQuery ( partition , context ) ;
DataPartsVector parts ;
if ( moving_part )
{
2019-12-09 21:38:41 +00:00
auto part_info = MergeTreePartInfo : : fromPartName ( partition_id , format_version ) ;
parts . push_back ( getActiveContainingPart ( part_info ) ) ;
if ( ! parts . back ( ) | | parts . back ( ) - > name ! = part_info . getPartName ( ) )
2019-08-20 19:04:58 +00:00
throw Exception ( " Part " + partition_id + " is not exists or not active " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-08-20 09:59:19 +00:00
}
else
parts = getDataPartsVectorInPartition ( MergeTreeDataPartState : : Committed , partition_id ) ;
2019-07-23 13:34:17 +00:00
2020-01-09 14:50:34 +00:00
auto disk = getStoragePolicy ( ) - > getDiskByName ( name ) ;
2019-07-23 13:34:17 +00:00
if ( ! disk )
2020-01-09 14:50:34 +00:00
throw Exception ( " Disk " + name + " does not exists on policy " + getStoragePolicy ( ) - > getName ( ) , ErrorCodes : : UNKNOWN_DISK ) ;
2019-07-23 13:34:17 +00:00
2019-10-24 08:52:33 +00:00
parts . erase ( std : : remove_if ( parts . begin ( ) , parts . end ( ) , [ & ] ( auto part_ptr )
{
2020-05-09 21:24:15 +00:00
return part_ptr - > volume - > getDisk ( ) - > getName ( ) = = disk - > getName ( ) ;
2019-10-24 08:52:33 +00:00
} ) , parts . end ( ) ) ;
2019-10-22 14:45:01 +00:00
2019-10-24 08:52:33 +00:00
if ( parts . empty ( ) )
2019-08-20 09:59:19 +00:00
{
2019-10-24 10:56:32 +00:00
String no_parts_to_move_message ;
if ( moving_part )
no_parts_to_move_message = " Part ' " + partition_id + " ' is already on disk ' " + disk - > getName ( ) + " ' " ;
else
no_parts_to_move_message = " All parts of partition ' " + partition_id + " ' are already on disk ' " + disk - > getName ( ) + " ' " ;
throw Exception ( no_parts_to_move_message , ErrorCodes : : UNKNOWN_DISK ) ;
2019-08-20 09:59:19 +00:00
}
2019-07-23 13:34:17 +00:00
2019-12-03 13:37:40 +00:00
if ( ! movePartsToSpace ( parts , std : : static_pointer_cast < Space > ( disk ) ) )
2019-12-09 21:38:41 +00:00
throw Exception ( " Cannot move parts because moves are manually disabled " , ErrorCodes : : ABORTED ) ;
2019-07-23 13:34:17 +00:00
}
2019-08-20 09:59:19 +00:00
void MergeTreeData : : movePartitionToVolume ( const ASTPtr & partition , const String & name , bool moving_part , const Context & context )
2019-07-23 13:34:17 +00:00
{
2019-08-20 09:59:19 +00:00
String partition_id ;
if ( moving_part )
partition_id = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
else
partition_id = getPartitionIDFromQuery ( partition , context ) ;
DataPartsVector parts ;
if ( moving_part )
{
2019-12-09 21:38:41 +00:00
auto part_info = MergeTreePartInfo : : fromPartName ( partition_id , format_version ) ;
parts . emplace_back ( getActiveContainingPart ( part_info ) ) ;
if ( ! parts . back ( ) | | parts . back ( ) - > name ! = part_info . getPartName ( ) )
2019-08-20 19:04:58 +00:00
throw Exception ( " Part " + partition_id + " is not exists or not active " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-08-20 09:59:19 +00:00
}
else
parts = getDataPartsVectorInPartition ( MergeTreeDataPartState : : Committed , partition_id ) ;
2019-07-23 13:34:17 +00:00
2020-01-09 14:50:34 +00:00
auto volume = getStoragePolicy ( ) - > getVolumeByName ( name ) ;
2019-07-23 13:34:17 +00:00
if ( ! volume )
2020-01-09 14:50:34 +00:00
throw Exception ( " Volume " + name + " does not exists on policy " + getStoragePolicy ( ) - > getName ( ) , ErrorCodes : : UNKNOWN_DISK ) ;
2019-07-23 13:34:17 +00:00
2019-12-09 21:38:41 +00:00
if ( parts . empty ( ) )
throw Exception ( " Nothing to move " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-10-24 08:52:33 +00:00
parts . erase ( std : : remove_if ( parts . begin ( ) , parts . end ( ) , [ & ] ( auto part_ptr )
2019-10-22 14:45:01 +00:00
{
2020-05-09 21:24:15 +00:00
for ( const auto & disk : volume - > getDisks ( ) )
2019-10-22 14:45:01 +00:00
{
2020-05-09 21:24:15 +00:00
if ( part_ptr - > volume - > getDisk ( ) - > getName ( ) = = disk - > getName ( ) )
2019-10-24 08:52:33 +00:00
{
return true ;
}
2019-10-22 14:45:01 +00:00
}
2019-10-24 08:52:33 +00:00
return false ;
} ) , parts . end ( ) ) ;
2019-10-22 14:45:01 +00:00
2019-10-24 08:52:33 +00:00
if ( parts . empty ( ) )
2019-10-22 14:45:01 +00:00
{
2019-10-24 10:56:32 +00:00
String no_parts_to_move_message ;
if ( moving_part )
no_parts_to_move_message = " Part ' " + partition_id + " ' is already on volume ' " + volume - > getName ( ) + " ' " ;
else
no_parts_to_move_message = " All parts of partition ' " + partition_id + " ' are already on volume ' " + volume - > getName ( ) + " ' " ;
throw Exception ( no_parts_to_move_message , ErrorCodes : : UNKNOWN_DISK ) ;
2019-10-22 14:45:01 +00:00
}
2019-07-23 13:34:17 +00:00
2019-12-03 13:37:40 +00:00
if ( ! movePartsToSpace ( parts , std : : static_pointer_cast < Space > ( volume ) ) )
2019-12-09 21:38:41 +00:00
throw Exception ( " Cannot move parts because moves are manually disabled " , ErrorCodes : : ABORTED ) ;
2019-07-18 15:19:03 +00:00
}
2020-11-11 13:34:07 +00:00
void MergeTreeData : : fetchPartition ( const ASTPtr & /*partition*/ , const StorageMetadataPtr & /*metadata_snapshot*/ , const String & /*from*/ , const Context & /*query_context*/ )
{
throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " FETCH PARTITION is not supported by storage {} " , getName ( ) ) ;
}
Pipe MergeTreeData : : alterPartition (
const StorageMetadataPtr & metadata_snapshot ,
const PartitionCommands & commands ,
const Context & query_context )
{
PartitionCommandsResultInfo result ;
for ( const PartitionCommand & command : commands )
{
PartitionCommandsResultInfo current_command_results ;
switch ( command . type )
{
case PartitionCommand : : DROP_PARTITION :
if ( command . part )
checkPartCanBeDropped ( command . partition ) ;
else
checkPartitionCanBeDropped ( command . partition ) ;
dropPartition ( command . partition , command . detach , command . part , query_context ) ;
break ;
case PartitionCommand : : DROP_DETACHED_PARTITION :
dropDetached ( command . partition , command . part , query_context ) ;
break ;
case PartitionCommand : : ATTACH_PARTITION :
current_command_results = attachPartition ( command . partition , metadata_snapshot , command . part , query_context ) ;
break ;
case PartitionCommand : : MOVE_PARTITION :
{
switch ( * command . move_destination_type )
{
case PartitionCommand : : MoveDestinationType : : DISK :
movePartitionToDisk ( command . partition , command . move_destination_name , command . part , query_context ) ;
break ;
case PartitionCommand : : MoveDestinationType : : VOLUME :
movePartitionToVolume ( command . partition , command . move_destination_name , command . part , query_context ) ;
break ;
case PartitionCommand : : MoveDestinationType : : TABLE :
checkPartitionCanBeDropped ( command . partition ) ;
String dest_database = query_context . resolveDatabase ( command . to_database ) ;
auto dest_storage = DatabaseCatalog : : instance ( ) . getTable ( { dest_database , command . to_table } , query_context ) ;
movePartitionToTable ( dest_storage , command . partition , query_context ) ;
break ;
}
}
break ;
case PartitionCommand : : REPLACE_PARTITION :
{
checkPartitionCanBeDropped ( command . partition ) ;
String from_database = query_context . resolveDatabase ( command . from_database ) ;
auto from_storage = DatabaseCatalog : : instance ( ) . getTable ( { from_database , command . from_table } , query_context ) ;
replacePartitionFrom ( from_storage , command . partition , command . replace , query_context ) ;
}
break ;
case PartitionCommand : : FETCH_PARTITION :
fetchPartition ( command . partition , metadata_snapshot , command . from_zookeeper_path , query_context ) ;
break ;
case PartitionCommand : : FREEZE_PARTITION :
{
auto lock = lockForShare ( query_context . getCurrentQueryId ( ) , query_context . getSettingsRef ( ) . lock_acquire_timeout ) ;
current_command_results = freezePartition ( command . partition , metadata_snapshot , command . with_name , query_context , lock ) ;
}
break ;
case PartitionCommand : : FREEZE_ALL_PARTITIONS :
{
auto lock = lockForShare ( query_context . getCurrentQueryId ( ) , query_context . getSettingsRef ( ) . lock_acquire_timeout ) ;
current_command_results = freezeAll ( command . with_name , metadata_snapshot , query_context , lock ) ;
}
break ;
}
for ( auto & command_result : current_command_results )
command_result . command_type = command . typeToString ( ) ;
result . insert ( result . end ( ) , current_command_results . begin ( ) , current_command_results . end ( ) ) ;
}
if ( query_context . getSettingsRef ( ) . alter_partition_verbose_result )
return convertCommandsResultToSource ( result ) ;
return { } ;
}
2019-07-18 15:19:03 +00:00
2020-07-13 17:27:52 +00:00
String MergeTreeData : : getPartitionIDFromQuery ( const ASTPtr & ast , const Context & context ) const
2014-10-03 17:57:01 +00:00
{
2019-03-15 16:14:13 +00:00
const auto & partition_ast = ast - > as < ASTPartition & > ( ) ;
2017-09-06 20:34:26 +00:00
2017-09-11 17:55:41 +00:00
if ( ! partition_ast . value )
return partition_ast . id ;
2017-09-06 20:34:26 +00:00
2017-09-07 16:21:06 +00:00
if ( format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING )
2017-09-06 20:34:26 +00:00
{
2017-09-11 17:55:41 +00:00
/// Month-partitioning specific - partition ID can be passed in the partition value.
2019-03-15 16:14:13 +00:00
const auto * partition_lit = partition_ast . value - > as < ASTLiteral > ( ) ;
2017-09-06 20:34:26 +00:00
if ( partition_lit & & partition_lit - > value . getType ( ) = = Field : : Types : : String )
{
String partition_id = partition_lit - > value . get < String > ( ) ;
if ( partition_id . size ( ) ! = 6 | | ! std : : all_of ( partition_id . begin ( ) , partition_id . end ( ) , isNumericASCII ) )
throw Exception (
" Invalid partition format: " + partition_id + " . Partition should consist of 6 digits: YYYYMM " ,
ErrorCodes : : INVALID_PARTITION_VALUE ) ;
return partition_id ;
}
}
2014-10-03 17:57:01 +00:00
2017-09-06 20:34:26 +00:00
/// Re-parse partition key fields using the information about expected field types.
2020-06-17 10:34:23 +00:00
auto metadata_snapshot = getInMemoryMetadataPtr ( ) ;
size_t fields_count = metadata_snapshot - > getPartitionKey ( ) . sample_block . columns ( ) ;
2017-09-11 17:55:41 +00:00
if ( partition_ast . fields_count ! = fields_count )
2017-09-06 20:34:26 +00:00
throw Exception (
2017-09-11 17:55:41 +00:00
" Wrong number of fields in the partition expression: " + toString ( partition_ast . fields_count ) +
2017-09-06 20:34:26 +00:00
" , must be: " + toString ( fields_count ) ,
ErrorCodes : : INVALID_PARTITION_VALUE ) ;
2018-06-08 01:51:55 +00:00
const FormatSettings format_settings ;
2017-09-06 20:34:26 +00:00
Row partition_row ( fields_count ) ;
if ( fields_count )
{
ReadBufferFromMemory left_paren_buf ( " ( " , 1 ) ;
2019-06-25 13:10:09 +00:00
ReadBufferFromMemory fields_buf ( partition_ast . fields_str . data ( ) , partition_ast . fields_str . size ( ) ) ;
2017-09-06 20:34:26 +00:00
ReadBufferFromMemory right_paren_buf ( " ) " , 1) ;
ConcatReadBuffer buf ( { & left_paren_buf , & fields_buf , & right_paren_buf } ) ;
2020-06-17 10:34:23 +00:00
auto input_stream = FormatFactory : : instance ( ) . getInput ( " Values " , buf , metadata_snapshot - > getPartitionKey ( ) . sample_block , context , context . getSettingsRef ( ) . max_block_size ) ;
2017-12-15 21:11:24 +00:00
2019-08-02 17:16:58 +00:00
auto block = input_stream - > read ( ) ;
if ( ! block | | ! block . rows ( ) )
2017-09-06 20:34:26 +00:00
throw Exception (
2019-06-25 13:10:09 +00:00
" Could not parse partition value: ` " + partition_ast . fields_str + " ` " ,
2017-09-06 20:34:26 +00:00
ErrorCodes : : INVALID_PARTITION_VALUE ) ;
for ( size_t i = 0 ; i < fields_count ; + + i )
2019-08-02 17:16:58 +00:00
block . getByPosition ( i ) . column - > get ( 0 , partition_row [ i ] ) ;
2017-09-06 20:34:26 +00:00
}
2014-10-03 17:57:01 +00:00
2017-09-11 17:55:41 +00:00
MergeTreePartition partition ( std : : move ( partition_row ) ) ;
String partition_id = partition . getID ( * this ) ;
{
2019-03-28 19:58:41 +00:00
auto data_parts_lock = lockParts ( ) ;
2017-09-11 17:55:41 +00:00
DataPartPtr existing_part_in_partition = getAnyPartInPartition ( partition_id , data_parts_lock ) ;
if ( existing_part_in_partition & & existing_part_in_partition - > partition . value ! = partition . value )
{
WriteBufferFromOwnString buf ;
writeCString ( " Parsed partition value: " , buf ) ;
2018-10-09 18:32:44 +00:00
partition . serializeText ( * this , buf , format_settings ) ;
2017-09-11 17:55:41 +00:00
writeCString ( " doesn't match partition value for an existing part with the same partition ID: " , buf ) ;
writeString ( existing_part_in_partition - > name , buf ) ;
throw Exception ( buf . str ( ) , ErrorCodes : : INVALID_PARTITION_VALUE ) ;
}
}
return partition_id ;
2016-01-30 00:57:35 +00:00
}
2017-11-20 19:33:12 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getDataPartsVector ( const DataPartStates & affordable_states , DataPartStateVector * out_states ) const
2017-09-11 22:40:51 +00:00
{
DataPartsVector res ;
2017-11-20 19:33:12 +00:00
DataPartsVector buf ;
2017-09-11 22:40:51 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-11-20 19:33:12 +00:00
for ( auto state : affordable_states )
{
2019-01-09 15:44:20 +00:00
std : : swap ( buf , res ) ;
2017-11-20 19:33:12 +00:00
res . clear ( ) ;
auto range = getDataPartsStateRange ( state ) ;
std : : merge ( range . begin ( ) , range . end ( ) , buf . begin ( ) , buf . end ( ) , std : : back_inserter ( res ) , LessDataPart ( ) ) ;
}
if ( out_states ! = nullptr )
{
out_states - > resize ( res . size ( ) ) ;
for ( size_t i = 0 ; i < res . size ( ) ; + + i )
( * out_states ) [ i ] = res [ i ] - > state ;
}
2017-09-11 22:40:51 +00:00
}
2017-11-20 19:33:12 +00:00
2017-09-11 22:40:51 +00:00
return res ;
}
2017-11-20 19:33:12 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getAllDataPartsVector ( MergeTreeData : : DataPartStateVector * out_states ) const
2017-10-06 16:48:41 +00:00
{
DataPartsVector res ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-02-19 15:31:43 +00:00
res . assign ( data_parts_by_info . begin ( ) , data_parts_by_info . end ( ) ) ;
2017-10-06 16:48:41 +00:00
2017-11-20 19:33:12 +00:00
if ( out_states ! = nullptr )
{
out_states - > resize ( res . size ( ) ) ;
for ( size_t i = 0 ; i < res . size ( ) ; + + i )
( * out_states ) [ i ] = res [ i ] - > state ;
}
2017-10-06 16:48:41 +00:00
}
2017-11-20 19:33:12 +00:00
2017-10-06 16:48:41 +00:00
return res ;
}
2019-05-20 16:24:36 +00:00
std : : vector < DetachedPartInfo >
MergeTreeData : : getDetachedParts ( ) const
{
std : : vector < DetachedPartInfo > res ;
2020-03-19 16:37:55 +00:00
for ( const auto & [ path , disk ] : getRelativeDataPathsWithDisks ( ) )
2019-05-20 16:24:36 +00:00
{
2020-03-19 16:37:55 +00:00
for ( auto it = disk - > iterateDirectory ( path + " detached " ) ; it - > isValid ( ) ; it - > next ( ) )
2019-06-09 12:38:54 +00:00
{
res . emplace_back ( ) ;
2019-08-29 16:17:47 +00:00
auto & part = res . back ( ) ;
2019-05-20 16:24:36 +00:00
2020-03-19 16:37:55 +00:00
DetachedPartInfo : : tryParseDetachedPartName ( it - > name ( ) , part , format_version ) ;
2019-11-19 06:44:10 +00:00
part . disk = disk - > getName ( ) ;
2019-08-29 16:17:47 +00:00
}
2019-05-20 16:24:36 +00:00
}
return res ;
}
2019-07-26 20:04:45 +00:00
void MergeTreeData : : validateDetachedPartName ( const String & name ) const
{
if ( name . find ( ' / ' ) ! = std : : string : : npos | | name = = " . " | | name = = " .. " )
2019-09-11 17:17:10 +00:00
throw DB : : Exception ( " Invalid part name ' " + name + " ' " , ErrorCodes : : INCORRECT_FILE_NAME ) ;
2019-05-20 16:24:36 +00:00
2020-03-19 16:37:55 +00:00
auto full_path = getFullRelativePathForPart ( name , " detached/ " ) ;
2019-08-29 16:17:47 +00:00
2020-03-19 16:37:55 +00:00
if ( ! full_path )
2019-07-26 20:04:45 +00:00
throw DB : : Exception ( " Detached part \" " + name + " \" not found " , ErrorCodes : : BAD_DATA_PART_NAME ) ;
2019-05-20 16:24:36 +00:00
2019-07-31 14:44:55 +00:00
if ( startsWith ( name , " attaching_ " ) | | startsWith ( name , " deleting_ " ) )
throw DB : : Exception ( " Cannot drop part " + name + " : "
" most likely it is used by another DROP or ATTACH query. " ,
ErrorCodes : : BAD_DATA_PART_NAME ) ;
2019-07-26 20:04:45 +00:00
}
2019-07-31 14:44:55 +00:00
void MergeTreeData : : dropDetached ( const ASTPtr & partition , bool part , const Context & context )
2019-07-30 17:24:40 +00:00
{
2019-08-29 16:17:47 +00:00
PartsTemporaryRename renamed_parts ( * this , " detached/ " ) ;
2019-07-31 14:44:55 +00:00
if ( part )
{
String part_name = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
validateDetachedPartName ( part_name ) ;
renamed_parts . addPart ( part_name , " deleting_ " + part_name ) ;
2019-05-20 16:24:36 +00:00
}
2019-07-31 14:44:55 +00:00
else
{
String partition_id = getPartitionIDFromQuery ( partition , context ) ;
DetachedPartsInfo detached_parts = getDetachedParts ( ) ;
for ( const auto & part_info : detached_parts )
if ( part_info . valid_name & & part_info . partition_id = = partition_id
& & part_info . prefix ! = " attaching " & & part_info . prefix ! = " deleting " )
renamed_parts . addPart ( part_info . dir_name , " deleting_ " + part_info . dir_name ) ;
}
2019-05-20 16:24:36 +00:00
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Will drop {} detached parts. " , renamed_parts . old_and_new_names . size ( ) ) ;
2019-07-30 17:24:40 +00:00
2019-07-31 14:44:55 +00:00
renamed_parts . tryRenameAll ( ) ;
2019-09-06 15:09:20 +00:00
for ( auto & [ old_name , new_name ] : renamed_parts . old_and_new_names )
2019-07-31 14:44:55 +00:00
{
2020-03-19 16:37:55 +00:00
const auto & [ path , disk ] = renamed_parts . old_part_name_to_path_and_disk [ old_name ] ;
disk - > removeRecursive ( path + " detached/ " + new_name + " / " ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Dropped detached part {} " , old_name ) ;
2019-09-06 15:09:20 +00:00
old_name . clear ( ) ;
2019-07-31 14:44:55 +00:00
}
2019-07-30 17:24:40 +00:00
}
2019-07-30 19:11:15 +00:00
MergeTreeData : : MutableDataPartsVector MergeTreeData : : tryLoadPartsToAttach ( const ASTPtr & partition , bool attach_part ,
const Context & context , PartsTemporaryRename & renamed_parts )
{
String source_dir = " detached/ " ;
2019-11-27 09:39:44 +00:00
std : : map < String , DiskPtr > name_to_disk ;
2019-07-30 19:11:15 +00:00
/// Let's compose a list of parts that should be added.
if ( attach_part )
{
2019-07-31 14:44:55 +00:00
String part_id = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
validateDetachedPartName ( part_id ) ;
renamed_parts . addPart ( part_id , " attaching_ " + part_id ) ;
2019-10-03 18:07:47 +00:00
if ( MergeTreePartInfo : : tryParsePartName ( part_id , nullptr , format_version ) )
name_to_disk [ part_id ] = getDiskForPart ( part_id , source_dir ) ;
2019-07-30 19:11:15 +00:00
}
else
{
2019-07-31 14:44:55 +00:00
String partition_id = getPartitionIDFromQuery ( partition , context ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Looking for parts for partition {} in {} " , partition_id , source_dir ) ;
2019-07-30 19:11:15 +00:00
ActiveDataPartSet active_parts ( format_version ) ;
2020-01-09 14:50:34 +00:00
const auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
2020-04-22 06:22:14 +00:00
for ( const auto & disk : disks )
2019-07-30 19:11:15 +00:00
{
2020-03-19 16:37:55 +00:00
for ( auto it = disk - > iterateDirectory ( relative_data_path + source_dir ) ; it - > isValid ( ) ; it - > next ( ) )
2019-08-29 16:17:47 +00:00
{
2020-03-19 16:37:55 +00:00
const String & name = it - > name ( ) ;
2019-08-29 16:17:47 +00:00
MergeTreePartInfo part_info ;
// TODO what if name contains "_tryN" suffix?
/// Parts with prefix in name (e.g. attaching_1_3_3_0, deleting_1_3_3_0) will be ignored
if ( ! MergeTreePartInfo : : tryParsePartName ( name , & part_info , format_version )
| | part_info . partition_id ! = partition_id )
{
continue ;
}
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Found part {} " , name ) ;
2019-08-29 16:17:47 +00:00
active_parts . add ( name ) ;
name_to_disk [ name ] = disk ;
}
2019-07-30 19:11:15 +00:00
}
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " {} of them are active " , active_parts . size ( ) ) ;
2019-07-30 19:11:15 +00:00
/// Inactive parts rename so they can not be attached in case of repeated ATTACH.
2019-09-03 08:30:59 +00:00
for ( const auto & [ name , disk ] : name_to_disk )
2019-07-30 19:11:15 +00:00
{
String containing_part = active_parts . getContainingPart ( name ) ;
if ( ! containing_part . empty ( ) & & containing_part ! = name )
2019-08-29 16:17:47 +00:00
{
2019-07-31 14:44:55 +00:00
// TODO maybe use PartsTemporaryRename here?
2020-03-19 16:37:55 +00:00
disk - > moveDirectory ( relative_data_path + source_dir + name , relative_data_path + source_dir + " inactive_ " + name ) ;
2019-08-29 16:17:47 +00:00
}
2019-07-31 14:44:55 +00:00
else
renamed_parts . addPart ( name , " attaching_ " + name ) ;
2019-06-09 12:38:54 +00:00
}
2019-05-20 16:24:36 +00:00
}
2019-07-30 19:11:15 +00:00
2019-08-29 16:17:47 +00:00
2019-07-30 19:11:15 +00:00
/// Try to rename all parts before attaching to prevent race with DROP DETACHED and another ATTACH.
2019-07-31 14:44:55 +00:00
renamed_parts . tryRenameAll ( ) ;
2019-07-30 19:11:15 +00:00
/// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist.
LOG_DEBUG ( log , " Checking parts " ) ;
MutableDataPartsVector loaded_parts ;
2019-07-31 14:44:55 +00:00
loaded_parts . reserve ( renamed_parts . old_and_new_names . size ( ) ) ;
2020-08-26 15:29:46 +00:00
2019-07-30 19:11:15 +00:00
for ( const auto & part_names : renamed_parts . old_and_new_names )
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Checking part {} " , part_names . second ) ;
2020-10-20 15:10:24 +00:00
auto single_disk_volume = std : : make_shared < SingleDiskVolume > ( " volume_ " + part_names . first , name_to_disk [ part_names . first ] , 0 ) ;
2020-05-09 21:24:15 +00:00
MutableDataPartPtr part = createPart ( part_names . first , single_disk_volume , source_dir + part_names . second ) ;
2020-03-18 00:57:00 +00:00
loadPartAndFixMetadataImpl ( part ) ;
2019-07-30 19:11:15 +00:00
loaded_parts . push_back ( part ) ;
}
return loaded_parts ;
2019-05-20 16:24:36 +00:00
}
2019-10-31 10:40:11 +00:00
namespace
2019-04-01 18:41:19 +00:00
{
2019-04-05 17:37:27 +00:00
2019-12-12 11:10:18 +00:00
inline ReservationPtr checkAndReturnReservation ( UInt64 expected_size , ReservationPtr reservation )
2019-10-31 10:40:11 +00:00
{
2019-04-05 17:37:27 +00:00
if ( reservation )
2019-04-03 12:52:09 +00:00
return reservation ;
2020-05-30 21:35:52 +00:00
throw Exception ( fmt : : format ( " Cannot reserve {}, not enough space " , ReadableSize ( expected_size ) ) , ErrorCodes : : NOT_ENOUGH_SPACE ) ;
2019-04-01 18:41:19 +00:00
}
2019-10-31 10:40:11 +00:00
}
2019-12-12 11:10:18 +00:00
ReservationPtr MergeTreeData : : reserveSpace ( UInt64 expected_size ) const
2019-10-31 10:40:11 +00:00
{
2019-09-04 17:26:53 +00:00
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2020-01-09 14:50:34 +00:00
auto reservation = getStoragePolicy ( ) - > reserve ( expected_size ) ;
2019-12-05 08:05:07 +00:00
return checkAndReturnReservation ( expected_size , std : : move ( reservation ) ) ;
}
2020-03-18 00:57:00 +00:00
ReservationPtr MergeTreeData : : reserveSpace ( UInt64 expected_size , SpacePtr space )
2019-12-05 08:05:07 +00:00
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
auto reservation = tryReserveSpace ( expected_size , space ) ;
return checkAndReturnReservation ( expected_size , std : : move ( reservation ) ) ;
}
2020-03-18 00:57:00 +00:00
ReservationPtr MergeTreeData : : tryReserveSpace ( UInt64 expected_size , SpacePtr space )
2019-12-05 08:05:07 +00:00
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
return space - > reserve ( expected_size ) ;
2019-10-31 10:40:11 +00:00
}
2020-10-05 16:41:46 +00:00
ReservationPtr MergeTreeData : : reserveSpacePreferringTTLRules (
const StorageMetadataPtr & metadata_snapshot ,
UInt64 expected_size ,
const IMergeTreeDataPart : : TTLInfos & ttl_infos ,
time_t time_of_move ,
size_t min_volume_index ,
bool is_insert ) const
2019-10-31 10:40:11 +00:00
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2020-10-05 16:41:46 +00:00
ReservationPtr reservation = tryReserveSpacePreferringTTLRules ( metadata_snapshot , expected_size , ttl_infos , time_of_move , min_volume_index , is_insert ) ;
2019-11-30 19:22:01 +00:00
2019-12-05 08:05:07 +00:00
return checkAndReturnReservation ( expected_size , std : : move ( reservation ) ) ;
2019-11-30 19:22:01 +00:00
}
2020-10-05 16:41:46 +00:00
ReservationPtr MergeTreeData : : tryReserveSpacePreferringTTLRules (
const StorageMetadataPtr & metadata_snapshot ,
UInt64 expected_size ,
const IMergeTreeDataPart : : TTLInfos & ttl_infos ,
time_t time_of_move ,
size_t min_volume_index ,
bool is_insert ) const
2019-11-30 19:22:01 +00:00
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2019-12-12 11:10:18 +00:00
ReservationPtr reservation ;
2019-10-31 10:40:11 +00:00
2020-09-18 15:41:14 +00:00
auto move_ttl_entry = selectTTLDescriptionForTTLInfos ( metadata_snapshot - > getMoveTTLs ( ) , ttl_infos . moves_ttl , time_of_move , true ) ;
2020-09-02 12:16:12 +00:00
2020-09-18 15:41:14 +00:00
if ( move_ttl_entry )
2019-10-31 10:40:11 +00:00
{
2020-09-18 15:41:14 +00:00
SpacePtr destination_ptr = getDestinationForMoveTTL ( * move_ttl_entry , is_insert ) ;
2019-11-29 07:00:43 +00:00
if ( ! destination_ptr )
2019-10-31 10:40:11 +00:00
{
2020-09-18 15:41:14 +00:00
if ( move_ttl_entry - > destination_type = = DataDestinationType : : VOLUME )
LOG_WARNING ( log , " Would like to reserve space on volume '{}' by TTL rule of table '{}' but volume was not found or rule is not applicable at the moment " ,
move_ttl_entry - > destination_name , log_name ) ;
else if ( move_ttl_entry - > destination_type = = DataDestinationType : : DISK )
LOG_WARNING ( log , " Would like to reserve space on disk '{}' by TTL rule of table '{}' but disk was not found or rule is not applicable at the moment " ,
move_ttl_entry - > destination_name , log_name ) ;
2019-10-31 10:40:11 +00:00
}
2019-11-29 07:00:43 +00:00
else
{
reservation = destination_ptr - > reserve ( expected_size ) ;
if ( reservation )
return reservation ;
2020-01-07 18:45:38 +00:00
else
2020-09-18 15:41:14 +00:00
if ( move_ttl_entry - > destination_type = = DataDestinationType : : VOLUME )
LOG_WARNING ( log , " Would like to reserve space on volume '{}' by TTL rule of table '{}' but there is not enough space " ,
move_ttl_entry - > destination_name , log_name ) ;
else if ( move_ttl_entry - > destination_type = = DataDestinationType : : DISK )
LOG_WARNING ( log , " Would like to reserve space on disk '{}' by TTL rule of table '{}' but there is not enough space " ,
move_ttl_entry - > destination_name , log_name ) ;
2019-11-29 07:00:43 +00:00
}
2019-10-31 10:40:11 +00:00
}
2020-01-09 14:50:34 +00:00
reservation = getStoragePolicy ( ) - > reserve ( expected_size , min_volume_index ) ;
2019-11-29 07:00:43 +00:00
2019-11-30 19:22:01 +00:00
return reservation ;
2019-10-31 10:40:11 +00:00
}
2020-09-18 15:41:14 +00:00
SpacePtr MergeTreeData : : getDestinationForMoveTTL ( const TTLDescription & move_ttl , bool is_insert ) const
2019-11-29 07:00:43 +00:00
{
2020-05-25 17:07:14 +00:00
auto policy = getStoragePolicy ( ) ;
2020-09-18 15:41:14 +00:00
if ( move_ttl . destination_type = = DataDestinationType : : VOLUME )
2020-09-18 15:30:00 +00:00
{
2020-09-18 15:41:14 +00:00
auto volume = policy - > getVolumeByName ( move_ttl . destination_name ) ;
2020-09-18 15:30:00 +00:00
if ( ! volume )
return { } ;
if ( is_insert & & ! volume - > perform_ttl_move_on_insert )
return { } ;
return volume ;
}
2020-09-18 15:41:14 +00:00
else if ( move_ttl . destination_type = = DataDestinationType : : DISK )
2020-09-18 15:30:00 +00:00
{
2020-09-18 15:41:14 +00:00
auto disk = policy - > getDiskByName ( move_ttl . destination_name ) ;
2020-09-18 15:30:00 +00:00
if ( ! disk )
return { } ;
auto volume = policy - > getVolume ( policy - > getVolumeIndexByDisk ( disk ) ) ;
if ( ! volume )
return { } ;
if ( is_insert & & ! volume - > perform_ttl_move_on_insert )
return { } ;
return disk ;
}
2019-11-29 07:00:43 +00:00
else
return { } ;
}
2020-05-28 15:33:44 +00:00
bool MergeTreeData : : isPartInTTLDestination ( const TTLDescription & ttl , const IMergeTreeDataPart & part ) const
2019-11-29 07:00:43 +00:00
{
2020-05-25 17:07:14 +00:00
auto policy = getStoragePolicy ( ) ;
if ( ttl . destination_type = = DataDestinationType : : VOLUME )
2019-11-29 07:00:43 +00:00
{
2020-05-25 17:07:14 +00:00
for ( const auto & disk : policy - > getVolumeByName ( ttl . destination_name ) - > getDisks ( ) )
2020-05-09 21:24:15 +00:00
if ( disk - > getName ( ) = = part . volume - > getDisk ( ) - > getName ( ) )
2019-11-29 07:00:43 +00:00
return true ;
}
2020-05-25 17:07:14 +00:00
else if ( ttl . destination_type = = DataDestinationType : : DISK )
return policy - > getDiskByName ( ttl . destination_name ) - > getName ( ) = = part . volume - > getDisk ( ) - > getName ( ) ;
2019-11-29 07:00:43 +00:00
return false ;
}
2020-08-31 19:50:42 +00:00
CompressionCodecPtr MergeTreeData : : getCompressionCodecForPart ( size_t part_size_compressed , const IMergeTreeDataPart : : TTLInfos & ttl_infos , time_t current_time ) const
2019-11-29 07:00:43 +00:00
{
2020-08-31 19:50:42 +00:00
2020-06-17 13:39:26 +00:00
auto metadata_snapshot = getInMemoryMetadataPtr ( ) ;
2019-11-29 07:00:43 +00:00
2020-08-31 19:50:42 +00:00
const auto & recompression_ttl_entries = metadata_snapshot - > getRecompressionTTLs ( ) ;
2020-09-09 09:15:42 +00:00
auto best_ttl_entry = selectTTLDescriptionForTTLInfos ( recompression_ttl_entries , ttl_infos . recompression_ttl , current_time , true ) ;
2020-08-31 19:50:42 +00:00
2020-09-02 12:16:12 +00:00
if ( best_ttl_entry )
return CompressionCodecFactory : : instance ( ) . get ( best_ttl_entry - > recompression_codec , { } ) ;
2019-11-29 07:00:43 +00:00
2020-08-31 19:50:42 +00:00
return global_context . chooseCompressionCodec (
part_size_compressed ,
static_cast < double > ( part_size_compressed ) / getTotalActiveSizeInBytes ( ) ) ;
2019-11-29 07:00:43 +00:00
}
2017-10-03 19:04:56 +00:00
MergeTreeData : : DataParts MergeTreeData : : getDataParts ( const DataPartStates & affordable_states ) const
2017-09-11 22:40:51 +00:00
{
DataParts res ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-11-20 19:33:12 +00:00
for ( auto state : affordable_states )
{
auto range = getDataPartsStateRange ( state ) ;
res . insert ( range . begin ( ) , range . end ( ) ) ;
}
2017-09-11 22:40:51 +00:00
}
return res ;
}
MergeTreeData : : DataParts MergeTreeData : : getDataParts ( ) const
{
return getDataParts ( { DataPartState : : Committed } ) ;
}
MergeTreeData : : DataPartsVector MergeTreeData : : getDataPartsVector ( ) const
{
return getDataPartsVector ( { DataPartState : : Committed } ) ;
}
MergeTreeData : : DataPartPtr MergeTreeData : : getAnyPartInPartition (
2020-07-13 17:27:52 +00:00
const String & partition_id , DataPartsLock & /*data_parts_lock*/ ) const
2017-09-11 22:40:51 +00:00
{
2018-05-21 13:49:54 +00:00
auto it = data_parts_by_state_and_info . lower_bound ( DataPartStateAndPartitionID { DataPartState : : Committed , partition_id } ) ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( it ! = data_parts_by_state_and_info . end ( ) & & ( * it ) - > state = = DataPartState : : Committed & & ( * it ) - > info . partition_id = = partition_id )
2017-09-11 22:40:51 +00:00
return * it ;
2017-11-20 19:33:12 +00:00
return nullptr ;
2017-09-11 22:40:51 +00:00
}
2020-09-17 15:33:50 +00:00
void MergeTreeData : : Transaction : : rollbackPartsToTemporaryState ( )
{
if ( ! isEmpty ( ) )
{
2020-11-09 19:07:38 +00:00
WriteBufferFromOwnString buf ;
buf < < " Rollbacking parts state to temporary and removing from working set: " ;
2020-09-17 15:33:50 +00:00
for ( const auto & part : precommitted_parts )
2020-11-09 19:07:38 +00:00
buf < < " " < < part - > relative_path ;
buf < < " . " ;
LOG_DEBUG ( data . log , " Undoing transaction.{} " , buf . str ( ) ) ;
2020-09-17 15:33:50 +00:00
data . removePartsFromWorkingSetImmediatelyAndSetTemporaryState (
DataPartsVector ( precommitted_parts . begin ( ) , precommitted_parts . end ( ) ) ) ;
}
clear ( ) ;
}
2017-05-24 20:19:29 +00:00
void MergeTreeData : : Transaction : : rollback ( )
{
2017-11-20 19:33:12 +00:00
if ( ! isEmpty ( ) )
2017-05-24 20:19:29 +00:00
{
2020-11-09 19:07:38 +00:00
WriteBufferFromOwnString buf ;
buf < < " Removing parts: " ;
2018-02-19 15:31:43 +00:00
for ( const auto & part : precommitted_parts )
2020-11-09 19:07:38 +00:00
buf < < " " < < part - > relative_path ;
buf < < " . " ;
LOG_DEBUG ( data . log , " Undoing transaction.{} " , buf . str ( ) ) ;
2017-05-24 20:19:29 +00:00
2018-09-20 14:30:52 +00:00
data . removePartsFromWorkingSet (
2018-02-19 15:31:43 +00:00
DataPartsVector ( precommitted_parts . begin ( ) , precommitted_parts . end ( ) ) ,
/* clear_without_timeout = */ true ) ;
2017-05-24 20:19:29 +00:00
}
2017-11-20 19:33:12 +00:00
clear ( ) ;
2017-05-24 20:19:29 +00:00
}
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : Transaction : : commit ( MergeTreeData : : DataPartsLock * acquired_parts_lock )
2017-09-11 17:55:41 +00:00
{
2018-02-19 15:31:43 +00:00
DataPartsVector total_covered_parts ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( ! isEmpty ( ) )
2017-09-11 22:40:51 +00:00
{
2018-09-20 14:30:52 +00:00
auto parts_lock = acquired_parts_lock ? MergeTreeData : : DataPartsLock ( ) : data . lockParts ( ) ;
2020-04-22 06:22:14 +00:00
auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : & parts_lock ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
auto current_time = time ( nullptr ) ;
for ( const DataPartPtr & part : precommitted_parts )
2017-09-11 22:40:51 +00:00
{
2018-02-19 15:31:43 +00:00
DataPartPtr covering_part ;
2018-09-20 14:30:52 +00:00
DataPartsVector covered_parts = data . getActivePartsToReplace ( part - > info , part - > name , covering_part , * owing_parts_lock ) ;
2018-02-19 15:31:43 +00:00
if ( covering_part )
2017-09-11 22:40:51 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( data . log , " Tried to commit obsolete part {} covered by {} " , part - > name , covering_part - > getNameWithState ( ) ) ;
2018-02-19 15:31:43 +00:00
2018-03-03 17:44:53 +00:00
part - > remove_time . store ( 0 , std : : memory_order_relaxed ) ; /// The part will be removed without waiting for old_parts_lifetime seconds.
2018-09-20 14:30:52 +00:00
data . modifyPartState ( part , DataPartState : : Outdated ) ;
2017-09-11 22:40:51 +00:00
}
2018-02-19 15:31:43 +00:00
else
{
total_covered_parts . insert ( total_covered_parts . end ( ) , covered_parts . begin ( ) , covered_parts . end ( ) ) ;
for ( const DataPartPtr & covered_part : covered_parts )
{
2018-03-03 17:44:53 +00:00
covered_part - > remove_time . store ( current_time , std : : memory_order_relaxed ) ;
2018-09-20 14:30:52 +00:00
data . modifyPartState ( covered_part , DataPartState : : Outdated ) ;
data . removePartContributionToColumnSizes ( covered_part ) ;
2018-02-19 15:31:43 +00:00
}
2017-09-11 22:40:51 +00:00
2018-09-20 14:30:52 +00:00
data . modifyPartState ( part , DataPartState : : Committed ) ;
data . addPartContributionToColumnSizes ( part ) ;
2018-02-19 15:31:43 +00:00
}
2017-09-11 22:40:51 +00:00
}
}
2018-02-19 15:31:43 +00:00
clear ( ) ;
return total_covered_parts ;
2017-09-11 17:55:41 +00:00
}
2017-05-24 20:19:29 +00:00
2020-06-17 12:39:20 +00:00
bool MergeTreeData : : isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions (
const ASTPtr & node , const StorageMetadataPtr & metadata_snapshot ) const
2018-01-21 07:30:07 +00:00
{
2018-06-30 21:35:01 +00:00
const String column_name = node - > getColumnName ( ) ;
2018-01-21 07:30:07 +00:00
2020-06-17 12:39:20 +00:00
for ( const auto & name : metadata_snapshot - > getPrimaryKeyColumns ( ) )
2018-06-30 21:35:01 +00:00
if ( column_name = = name )
2018-01-21 07:30:07 +00:00
return true ;
2018-06-30 21:35:01 +00:00
for ( const auto & name : minmax_idx_columns )
if ( column_name = = name )
2018-04-19 20:34:02 +00:00
return true ;
2018-04-04 20:37:28 +00:00
2019-03-11 13:22:51 +00:00
if ( const auto * func = node - > as < ASTFunction > ( ) )
2018-03-16 06:51:37 +00:00
if ( func - > arguments - > children . size ( ) = = 1 )
2020-06-17 12:39:20 +00:00
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( func - > arguments - > children . front ( ) , metadata_snapshot ) ;
2018-03-16 06:51:37 +00:00
2018-01-21 07:30:07 +00:00
return false ;
}
2020-06-17 09:38:47 +00:00
bool MergeTreeData : : mayBenefitFromIndexForIn (
const ASTPtr & left_in_operand , const Context & , const StorageMetadataPtr & metadata_snapshot ) const
2018-01-21 07:30:07 +00:00
{
2018-04-19 20:34:02 +00:00
/// Make sure that the left side of the IN operator contain part of the key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
2019-03-11 13:22:51 +00:00
const auto * left_in_operand_tuple = left_in_operand - > as < ASTFunction > ( ) ;
2020-05-28 12:37:05 +00:00
const auto & index_wrapper_factory = MergeTreeIndexFactory : : instance ( ) ;
2018-01-21 07:30:07 +00:00
if ( left_in_operand_tuple & & left_in_operand_tuple - > name = = " tuple " )
{
for ( const auto & item : left_in_operand_tuple - > arguments - > children )
2019-02-25 08:43:19 +00:00
{
2020-06-17 12:39:20 +00:00
if ( isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( item , metadata_snapshot ) )
2018-04-03 18:37:35 +00:00
return true ;
2020-06-17 09:38:47 +00:00
for ( const auto & index : metadata_snapshot - > getSecondaryIndices ( ) )
2020-05-28 12:37:05 +00:00
if ( index_wrapper_factory . get ( index ) - > mayBenefitFromIndexForIn ( item ) )
2019-02-25 08:43:19 +00:00
return true ;
}
2018-04-03 18:37:35 +00:00
/// The tuple itself may be part of the primary key, so check that as a last resort.
2020-06-17 12:39:20 +00:00
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( left_in_operand , metadata_snapshot ) ;
2018-01-21 07:30:07 +00:00
}
else
{
2020-06-17 09:38:47 +00:00
for ( const auto & index : metadata_snapshot - > getSecondaryIndices ( ) )
2020-05-28 12:37:05 +00:00
if ( index_wrapper_factory . get ( index ) - > mayBenefitFromIndexForIn ( left_in_operand ) )
2019-02-25 08:43:19 +00:00
return true ;
2020-06-17 12:39:20 +00:00
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( left_in_operand , metadata_snapshot ) ;
2018-01-21 07:30:07 +00:00
}
}
2017-09-11 22:40:51 +00:00
2020-06-17 10:34:23 +00:00
MergeTreeData & MergeTreeData : : checkStructureAndGetMergeTreeData ( IStorage & source_table , const StorageMetadataPtr & src_snapshot , const StorageMetadataPtr & my_snapshot ) const
2018-05-21 13:49:54 +00:00
{
2020-03-19 23:48:53 +00:00
MergeTreeData * src_data = dynamic_cast < MergeTreeData * > ( & source_table ) ;
2019-05-03 02:00:57 +00:00
if ( ! src_data )
2020-03-19 23:48:53 +00:00
throw Exception ( " Table " + source_table . getStorageID ( ) . getNameForLogs ( ) +
2019-12-03 16:25:32 +00:00
" supports attachPartitionFrom only for MergeTree family of table engines. "
2020-03-19 23:48:53 +00:00
" Got " + source_table . getName ( ) , ErrorCodes : : NOT_IMPLEMENTED ) ;
2018-05-21 13:49:54 +00:00
2020-06-17 10:34:23 +00:00
if ( my_snapshot - > getColumns ( ) . getAllPhysical ( ) . sizeOfDifference ( src_snapshot - > getColumns ( ) . getAllPhysical ( ) ) )
2018-05-21 13:49:54 +00:00
throw Exception ( " Tables have different structure " , ErrorCodes : : INCOMPATIBLE_COLUMNS ) ;
auto query_to_string = [ ] ( const ASTPtr & ast )
{
return ast ? queryToString ( ast ) : " " ;
} ;
2020-06-17 11:05:11 +00:00
if ( query_to_string ( my_snapshot - > getSortingKeyAST ( ) ) ! = query_to_string ( src_snapshot - > getSortingKeyAST ( ) ) )
2018-05-21 13:49:54 +00:00
throw Exception ( " Tables have different ordering " , ErrorCodes : : BAD_ARGUMENTS ) ;
2020-06-17 10:34:23 +00:00
if ( query_to_string ( my_snapshot - > getPartitionKeyAST ( ) ) ! = query_to_string ( src_snapshot - > getPartitionKeyAST ( ) ) )
2018-05-21 13:49:54 +00:00
throw Exception ( " Tables have different partition key " , ErrorCodes : : BAD_ARGUMENTS ) ;
if ( format_version ! = src_data - > format_version )
throw Exception ( " Tables have different format_version " , ErrorCodes : : BAD_ARGUMENTS ) ;
2019-05-03 02:00:57 +00:00
return * src_data ;
2018-05-21 13:49:54 +00:00
}
2020-06-17 10:34:23 +00:00
MergeTreeData & MergeTreeData : : checkStructureAndGetMergeTreeData (
const StoragePtr & source_table , const StorageMetadataPtr & src_snapshot , const StorageMetadataPtr & my_snapshot ) const
2019-07-26 08:42:17 +00:00
{
2020-06-17 10:34:23 +00:00
return checkStructureAndGetMergeTreeData ( * source_table , src_snapshot , my_snapshot ) ;
2019-07-26 08:42:17 +00:00
}
2020-06-26 11:30:23 +00:00
MergeTreeData : : MutableDataPartPtr MergeTreeData : : cloneAndLoadDataPartOnSameDisk (
const MergeTreeData : : DataPartPtr & src_part ,
const String & tmp_part_prefix ,
const MergeTreePartInfo & dst_part_info ,
const StorageMetadataPtr & metadata_snapshot )
2018-05-21 13:49:54 +00:00
{
2020-02-26 19:22:58 +00:00
/// Check that the storage policy contains the disk where the src_part is located.
2020-02-26 14:51:01 +00:00
bool does_storage_policy_allow_same_disk = false ;
for ( const DiskPtr & disk : getStoragePolicy ( ) - > getDisks ( ) )
2020-02-26 19:22:58 +00:00
{
2020-05-09 21:24:15 +00:00
if ( disk - > getName ( ) = = src_part - > volume - > getDisk ( ) - > getName ( ) )
2020-02-26 19:22:58 +00:00
{
2020-02-26 14:51:01 +00:00
does_storage_policy_allow_same_disk = true ;
2020-02-26 19:22:58 +00:00
break ;
}
}
2020-02-26 14:51:01 +00:00
if ( ! does_storage_policy_allow_same_disk )
throw Exception (
2020-08-28 00:53:22 +00:00
" Could not clone and load part " + quoteString ( src_part - > getFullPath ( ) ) + " because disk does not belong to storage policy " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
2020-02-26 14:51:01 +00:00
2018-07-20 23:56:25 +00:00
String dst_part_name = src_part - > getNewName ( dst_part_info ) ;
2018-05-21 13:49:54 +00:00
String tmp_dst_part_name = tmp_part_prefix + dst_part_name ;
2020-05-09 21:24:15 +00:00
auto reservation = reserveSpace ( src_part - > getBytesOnDisk ( ) , src_part - > volume - > getDisk ( ) ) ;
2020-03-19 16:37:55 +00:00
auto disk = reservation - > getDisk ( ) ;
String src_part_path = src_part - > getFullRelativePath ( ) ;
String dst_part_path = relative_data_path + tmp_dst_part_name ;
2018-05-21 13:49:54 +00:00
2020-03-19 16:37:55 +00:00
if ( disk - > exists ( dst_part_path ) )
throw Exception ( " Part in " + fullPath ( disk , dst_part_path ) + " already exists " , ErrorCodes : : DIRECTORY_ALREADY_EXISTS ) ;
2018-05-21 13:49:54 +00:00
2020-05-29 16:58:08 +00:00
/// If source part is in memory, flush it to disk and clone it already in on-disk format
2020-06-05 20:47:46 +00:00
if ( auto src_part_in_memory = asInMemoryPart ( src_part ) )
2020-05-29 16:58:08 +00:00
{
2020-06-03 09:51:23 +00:00
const auto & src_relative_data_path = src_part_in_memory - > storage . relative_data_path ;
auto flushed_part_path = src_part_in_memory - > getRelativePathForPrefix ( tmp_part_prefix ) ;
2020-06-26 11:30:23 +00:00
src_part_in_memory - > flushToDisk ( src_relative_data_path , flushed_part_path , metadata_snapshot ) ;
2020-06-03 09:51:23 +00:00
src_part_path = src_relative_data_path + flushed_part_path + " / " ;
2020-05-29 16:58:08 +00:00
}
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Cloning part {} to {} " , fullPath ( disk , src_part_path ) , fullPath ( disk , dst_part_path ) ) ;
2020-03-19 16:37:55 +00:00
localBackup ( disk , src_part_path , dst_part_path ) ;
2020-08-26 15:29:46 +00:00
disk - > removeIfExists ( dst_part_path + " / " + IMergeTreeDataPart : : DELETE_ON_DESTROY_MARKER_FILE_NAME ) ;
2018-05-21 13:49:54 +00:00
2020-10-20 15:10:24 +00:00
auto single_disk_volume = std : : make_shared < SingleDiskVolume > ( disk - > getName ( ) , disk , 0 ) ;
2020-05-09 21:24:15 +00:00
auto dst_data_part = createPart ( dst_part_name , dst_part_info , single_disk_volume , tmp_dst_part_name ) ;
2019-09-11 10:57:32 +00:00
2018-05-21 13:49:54 +00:00
dst_data_part - > is_temp = true ;
dst_data_part - > loadColumnsChecksumsIndexes ( require_part_metadata , true ) ;
2020-03-19 16:37:55 +00:00
dst_data_part - > modification_time = disk - > getLastModified ( dst_part_path ) . epochTime ( ) ;
2018-05-21 13:49:54 +00:00
return dst_data_part ;
}
2019-11-27 09:39:44 +00:00
String MergeTreeData : : getFullPathOnDisk ( const DiskPtr & disk ) const
2019-04-21 20:23:02 +00:00
{
2019-10-28 20:12:14 +00:00
return disk - > getPath ( ) + relative_data_path ;
2019-04-21 18:38:44 +00:00
}
2019-08-29 16:17:47 +00:00
2020-03-19 16:37:55 +00:00
DiskPtr MergeTreeData : : getDiskForPart ( const String & part_name , const String & additional_path ) const
2019-08-29 16:17:47 +00:00
{
2020-01-09 14:50:34 +00:00
const auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
2020-03-19 16:37:55 +00:00
2019-11-27 09:39:44 +00:00
for ( const DiskPtr & disk : disks )
2020-03-19 16:37:55 +00:00
for ( auto it = disk - > iterateDirectory ( relative_data_path + additional_path ) ; it - > isValid ( ) ; it - > next ( ) )
if ( it - > name ( ) = = part_name )
2019-08-29 16:17:47 +00:00
return disk ;
2020-03-19 16:37:55 +00:00
2019-08-29 16:17:47 +00:00
return nullptr ;
}
2020-03-19 16:37:55 +00:00
std : : optional < String > MergeTreeData : : getFullRelativePathForPart ( const String & part_name , const String & additional_path ) const
2019-08-29 16:17:47 +00:00
{
2020-03-19 16:37:55 +00:00
auto disk = getDiskForPart ( part_name , additional_path ) ;
2019-08-29 16:17:47 +00:00
if ( disk )
2020-03-19 16:37:55 +00:00
return relative_data_path + additional_path ;
return { } ;
2019-08-29 16:17:47 +00:00
}
2019-05-11 18:50:29 +00:00
Strings MergeTreeData : : getDataPaths ( ) const
2019-04-21 20:23:02 +00:00
{
2019-04-21 18:38:44 +00:00
Strings res ;
2020-01-09 14:50:34 +00:00
auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
2019-04-28 14:49:41 +00:00
for ( const auto & disk : disks )
2019-04-21 18:38:44 +00:00
res . push_back ( getFullPathOnDisk ( disk ) ) ;
return res ;
}
2020-02-27 16:47:40 +00:00
MergeTreeData : : PathsWithDisks MergeTreeData : : getRelativeDataPathsWithDisks ( ) const
{
PathsWithDisks res ;
2020-03-06 08:29:38 +00:00
auto disks = getStoragePolicy ( ) - > getDisks ( ) ;
2020-02-27 16:47:40 +00:00
for ( const auto & disk : disks )
res . emplace_back ( relative_data_path , disk ) ;
return res ;
}
2020-07-28 15:10:36 +00:00
PartitionCommandsResultInfo MergeTreeData : : freezePartitionsByMatcher ( MatcherFn matcher , const StorageMetadataPtr & metadata_snapshot , const String & with_name , const Context & context )
2018-11-01 17:13:01 +00:00
{
String clickhouse_path = Poco : : Path ( context . getPath ( ) ) . makeAbsolute ( ) . toString ( ) ;
2019-09-11 10:57:32 +00:00
String default_shadow_path = clickhouse_path + " shadow/ " ;
Poco : : File ( default_shadow_path ) . createDirectories ( ) ;
auto increment = Increment ( default_shadow_path + " increment.txt " ) . get ( true ) ;
2018-11-01 17:13:01 +00:00
2020-03-19 16:37:55 +00:00
const String shadow_path = " shadow/ " ;
2018-11-01 17:13:01 +00:00
/// Acquire a snapshot of active data parts to prevent removing while doing backup.
const auto data_parts = getDataParts ( ) ;
2020-07-28 15:10:36 +00:00
String backup_name = ( ! with_name . empty ( ) ? escapeForFileName ( with_name ) : toString ( increment ) ) ;
PartitionCommandsResultInfo result ;
2018-11-01 17:13:01 +00:00
size_t parts_processed = 0 ;
for ( const auto & part : data_parts )
{
if ( ! matcher ( part ) )
continue ;
2020-05-09 21:24:15 +00:00
part - > volume - > getDisk ( ) - > createDirectories ( shadow_path ) ;
2018-11-01 17:13:01 +00:00
2020-07-28 15:10:36 +00:00
String backup_path = shadow_path + backup_name + " / " ;
2018-11-01 17:13:01 +00:00
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Freezing part {} snapshot will be placed at {} " , part - > name , backup_path ) ;
2019-09-10 13:06:26 +00:00
2020-03-19 16:37:55 +00:00
String backup_part_path = backup_path + relative_data_path + part - > relative_path ;
2020-06-05 20:47:46 +00:00
if ( auto part_in_memory = asInMemoryPart ( part ) )
2020-06-26 11:30:23 +00:00
part_in_memory - > flushToDisk ( backup_path + relative_data_path , part - > relative_path , metadata_snapshot ) ;
2020-05-29 16:58:08 +00:00
else
2020-06-03 13:27:54 +00:00
localBackup ( part - > volume - > getDisk ( ) , part - > getFullRelativePath ( ) , backup_part_path ) ;
2020-05-29 16:58:08 +00:00
2020-08-26 15:29:46 +00:00
part - > volume - > getDisk ( ) - > removeIfExists ( backup_part_path + " / " + IMergeTreeDataPart : : DELETE_ON_DESTROY_MARKER_FILE_NAME ) ;
2020-02-27 08:37:52 +00:00
2019-08-21 03:16:59 +00:00
part - > is_frozen . store ( true , std : : memory_order_relaxed ) ;
2020-07-28 15:10:36 +00:00
result . push_back ( PartitionCommandResultInfo {
. partition_id = part - > info . partition_id ,
. part_name = part - > name ,
2020-07-30 17:03:26 +00:00
. backup_path = part - > volume - > getDisk ( ) - > getPath ( ) + backup_path ,
. part_backup_path = part - > volume - > getDisk ( ) - > getPath ( ) + backup_part_path ,
2020-07-28 15:10:36 +00:00
. backup_name = backup_name ,
} ) ;
2018-11-01 17:13:01 +00:00
+ + parts_processed ;
}
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Freezed {} parts " , parts_processed ) ;
2020-07-28 15:10:36 +00:00
return result ;
2018-11-01 17:13:01 +00:00
}
2019-06-19 16:16:13 +00:00
bool MergeTreeData : : canReplacePartition ( const DataPartPtr & src_part ) const
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-08-13 08:35:49 +00:00
2019-08-13 10:29:31 +00:00
if ( ! settings - > enable_mixed_granularity_parts | | settings - > index_granularity_bytes = = 0 )
2019-06-19 16:16:13 +00:00
{
if ( ! canUseAdaptiveGranularity ( ) & & src_part - > index_granularity_info . is_adaptive )
return false ;
if ( canUseAdaptiveGranularity ( ) & & ! src_part - > index_granularity_info . is_adaptive )
return false ;
}
return true ;
}
2019-09-03 11:32:25 +00:00
void MergeTreeData : : writePartLog (
PartLogElement : : Type type ,
const ExecutionStatus & execution_status ,
UInt64 elapsed_ns ,
const String & new_part_name ,
const DataPartPtr & result_part ,
const DataPartsVector & source_parts ,
const MergeListEntry * merge_entry )
try
{
2019-12-03 16:25:32 +00:00
auto table_id = getStorageID ( ) ;
auto part_log = global_context . getPartLog ( table_id . database_name ) ;
2019-09-03 11:32:25 +00:00
if ( ! part_log )
return ;
PartLogElement part_log_elem ;
part_log_elem . event_type = type ;
part_log_elem . error = static_cast < UInt16 > ( execution_status . code ) ;
part_log_elem . exception = execution_status . message ;
part_log_elem . event_time = time ( nullptr ) ;
/// TODO: Stop stopwatch in outer code to exclude ZK timings and so on
2019-10-02 07:46:53 +00:00
part_log_elem . duration_ms = elapsed_ns / 1000000 ;
2019-09-03 11:32:25 +00:00
2019-12-03 16:25:32 +00:00
part_log_elem . database_name = table_id . database_name ;
part_log_elem . table_name = table_id . table_name ;
2019-09-03 11:32:25 +00:00
part_log_elem . partition_id = MergeTreePartInfo : : fromPartName ( new_part_name , format_version ) . partition_id ;
part_log_elem . part_name = new_part_name ;
if ( result_part )
{
part_log_elem . path_on_disk = result_part - > getFullPath ( ) ;
2020-03-23 13:32:02 +00:00
part_log_elem . bytes_compressed_on_disk = result_part - > getBytesOnDisk ( ) ;
2019-09-03 11:32:25 +00:00
part_log_elem . rows = result_part - > rows_count ;
}
part_log_elem . source_part_names . reserve ( source_parts . size ( ) ) ;
for ( const auto & source_part : source_parts )
part_log_elem . source_part_names . push_back ( source_part - > name ) ;
if ( merge_entry )
{
part_log_elem . rows_read = ( * merge_entry ) - > rows_read ;
part_log_elem . bytes_read_uncompressed = ( * merge_entry ) - > bytes_read_uncompressed ;
part_log_elem . rows = ( * merge_entry ) - > rows_written ;
part_log_elem . bytes_uncompressed = ( * merge_entry ) - > bytes_written_uncompressed ;
2020-03-19 11:31:21 +00:00
part_log_elem . peak_memory_usage = ( * merge_entry ) - > memory_tracker . getPeak ( ) ;
2019-09-03 11:32:25 +00:00
}
part_log - > add ( part_log_elem ) ;
}
catch ( . . . )
{
tryLogCurrentException ( log , __PRETTY_FUNCTION__ ) ;
}
2019-09-05 15:53:23 +00:00
MergeTreeData : : CurrentlyMovingPartsTagger : : CurrentlyMovingPartsTagger ( MergeTreeMovingParts & & moving_parts_ , MergeTreeData & data_ )
: parts_to_move ( std : : move ( moving_parts_ ) ) , data ( data_ )
2019-09-05 13:12:29 +00:00
{
2019-09-05 15:53:23 +00:00
for ( const auto & moving_part : parts_to_move )
if ( ! data . currently_moving_parts . emplace ( moving_part . part ) . second )
throw Exception ( " Cannot move part ' " + moving_part . part - > name + " '. It's already moving. " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
MergeTreeData : : CurrentlyMovingPartsTagger : : ~ CurrentlyMovingPartsTagger ( )
2019-09-05 13:12:29 +00:00
{
2019-09-05 15:53:23 +00:00
std : : lock_guard lock ( data . moving_parts_mutex ) ;
for ( const auto & moving_part : parts_to_move )
2019-09-05 13:12:29 +00:00
{
2019-09-05 15:53:23 +00:00
/// Something went completely wrong
if ( ! data . currently_moving_parts . count ( moving_part . part ) )
std : : terminate ( ) ;
data . currently_moving_parts . erase ( moving_part . part ) ;
2019-09-05 13:12:29 +00:00
}
2019-09-05 15:53:23 +00:00
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
bool MergeTreeData : : selectPartsAndMove ( )
{
if ( parts_mover . moves_blocker . isCancelled ( ) )
return false ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
auto moving_tagger = selectPartsForMove ( ) ;
2020-10-14 07:22:48 +00:00
if ( moving_tagger - > parts_to_move . empty ( ) )
2019-09-05 15:53:23 +00:00
return false ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
return moveParts ( std : : move ( moving_tagger ) ) ;
2019-09-05 13:12:29 +00:00
}
2020-10-16 10:12:31 +00:00
std : : optional < JobAndPool > MergeTreeData : : getDataMovingJob ( )
2020-10-14 07:22:48 +00:00
{
if ( parts_mover . moves_blocker . isCancelled ( ) )
return { } ;
auto moving_tagger = selectPartsForMove ( ) ;
if ( moving_tagger - > parts_to_move . empty ( ) )
return { } ;
2020-10-23 08:54:00 +00:00
return JobAndPool { [ this , moving_tagger ] ( ) mutable
2020-10-14 07:22:48 +00:00
{
moveParts ( moving_tagger ) ;
2020-10-16 10:12:31 +00:00
} , PoolType : : MOVE } ;
2020-10-14 07:22:48 +00:00
}
2019-11-14 11:10:17 +00:00
bool MergeTreeData : : areBackgroundMovesNeeded ( ) const
{
2020-01-09 14:50:34 +00:00
auto policy = getStoragePolicy ( ) ;
2020-01-17 13:30:54 +00:00
if ( policy - > getVolumes ( ) . size ( ) > 1 )
return true ;
2020-06-23 16:40:58 +00:00
return policy - > getVolumes ( ) . size ( ) = = 1 & & policy - > getVolumes ( ) [ 0 ] - > getDisks ( ) . size ( ) > 1 ;
2019-11-14 11:10:17 +00:00
}
2019-11-27 09:39:44 +00:00
bool MergeTreeData : : movePartsToSpace ( const DataPartsVector & parts , SpacePtr space )
2019-09-05 13:12:29 +00:00
{
if ( parts_mover . moves_blocker . isCancelled ( ) )
return false ;
2019-09-05 15:53:23 +00:00
auto moving_tagger = checkPartsForMove ( parts , space ) ;
2020-10-14 07:22:48 +00:00
if ( moving_tagger - > parts_to_move . empty ( ) )
2019-09-05 15:53:23 +00:00
return false ;
2019-09-05 13:12:29 +00:00
2020-10-14 07:22:48 +00:00
return moveParts ( moving_tagger ) ;
2019-09-05 15:53:23 +00:00
}
2020-10-14 07:22:48 +00:00
MergeTreeData : : CurrentlyMovingPartsTaggerPtr MergeTreeData : : selectPartsForMove ( )
2019-09-05 15:53:23 +00:00
{
MergeTreeMovingParts parts_to_move ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
auto can_move = [ this ] ( const DataPartPtr & part , String * reason ) - > bool
{
if ( partIsAssignedToBackgroundOperation ( part ) )
2019-09-05 13:12:29 +00:00
{
2019-09-09 17:01:19 +00:00
* reason = " part already assigned to background operation. " ;
2019-09-05 15:53:23 +00:00
return false ;
}
if ( currently_moving_parts . count ( part ) )
{
* reason = " part is already moving. " ;
return false ;
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
return true ;
} ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
std : : lock_guard moving_lock ( moving_parts_mutex ) ;
2019-09-05 13:12:29 +00:00
2019-09-06 15:09:20 +00:00
parts_mover . selectPartsForMove ( parts_to_move , can_move , moving_lock ) ;
2020-10-14 07:22:48 +00:00
return std : : make_shared < CurrentlyMovingPartsTagger > ( std : : move ( parts_to_move ) , * this ) ;
2019-09-05 15:53:23 +00:00
}
2019-09-05 13:12:29 +00:00
2020-10-14 07:22:48 +00:00
MergeTreeData : : CurrentlyMovingPartsTaggerPtr MergeTreeData : : checkPartsForMove ( const DataPartsVector & parts , SpacePtr space )
2019-09-05 15:53:23 +00:00
{
std : : lock_guard moving_lock ( moving_parts_mutex ) ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
MergeTreeMovingParts parts_to_move ;
for ( const auto & part : parts )
{
2020-03-23 13:32:02 +00:00
auto reservation = space - > reserve ( part - > getBytesOnDisk ( ) ) ;
2019-09-05 15:53:23 +00:00
if ( ! reservation )
2019-09-09 17:01:19 +00:00
throw Exception ( " Move is not possible. Not enough space on ' " + space - > getName ( ) + " ' " , ErrorCodes : : NOT_ENOUGH_SPACE ) ;
2019-09-05 13:12:29 +00:00
2019-12-03 13:37:40 +00:00
auto reserved_disk = reservation - > getDisk ( ) ;
2019-09-05 13:12:29 +00:00
2020-03-19 16:37:55 +00:00
if ( reserved_disk - > exists ( relative_data_path + part - > name ) )
2019-09-05 15:53:23 +00:00
throw Exception (
2020-03-19 16:37:55 +00:00
" Move is not possible: " + fullPath ( reserved_disk , relative_data_path + part - > name ) + " already exists " ,
2019-09-05 15:53:23 +00:00
ErrorCodes : : DIRECTORY_ALREADY_EXISTS ) ;
if ( currently_moving_parts . count ( part ) | | partIsAssignedToBackgroundOperation ( part ) )
throw Exception (
2019-09-09 17:01:19 +00:00
" Cannot move part ' " + part - > name + " ' because it's participating in background process " ,
2019-09-05 15:53:23 +00:00
ErrorCodes : : PART_IS_TEMPORARILY_LOCKED ) ;
parts_to_move . emplace_back ( part , std : : move ( reservation ) ) ;
2019-09-05 13:12:29 +00:00
}
2020-10-14 07:22:48 +00:00
return std : : make_shared < CurrentlyMovingPartsTagger > ( std : : move ( parts_to_move ) , * this ) ;
2019-09-05 15:53:23 +00:00
}
2019-09-05 13:12:29 +00:00
2020-10-14 07:22:48 +00:00
bool MergeTreeData : : moveParts ( const CurrentlyMovingPartsTaggerPtr & moving_tagger )
2019-09-05 15:53:23 +00:00
{
2020-10-14 07:22:48 +00:00
LOG_INFO ( log , " Got {} parts to move. " , moving_tagger - > parts_to_move . size ( ) ) ;
2019-09-05 13:12:29 +00:00
2020-10-14 07:22:48 +00:00
for ( const auto & moving_part : moving_tagger - > parts_to_move )
2019-09-05 13:12:29 +00:00
{
Stopwatch stopwatch ;
DataPartPtr cloned_part ;
auto write_part_log = [ & ] ( const ExecutionStatus & execution_status )
{
writePartLog (
PartLogElement : : Type : : MOVE_PART ,
execution_status ,
stopwatch . elapsed ( ) ,
moving_part . part - > name ,
cloned_part ,
{ moving_part . part } ,
nullptr ) ;
} ;
try
{
cloned_part = parts_mover . clonePart ( moving_part ) ;
parts_mover . swapClonedPart ( cloned_part ) ;
write_part_log ( { } ) ;
}
catch ( . . . )
{
write_part_log ( ExecutionStatus : : fromCurrentException ( ) ) ;
if ( cloned_part )
cloned_part - > remove ( ) ;
throw ;
}
}
return true ;
}
2019-09-03 11:32:25 +00:00
2020-04-22 06:22:14 +00:00
bool MergeTreeData : : canUsePolymorphicParts ( const MergeTreeSettings & settings , String * out_reason ) const
2020-01-13 16:28:29 +00:00
{
2020-02-14 21:33:02 +00:00
if ( ! canUseAdaptiveGranularity ( ) )
2020-01-13 16:28:29 +00:00
{
2020-06-15 17:41:44 +00:00
if ( out_reason & & ( settings . min_rows_for_wide_part ! = 0 | | settings . min_bytes_for_wide_part ! = 0
| | settings . min_rows_for_compact_part ! = 0 | | settings . min_bytes_for_compact_part ! = 0 ) )
2020-01-13 16:28:29 +00:00
{
2020-11-09 19:07:38 +00:00
* out_reason = fmt : : format (
" Table can't create parts with adaptive granularity, but settings "
" min_rows_for_wide_part = {} "
" , min_bytes_for_wide_part = {} "
" , min_rows_for_compact_part = {} "
" , min_bytes_for_compact_part = {} "
" . Parts with non-adaptive granularity can be stored only in Wide (default) format. " ,
settings . min_rows_for_wide_part , settings . min_bytes_for_wide_part ,
settings . min_rows_for_compact_part , settings . min_bytes_for_compact_part ) ;
2020-01-13 16:28:29 +00:00
}
2020-02-14 21:33:02 +00:00
return false ;
2020-01-13 16:28:29 +00:00
}
2020-02-14 21:33:02 +00:00
return true ;
2020-01-13 16:28:29 +00:00
}
2020-03-25 18:44:08 +00:00
MergeTreeData : : AlterConversions MergeTreeData : : getAlterConversionsForPart ( const MergeTreeDataPartPtr part ) const
2020-03-24 17:05:38 +00:00
{
2020-03-25 18:44:08 +00:00
MutationCommands commands = getFirtsAlterMutationCommandsForPart ( part ) ;
AlterConversions result { } ;
for ( const auto & command : commands )
2020-04-03 11:23:14 +00:00
/// Currently we need explicit conversions only for RENAME alter
/// all other conversions can be deduced from diff between part columns
/// and columns in storage.
2020-03-25 18:44:08 +00:00
if ( command . type = = MutationCommand : : Type : : RENAME_COLUMN )
2020-03-30 16:03:08 +00:00
result . rename_map [ command . rename_to ] = command . column_name ;
2020-03-25 18:44:08 +00:00
return result ;
2020-03-24 17:05:38 +00:00
}
2020-04-27 13:55:30 +00:00
2020-06-30 18:47:12 +00:00
MergeTreeData : : WriteAheadLogPtr MergeTreeData : : getWriteAheadLog ( )
2020-04-14 19:47:19 +00:00
{
2020-06-30 18:47:12 +00:00
std : : lock_guard lock ( write_ahead_log_mutex ) ;
if ( ! write_ahead_log )
{
auto reservation = reserveSpace ( getSettings ( ) - > write_ahead_log_max_bytes ) ;
write_ahead_log = std : : make_shared < MergeTreeWriteAheadLog > ( * this , reservation - > getDisk ( ) ) ;
}
2020-04-14 19:47:19 +00:00
return write_ahead_log ;
}
2020-04-28 10:38:57 +00:00
NamesAndTypesList MergeTreeData : : getVirtuals ( ) const
2020-04-27 13:55:30 +00:00
{
2020-04-28 10:38:57 +00:00
return NamesAndTypesList {
2020-04-27 13:55:30 +00:00
NameAndTypePair ( " _part " , std : : make_shared < DataTypeString > ( ) ) ,
NameAndTypePair ( " _part_index " , std : : make_shared < DataTypeUInt64 > ( ) ) ,
NameAndTypePair ( " _partition_id " , std : : make_shared < DataTypeString > ( ) ) ,
NameAndTypePair ( " _sample_factor " , std : : make_shared < DataTypeFloat64 > ( ) ) ,
} ;
}
2020-05-20 12:02:02 +00:00
2020-09-04 10:08:09 +00:00
size_t MergeTreeData : : getTotalMergesWithTTLInMergeList ( ) const
{
return global_context . getMergeList ( ) . getExecutingMergesWithTTLCount ( ) ;
}
2014-03-09 17:36:01 +00:00
}