2017-04-01 09:19:00 +00:00
# include <Storages/MergeTree/MergeTreeData.h>
2018-11-08 15:43:14 +00:00
# include <Interpreters/SyntaxAnalyzer.h>
2017-04-01 09:19:00 +00:00
# include <Interpreters/ExpressionAnalyzer.h>
2018-11-28 15:05:28 +00:00
# include <Storages/MergeTree/MergeTreeSequentialBlockInputStream.h>
2017-04-01 09:19:00 +00:00
# include <Storages/MergeTree/MergedBlockOutputStream.h>
2019-06-18 12:54:27 +00:00
# include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
2017-12-03 00:48:19 +00:00
# include <Storages/MergeTree/checkDataPart.h>
2018-05-21 13:49:54 +00:00
# include <Storages/StorageMergeTree.h>
# include <Storages/StorageReplicatedMergeTree.h>
2017-04-01 09:19:00 +00:00
# include <Storages/AlterCommands.h>
# include <Parsers/ASTNameTypePair.h>
2017-09-06 20:34:26 +00:00
# include <Parsers/ASTLiteral.h>
# include <Parsers/ASTFunction.h>
# include <Parsers/ASTPartition.h>
2019-08-06 13:04:29 +00:00
# include <Parsers/ASTSetQuery.h>
2017-08-19 18:11:20 +00:00
# include <Parsers/ExpressionListParsers.h>
2017-08-31 19:56:43 +00:00
# include <Parsers/parseQuery.h>
2018-05-21 13:49:54 +00:00
# include <Parsers/queryToString.h>
2017-04-01 09:19:00 +00:00
# include <DataStreams/ExpressionBlockInputStream.h>
2018-11-14 07:49:20 +00:00
# include <DataStreams/MarkInCompressedFile.h>
2019-08-02 17:16:58 +00:00
# include <Formats/FormatFactory.h>
2017-04-01 09:19:00 +00:00
# include <DataStreams/copyData.h>
# include <IO/WriteBufferFromFile.h>
2017-07-31 21:39:24 +00:00
# include <IO/WriteBufferFromString.h>
2018-12-28 18:15:26 +00:00
# include <Compression/CompressedReadBuffer.h>
2017-09-06 20:34:26 +00:00
# include <IO/ReadBufferFromMemory.h>
# include <IO/ConcatReadBuffer.h>
2017-04-01 09:19:00 +00:00
# include <IO/HexWriteBuffer.h>
2017-09-05 12:12:55 +00:00
# include <IO/Operators.h>
2017-04-01 09:19:00 +00:00
# include <DataTypes/DataTypeDate.h>
# include <DataTypes/DataTypeDateTime.h>
# include <DataTypes/DataTypeEnum.h>
2017-12-25 18:58:39 +00:00
# include <DataTypes/NestedUtils.h>
2017-04-01 09:19:00 +00:00
# include <DataTypes/DataTypeArray.h>
# include <DataTypes/DataTypeNullable.h>
# include <Functions/FunctionFactory.h>
# include <Functions/IFunction.h>
2017-05-10 08:08:32 +00:00
# include <Common/Increment.h>
2017-05-10 06:49:19 +00:00
# include <Common/SimpleIncrement.h>
2017-04-01 09:19:00 +00:00
# include <Common/escapeForFileName.h>
2019-11-28 10:13:53 +00:00
# include <Common/quoteString.h>
2018-01-15 19:07:47 +00:00
# include <Common/StringUtils/StringUtils.h>
2017-04-01 09:19:00 +00:00
# include <Common/Stopwatch.h>
2017-07-13 20:58:19 +00:00
# include <Common/typeid_cast.h>
2017-09-05 12:12:55 +00:00
# include <Common/localBackup.h>
2018-05-21 13:49:54 +00:00
# include <Interpreters/PartLog.h>
2017-09-05 12:12:55 +00:00
# include <Poco/DirectoryIterator.h>
2014-10-16 01:21:03 +00:00
2017-09-11 22:40:51 +00:00
# include <boost/range/adaptor/filtered.hpp>
2014-06-10 14:24:33 +00:00
# include <algorithm>
2014-10-16 01:21:03 +00:00
# include <iomanip>
2019-02-05 14:50:25 +00:00
# include <set>
2015-04-16 07:22:29 +00:00
# include <thread>
2017-02-09 17:29:36 +00:00
# include <typeinfo>
# include <typeindex>
2017-11-20 04:15:43 +00:00
# include <optional>
2014-03-09 17:36:01 +00:00
2016-10-24 02:02:37 +00:00
namespace ProfileEvents
{
2017-04-01 07:20:54 +00:00
extern const Event RejectedInserts ;
extern const Event DelayedInserts ;
extern const Event DelayedInsertsMilliseconds ;
2016-10-24 02:02:37 +00:00
}
2016-10-27 22:50:02 +00:00
namespace CurrentMetrics
{
2017-04-01 07:20:54 +00:00
extern const Metric DelayedInserts ;
2016-10-27 22:50:02 +00:00
}
2014-03-09 17:36:01 +00:00
2019-10-31 10:40:11 +00:00
namespace
{
constexpr UInt64 RESERVATION_MIN_ESTIMATION_SIZE = 1u * 1024u * 1024u ; /// 1MB
}
2014-03-09 17:36:01 +00:00
namespace DB
{
2016-11-20 12:43:20 +00:00
namespace ErrorCodes
{
2019-02-12 17:54:52 +00:00
extern const int BAD_ARGUMENTS ;
2017-04-01 07:20:54 +00:00
extern const int MEMORY_LIMIT_EXCEEDED ;
2017-08-19 18:11:20 +00:00
extern const int SYNTAX_ERROR ;
2017-09-06 20:34:26 +00:00
extern const int INVALID_PARTITION_VALUE ;
2017-09-08 18:11:09 +00:00
extern const int METADATA_MISMATCH ;
2017-11-20 19:33:12 +00:00
extern const int PART_IS_TEMPORARILY_LOCKED ;
2018-03-09 23:23:15 +00:00
extern const int TOO_MANY_PARTS ;
2018-05-21 13:49:54 +00:00
extern const int INCOMPATIBLE_COLUMNS ;
2018-09-04 14:34:50 +00:00
extern const int CANNOT_UPDATE_COLUMN ;
2018-11-21 20:56:37 +00:00
extern const int CANNOT_ALLOCATE_MEMORY ;
extern const int CANNOT_MUNMAP ;
extern const int CANNOT_MREMAP ;
2019-04-15 09:30:45 +00:00
extern const int BAD_TTL_EXPRESSION ;
2019-07-26 20:04:45 +00:00
extern const int INCORRECT_FILE_NAME ;
extern const int BAD_DATA_PART_NAME ;
2019-07-24 12:56:39 +00:00
extern const int UNKNOWN_SETTING ;
2019-08-30 20:12:26 +00:00
extern const int READONLY_SETTING ;
2019-09-05 13:12:29 +00:00
extern const int ABORTED ;
2016-11-20 12:43:20 +00:00
}
2019-11-18 08:42:46 +00:00
namespace
{
const char * DELETE_ON_DESTROY_MARKER_PATH = " delete-on-destroy.txt " ;
}
2014-03-09 17:36:01 +00:00
MergeTreeData : : MergeTreeData (
2019-08-29 16:17:47 +00:00
const String & database_ ,
const String & table_ ,
2019-04-04 17:19:11 +00:00
const ColumnsDescription & columns_ ,
2019-02-05 14:50:25 +00:00
const IndicesDescription & indices_ ,
2019-05-18 08:05:52 +00:00
const ConstraintsDescription & constraints_ ,
2017-04-01 07:20:54 +00:00
Context & context_ ,
2017-09-08 18:11:09 +00:00
const String & date_column_name ,
2018-11-06 18:25:36 +00:00
const ASTPtr & partition_by_ast_ ,
const ASTPtr & order_by_ast_ ,
const ASTPtr & primary_key_ast_ ,
2018-11-09 19:01:39 +00:00
const ASTPtr & sample_by_ast_ ,
2019-04-15 09:30:45 +00:00
const ASTPtr & ttl_table_ast_ ,
2017-04-01 07:20:54 +00:00
const MergingParams & merging_params_ ,
2019-08-26 14:24:29 +00:00
std : : unique_ptr < MergeTreeSettings > storage_settings_ ,
2017-04-01 07:20:54 +00:00
bool require_part_metadata_ ,
bool attach ,
2017-12-01 21:40:58 +00:00
BrokenPartCallback broken_part_callback_ )
2019-08-29 16:17:47 +00:00
: global_context ( context_ )
, merging_params ( merging_params_ )
, partition_by_ast ( partition_by_ast_ )
, sample_by_ast ( sample_by_ast_ )
, require_part_metadata ( require_part_metadata_ )
, database_name ( database_ )
, table_name ( table_ )
, broken_part_callback ( broken_part_callback_ )
, log_name ( database_name + " . " + table_name )
, log ( & Logger : : get ( log_name ) )
, storage_settings ( std : : move ( storage_settings_ ) )
2019-09-20 20:35:50 +00:00
, storage_policy ( context_ . getStoragePolicy ( getSettings ( ) - > storage_policy ) )
2019-08-29 16:17:47 +00:00
, data_parts_by_info ( data_parts_indexes . get < TagByInfo > ( ) )
, data_parts_by_state_and_info ( data_parts_indexes . get < TagByStateAndInfo > ( ) )
2019-09-05 13:12:29 +00:00
, parts_mover ( this )
2014-03-09 17:36:01 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-07-13 10:42:52 +00:00
setProperties ( order_by_ast_ , primary_key_ast_ , columns_ , indices_ , constraints_ ) ;
2018-11-13 12:51:55 +00:00
2018-03-06 14:49:27 +00:00
/// NOTE: using the same columns list as is read when performing actual merges.
2018-03-13 15:00:28 +00:00
merging_params . check ( getColumns ( ) . getAllPhysical ( ) ) ;
2017-04-01 07:20:54 +00:00
2018-11-09 19:01:39 +00:00
if ( sample_by_ast )
2018-11-08 16:39:43 +00:00
{
2018-11-09 19:01:39 +00:00
sampling_expr_column_name = sample_by_ast - > getColumnName ( ) ;
2017-09-01 20:33:17 +00:00
2018-11-09 19:01:39 +00:00
if ( ! primary_key_sample . has ( sampling_expr_column_name )
2019-08-13 08:35:49 +00:00
& & ! attach & & ! settings - > compatibility_allow_sampling_expression_not_in_primary_key ) /// This is for backward compatibility.
2018-11-08 16:39:43 +00:00
throw Exception ( " Sampling expression must be present in the primary key " , ErrorCodes : : BAD_ARGUMENTS ) ;
2017-09-18 19:24:27 +00:00
2019-01-09 16:16:59 +00:00
auto syntax = SyntaxAnalyzer ( global_context ) . analyze ( sample_by_ast , getColumns ( ) . getAllPhysical ( ) ) ;
2019-08-09 14:50:04 +00:00
columns_required_for_sampling = syntax - > requiredSourceColumns ( ) ;
2018-11-08 16:39:43 +00:00
}
2017-09-18 19:24:27 +00:00
2017-09-08 18:11:09 +00:00
MergeTreeDataFormatVersion min_format_version ( 0 ) ;
if ( ! date_column_name . empty ( ) )
2017-09-01 20:33:17 +00:00
{
2017-09-08 18:11:09 +00:00
try
{
2018-11-06 18:25:36 +00:00
partition_by_ast = makeASTFunction ( " toYYYYMM " , std : : make_shared < ASTIdentifier > ( date_column_name ) ) ;
2017-09-08 18:11:09 +00:00
initPartitionKey ( ) ;
2017-09-08 13:17:38 +00:00
2017-09-08 18:11:09 +00:00
if ( minmax_idx_date_column_pos = = - 1 )
throw Exception ( " Could not find Date column " , ErrorCodes : : BAD_TYPE_OF_FIELD ) ;
}
catch ( Exception & e )
{
/// Better error message.
2019-06-15 12:06:22 +00:00
e . addMessage ( " (while initializing MergeTree partition key from date column " + backQuote ( date_column_name ) + " ) " ) ;
2017-09-08 18:11:09 +00:00
throw ;
}
2017-09-08 13:17:38 +00:00
}
2017-09-08 18:11:09 +00:00
else
2017-09-08 13:17:38 +00:00
{
2018-11-13 12:51:55 +00:00
is_custom_partitioned = true ;
2017-09-08 18:11:09 +00:00
initPartitionKey ( ) ;
min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING ;
2017-09-01 20:33:17 +00:00
}
2017-04-01 07:20:54 +00:00
2019-04-15 09:30:45 +00:00
setTTLExpressions ( columns_ . getColumnTTLs ( ) , ttl_table_ast_ ) ;
2019-08-01 10:29:14 +00:00
// format_file always contained on any data path
2019-04-21 18:38:44 +00:00
String version_file_path ;
2019-04-03 12:52:09 +00:00
2017-04-01 07:20:54 +00:00
/// Creating directories, if not exist.
2019-05-11 18:50:29 +00:00
auto paths = getDataPaths ( ) ;
2019-04-28 14:49:41 +00:00
for ( const String & path : paths )
2019-04-05 17:37:27 +00:00
{
2019-04-01 18:41:19 +00:00
Poco : : File ( path ) . createDirectories ( ) ;
Poco : : File ( path + " detached " ) . createDirectory ( ) ;
2019-04-21 20:23:02 +00:00
if ( Poco : : File { path + " format_version.txt " } . exists ( ) )
{
2019-04-21 18:38:44 +00:00
if ( ! version_file_path . empty ( ) )
{
LOG_ERROR ( log , " Duplication of version file " < < version_file_path < < " and " < < path < < " format_file.txt " ) ;
throw Exception ( " Multiple format_version.txt file " , ErrorCodes : : CORRUPTED_DATA ) ;
}
version_file_path = path + " format_version.txt " ;
}
2019-04-01 18:41:19 +00:00
}
2019-04-21 18:38:44 +00:00
/// If not choose any
2019-04-21 20:23:02 +00:00
if ( version_file_path . empty ( ) )
2019-05-24 19:03:07 +00:00
version_file_path = getFullPathOnDisk ( storage_policy - > getAnyDisk ( ) ) + " format_version.txt " ;
2018-04-16 10:04:59 +00:00
2019-05-21 17:57:17 +00:00
bool version_file_exists = Poco : : File ( version_file_path ) . exists ( ) ;
2017-08-25 20:41:45 +00:00
2018-07-04 15:23:25 +00:00
// When data path or file not exists, ignore the format_version check
2019-04-21 18:38:44 +00:00
if ( ! attach | | ! version_file_exists )
2017-08-25 20:41:45 +00:00
{
2017-09-08 18:11:09 +00:00
format_version = min_format_version ;
2017-08-25 20:41:45 +00:00
WriteBufferFromFile buf ( version_file_path ) ;
writeIntText ( format_version . toUnderType ( ) , buf ) ;
}
2019-05-22 19:20:10 +00:00
else
2017-08-25 20:41:45 +00:00
{
ReadBufferFromFile buf ( version_file_path ) ;
2018-12-26 01:12:13 +00:00
UInt32 read_format_version ;
readIntText ( read_format_version , buf ) ;
format_version = read_format_version ;
2017-08-25 20:41:45 +00:00
if ( ! buf . eof ( ) )
throw Exception ( " Bad version file: " + version_file_path , ErrorCodes : : CORRUPTED_DATA ) ;
}
2017-09-08 18:11:09 +00:00
if ( format_version < min_format_version )
2018-11-12 16:14:37 +00:00
{
if ( min_format_version = = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING . toUnderType ( ) )
throw Exception (
" MergeTree data format version on disk doesn't support custom partitioning " ,
ErrorCodes : : METADATA_MISMATCH ) ;
}
2019-03-25 13:55:24 +00:00
}
2018-02-21 17:05:21 +00:00
static void checkKeyExpression ( const ExpressionActions & expr , const Block & sample_block , const String & key_name )
2017-12-09 10:14:45 +00:00
{
2018-02-21 17:05:21 +00:00
for ( const ExpressionAction & action : expr . getActions ( ) )
{
if ( action . type = = ExpressionAction : : ARRAY_JOIN )
2018-11-22 21:19:58 +00:00
throw Exception ( key_name + " key cannot contain array joins " , ErrorCodes : : ILLEGAL_COLUMN ) ;
2018-02-21 17:05:21 +00:00
if ( action . type = = ExpressionAction : : APPLY_FUNCTION )
{
2018-09-21 14:15:21 +00:00
IFunctionBase & func = * action . function_base ;
2018-02-21 17:05:21 +00:00
if ( ! func . isDeterministic ( ) )
throw Exception ( key_name + " key cannot contain non-deterministic functions, "
" but contains function " + func . getName ( ) ,
ErrorCodes : : BAD_ARGUMENTS ) ;
}
}
2017-12-09 10:14:45 +00:00
2018-02-21 17:05:21 +00:00
for ( const ColumnWithTypeAndName & element : sample_block )
{
const ColumnPtr & column = element . column ;
2019-06-27 19:28:52 +00:00
if ( column & & ( isColumnConst ( * column ) | | column - > isDummy ( ) ) )
2018-02-21 17:05:21 +00:00
throw Exception { key_name + " key cannot contain constants " , ErrorCodes : : ILLEGAL_COLUMN } ;
if ( element . type - > isNullable ( ) )
throw Exception { key_name + " key cannot contain nullable columns " , ErrorCodes : : ILLEGAL_COLUMN } ;
}
2017-12-09 10:14:45 +00:00
}
2019-07-13 10:42:52 +00:00
void MergeTreeData : : setProperties (
2019-03-23 01:27:08 +00:00
const ASTPtr & new_order_by_ast , const ASTPtr & new_primary_key_ast ,
2019-06-02 14:41:12 +00:00
const ColumnsDescription & new_columns , const IndicesDescription & indices_description ,
const ConstraintsDescription & constraints_description , bool only_check )
2016-05-16 23:04:03 +00:00
{
2018-11-06 18:25:36 +00:00
if ( ! new_order_by_ast )
throw Exception ( " ORDER BY cannot be empty " , ErrorCodes : : BAD_ARGUMENTS ) ;
2018-10-11 17:06:55 +00:00
2018-11-06 18:25:36 +00:00
ASTPtr new_sorting_key_expr_list = extractKeyExpressionList ( new_order_by_ast ) ;
ASTPtr new_primary_key_expr_list = new_primary_key_ast
? extractKeyExpressionList ( new_primary_key_ast ) : new_sorting_key_expr_list - > clone ( ) ;
2018-10-23 13:34:04 +00:00
2018-11-06 18:25:36 +00:00
if ( merging_params . mode = = MergeTreeData : : MergingParams : : VersionedCollapsing )
new_sorting_key_expr_list - > children . push_back ( std : : make_shared < ASTIdentifier > ( merging_params . version_column ) ) ;
2018-02-09 10:53:50 +00:00
2018-11-06 18:25:36 +00:00
size_t primary_key_size = new_primary_key_expr_list - > children . size ( ) ;
size_t sorting_key_size = new_sorting_key_expr_list - > children . size ( ) ;
2018-10-12 19:00:43 +00:00
if ( primary_key_size > sorting_key_size )
2018-10-11 17:06:55 +00:00
throw Exception ( " Primary key must be a prefix of the sorting key, but its length: "
2018-10-12 19:00:43 +00:00
+ toString ( primary_key_size ) + " is greater than the sorting key length: " + toString ( sorting_key_size ) ,
2018-10-11 17:06:55 +00:00
ErrorCodes : : BAD_ARGUMENTS ) ;
2017-04-01 07:20:54 +00:00
2018-10-11 17:06:55 +00:00
Names new_primary_key_columns ;
2018-10-12 19:00:43 +00:00
Names new_sorting_key_columns ;
2017-04-01 07:20:54 +00:00
2018-10-12 19:00:43 +00:00
for ( size_t i = 0 ; i < sorting_key_size ; + + i )
2017-04-01 07:20:54 +00:00
{
2018-11-06 18:25:36 +00:00
String sorting_key_column = new_sorting_key_expr_list - > children [ i ] - > getColumnName ( ) ;
2018-10-12 19:00:43 +00:00
new_sorting_key_columns . push_back ( sorting_key_column ) ;
2018-10-11 17:06:55 +00:00
if ( i < primary_key_size )
{
2018-11-06 18:25:36 +00:00
String pk_column = new_primary_key_expr_list - > children [ i ] - > getColumnName ( ) ;
2018-10-12 19:00:43 +00:00
if ( pk_column ! = sorting_key_column )
2018-10-11 17:06:55 +00:00
throw Exception ( " Primary key must be a prefix of the sorting key, but in position "
2018-10-12 19:00:43 +00:00
+ toString ( i ) + " its column is " + pk_column + " , not " + sorting_key_column ,
2018-10-11 17:06:55 +00:00
ErrorCodes : : BAD_ARGUMENTS ) ;
new_primary_key_columns . push_back ( pk_column ) ;
}
2018-02-09 10:53:50 +00:00
}
2018-11-13 12:51:55 +00:00
auto all_columns = new_columns . getAllPhysical ( ) ;
2017-04-01 07:20:54 +00:00
2018-11-13 12:51:55 +00:00
if ( order_by_ast & & only_check )
2018-02-09 10:53:50 +00:00
{
2018-11-13 12:51:55 +00:00
/// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key
/// expression have just been added (so that the sorting order is guaranteed to be valid with the new key).
ASTPtr added_key_column_expr_list = std : : make_shared < ASTExpressionList > ( ) ;
for ( size_t new_i = 0 , old_i = 0 ; new_i < sorting_key_size ; + + new_i )
{
if ( old_i < sorting_key_columns . size ( ) )
{
if ( new_sorting_key_columns [ new_i ] ! = sorting_key_columns [ old_i ] )
added_key_column_expr_list - > children . push_back ( new_sorting_key_expr_list - > children [ new_i ] ) ;
else
+ + old_i ;
}
else
added_key_column_expr_list - > children . push_back ( new_sorting_key_expr_list - > children [ new_i ] ) ;
}
if ( ! added_key_column_expr_list - > children . empty ( ) )
{
2019-01-09 16:16:59 +00:00
auto syntax = SyntaxAnalyzer ( global_context ) . analyze ( added_key_column_expr_list , all_columns ) ;
2019-08-09 14:50:04 +00:00
Names used_columns = syntax - > requiredSourceColumns ( ) ;
2018-11-13 12:51:55 +00:00
NamesAndTypesList deleted_columns ;
NamesAndTypesList added_columns ;
getColumns ( ) . getAllPhysical ( ) . getDifference ( all_columns , deleted_columns , added_columns ) ;
2018-11-08 15:43:14 +00:00
2018-11-13 12:51:55 +00:00
for ( const String & col : used_columns )
{
if ( ! added_columns . contains ( col ) | | deleted_columns . contains ( col ) )
throw Exception ( " Existing column " + col + " is used in the expression that was "
" added to the sorting key. You can add expressions that use only the newly added columns " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
2019-03-14 15:20:51 +00:00
if ( new_columns . getDefaults ( ) . count ( col ) )
2018-11-13 12:51:55 +00:00
throw Exception ( " Newly added column " + col + " has a default expression, so adding "
" expressions that use it to the sorting key is forbidden " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
}
}
2018-02-09 10:53:50 +00:00
}
2017-04-01 07:20:54 +00:00
2019-01-09 16:16:59 +00:00
auto new_sorting_key_syntax = SyntaxAnalyzer ( global_context ) . analyze ( new_sorting_key_expr_list , all_columns ) ;
2019-01-04 12:10:00 +00:00
auto new_sorting_key_expr = ExpressionAnalyzer ( new_sorting_key_expr_list , new_sorting_key_syntax , global_context )
2018-10-11 17:06:55 +00:00
. getActions ( false ) ;
2018-10-12 19:00:43 +00:00
auto new_sorting_key_sample =
2019-01-04 12:10:00 +00:00
ExpressionAnalyzer ( new_sorting_key_expr_list , new_sorting_key_syntax , global_context )
2018-10-11 17:06:55 +00:00
. getActions ( true ) - > getSampleBlock ( ) ;
2018-02-09 10:53:50 +00:00
2018-10-12 19:00:43 +00:00
checkKeyExpression ( * new_sorting_key_expr , new_sorting_key_sample , " Sorting " ) ;
2017-04-01 07:20:54 +00:00
2019-01-09 16:16:59 +00:00
auto new_primary_key_syntax = SyntaxAnalyzer ( global_context ) . analyze ( new_primary_key_expr_list , all_columns ) ;
2019-01-04 12:10:00 +00:00
auto new_primary_key_expr = ExpressionAnalyzer ( new_primary_key_expr_list , new_primary_key_syntax , global_context )
2018-10-11 17:06:55 +00:00
. getActions ( false ) ;
2018-02-09 10:53:50 +00:00
2018-10-11 17:06:55 +00:00
Block new_primary_key_sample ;
DataTypes new_primary_key_data_types ;
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < primary_key_size ; + + i )
2018-10-11 17:06:55 +00:00
{
2018-10-12 19:00:43 +00:00
const auto & elem = new_sorting_key_sample . getByPosition ( i ) ;
2018-10-11 17:06:55 +00:00
new_primary_key_sample . insert ( elem ) ;
new_primary_key_data_types . push_back ( elem . type ) ;
2018-02-09 10:53:50 +00:00
}
2019-02-05 14:50:25 +00:00
ASTPtr skip_indices_with_primary_key_expr_list = new_primary_key_expr_list - > clone ( ) ;
ASTPtr skip_indices_with_sorting_key_expr_list = new_sorting_key_expr_list - > clone ( ) ;
MergeTreeIndices new_indices ;
if ( ! indices_description . indices . empty ( ) )
{
std : : set < String > indices_names ;
for ( const auto & index_ast : indices_description . indices )
{
const auto & index_decl = std : : dynamic_pointer_cast < ASTIndexDeclaration > ( index_ast ) ;
new_indices . push_back (
2019-08-29 10:56:43 +00:00
MergeTreeIndexFactory : : instance ( ) . get (
all_columns ,
std : : dynamic_pointer_cast < ASTIndexDeclaration > ( index_decl - > clone ( ) ) ,
global_context ) ) ;
2019-02-05 14:50:25 +00:00
if ( indices_names . find ( new_indices . back ( ) - > name ) ! = indices_names . end ( ) )
throw Exception (
2019-06-15 12:06:22 +00:00
" Index with name " + backQuote ( new_indices . back ( ) - > name ) + " already exsists " ,
2019-02-05 14:50:25 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
ASTPtr expr_list = MergeTreeData : : extractKeyExpressionList ( index_decl - > expr - > clone ( ) ) ;
for ( const auto & expr : expr_list - > children )
{
skip_indices_with_primary_key_expr_list - > children . push_back ( expr - > clone ( ) ) ;
skip_indices_with_sorting_key_expr_list - > children . push_back ( expr - > clone ( ) ) ;
}
indices_names . insert ( new_indices . back ( ) - > name ) ;
}
}
auto syntax_primary = SyntaxAnalyzer ( global_context , { } ) . analyze (
skip_indices_with_primary_key_expr_list , all_columns ) ;
auto new_indices_with_primary_key_expr = ExpressionAnalyzer (
skip_indices_with_primary_key_expr_list , syntax_primary , global_context ) . getActions ( false ) ;
auto syntax_sorting = SyntaxAnalyzer ( global_context , { } ) . analyze (
skip_indices_with_sorting_key_expr_list , all_columns ) ;
auto new_indices_with_sorting_key_expr = ExpressionAnalyzer (
skip_indices_with_sorting_key_expr_list , syntax_sorting , global_context ) . getActions ( false ) ;
2018-11-13 12:51:55 +00:00
if ( ! only_check )
2018-02-09 10:53:50 +00:00
{
2019-05-02 22:51:39 +00:00
setColumns ( std : : move ( new_columns ) ) ;
2018-02-09 10:53:50 +00:00
2018-11-13 12:51:55 +00:00
order_by_ast = new_order_by_ast ;
sorting_key_columns = std : : move ( new_sorting_key_columns ) ;
2018-11-27 17:07:10 +00:00
sorting_key_expr_ast = std : : move ( new_sorting_key_expr_list ) ;
2018-11-13 12:51:55 +00:00
sorting_key_expr = std : : move ( new_sorting_key_expr ) ;
2018-02-09 10:53:50 +00:00
2018-11-13 12:51:55 +00:00
primary_key_ast = new_primary_key_ast ;
primary_key_columns = std : : move ( new_primary_key_columns ) ;
2018-11-27 17:07:10 +00:00
primary_key_expr_ast = std : : move ( new_primary_key_expr_list ) ;
2018-11-13 12:51:55 +00:00
primary_key_expr = std : : move ( new_primary_key_expr ) ;
primary_key_sample = std : : move ( new_primary_key_sample ) ;
primary_key_data_types = std : : move ( new_primary_key_data_types ) ;
2019-02-05 14:50:25 +00:00
2019-05-02 16:07:23 +00:00
setIndices ( indices_description ) ;
2019-02-05 14:50:25 +00:00
skip_indices = std : : move ( new_indices ) ;
2019-06-02 14:41:12 +00:00
setConstraints ( constraints_description ) ;
2019-02-05 14:50:25 +00:00
primary_key_and_skip_indices_expr = new_indices_with_primary_key_expr ;
sorting_key_and_skip_indices_expr = new_indices_with_sorting_key_expr ;
2018-11-13 12:51:55 +00:00
}
2014-03-13 12:48:07 +00:00
}
2014-03-09 17:36:01 +00:00
2018-11-13 12:51:55 +00:00
2018-11-06 18:25:36 +00:00
ASTPtr MergeTreeData : : extractKeyExpressionList ( const ASTPtr & node )
{
if ( ! node )
return std : : make_shared < ASTExpressionList > ( ) ;
2019-03-11 13:22:51 +00:00
const auto * expr_func = node - > as < ASTFunction > ( ) ;
2018-11-06 18:25:36 +00:00
if ( expr_func & & expr_func - > name = = " tuple " )
{
2018-12-05 14:02:34 +00:00
/// Primary key is specified in tuple, extract its arguments.
return expr_func - > arguments - > clone ( ) ;
2018-11-06 18:25:36 +00:00
}
else
{
/// Primary key consists of one column.
auto res = std : : make_shared < ASTExpressionList > ( ) ;
res - > children . push_back ( node ) ;
return res ;
2018-02-09 10:53:50 +00:00
}
2014-03-13 12:48:07 +00:00
}
2014-03-09 17:36:01 +00:00
2016-04-15 17:42:51 +00:00
2017-09-08 18:11:09 +00:00
void MergeTreeData : : initPartitionKey ( )
2017-08-19 18:11:20 +00:00
{
2018-11-06 18:25:36 +00:00
ASTPtr partition_key_expr_list = extractKeyExpressionList ( partition_by_ast ) ;
if ( partition_key_expr_list - > children . empty ( ) )
2017-09-01 20:33:17 +00:00
return ;
2018-11-08 15:43:14 +00:00
{
2019-01-09 16:16:59 +00:00
auto syntax_result = SyntaxAnalyzer ( global_context ) . analyze ( partition_key_expr_list , getColumns ( ) . getAllPhysical ( ) ) ;
2019-01-04 12:10:00 +00:00
partition_key_expr = ExpressionAnalyzer ( partition_key_expr_list , syntax_result , global_context ) . getActions ( false ) ;
2018-11-08 15:43:14 +00:00
}
2018-11-06 18:25:36 +00:00
for ( const ASTPtr & ast : partition_key_expr_list - > children )
2017-08-30 19:03:19 +00:00
{
String col_name = ast - > getColumnName ( ) ;
2018-11-06 18:25:36 +00:00
partition_key_sample . insert ( partition_key_expr - > getSampleBlock ( ) . getByName ( col_name ) ) ;
2017-08-30 19:03:19 +00:00
}
2017-08-21 15:35:29 +00:00
2018-11-06 18:25:36 +00:00
checkKeyExpression ( * partition_key_expr , partition_key_sample , " Partition " ) ;
2018-02-21 17:05:21 +00:00
2017-09-01 20:33:17 +00:00
/// Add all columns used in the partition key to the min-max index.
2018-11-06 18:25:36 +00:00
const NamesAndTypesList & minmax_idx_columns_with_types = partition_key_expr - > getRequiredColumnsWithTypes ( ) ;
2019-01-04 12:10:00 +00:00
minmax_idx_expr = std : : make_shared < ExpressionActions > ( minmax_idx_columns_with_types , global_context ) ;
2017-12-25 21:57:29 +00:00
for ( const NameAndTypePair & column : minmax_idx_columns_with_types )
2017-08-21 15:35:29 +00:00
{
minmax_idx_columns . emplace_back ( column . name ) ;
minmax_idx_column_types . emplace_back ( column . type ) ;
}
2017-09-01 20:33:17 +00:00
/// Try to find the date column in columns used by the partition key (a common case).
2017-08-21 15:35:29 +00:00
bool encountered_date_column = false ;
for ( size_t i = 0 ; i < minmax_idx_column_types . size ( ) ; + + i )
{
if ( typeid_cast < const DataTypeDate * > ( minmax_idx_column_types [ i ] . get ( ) ) )
{
if ( ! encountered_date_column )
{
minmax_idx_date_column_pos = i ;
encountered_date_column = true ;
}
else
{
/// There is more than one Date column in partition key and we don't know which one to choose.
minmax_idx_date_column_pos = - 1 ;
}
}
}
2018-12-10 10:20:19 +00:00
if ( ! encountered_date_column )
{
for ( size_t i = 0 ; i < minmax_idx_column_types . size ( ) ; + + i )
{
if ( typeid_cast < const DataTypeDateTime * > ( minmax_idx_column_types [ i ] . get ( ) ) )
{
if ( ! encountered_date_column )
{
minmax_idx_time_column_pos = i ;
encountered_date_column = true ;
}
else
{
/// There is more than one DateTime column in partition key and we don't know which one to choose.
minmax_idx_time_column_pos = - 1 ;
}
}
}
}
2017-08-19 18:11:20 +00:00
}
2019-04-15 09:30:45 +00:00
namespace
{
void checkTTLExpression ( const ExpressionActionsPtr & ttl_expression , const String & result_column_name )
{
for ( const auto & action : ttl_expression - > getActions ( ) )
{
if ( action . type = = ExpressionAction : : APPLY_FUNCTION )
{
IFunctionBase & func = * action . function_base ;
if ( ! func . isDeterministic ( ) )
throw Exception ( " TTL expression cannot contain non-deterministic functions, "
" but contains function " + func . getName ( ) , ErrorCodes : : BAD_ARGUMENTS ) ;
}
}
const auto & result_column = ttl_expression - > getSampleBlock ( ) . getByName ( result_column_name ) ;
if ( ! typeid_cast < const DataTypeDateTime * > ( result_column . type . get ( ) )
& & ! typeid_cast < const DataTypeDate * > ( result_column . type . get ( ) ) )
{
throw Exception ( " TTL expression result column should have DateTime or Date type, but has "
+ result_column . type - > getName ( ) , ErrorCodes : : BAD_TTL_EXPRESSION ) ;
}
}
}
void MergeTreeData : : setTTLExpressions ( const ColumnsDescription : : ColumnTTLs & new_column_ttls ,
const ASTPtr & new_ttl_table_ast , bool only_check )
{
auto create_ttl_entry = [ this ] ( ASTPtr ttl_ast ) - > TTLEntry
{
auto syntax_result = SyntaxAnalyzer ( global_context ) . analyze ( ttl_ast , getColumns ( ) . getAllPhysical ( ) ) ;
auto expr = ExpressionAnalyzer ( ttl_ast , syntax_result , global_context ) . getActions ( false ) ;
String result_column = ttl_ast - > getColumnName ( ) ;
checkTTLExpression ( expr , result_column ) ;
2019-11-28 10:30:51 +00:00
return { expr , result_column , PartDestinationType : : DELETE , { } , { } } ;
2019-04-15 09:30:45 +00:00
} ;
if ( ! new_column_ttls . empty ( ) )
{
NameSet columns_ttl_forbidden ;
if ( partition_key_expr )
for ( const auto & col : partition_key_expr - > getRequiredColumns ( ) )
columns_ttl_forbidden . insert ( col ) ;
if ( sorting_key_expr )
for ( const auto & col : sorting_key_expr - > getRequiredColumns ( ) )
columns_ttl_forbidden . insert ( col ) ;
for ( const auto & [ name , ast ] : new_column_ttls )
{
if ( columns_ttl_forbidden . count ( name ) )
2019-11-26 08:02:48 +00:00
throw Exception ( " Trying to set TTL for key column " + name , ErrorCodes : : ILLEGAL_COLUMN ) ;
2019-04-15 09:30:45 +00:00
else
{
auto new_ttl_entry = create_ttl_entry ( ast ) ;
if ( ! only_check )
2019-11-26 08:02:48 +00:00
column_ttl_entries_by_name . emplace ( name , new_ttl_entry ) ;
2019-04-15 09:30:45 +00:00
}
}
}
if ( new_ttl_table_ast )
{
2019-10-22 07:55:36 +00:00
bool seen_delete_ttl = false ;
2019-10-16 07:32:37 +00:00
for ( auto ttl_element_ptr : new_ttl_table_ast - > children )
{
ASTTTLElement & ttl_element = static_cast < ASTTTLElement & > ( * ttl_element_ptr ) ;
2019-11-20 08:06:51 +00:00
if ( ttl_element . destination_type = = PartDestinationType : : DELETE )
2019-10-16 07:32:37 +00:00
{
2019-10-22 07:55:36 +00:00
if ( seen_delete_ttl )
2019-10-16 07:32:37 +00:00
{
2019-10-31 10:40:11 +00:00
throw Exception ( " More than one DELETE TTL expression is not allowed " , ErrorCodes : : BAD_TTL_EXPRESSION ) ;
2019-10-16 07:32:37 +00:00
}
2019-10-22 07:55:36 +00:00
auto new_ttl_table_entry = create_ttl_entry ( ttl_element . children [ 0 ] ) ;
if ( ! only_check )
{
ttl_table_ast = ttl_element . children [ 0 ] ;
ttl_table_entry = new_ttl_table_entry ;
}
seen_delete_ttl = true ;
2019-10-16 07:32:37 +00:00
}
else
{
2019-10-17 16:01:28 +00:00
auto new_ttl_entry = create_ttl_entry ( ttl_element . children [ 0 ] ) ;
if ( ! only_check )
{
2019-11-28 10:30:51 +00:00
new_ttl_entry . entry_ast = ttl_element_ptr ;
2019-11-11 10:01:04 +00:00
new_ttl_entry . destination_type = ttl_element . destination_type ;
new_ttl_entry . destination_name = ttl_element . destination_name ;
2019-11-28 06:44:26 +00:00
move_ttl_entries . emplace_back ( std : : move ( new_ttl_entry ) ) ;
2019-10-17 16:01:28 +00:00
}
2019-10-16 07:32:37 +00:00
}
}
2019-04-15 09:30:45 +00:00
}
}
2017-08-19 18:11:20 +00:00
2017-12-25 21:57:29 +00:00
void MergeTreeData : : MergingParams : : check ( const NamesAndTypesList & columns ) const
2016-04-15 17:42:51 +00:00
{
2018-02-02 09:46:54 +00:00
if ( ! sign_column . empty ( ) & & mode ! = MergingParams : : Collapsing & & mode ! = MergingParams : : VersionedCollapsing )
throw Exception ( " Sign column for MergeTree cannot be specified in modes except Collapsing or VersionedCollapsing. " ,
2018-01-29 17:42:19 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-02 09:46:54 +00:00
if ( ! version_column . empty ( ) & & mode ! = MergingParams : : Replacing & & mode ! = MergingParams : : VersionedCollapsing )
throw Exception ( " Version column for MergeTree cannot be specified in modes except Replacing or VersionedCollapsing. " ,
2018-01-29 17:42:19 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
if ( ! columns_to_sum . empty ( ) & & mode ! = MergingParams : : Summing )
throw Exception ( " List of columns to sum for MergeTree cannot be specified in all modes except Summing. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-01 07:20:54 +00:00
/// Check that if the sign column is needed, it exists and is of type Int8.
2018-01-29 17:42:19 +00:00
auto check_sign_column = [ this , & columns ] ( bool is_optional , const std : : string & storage )
2017-04-01 07:20:54 +00:00
{
if ( sign_column . empty ( ) )
2018-01-29 17:42:19 +00:00
{
if ( is_optional )
return ;
throw Exception ( " Logical error: Sign column for storage " + storage + " is empty " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2017-04-01 07:20:54 +00:00
2018-01-01 12:00:05 +00:00
bool miss_column = true ;
2017-04-01 07:20:54 +00:00
for ( const auto & column : columns )
{
if ( column . name = = sign_column )
{
if ( ! typeid_cast < const DataTypeInt8 * > ( column . type . get ( ) ) )
2018-01-29 17:42:19 +00:00
throw Exception ( " Sign column ( " + sign_column + " ) for storage " + storage + " must have type Int8. "
" Provided column of type " + column . type - > getName ( ) + " . " , ErrorCodes : : BAD_TYPE_OF_FIELD ) ;
2018-01-01 12:00:05 +00:00
miss_column = false ;
2017-04-01 07:20:54 +00:00
break ;
}
}
2018-01-09 18:00:19 +00:00
if ( miss_column )
2018-11-22 21:19:58 +00:00
throw Exception ( " Sign column " + sign_column + " does not exist in table declaration. " , ErrorCodes : : NO_SUCH_COLUMN_IN_TABLE ) ;
2018-01-29 17:42:19 +00:00
} ;
2017-04-01 07:20:54 +00:00
2018-01-29 17:42:19 +00:00
/// that if the version_column column is needed, it exists and is of unsigned integer type.
auto check_version_column = [ this , & columns ] ( bool is_optional , const std : : string & storage )
2017-04-01 07:20:54 +00:00
{
2018-01-29 17:42:19 +00:00
if ( version_column . empty ( ) )
{
if ( is_optional )
return ;
2017-04-01 07:20:54 +00:00
2018-01-29 17:42:19 +00:00
throw Exception ( " Logical error: Version column for storage " + storage + " is empty " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2017-04-01 07:20:54 +00:00
2018-01-01 12:00:05 +00:00
bool miss_column = true ;
2017-04-01 07:20:54 +00:00
for ( const auto & column : columns )
{
if ( column . name = = version_column )
{
2018-03-06 14:49:27 +00:00
if ( ! column . type - > canBeUsedAsVersion ( ) )
throw Exception ( " The column " + version_column +
" cannot be used as a version column for storage " + storage +
" because it is of type " + column . type - > getName ( ) +
2018-03-06 19:01:45 +00:00
" (must be of an integer type or of type Date or DateTime) " , ErrorCodes : : BAD_TYPE_OF_FIELD ) ;
2018-01-01 12:00:05 +00:00
miss_column = false ;
2017-04-01 07:20:54 +00:00
break ;
}
}
2018-01-09 18:00:19 +00:00
if ( miss_column )
2018-11-22 21:19:58 +00:00
throw Exception ( " Version column " + version_column + " does not exist in table declaration. " , ErrorCodes : : NO_SUCH_COLUMN_IN_TABLE ) ;
2018-01-29 17:42:19 +00:00
} ;
if ( mode = = MergingParams : : Collapsing )
check_sign_column ( false , " CollapsingMergeTree " ) ;
if ( mode = = MergingParams : : Summing )
{
/// If columns_to_sum are set, then check that such columns exist.
for ( const auto & column_to_sum : columns_to_sum )
2018-02-02 12:14:30 +00:00
{
auto check_column_to_sum_exists = [ & column_to_sum ] ( const NameAndTypePair & name_and_type )
{
return column_to_sum = = Nested : : extractTableName ( name_and_type . name ) ;
} ;
if ( columns . end ( ) = = std : : find_if ( columns . begin ( ) , columns . end ( ) , check_column_to_sum_exists ) )
throw Exception (
2018-11-22 21:19:58 +00:00
" Column " + column_to_sum + " listed in columns to sum does not exist in table declaration. " , ErrorCodes : : NO_SUCH_COLUMN_IN_TABLE ) ;
2018-02-02 12:14:30 +00:00
}
2018-01-29 17:42:19 +00:00
}
if ( mode = = MergingParams : : Replacing )
check_version_column ( true , " ReplacingMergeTree " ) ;
2018-02-02 09:46:54 +00:00
if ( mode = = MergingParams : : VersionedCollapsing )
2018-01-29 17:42:19 +00:00
{
2018-02-02 09:46:54 +00:00
check_sign_column ( false , " VersionedCollapsingMergeTree " ) ;
check_version_column ( false , " VersionedCollapsingMergeTree " ) ;
2017-04-01 07:20:54 +00:00
}
/// TODO Checks for Graphite mode.
2016-04-24 09:44:47 +00:00
}
String MergeTreeData : : MergingParams : : getModeName ( ) const
{
2017-04-01 07:20:54 +00:00
switch ( mode )
{
2017-09-11 22:40:51 +00:00
case Ordinary : return " " ;
case Collapsing : return " Collapsing " ;
case Summing : return " Summing " ;
case Aggregating : return " Aggregating " ;
2017-04-01 07:20:54 +00:00
case Replacing : return " Replacing " ;
2017-09-11 22:40:51 +00:00
case Graphite : return " Graphite " ;
2018-11-26 00:56:50 +00:00
case VersionedCollapsing : return " VersionedCollapsing " ;
2017-04-01 07:20:54 +00:00
}
2019-01-05 03:33:22 +00:00
__builtin_unreachable ( ) ;
2016-04-15 17:42:51 +00:00
}
2019-05-04 03:45:58 +00:00
Int64 MergeTreeData : : getMaxBlockNumber ( ) const
2014-03-13 12:48:07 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2016-01-30 00:57:35 +00:00
2018-05-13 00:24:23 +00:00
Int64 max_block_num = 0 ;
2018-02-19 15:31:43 +00:00
for ( const DataPartPtr & part : data_parts_by_info )
2018-07-16 03:14:46 +00:00
max_block_num = std : : max ( { max_block_num , part - > info . max_block , part - > info . mutation } ) ;
2014-09-29 05:03:03 +00:00
2018-05-13 00:24:23 +00:00
return max_block_num ;
2014-03-09 17:36:01 +00:00
}
2014-08-13 08:07:52 +00:00
void MergeTreeData : : loadDataParts ( bool skip_sanity_checks )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
LOG_DEBUG ( log , " Loading data parts " ) ;
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-07-23 13:34:17 +00:00
std : : vector < std : : pair < String , DiskSpace : : DiskPtr > > part_names_with_disks ;
2017-04-01 07:20:54 +00:00
Strings part_file_names ;
Poco : : DirectoryIterator end ;
2019-04-28 14:49:41 +00:00
2019-05-24 19:03:07 +00:00
auto disks = storage_policy - > getDisks ( ) ;
2019-04-28 14:49:41 +00:00
2019-07-16 13:06:23 +00:00
/// Reversed order to load part from low priority disks firstly.
/// Used for keep part on low priority disk if duplication found
for ( auto disk_it = disks . rbegin ( ) ; disk_it ! = disks . rend ( ) ; + + disk_it )
2017-04-01 07:20:54 +00:00
{
2019-07-16 13:06:23 +00:00
auto disk_ptr = * disk_it ;
2019-04-21 18:38:44 +00:00
for ( Poco : : DirectoryIterator it ( getFullPathOnDisk ( disk_ptr ) ) ; it ! = end ; + + it )
2019-04-01 18:41:19 +00:00
{
/// Skip temporary directories.
if ( startsWith ( it . name ( ) , " tmp " ) )
continue ;
2017-04-01 07:20:54 +00:00
2019-04-21 18:38:44 +00:00
part_names_with_disks . emplace_back ( it . name ( ) , disk_ptr ) ;
2019-04-01 18:41:19 +00:00
}
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
auto part_lock = lockParts ( ) ;
data_parts_indexes . clear ( ) ;
2019-08-14 09:46:30 +00:00
if ( part_names_with_disks . empty ( ) )
2019-08-11 20:02:51 +00:00
{
LOG_DEBUG ( log , " There is no data parts " ) ;
return ;
}
/// Parallel loading of data parts.
2019-08-29 16:17:47 +00:00
size_t num_threads = std : : min ( size_t ( settings - > max_part_loading_threads ) , part_names_with_disks . size ( ) ) ;
2019-08-11 20:02:51 +00:00
std : : mutex mutex ;
2017-04-01 07:20:54 +00:00
DataPartsVector broken_parts_to_remove ;
DataPartsVector broken_parts_to_detach ;
size_t suspicious_broken_parts = 0 ;
2019-08-11 20:02:51 +00:00
std : : atomic < bool > has_adaptive_parts = false ;
std : : atomic < bool > has_non_adaptive_parts = false ;
ThreadPool pool ( num_threads ) ;
2017-11-20 19:33:12 +00:00
2019-08-14 09:46:30 +00:00
for ( size_t i = 0 ; i < part_names_with_disks . size ( ) ; + + i )
2017-04-01 07:20:54 +00:00
{
2019-10-17 14:41:27 +00:00
pool . scheduleOrThrowOnError ( [ & , i ]
2017-04-01 07:20:54 +00:00
{
2019-08-14 11:59:45 +00:00
const auto & part_name = part_names_with_disks [ i ] . first ;
const auto part_disk_ptr = part_names_with_disks [ i ] . second ;
2019-08-11 20:02:51 +00:00
MergeTreePartInfo part_info ;
2019-08-14 09:46:30 +00:00
if ( ! MergeTreePartInfo : : tryParsePartName ( part_name , & part_info , format_version ) )
2019-08-11 20:02:51 +00:00
return ;
2017-04-01 07:20:54 +00:00
2019-08-14 09:46:30 +00:00
MutableDataPartPtr part = std : : make_shared < DataPart > ( * this , part_disk_ptr , part_name , part_info ) ;
part - > relative_path = part_name ;
2019-08-11 20:02:51 +00:00
bool broken = false ;
2019-12-09 13:44:11 +00:00
Poco : : Path part_path ( getFullPathOnDisk ( part_disk_ptr ) , part_name ) ;
Poco : : Path marker_path ( part_path , DELETE_ON_DESTROY_MARKER_PATH ) ;
if ( Poco : : File ( marker_path ) . exists ( ) )
{
2019-12-09 16:20:56 +00:00
LOG_WARNING ( log , " Detaching stale part " < < getFullPathOnDisk ( part_disk_ptr ) < < part_name < < " , which should have been deleted after a move. That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part. " ) ;
2019-12-09 13:44:11 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_detach . push_back ( part ) ;
+ + suspicious_broken_parts ;
2019-12-09 16:20:56 +00:00
return ;
2019-12-09 13:44:11 +00:00
}
2019-08-11 20:02:51 +00:00
try
2017-04-01 07:20:54 +00:00
{
2019-08-11 20:02:51 +00:00
part - > loadColumnsChecksumsIndexes ( require_part_metadata , true ) ;
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
catch ( const Exception & e )
2017-04-01 07:20:54 +00:00
{
2019-08-11 20:02:51 +00:00
/// Don't count the part as broken if there is not enough memory to load it.
/// In fact, there can be many similar situations.
/// But it is OK, because there is a safety guard against deleting too many parts.
if ( e . code ( ) = = ErrorCodes : : MEMORY_LIMIT_EXCEEDED
| | e . code ( ) = = ErrorCodes : : CANNOT_ALLOCATE_MEMORY
| | e . code ( ) = = ErrorCodes : : CANNOT_MUNMAP
| | e . code ( ) = = ErrorCodes : : CANNOT_MREMAP )
throw ;
broken = true ;
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
catch ( . . . )
{
broken = true ;
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2017-04-01 07:20:54 +00:00
2019-08-11 20:02:51 +00:00
/// Ignore and possibly delete broken parts that can appear as a result of hard server restart.
if ( broken )
{
if ( part - > info . level = = 0 )
2017-04-01 07:20:54 +00:00
{
2019-08-11 20:02:51 +00:00
/// It is impossible to restore level 0 parts.
2019-08-14 09:46:30 +00:00
LOG_ERROR ( log , " Considering to remove broken part " < < getFullPathOnDisk ( part_disk_ptr ) < < part_name < < " because it's impossible to repair. " ) ;
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_remove . push_back ( part ) ;
}
else
{
/// Count the number of parts covered by the broken part. If it is at least two, assume that
/// the broken part was created as a result of merging them and we won't lose data if we
/// delete it.
size_t contained_parts = 0 ;
2017-04-18 20:38:07 +00:00
2019-08-14 09:46:30 +00:00
LOG_ERROR ( log , " Part " < < getFullPathOnDisk ( part_disk_ptr ) < < part_name < < " is broken. Looking for parts to replace it. " ) ;
2017-04-18 20:38:07 +00:00
2019-08-14 09:46:30 +00:00
for ( const auto & [ contained_name , contained_disk_ptr ] : part_names_with_disks )
2017-04-01 07:20:54 +00:00
{
2019-08-14 09:46:30 +00:00
if ( contained_name = = part_name )
2019-08-11 20:02:51 +00:00
continue ;
MergeTreePartInfo contained_part_info ;
if ( ! MergeTreePartInfo : : tryParsePartName ( contained_name , & contained_part_info , format_version ) )
continue ;
if ( part - > info . contains ( contained_part_info ) )
{
2019-08-14 09:46:30 +00:00
LOG_ERROR ( log , " Found part " < < getFullPathOnDisk ( contained_disk_ptr ) < < contained_name ) ;
2019-08-11 20:02:51 +00:00
+ + contained_parts ;
}
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
if ( contained_parts > = 2 )
{
2019-08-14 09:46:30 +00:00
LOG_ERROR ( log , " Considering to remove broken part " < < getFullPathOnDisk ( part_disk_ptr ) < < part_name < < " because it covers at least 2 other parts " ) ;
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_remove . push_back ( part ) ;
}
else
{
2019-08-14 09:46:30 +00:00
LOG_ERROR ( log , " Detaching broken part " < < getFullPathOnDisk ( part_disk_ptr ) < < part_name
2019-08-11 20:02:51 +00:00
< < " because it covers less than 2 parts. You need to resolve this manually " ) ;
std : : lock_guard loading_lock ( mutex ) ;
broken_parts_to_detach . push_back ( part ) ;
+ + suspicious_broken_parts ;
}
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
return ;
}
if ( ! part - > index_granularity_info . is_adaptive )
has_non_adaptive_parts . store ( true , std : : memory_order_relaxed ) ;
else
has_adaptive_parts . store ( true , std : : memory_order_relaxed ) ;
2017-04-01 07:20:54 +00:00
2019-08-14 09:46:30 +00:00
part - > modification_time = Poco : : File ( getFullPathOnDisk ( part_disk_ptr ) + part_name ) . getLastModified ( ) . epochTime ( ) ;
2019-08-11 20:02:51 +00:00
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
part - > state = DataPartState : : Committed ;
2017-04-01 07:20:54 +00:00
2019-08-11 20:02:51 +00:00
std : : lock_guard loading_lock ( mutex ) ;
if ( ! data_parts_indexes . insert ( part ) . second )
throw Exception ( " Part " + part - > name + " already exists " , ErrorCodes : : DUPLICATE_DATA_PART ) ;
} ) ;
2017-04-01 07:20:54 +00:00
}
2019-08-11 20:02:51 +00:00
pool . wait ( ) ;
2019-08-13 10:29:31 +00:00
if ( has_non_adaptive_parts & & has_adaptive_parts & & ! settings - > enable_mixed_granularity_parts )
2019-06-19 14:46:06 +00:00
throw Exception ( " Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled " , ErrorCodes : : LOGICAL_ERROR ) ;
has_non_adaptive_index_granularity_parts = has_non_adaptive_parts ;
2019-08-13 10:29:31 +00:00
if ( suspicious_broken_parts > settings - > max_suspicious_broken_parts & & ! skip_sanity_checks )
2017-04-01 07:20:54 +00:00
throw Exception ( " Suspiciously many ( " + toString ( suspicious_broken_parts ) + " ) broken parts to remove. " ,
ErrorCodes : : TOO_MANY_UNEXPECTED_DATA_PARTS ) ;
2017-05-16 15:40:32 +00:00
for ( auto & part : broken_parts_to_remove )
2017-04-01 07:20:54 +00:00
part - > remove ( ) ;
2017-05-16 15:40:32 +00:00
for ( auto & part : broken_parts_to_detach )
2018-05-21 13:49:54 +00:00
part - > renameToDetached ( " " ) ;
2017-04-01 07:20:54 +00:00
/// Delete from the set of current parts those parts that are covered by another part (those parts that
2017-06-21 19:07:08 +00:00
/// were merged), but that for some reason are still not deleted from the filesystem.
2017-04-01 07:20:54 +00:00
/// Deletion of files will be performed later in the clearOldParts() method.
2017-11-20 19:33:12 +00:00
if ( data_parts_indexes . size ( ) > = 2 )
2017-04-01 07:20:54 +00:00
{
2018-02-19 15:31:43 +00:00
/// Now all parts are committed, so data_parts_by_state_and_info == committed_parts_range
auto prev_jt = data_parts_by_state_and_info . begin ( ) ;
2017-09-21 21:51:17 +00:00
auto curr_jt = std : : next ( prev_jt ) ;
2018-02-19 15:31:43 +00:00
auto deactivate_part = [ & ] ( DataPartIteratorByStateAndInfo it )
2017-11-20 19:33:12 +00:00
{
2018-03-03 17:44:53 +00:00
( * it ) - > remove_time . store ( ( * it ) - > modification_time , std : : memory_order_relaxed ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( it , DataPartState : : Outdated ) ;
} ;
( * prev_jt ) - > assertState ( { DataPartState : : Committed } ) ;
2018-02-19 15:31:43 +00:00
while ( curr_jt ! = data_parts_by_state_and_info . end ( ) & & ( * curr_jt ) - > state = = DataPartState : : Committed )
2017-04-01 07:20:54 +00:00
{
2017-08-14 18:16:11 +00:00
/// Don't consider data parts belonging to different partitions.
if ( ( * curr_jt ) - > info . partition_id ! = ( * prev_jt ) - > info . partition_id )
2017-04-01 07:20:54 +00:00
{
+ + prev_jt ;
+ + curr_jt ;
continue ;
}
if ( ( * curr_jt ) - > contains ( * * prev_jt ) )
{
2017-11-20 19:33:12 +00:00
deactivate_part ( prev_jt ) ;
2017-04-01 07:20:54 +00:00
prev_jt = curr_jt ;
+ + curr_jt ;
}
else if ( ( * prev_jt ) - > contains ( * * curr_jt ) )
{
2017-11-20 19:33:12 +00:00
auto next = std : : next ( curr_jt ) ;
deactivate_part ( curr_jt ) ;
curr_jt = next ;
2017-04-01 07:20:54 +00:00
}
else
{
+ + prev_jt ;
+ + curr_jt ;
}
}
}
2017-05-14 23:14:21 +00:00
calculateColumnSizesImpl ( ) ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
LOG_DEBUG ( log , " Loaded data parts ( " < < data_parts_indexes . size ( ) < < " items) " ) ;
2014-03-09 17:36:01 +00:00
}
2017-02-07 17:52:41 +00:00
/// Is the part directory old.
/// True if its modification time and the modification time of all files inside it is less then threshold.
/// (Only files on the first level of nesting are considered).
2016-06-06 19:16:34 +00:00
static bool isOldPartDirectory ( Poco : : File & directory , time_t threshold )
{
2017-04-01 07:20:54 +00:00
if ( directory . getLastModified ( ) . epochTime ( ) > = threshold )
return false ;
2016-06-06 19:16:34 +00:00
2017-04-01 07:20:54 +00:00
Poco : : DirectoryIterator end ;
for ( Poco : : DirectoryIterator it ( directory ) ; it ! = end ; + + it )
if ( it - > getLastModified ( ) . epochTime ( ) > = threshold )
return false ;
2016-06-06 19:16:34 +00:00
2017-04-01 07:20:54 +00:00
return true ;
2016-06-06 19:16:34 +00:00
}
2017-05-31 15:01:25 +00:00
void MergeTreeData : : clearOldTemporaryDirectories ( ssize_t custom_directories_lifetime_seconds )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
/// If the method is already called from another thread, then we don't need to do anything.
2019-01-02 06:44:36 +00:00
std : : unique_lock lock ( clear_old_temporary_directories_mutex , std : : defer_lock ) ;
2017-04-01 07:20:54 +00:00
if ( ! lock . try_lock ( ) )
return ;
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2017-08-04 14:00:26 +00:00
time_t current_time = time ( nullptr ) ;
2017-05-31 15:01:25 +00:00
ssize_t deadline = ( custom_directories_lifetime_seconds > = 0 )
? current_time - custom_directories_lifetime_seconds
2019-08-13 10:29:31 +00:00
: current_time - settings - > temporary_directories_lifetime . totalSeconds ( ) ;
2017-04-01 07:20:54 +00:00
2019-05-11 18:50:29 +00:00
const auto full_paths = getDataPaths ( ) ;
2019-04-03 12:52:09 +00:00
2017-04-01 07:20:54 +00:00
/// Delete temporary directories older than a day.
Poco : : DirectoryIterator end ;
2019-04-06 15:21:29 +00:00
for ( auto & & full_data_path : full_paths )
2017-04-01 07:20:54 +00:00
{
2019-04-06 15:21:29 +00:00
for ( Poco : : DirectoryIterator it { full_data_path } ; it ! = end ; + + it )
2017-04-01 07:20:54 +00:00
{
2019-04-01 18:41:19 +00:00
if ( startsWith ( it . name ( ) , " tmp_ " ) )
2017-04-01 07:20:54 +00:00
{
2019-04-06 15:21:29 +00:00
Poco : : File tmp_dir ( full_data_path + it . name ( ) ) ;
2019-04-01 18:41:19 +00:00
2019-04-05 17:37:27 +00:00
try
2017-04-01 07:20:54 +00:00
{
2019-04-05 17:37:27 +00:00
if ( tmp_dir . isDirectory ( ) & & isOldPartDirectory ( tmp_dir , deadline ) )
{
2019-04-06 15:21:29 +00:00
LOG_WARNING ( log , " Removing temporary directory " < < full_data_path < < it . name ( ) ) ;
2019-04-21 18:38:44 +00:00
Poco : : File ( full_data_path + it . name ( ) ) . remove ( true ) ;
2019-04-01 18:41:19 +00:00
}
}
2019-04-05 17:37:27 +00:00
catch ( const Poco : : FileNotFoundException & )
{
2019-04-01 18:41:19 +00:00
/// If the file is already deleted, do nothing.
2017-04-01 07:20:54 +00:00
}
}
}
}
2016-02-14 11:02:47 +00:00
}
MergeTreeData : : DataPartsVector MergeTreeData : : grabOldParts ( )
{
2017-04-01 07:20:54 +00:00
DataPartsVector res ;
/// If the method is already called from another thread, then we don't need to do anything.
2019-01-02 06:44:36 +00:00
std : : unique_lock lock ( grab_old_parts_mutex , std : : defer_lock ) ;
2017-04-01 07:20:54 +00:00
if ( ! lock . try_lock ( ) )
return res ;
2017-08-04 14:00:26 +00:00
time_t now = time ( nullptr ) ;
2018-02-19 15:31:43 +00:00
std : : vector < DataPartIteratorByStateAndInfo > parts_to_delete ;
2017-04-01 07:20:54 +00:00
{
2019-03-28 19:58:41 +00:00
auto parts_lock = lockParts ( ) ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
auto outdated_parts_range = getDataPartsStateRange ( DataPartState : : Outdated ) ;
for ( auto it = outdated_parts_range . begin ( ) ; it ! = outdated_parts_range . end ( ) ; + + it )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
const DataPartPtr & part = * it ;
2018-03-03 17:44:53 +00:00
auto part_remove_time = part - > remove_time . load ( std : : memory_order_relaxed ) ;
2018-02-19 15:31:43 +00:00
if ( part . unique ( ) & & /// Grab only parts that are not used by anyone (SELECTs for example).
2018-03-03 17:44:53 +00:00
part_remove_time < now & &
2019-08-26 14:24:29 +00:00
now - part_remove_time > getSettings ( ) - > old_parts_lifetime . totalSeconds ( ) )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
parts_to_delete . emplace_back ( it ) ;
2017-04-01 07:20:54 +00:00
}
}
2017-11-20 19:33:12 +00:00
res . reserve ( parts_to_delete . size ( ) ) ;
for ( const auto & it_to_delete : parts_to_delete )
{
res . emplace_back ( * it_to_delete ) ;
modifyPartState ( it_to_delete , DataPartState : : Deleting ) ;
}
2017-04-01 07:20:54 +00:00
}
if ( ! res . empty ( ) )
LOG_TRACE ( log , " Found " < < res . size ( ) < < " old parts to remove. " ) ;
return res ;
2014-03-09 17:36:01 +00:00
}
2016-02-14 11:02:47 +00:00
2017-09-11 22:40:51 +00:00
void MergeTreeData : : rollbackDeletingParts ( const MergeTreeData : : DataPartsVector & parts )
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-09-11 22:40:51 +00:00
for ( auto & part : parts )
{
/// We should modify it under data_parts_mutex
2017-09-21 21:51:17 +00:00
part - > assertState ( { DataPartState : : Deleting } ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( part , DataPartState : : Outdated ) ;
2017-09-11 22:40:51 +00:00
}
}
void MergeTreeData : : removePartsFinally ( const MergeTreeData : : DataPartsVector & parts )
2014-07-25 11:15:11 +00:00
{
2018-01-23 22:56:46 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-09-11 22:40:51 +00:00
2018-01-23 22:56:46 +00:00
/// TODO: use data_parts iterators instead of pointers
for ( auto & part : parts )
{
2018-02-19 15:31:43 +00:00
auto it = data_parts_by_info . find ( part - > info ) ;
if ( it = = data_parts_by_info . end ( ) )
throw Exception ( " Deleting data part " + part - > name + " doesn't exist " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-01-23 22:56:46 +00:00
( * it ) - > assertState ( { DataPartState : : Deleting } ) ;
data_parts_indexes . erase ( it ) ;
}
}
/// Data parts is still alive (since DataPartsVector holds shared_ptrs) and contain useful metainformation for logging
/// NOTE: There is no need to log parts deletion somewhere else, all deleting parts pass through this function and pass away
2019-01-04 12:10:00 +00:00
if ( auto part_log = global_context . getPartLog ( database_name ) )
2017-09-11 22:40:51 +00:00
{
2018-01-23 22:56:46 +00:00
PartLogElement part_log_elem ;
part_log_elem . event_type = PartLogElement : : REMOVE_PART ;
part_log_elem . event_time = time ( nullptr ) ;
part_log_elem . duration_ms = 0 ;
2017-09-11 22:40:51 +00:00
2018-01-23 22:56:46 +00:00
part_log_elem . database_name = database_name ;
part_log_elem . table_name = table_name ;
2017-11-20 19:33:12 +00:00
2018-01-23 22:56:46 +00:00
for ( auto & part : parts )
{
2019-01-31 17:30:56 +00:00
part_log_elem . partition_id = part - > info . partition_id ;
2018-01-23 22:56:46 +00:00
part_log_elem . part_name = part - > name ;
2018-03-26 14:18:04 +00:00
part_log_elem . bytes_compressed_on_disk = part - > bytes_on_disk ;
2018-01-23 22:56:46 +00:00
part_log_elem . rows = part - > rows_count ;
part_log - > add ( part_log_elem ) ;
}
2017-09-11 22:40:51 +00:00
}
2014-07-25 11:15:11 +00:00
}
2017-11-20 19:33:12 +00:00
void MergeTreeData : : clearOldPartsFromFilesystem ( )
2014-07-25 11:15:11 +00:00
{
2019-08-11 19:14:42 +00:00
DataPartsVector parts_to_remove = grabOldParts ( ) ;
clearPartsFromFilesystem ( parts_to_remove ) ;
removePartsFinally ( parts_to_remove ) ;
}
2014-07-25 11:15:11 +00:00
2019-08-11 19:14:42 +00:00
void MergeTreeData : : clearPartsFromFilesystem ( const DataPartsVector & parts_to_remove )
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_to_remove . size ( ) > 1 & & settings - > max_part_removal_threads > 1 & & parts_to_remove . size ( ) > settings - > concurrent_part_removal_threshold )
2017-04-01 07:20:54 +00:00
{
2019-08-11 19:14:42 +00:00
/// Parallel parts removal.
2017-11-20 19:33:12 +00:00
2019-08-13 10:29:31 +00:00
size_t num_threads = std : : min ( size_t ( settings - > max_part_removal_threads ) , parts_to_remove . size ( ) ) ;
2019-08-11 19:14:42 +00:00
ThreadPool pool ( num_threads ) ;
/// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool.
2019-08-11 19:30:39 +00:00
for ( const DataPartPtr & part : parts_to_remove )
2019-08-11 19:14:42 +00:00
{
2019-10-17 14:41:27 +00:00
pool . scheduleOrThrowOnError ( [ & ]
2019-08-11 19:14:42 +00:00
{
2019-08-11 19:30:39 +00:00
LOG_DEBUG ( log , " Removing part from filesystem " < < part - > name ) ;
part - > remove ( ) ;
2019-08-11 19:14:42 +00:00
} ) ;
}
pool . wait ( ) ;
}
else
{
for ( const DataPartPtr & part : parts_to_remove )
{
LOG_DEBUG ( log , " Removing part from filesystem " < < part - > name ) ;
part - > remove ( ) ;
}
}
2014-07-25 11:15:11 +00:00
}
2019-08-29 16:17:47 +00:00
void MergeTreeData : : rename (
const String & /*new_path_to_db*/ , const String & new_database_name ,
const String & new_table_name , TableStructureWriteLockHolder & )
2014-03-09 17:36:01 +00:00
{
2019-04-21 18:38:44 +00:00
auto old_file_db_name = escapeForFileName ( database_name ) ;
auto new_file_db_name = escapeForFileName ( new_database_name ) ;
2019-04-05 19:45:59 +00:00
auto old_file_table_name = escapeForFileName ( table_name ) ;
auto new_file_table_name = escapeForFileName ( new_table_name ) ;
2017-04-01 07:20:54 +00:00
2019-05-24 19:03:07 +00:00
auto disks = storage_policy - > getDisks ( ) ;
2019-04-28 14:49:41 +00:00
for ( const auto & disk : disks )
2019-04-05 19:58:59 +00:00
{
2019-09-11 10:57:32 +00:00
auto new_full_path = disk - > getClickHouseDataPath ( ) + new_file_db_name + ' / ' + new_file_table_name + ' / ' ;
2019-09-10 12:23:18 +00:00
2019-04-05 19:45:59 +00:00
if ( Poco : : File { new_full_path } . exists ( ) )
throw Exception { " Target path already exists: " + new_full_path , ErrorCodes : : DIRECTORY_ALREADY_EXISTS } ;
}
2019-04-28 14:49:41 +00:00
for ( const auto & disk : disks )
2019-04-06 15:21:29 +00:00
{
2019-09-11 10:57:32 +00:00
auto full_path = disk - > getClickHouseDataPath ( ) + old_file_db_name + ' / ' + old_file_table_name + ' / ' ;
auto new_db_path = disk - > getClickHouseDataPath ( ) + new_file_db_name + ' / ' ;
2019-09-10 12:23:18 +00:00
Poco : : File db_file { new_db_path } ;
if ( ! db_file . exists ( ) )
db_file . createDirectory ( ) ;
auto new_full_path = new_db_path + new_file_table_name + ' / ' ;
2019-04-21 18:38:44 +00:00
Poco : : File { full_path } . renameTo ( new_full_path ) ;
2019-04-06 15:21:29 +00:00
}
2017-12-03 02:15:35 +00:00
2019-01-04 12:10:00 +00:00
global_context . dropCaches ( ) ;
2019-04-21 18:38:44 +00:00
database_name = new_database_name ;
2019-04-05 19:45:59 +00:00
table_name = new_table_name ;
2014-03-09 17:36:01 +00:00
}
2014-03-13 12:48:07 +00:00
void MergeTreeData : : dropAllData ( )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: waiting for locks. " ) ;
2015-09-17 21:31:26 +00:00
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2015-09-17 21:31:26 +00:00
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: removing data from memory. " ) ;
2015-09-17 21:31:26 +00:00
2019-08-11 19:14:42 +00:00
DataPartsVector all_parts ( data_parts_by_info . begin ( ) , data_parts_by_info . end ( ) ) ;
2017-11-20 19:33:12 +00:00
data_parts_indexes . clear ( ) ;
2017-04-01 07:20:54 +00:00
column_sizes . clear ( ) ;
2014-03-09 17:36:01 +00:00
2019-01-04 12:10:00 +00:00
global_context . dropCaches ( ) ;
2014-03-13 19:14:25 +00:00
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: removing data from filesystem. " ) ;
2015-09-17 21:31:26 +00:00
2019-06-16 19:59:30 +00:00
/// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls.
2019-08-11 19:14:42 +00:00
clearPartsFromFilesystem ( all_parts ) ;
2019-06-16 19:59:30 +00:00
2019-05-11 18:50:29 +00:00
auto full_paths = getDataPaths ( ) ;
2019-04-28 14:49:41 +00:00
for ( auto & & full_data_path : full_paths )
2019-04-06 15:21:29 +00:00
Poco : : File ( full_data_path ) . remove ( true ) ;
2015-09-17 21:31:26 +00:00
2017-04-01 07:20:54 +00:00
LOG_TRACE ( log , " dropAllData: done. " ) ;
2014-03-09 17:36:01 +00:00
}
2017-02-08 18:43:35 +00:00
namespace
{
/// If true, then in order to ALTER the type of the column from the type from to the type to
/// we don't need to rewrite the data, we only need to update metadata and columns.txt in part directories.
2017-02-09 17:29:36 +00:00
/// The function works for Arrays and Nullables of the same structure.
2017-02-08 18:43:35 +00:00
bool isMetadataOnlyConversion ( const IDataType * from , const IDataType * to )
{
2017-04-01 07:20:54 +00:00
if ( from - > getName ( ) = = to - > getName ( ) )
return true ;
static const std : : unordered_multimap < std : : type_index , const std : : type_info & > ALLOWED_CONVERSIONS =
{
{ typeid ( DataTypeEnum8 ) , typeid ( DataTypeEnum8 ) } ,
{ typeid ( DataTypeEnum8 ) , typeid ( DataTypeInt8 ) } ,
{ typeid ( DataTypeEnum16 ) , typeid ( DataTypeEnum16 ) } ,
{ typeid ( DataTypeEnum16 ) , typeid ( DataTypeInt16 ) } ,
{ typeid ( DataTypeDateTime ) , typeid ( DataTypeUInt32 ) } ,
{ typeid ( DataTypeUInt32 ) , typeid ( DataTypeDateTime ) } ,
{ typeid ( DataTypeDate ) , typeid ( DataTypeUInt16 ) } ,
{ typeid ( DataTypeUInt16 ) , typeid ( DataTypeDate ) } ,
} ;
while ( true )
{
auto it_range = ALLOWED_CONVERSIONS . equal_range ( typeid ( * from ) ) ;
for ( auto it = it_range . first ; it ! = it_range . second ; + + it )
{
if ( it - > second = = typeid ( * to ) )
return true ;
}
const auto * arr_from = typeid_cast < const DataTypeArray * > ( from ) ;
const auto * arr_to = typeid_cast < const DataTypeArray * > ( to ) ;
if ( arr_from & & arr_to )
{
from = arr_from - > getNestedType ( ) . get ( ) ;
to = arr_to - > getNestedType ( ) . get ( ) ;
continue ;
}
const auto * nullable_from = typeid_cast < const DataTypeNullable * > ( from ) ;
const auto * nullable_to = typeid_cast < const DataTypeNullable * > ( to ) ;
if ( nullable_from & & nullable_to )
{
from = nullable_from - > getNestedType ( ) . get ( ) ;
to = nullable_to - > getNestedType ( ) . get ( ) ;
continue ;
}
return false ;
}
2017-02-08 18:43:35 +00:00
}
}
2019-02-12 17:54:52 +00:00
void MergeTreeData : : checkAlter ( const AlterCommands & commands , const Context & context )
2014-03-09 17:36:01 +00:00
{
2017-04-01 07:20:54 +00:00
/// Check that needed transformations can be applied to the list of columns without considering type conversions.
2018-03-13 14:18:11 +00:00
auto new_columns = getColumns ( ) ;
2019-05-02 16:07:23 +00:00
auto new_indices = getIndices ( ) ;
2019-06-02 14:41:12 +00:00
auto new_constraints = getConstraints ( ) ;
2018-11-13 12:51:55 +00:00
ASTPtr new_order_by_ast = order_by_ast ;
ASTPtr new_primary_key_ast = primary_key_ast ;
2019-04-15 09:30:45 +00:00
ASTPtr new_ttl_table_ast = ttl_table_ast ;
2019-07-24 12:56:39 +00:00
SettingsChanges new_changes ;
2019-08-26 10:07:32 +00:00
commands . apply ( new_columns , new_indices , new_constraints , new_order_by_ast , new_primary_key_ast , new_ttl_table_ast , new_changes ) ;
2019-05-02 16:07:23 +00:00
if ( getIndices ( ) . empty ( ) & & ! new_indices . empty ( ) & &
2019-02-12 17:54:52 +00:00
! context . getSettingsRef ( ) . allow_experimental_data_skipping_indices )
throw Exception ( " You must set the setting `allow_experimental_data_skipping_indices` to 1 " \
" before using data skipping indices. " , ErrorCodes : : BAD_ARGUMENTS ) ;
2017-04-01 07:20:54 +00:00
/// Set of columns that shouldn't be altered.
NameSet columns_alter_forbidden ;
2017-09-12 19:20:56 +00:00
/// Primary key columns can be ALTERed only if they are used in the key as-is
2017-04-01 07:20:54 +00:00
/// (and not as a part of some expression) and if the ALTER only affects column metadata.
NameSet columns_alter_metadata_only ;
2018-11-06 18:25:36 +00:00
if ( partition_key_expr )
2017-04-01 07:20:54 +00:00
{
2017-09-12 19:20:56 +00:00
/// Forbid altering partition key columns because it can change partition ID format.
/// TODO: in some cases (e.g. adding an Enum value) a partition key column can still be ALTERed.
/// We should allow it.
2018-11-06 18:25:36 +00:00
for ( const String & col : partition_key_expr - > getRequiredColumns ( ) )
2017-09-12 19:20:56 +00:00
columns_alter_forbidden . insert ( col ) ;
}
2017-08-31 13:33:32 +00:00
2019-02-05 14:50:25 +00:00
for ( const auto & index : skip_indices )
{
for ( const String & col : index - > expr - > getRequiredColumns ( ) )
columns_alter_forbidden . insert ( col ) ;
}
2018-10-12 19:00:43 +00:00
if ( sorting_key_expr )
2017-09-12 19:20:56 +00:00
{
2018-10-12 19:00:43 +00:00
for ( const ExpressionAction & action : sorting_key_expr - > getActions ( ) )
2017-04-01 07:20:54 +00:00
{
auto action_columns = action . getNeededColumns ( ) ;
columns_alter_forbidden . insert ( action_columns . begin ( ) , action_columns . end ( ) ) ;
}
2018-10-12 19:00:43 +00:00
for ( const String & col : sorting_key_expr - > getRequiredColumns ( ) )
2017-08-31 13:33:32 +00:00
columns_alter_metadata_only . insert ( col ) ;
2018-11-09 19:01:39 +00:00
/// We don't process sample_by_ast separately because it must be among the primary key columns
2018-10-12 19:00:43 +00:00
/// and we don't process primary_key_expr separately because it is a prefix of sorting_key_expr.
2018-10-11 14:53:23 +00:00
}
2018-02-09 10:53:50 +00:00
2017-08-31 13:33:32 +00:00
if ( ! merging_params . sign_column . empty ( ) )
columns_alter_forbidden . insert ( merging_params . sign_column ) ;
2017-04-01 07:20:54 +00:00
std : : map < String , const IDataType * > old_types ;
2018-03-13 15:00:28 +00:00
for ( const auto & column : getColumns ( ) . getAllPhysical ( ) )
2017-04-01 07:20:54 +00:00
old_types . emplace ( column . name , column . type . get ( ) ) ;
for ( const AlterCommand & command : commands )
{
2019-05-02 23:56:42 +00:00
if ( ! command . isMutable ( ) )
2018-10-14 15:30:06 +00:00
{
continue ;
}
2017-04-01 07:20:54 +00:00
if ( columns_alter_forbidden . count ( command . column_name ) )
2019-08-28 18:23:20 +00:00
throw Exception ( " Trying to ALTER key column " + command . column_name , ErrorCodes : : ILLEGAL_COLUMN ) ;
2017-04-01 07:20:54 +00:00
if ( columns_alter_metadata_only . count ( command . column_name ) )
{
if ( command . type = = AlterCommand : : MODIFY_COLUMN )
{
auto it = old_types . find ( command . column_name ) ;
if ( it ! = old_types . end ( ) & & isMetadataOnlyConversion ( it - > second , command . data_type . get ( ) ) )
continue ;
}
throw Exception (
" ALTER of key column " + command . column_name + " must be metadata-only " ,
ErrorCodes : : ILLEGAL_COLUMN ) ;
}
2018-11-13 12:51:55 +00:00
if ( command . type = = AlterCommand : : MODIFY_ORDER_BY )
{
if ( ! is_custom_partitioned )
throw Exception (
" ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax " ,
ErrorCodes : : BAD_ARGUMENTS ) ;
}
2017-04-01 07:20:54 +00:00
}
2019-07-13 10:42:52 +00:00
setProperties ( new_order_by_ast , new_primary_key_ast ,
2019-06-02 14:41:12 +00:00
new_columns , new_indices , new_constraints , /* only_check = */ true ) ;
2018-11-13 12:51:55 +00:00
2019-04-15 09:30:45 +00:00
setTTLExpressions ( new_columns . getColumnTTLs ( ) , new_ttl_table_ast , /* only_check = */ true ) ;
2019-07-24 12:56:39 +00:00
for ( const auto & setting : new_changes )
2019-08-31 21:15:40 +00:00
checkSettingCanBeChanged ( setting . name ) ;
2019-07-24 12:56:39 +00:00
2017-04-01 07:20:54 +00:00
/// Check that type conversions are possible.
ExpressionActionsPtr unused_expression ;
NameToNameMap unused_map ;
bool unused_bool ;
2019-02-05 14:50:25 +00:00
createConvertExpression ( nullptr , getColumns ( ) . getAllPhysical ( ) , new_columns . getAllPhysical ( ) ,
2019-05-02 16:07:23 +00:00
getIndices ( ) . indices , new_indices . indices , unused_expression , unused_map , unused_bool ) ;
2014-03-09 17:36:01 +00:00
}
2017-12-25 21:57:29 +00:00
void MergeTreeData : : createConvertExpression ( const DataPartPtr & part , const NamesAndTypesList & old_columns , const NamesAndTypesList & new_columns ,
2019-02-06 07:51:35 +00:00
const IndicesASTs & old_indices , const IndicesASTs & new_indices , ExpressionActionsPtr & out_expression ,
2019-02-05 14:50:25 +00:00
NameToNameMap & out_rename_map , bool & out_force_update_metadata ) const
2014-03-09 17:36:01 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2017-04-01 07:20:54 +00:00
out_expression = nullptr ;
out_rename_map = { } ;
out_force_update_metadata = false ;
2019-06-19 10:07:56 +00:00
String part_mrk_file_extension ;
if ( part )
part_mrk_file_extension = part - > index_granularity_info . marks_file_extension ;
else
2019-08-13 10:29:31 +00:00
part_mrk_file_extension = settings - > index_granularity_bytes = = 0 ? getNonAdaptiveMrkExtension ( ) : getAdaptiveMrkExtension ( ) ;
2017-04-01 07:20:54 +00:00
using NameToType = std : : map < String , const IDataType * > ;
NameToType new_types ;
2017-12-25 21:57:29 +00:00
for ( const NameAndTypePair & column : new_columns )
2017-04-01 07:20:54 +00:00
new_types . emplace ( column . name , column . type . get ( ) ) ;
/// For every column that need to be converted: source column name, column name of calculated expression for conversion.
std : : vector < std : : pair < String , String > > conversions ;
2019-02-05 14:50:25 +00:00
/// Remove old indices
2019-03-28 08:52:09 +00:00
std : : set < String > new_indices_set ;
for ( const auto & index_decl : new_indices )
new_indices_set . emplace ( index_decl - > as < ASTIndexDeclaration & > ( ) . name ) ;
for ( const auto & index_decl : old_indices )
{
const auto & index = index_decl - > as < ASTIndexDeclaration & > ( ) ;
if ( ! new_indices_set . count ( index . name ) )
2019-02-05 14:50:25 +00:00
{
2019-03-28 08:52:09 +00:00
out_rename_map [ " skp_idx_ " + index . name + " .idx " ] = " " ;
2019-06-19 10:07:56 +00:00
out_rename_map [ " skp_idx_ " + index . name + part_mrk_file_extension ] = " " ;
2019-02-05 14:50:25 +00:00
}
}
2017-12-03 01:49:54 +00:00
/// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes.
std : : map < String , size_t > stream_counts ;
2017-12-25 21:57:29 +00:00
for ( const NameAndTypePair & column : old_columns )
2017-04-01 07:20:54 +00:00
{
2017-12-03 01:49:54 +00:00
column . type - > enumerateStreams ( [ & ] ( const IDataType : : SubstreamPath & substream_path )
2017-04-01 07:20:54 +00:00
{
2017-12-03 01:49:54 +00:00
+ + stream_counts [ IDataType : : getFileNameForStream ( column . name , substream_path ) ] ;
} , { } ) ;
}
2017-04-01 07:20:54 +00:00
2017-12-25 21:57:29 +00:00
for ( const NameAndTypePair & column : old_columns )
2017-12-03 01:49:54 +00:00
{
if ( ! new_types . count ( column . name ) )
{
/// The column was deleted.
2019-06-16 19:32:52 +00:00
if ( ! part | | part - > hasColumnFiles ( column . name , * column . type ) )
2017-04-01 07:20:54 +00:00
{
2017-12-03 01:49:54 +00:00
column . type - > enumerateStreams ( [ & ] ( const IDataType : : SubstreamPath & substream_path )
2017-04-01 07:20:54 +00:00
{
2017-12-03 01:49:54 +00:00
String file_name = IDataType : : getFileNameForStream ( column . name , substream_path ) ;
2017-04-01 07:20:54 +00:00
2017-12-03 01:49:54 +00:00
/// Delete files if they are no longer shared with another column.
if ( - - stream_counts [ file_name ] = = 0 )
2017-04-01 07:20:54 +00:00
{
2017-12-03 01:49:54 +00:00
out_rename_map [ file_name + " .bin " ] = " " ;
2019-06-19 10:07:56 +00:00
out_rename_map [ file_name + part_mrk_file_extension ] = " " ;
2017-04-01 07:20:54 +00:00
}
2017-12-03 01:49:54 +00:00
} , { } ) ;
2017-04-01 07:20:54 +00:00
}
}
else
{
2017-12-03 01:49:54 +00:00
/// The column was converted. Collect conversions.
2017-04-01 07:20:54 +00:00
const auto * new_type = new_types [ column . name ] ;
const String new_type_name = new_type - > getName ( ) ;
const auto * old_type = column . type . get ( ) ;
2019-06-16 19:32:52 +00:00
if ( ! new_type - > equals ( * old_type ) & & ( ! part | | part - > hasColumnFiles ( column . name , * column . type ) ) )
2017-04-01 07:20:54 +00:00
{
if ( isMetadataOnlyConversion ( old_type , new_type ) )
{
out_force_update_metadata = true ;
continue ;
}
/// Need to modify column type.
if ( ! out_expression )
2019-01-04 12:10:00 +00:00
out_expression = std : : make_shared < ExpressionActions > ( NamesAndTypesList ( ) , global_context ) ;
2017-04-01 07:20:54 +00:00
out_expression - > addInput ( ColumnWithTypeAndName ( nullptr , column . type , column . name ) ) ;
Names out_names ;
2017-12-03 01:49:54 +00:00
/// This is temporary name for expression. TODO Invent the name more safely.
const String new_type_name_column = ' # ' + new_type_name + " _column " ;
2017-04-01 07:20:54 +00:00
out_expression - > add ( ExpressionAction : : addColumn (
2018-11-26 01:39:04 +00:00
{ DataTypeString ( ) . createColumnConst ( 1 , new_type_name ) , std : : make_shared < DataTypeString > ( ) , new_type_name_column } ) ) ;
2017-04-01 07:20:54 +00:00
2019-01-04 12:10:00 +00:00
const auto & function = FunctionFactory : : instance ( ) . get ( " CAST " , global_context ) ;
2017-04-01 07:20:54 +00:00
out_expression - > add ( ExpressionAction : : applyFunction (
function , Names { column . name , new_type_name_column } ) , out_names ) ;
out_expression - > add ( ExpressionAction : : removeColumn ( new_type_name_column ) ) ;
out_expression - > add ( ExpressionAction : : removeColumn ( column . name ) ) ;
conversions . emplace_back ( column . name , out_names . at ( 0 ) ) ;
}
}
}
if ( ! conversions . empty ( ) )
{
/// Give proper names for temporary columns with conversion results.
NamesWithAliases projection ;
projection . reserve ( conversions . size ( ) ) ;
for ( const auto & source_and_expression : conversions )
{
2017-12-03 01:49:54 +00:00
/// Column name for temporary filenames before renaming. NOTE The is unnecessarily tricky.
2017-04-01 07:20:54 +00:00
2017-12-03 01:49:54 +00:00
String original_column_name = source_and_expression . first ;
String temporary_column_name = original_column_name + " converting " ;
2017-04-01 07:20:54 +00:00
2017-12-03 01:49:54 +00:00
projection . emplace_back ( source_and_expression . second , temporary_column_name ) ;
2017-04-01 07:20:54 +00:00
2017-12-03 01:49:54 +00:00
/// After conversion, we need to rename temporary files into original.
2017-04-01 07:20:54 +00:00
2019-03-28 08:52:09 +00:00
new_types [ source_and_expression . first ] - > enumerateStreams (
[ & ] ( const IDataType : : SubstreamPath & substream_path )
{
/// Skip array sizes, because they cannot be modified in ALTER.
if ( ! substream_path . empty ( ) & & substream_path . back ( ) . type = = IDataType : : Substream : : ArraySizes )
return ;
String original_file_name = IDataType : : getFileNameForStream ( original_column_name , substream_path ) ;
String temporary_file_name = IDataType : : getFileNameForStream ( temporary_column_name , substream_path ) ;
out_rename_map [ temporary_file_name + " .bin " ] = original_file_name + " .bin " ;
2019-06-19 10:07:56 +00:00
out_rename_map [ temporary_file_name + part_mrk_file_extension ] = original_file_name + part_mrk_file_extension ;
2019-03-28 08:52:09 +00:00
} , { } ) ;
2017-04-01 07:20:54 +00:00
}
out_expression - > add ( ExpressionAction : : project ( projection ) ) ;
}
if ( part & & ! out_rename_map . empty ( ) )
{
2017-07-31 21:39:24 +00:00
WriteBufferFromOwnString out ;
2018-02-21 05:11:53 +00:00
out < < " Will " ;
2017-07-31 21:39:24 +00:00
bool first = true ;
for ( const auto & from_to : out_rename_map )
2017-04-01 07:20:54 +00:00
{
2017-07-31 21:39:24 +00:00
if ( ! first )
out < < " , " ;
first = false ;
2018-02-21 05:11:53 +00:00
if ( from_to . second . empty ( ) )
out < < " remove " < < from_to . first ;
else
out < < " rename " < < from_to . first < < " to " < < from_to . second ;
2017-04-01 07:20:54 +00:00
}
2017-07-31 21:39:24 +00:00
out < < " in part " < < part - > name ;
LOG_DEBUG ( log , out . str ( ) ) ;
2017-04-01 07:20:54 +00:00
}
2014-03-20 13:00:42 +00:00
}
2014-03-09 17:36:01 +00:00
2019-04-30 23:46:19 +00:00
void MergeTreeData : : alterDataPart (
2017-12-25 21:57:29 +00:00
const NamesAndTypesList & new_columns ,
2019-02-06 07:51:35 +00:00
const IndicesASTs & new_indices ,
2019-04-30 23:46:19 +00:00
bool skip_sanity_checks ,
2019-05-10 14:29:13 +00:00
AlterDataPartTransactionPtr & transaction )
2014-03-20 13:00:42 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2017-04-01 07:20:54 +00:00
ExpressionActionsPtr expression ;
2019-05-10 14:29:13 +00:00
const auto & part = transaction - > getDataPart ( ) ;
2017-04-01 07:20:54 +00:00
bool force_update_metadata ;
2019-02-05 14:50:25 +00:00
createConvertExpression ( part , part - > columns , new_columns ,
2019-05-02 16:07:23 +00:00
getIndices ( ) . indices , new_indices ,
2019-02-05 14:50:25 +00:00
expression , transaction - > rename_map , force_update_metadata ) ;
2017-04-01 07:20:54 +00:00
size_t num_files_to_modify = transaction - > rename_map . size ( ) ;
size_t num_files_to_remove = 0 ;
for ( const auto & from_to : transaction - > rename_map )
if ( from_to . second . empty ( ) )
+ + num_files_to_remove ;
if ( ! skip_sanity_checks
2019-08-13 08:35:49 +00:00
& & ( num_files_to_modify > settings - > max_files_to_modify_in_alter_columns
| | num_files_to_remove > settings - > max_files_to_remove_in_alter_columns ) )
2017-04-01 07:20:54 +00:00
{
transaction - > clear ( ) ;
2019-08-13 08:35:49 +00:00
const bool forbidden_because_of_modify = num_files_to_modify > settings - > max_files_to_modify_in_alter_columns ;
2017-04-12 13:25:07 +00:00
2017-04-01 07:20:54 +00:00
std : : stringstream exception_message ;
2017-04-12 13:25:07 +00:00
exception_message
< < " Suspiciously many ( "
< < ( forbidden_because_of_modify ? num_files_to_modify : num_files_to_remove )
2017-04-01 07:20:54 +00:00
< < " ) files ( " ;
bool first = true ;
for ( const auto & from_to : transaction - > rename_map )
{
if ( ! first )
exception_message < < " , " ;
2017-04-12 13:25:07 +00:00
if ( forbidden_because_of_modify )
{
2019-06-15 12:06:22 +00:00
exception_message < < " from " < < backQuote ( from_to . first ) < < " to " < < backQuote ( from_to . second ) ;
2017-04-12 13:25:07 +00:00
first = false ;
}
else if ( from_to . second . empty ( ) )
{
2019-06-15 12:06:22 +00:00
exception_message < < backQuote ( from_to . first ) ;
2017-04-12 13:25:07 +00:00
first = false ;
}
2017-04-01 07:20:54 +00:00
}
2017-04-12 13:25:07 +00:00
exception_message
< < " ) need to be "
< < ( forbidden_because_of_modify ? " modified " : " removed " )
2019-04-21 18:38:44 +00:00
< < " in part " < < part - > name < < " of table at " < < part - > getFullPath ( ) < < " . Aborting just in case. "
2017-04-12 13:25:07 +00:00
< < " If it is not an error, you could increase merge_tree/ "
< < ( forbidden_because_of_modify ? " max_files_to_modify_in_alter_columns " : " max_files_to_remove_in_alter_columns " )
< < " parameter in configuration file (current value: "
2019-08-13 08:35:49 +00:00
< < ( forbidden_because_of_modify ? settings - > max_files_to_modify_in_alter_columns : settings - > max_files_to_remove_in_alter_columns )
2017-04-12 13:25:07 +00:00
< < " ) " ;
2017-04-01 07:20:54 +00:00
throw Exception ( exception_message . str ( ) , ErrorCodes : : TABLE_DIFFERS_TOO_MUCH ) ;
}
DataPart : : Checksums add_checksums ;
if ( transaction - > rename_map . empty ( ) & & ! force_update_metadata )
{
transaction - > clear ( ) ;
2019-04-30 23:46:19 +00:00
return ;
2017-04-01 07:20:54 +00:00
}
/// Apply the expression and write the result to temporary files.
if ( expression )
{
2018-11-28 15:05:28 +00:00
BlockInputStreamPtr part_in = std : : make_shared < MergeTreeSequentialBlockInputStream > (
2019-08-29 10:49:26 +00:00
* this , part , expression - > getRequiredColumns ( ) , false , /* take_column_types_from_storage = */ false ) ;
2017-04-01 07:20:54 +00:00
2019-01-04 12:10:00 +00:00
auto compression_codec = global_context . chooseCompressionCodec (
2018-03-26 14:18:04 +00:00
part - > bytes_on_disk ,
static_cast < double > ( part - > bytes_on_disk ) / this - > getTotalActiveSizeInBytes ( ) ) ;
2017-04-01 07:20:54 +00:00
ExpressionBlockInputStream in ( part_in , expression ) ;
2017-08-16 19:41:52 +00:00
/** Don't write offsets for arrays, because ALTER never change them
* ( MODIFY COLUMN could only change types of elements but never modify array sizes ) .
* Also note that they does not participate in ' rename_map ' .
* Also note , that for columns , that are parts of Nested ,
* temporary column name ( ' converting_column_name ' ) created in ' createConvertExpression ' method
* will have old name of shared offsets for arrays .
*/
2018-10-16 21:22:41 +00:00
IMergedBlockOutputStream : : WrittenOffsetColumns unused_written_offsets ;
2019-04-01 18:41:19 +00:00
2018-10-16 21:22:41 +00:00
MergedColumnOnlyOutputStream out (
2018-11-30 15:36:10 +00:00
* this ,
in . getHeader ( ) ,
2019-04-24 12:35:08 +00:00
part - > getFullPath ( ) ,
2018-11-30 15:36:10 +00:00
true /* sync */ ,
2019-03-18 12:02:33 +00:00
compression_codec ,
2018-11-30 15:36:10 +00:00
true /* skip_offsets */ ,
2019-08-29 10:49:26 +00:00
/// Don't recalc indices because indices alter is restricted
std : : vector < MergeTreeIndexPtr > { } ,
2018-11-30 15:36:10 +00:00
unused_written_offsets ,
2019-08-19 10:37:04 +00:00
part - > index_granularity ,
& part - > index_granularity_info ) ;
2017-08-16 19:41:52 +00:00
2017-04-01 07:20:54 +00:00
in . readPrefix ( ) ;
out . writePrefix ( ) ;
while ( Block b = in . read ( ) )
out . write ( b ) ;
in . readSuffix ( ) ;
add_checksums = out . writeSuffixAndGetChecksums ( ) ;
}
/// Update the checksums.
DataPart : : Checksums new_checksums = part - > checksums ;
for ( auto it : transaction - > rename_map )
{
2017-08-04 14:00:26 +00:00
if ( it . second . empty ( ) )
2017-04-01 07:20:54 +00:00
new_checksums . files . erase ( it . first ) ;
else
new_checksums . files [ it . second ] = add_checksums . files [ it . first ] ;
}
/// Write the checksums to the temporary file.
if ( ! part - > checksums . empty ( ) )
{
transaction - > new_checksums = new_checksums ;
2019-04-06 15:21:29 +00:00
WriteBufferFromFile checksums_file ( part - > getFullPath ( ) + " checksums.txt.tmp " , 4096 ) ;
2017-04-01 07:20:54 +00:00
new_checksums . write ( checksums_file ) ;
transaction - > rename_map [ " checksums.txt.tmp " ] = " checksums.txt " ;
}
/// Write the new column list to the temporary file.
{
transaction - > new_columns = new_columns . filter ( part - > columns . getNames ( ) ) ;
2019-04-06 15:21:29 +00:00
WriteBufferFromFile columns_file ( part - > getFullPath ( ) + " columns.txt.tmp " , 4096 ) ;
2017-04-01 07:20:54 +00:00
transaction - > new_columns . writeText ( columns_file ) ;
transaction - > rename_map [ " columns.txt.tmp " ] = " columns.txt " ;
}
2019-04-30 23:46:19 +00:00
return ;
2014-07-11 12:47:45 +00:00
}
2014-03-20 13:00:42 +00:00
2019-08-27 09:34:53 +00:00
void MergeTreeData : : changeSettings (
2019-08-06 13:04:29 +00:00
const SettingsChanges & new_changes ,
2019-08-27 09:34:53 +00:00
TableStructureWriteLockHolder & /* table_lock_holder */ )
2019-08-06 13:04:29 +00:00
{
2019-08-27 09:34:53 +00:00
if ( ! new_changes . empty ( ) )
{
MergeTreeSettings copy = * getSettings ( ) ;
2019-08-27 13:14:19 +00:00
copy . applyChanges ( new_changes ) ;
2019-08-27 09:34:53 +00:00
storage_settings . set ( std : : make_unique < const MergeTreeSettings > ( copy ) ) ;
}
2019-08-06 13:04:29 +00:00
}
2019-08-31 21:15:40 +00:00
void MergeTreeData : : checkSettingCanBeChanged ( const String & setting_name ) const
2019-08-06 13:04:29 +00:00
{
2019-08-30 16:50:59 +00:00
if ( MergeTreeSettings : : findIndex ( setting_name ) = = MergeTreeSettings : : npos )
throw Exception { " Storage ' " + getName ( ) + " ' doesn't have setting ' " + setting_name + " ' " , ErrorCodes : : UNKNOWN_SETTING } ;
2019-08-30 20:12:26 +00:00
if ( MergeTreeSettings : : isReadonlySetting ( setting_name ) )
throw Exception { " Setting ' " + setting_name + " ' is readonly for storage ' " + getName ( ) + " ' " , ErrorCodes : : READONLY_SETTING } ;
2019-08-30 16:50:59 +00:00
2019-08-06 13:04:29 +00:00
}
2019-04-15 09:30:45 +00:00
void MergeTreeData : : removeEmptyColumnsFromPart ( MergeTreeData : : MutableDataPartPtr & data_part )
{
auto & empty_columns = data_part - > empty_columns ;
if ( empty_columns . empty ( ) )
return ;
NamesAndTypesList new_columns ;
for ( const auto & [ name , type ] : data_part - > columns )
if ( ! empty_columns . count ( name ) )
new_columns . emplace_back ( name , type ) ;
std : : stringstream log_message ;
for ( auto it = empty_columns . begin ( ) ; it ! = empty_columns . end ( ) ; + + it )
{
if ( it ! = empty_columns . begin ( ) )
log_message < < " , " ;
log_message < < * it ;
}
LOG_INFO ( log , " Removing empty columns: " < < log_message . str ( ) < < " from part " < < data_part - > name ) ;
2019-04-30 23:46:19 +00:00
AlterDataPartTransactionPtr transaction ( new AlterDataPartTransaction ( data_part ) ) ;
2019-05-07 20:46:08 +00:00
alterDataPart ( new_columns , getIndices ( ) . indices , false , transaction ) ;
2019-04-30 23:46:19 +00:00
if ( transaction - > isValid ( ) )
2019-04-15 09:30:45 +00:00
transaction - > commit ( ) ;
2019-04-30 23:46:19 +00:00
2019-04-15 09:30:45 +00:00
empty_columns . clear ( ) ;
}
2019-08-27 20:43:08 +00:00
void MergeTreeData : : freezeAll ( const String & with_name , const Context & context , TableStructureReadLockHolder & )
2018-11-01 10:35:50 +00:00
{
2018-11-20 17:05:22 +00:00
freezePartitionsByMatcher ( [ ] ( const DataPartPtr & ) { return true ; } , with_name , context ) ;
2018-11-01 10:35:50 +00:00
}
2019-05-01 15:46:17 +00:00
2019-04-30 23:46:19 +00:00
bool MergeTreeData : : AlterDataPartTransaction : : isValid ( ) const
{
2019-05-02 13:42:09 +00:00
return valid & & data_part ;
2019-04-30 23:46:19 +00:00
}
void MergeTreeData : : AlterDataPartTransaction : : clear ( )
{
2019-05-02 13:42:09 +00:00
valid = false ;
2019-04-30 23:46:19 +00:00
}
2014-07-11 12:47:45 +00:00
void MergeTreeData : : AlterDataPartTransaction : : commit ( )
{
2019-05-02 13:42:09 +00:00
if ( ! isValid ( ) )
return ;
2017-04-01 07:20:54 +00:00
if ( ! data_part )
return ;
2019-04-30 23:46:19 +00:00
2017-04-01 07:20:54 +00:00
try
{
2017-07-28 17:34:02 +00:00
std : : unique_lock < std : : shared_mutex > lock ( data_part - > columns_lock ) ;
2017-04-01 07:20:54 +00:00
2019-04-06 15:21:29 +00:00
String path = data_part - > getFullPath ( ) ;
2017-04-01 07:20:54 +00:00
/// NOTE: checking that a file exists before renaming or deleting it
/// is justified by the fact that, when converting an ordinary column
/// to a nullable column, new files are created which did not exist
/// before, i.e. they do not have older versions.
/// 1) Rename the old files.
2017-12-03 02:15:35 +00:00
for ( const auto & from_to : rename_map )
2017-04-01 07:20:54 +00:00
{
2017-12-03 02:15:35 +00:00
String name = from_to . second . empty ( ) ? from_to . first : from_to . second ;
2017-04-01 07:20:54 +00:00
Poco : : File file { path + name } ;
if ( file . exists ( ) )
file . renameTo ( path + name + " .tmp2 " ) ;
}
/// 2) Move new files in the place of old and update the metadata in memory.
2017-12-03 02:15:35 +00:00
for ( const auto & from_to : rename_map )
2017-04-01 07:20:54 +00:00
{
2017-12-03 02:15:35 +00:00
if ( ! from_to . second . empty ( ) )
Poco : : File { path + from_to . first } . renameTo ( path + from_to . second ) ;
2017-04-01 07:20:54 +00:00
}
2017-08-04 14:00:26 +00:00
auto & mutable_part = const_cast < DataPart & > ( * data_part ) ;
2017-04-01 07:20:54 +00:00
mutable_part . checksums = new_checksums ;
mutable_part . columns = new_columns ;
/// 3) Delete the old files.
2017-12-03 02:15:35 +00:00
for ( const auto & from_to : rename_map )
2017-04-01 07:20:54 +00:00
{
2017-12-03 02:15:35 +00:00
String name = from_to . second . empty ( ) ? from_to . first : from_to . second ;
2017-04-01 07:20:54 +00:00
Poco : : File file { path + name + " .tmp2 " } ;
if ( file . exists ( ) )
file . remove ( ) ;
}
2018-07-08 03:56:24 +00:00
mutable_part . bytes_on_disk = new_checksums . getTotalSizeOnDisk ( ) ;
2017-04-01 07:20:54 +00:00
/// TODO: we can skip resetting caches when the column is added.
2019-01-04 12:10:00 +00:00
data_part - > storage . global_context . dropCaches ( ) ;
2017-04-01 07:20:54 +00:00
clear ( ) ;
}
catch ( . . . )
{
/// Don't delete temporary files in the destructor in case something went wrong.
clear ( ) ;
throw ;
}
2014-03-09 17:36:01 +00:00
}
2014-03-20 13:00:42 +00:00
2014-07-11 12:47:45 +00:00
MergeTreeData : : AlterDataPartTransaction : : ~ AlterDataPartTransaction ( )
2014-03-13 19:36:28 +00:00
{
2019-05-02 13:42:09 +00:00
if ( ! isValid ( ) )
return ;
if ( ! data_part )
return ;
2019-04-30 23:46:19 +00:00
2017-04-01 07:20:54 +00:00
try
{
2017-05-16 15:40:32 +00:00
LOG_WARNING ( data_part - > storage . log , " Aborting ALTER of part " < < data_part - > relative_path ) ;
2017-04-01 07:20:54 +00:00
2017-05-16 15:40:32 +00:00
String path = data_part - > getFullPath ( ) ;
2017-12-03 02:59:59 +00:00
for ( const auto & from_to : rename_map )
2017-04-01 07:20:54 +00:00
{
2017-12-03 02:59:59 +00:00
if ( ! from_to . second . empty ( ) )
2017-04-01 07:20:54 +00:00
{
try
{
2017-12-03 02:59:59 +00:00
Poco : : File file ( path + from_to . first ) ;
2017-04-01 07:20:54 +00:00
if ( file . exists ( ) )
file . remove ( ) ;
}
catch ( Poco : : Exception & e )
{
2017-12-03 02:59:59 +00:00
LOG_WARNING ( data_part - > storage . log , " Can't remove " < < path + from_to . first < < " : " < < e . displayText ( ) ) ;
2017-04-01 07:20:54 +00:00
}
}
}
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
2014-03-09 17:36:01 +00:00
}
2019-07-30 17:24:40 +00:00
void MergeTreeData : : PartsTemporaryRename : : addPart ( const String & old_name , const String & new_name )
{
old_and_new_names . push_back ( { old_name , new_name } ) ;
2019-09-06 15:09:20 +00:00
const auto paths = storage . getDataPaths ( ) ;
for ( const auto & full_path : paths )
2019-08-29 16:17:47 +00:00
{
for ( Poco : : DirectoryIterator it = Poco : : DirectoryIterator ( full_path + source_dir ) ; it ! = Poco : : DirectoryIterator ( ) ; + + it )
{
String name = it . name ( ) ;
if ( name = = old_name )
{
2019-09-06 15:09:20 +00:00
old_part_name_to_full_path [ old_name ] = full_path ;
2019-08-29 16:17:47 +00:00
break ;
}
}
}
2019-07-30 17:24:40 +00:00
}
2019-07-31 14:44:55 +00:00
void MergeTreeData : : PartsTemporaryRename : : tryRenameAll ( )
{
renamed = true ;
for ( size_t i = 0 ; i < old_and_new_names . size ( ) ; + + i )
{
try
{
const auto & names = old_and_new_names [ i ] ;
if ( names . first . empty ( ) | | names . second . empty ( ) )
throw DB : : Exception ( " Empty part name. Most likely it's a bug. " , ErrorCodes : : INCORRECT_FILE_NAME ) ;
2019-09-06 15:09:20 +00:00
const auto full_path = old_part_name_to_full_path [ names . first ] + source_dir ; /// old_name
2019-08-29 16:17:47 +00:00
Poco : : File ( full_path + names . first ) . renameTo ( full_path + names . second ) ;
2019-07-31 14:44:55 +00:00
}
catch ( . . . )
{
old_and_new_names . resize ( i ) ;
LOG_WARNING ( storage . log , " Cannot rename parts to perform operation on them: " < < getCurrentExceptionMessage ( false ) ) ;
throw ;
}
}
}
2019-07-30 17:24:40 +00:00
MergeTreeData : : PartsTemporaryRename : : ~ PartsTemporaryRename ( )
{
2019-07-31 14:44:55 +00:00
// TODO what if server had crashed before this destructor was called?
if ( ! renamed )
return ;
2019-07-30 17:24:40 +00:00
for ( const auto & names : old_and_new_names )
{
if ( names . first . empty ( ) )
continue ;
2019-08-29 16:17:47 +00:00
2019-07-30 17:24:40 +00:00
try
{
2019-09-06 15:09:20 +00:00
const auto full_path = old_part_name_to_full_path [ names . first ] + source_dir ; /// old_name
2019-08-29 16:17:47 +00:00
Poco : : File ( full_path + names . second ) . renameTo ( full_path + names . first ) ;
2019-07-30 17:24:40 +00:00
}
catch ( . . . )
{
tryLogCurrentException ( __PRETTY_FUNCTION__ ) ;
}
}
}
2014-03-09 17:36:01 +00:00
2018-02-19 15:31:43 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getActivePartsToReplace (
const MergeTreePartInfo & new_part_info ,
2018-02-19 16:12:16 +00:00
const String & new_part_name ,
2018-02-19 15:31:43 +00:00
DataPartPtr & out_covering_part ,
2018-05-21 13:49:54 +00:00
DataPartsLock & /* data_parts_lock */ ) const
2014-03-13 12:48:07 +00:00
{
2018-02-19 15:31:43 +00:00
/// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself.
2018-05-21 13:49:54 +00:00
auto it_middle = data_parts_by_state_and_info . lower_bound ( DataPartStateAndInfo { DataPartState : : Committed , new_part_info } ) ;
2018-02-19 15:31:43 +00:00
auto committed_parts_range = getDataPartsStateRange ( DataPartState : : Committed ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
/// Go to the left.
DataPartIteratorByStateAndInfo begin = it_middle ;
while ( begin ! = committed_parts_range . begin ( ) )
2017-04-01 07:20:54 +00:00
{
2018-02-19 15:31:43 +00:00
auto prev = std : : prev ( begin ) ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
if ( ! new_part_info . contains ( ( * prev ) - > info ) )
2017-09-11 17:55:41 +00:00
{
2018-02-19 15:31:43 +00:00
if ( ( * prev ) - > info . contains ( new_part_info ) )
{
out_covering_part = * prev ;
return { } ;
}
2017-09-11 17:55:41 +00:00
2018-02-19 16:12:16 +00:00
if ( ! new_part_info . isDisjoint ( ( * prev ) - > info ) )
throw Exception ( " Part " + new_part_name + " intersects previous part " + ( * prev ) - > getNameWithState ( ) +
" . It is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
break ;
2017-09-11 17:55:41 +00:00
}
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
begin = prev ;
}
2017-06-25 02:22:10 +00:00
2018-02-19 15:31:43 +00:00
/// Go to the right.
DataPartIteratorByStateAndInfo end = it_middle ;
while ( end ! = committed_parts_range . end ( ) )
{
if ( ( * end ) - > info = = new_part_info )
throw Exception ( " Unexpected duplicate part " + ( * end ) - > getNameWithState ( ) + " . It is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
if ( ! new_part_info . contains ( ( * end ) - > info ) )
2017-05-24 20:19:29 +00:00
{
2018-02-19 15:31:43 +00:00
if ( ( * end ) - > info . contains ( new_part_info ) )
2017-11-20 19:33:12 +00:00
{
2018-02-19 15:31:43 +00:00
out_covering_part = * end ;
return { } ;
2017-11-20 19:33:12 +00:00
}
2017-09-11 22:40:51 +00:00
2018-02-19 16:12:16 +00:00
if ( ! new_part_info . isDisjoint ( ( * end ) - > info ) )
throw Exception ( " Part " + new_part_name + " intersects next part " + ( * end ) - > getNameWithState ( ) +
" . It is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-05-24 20:19:29 +00:00
2018-02-19 15:31:43 +00:00
break ;
2017-05-24 20:19:29 +00:00
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
+ + end ;
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
return DataPartsVector { begin , end } ;
}
2017-11-20 19:33:12 +00:00
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
void MergeTreeData : : renameTempPartAndAdd ( MutableDataPartPtr & part , SimpleIncrement * increment , Transaction * out_transaction )
{
auto removed = renameTempPartAndReplace ( part , increment , out_transaction ) ;
if ( ! removed . empty ( ) )
throw Exception ( " Added part " + part - > name + " covers " + toString ( removed . size ( ) )
+ " existing part(s) (including " + removed [ 0 ] - > name + " ) " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2017-09-11 22:40:51 +00:00
2018-05-21 13:49:54 +00:00
void MergeTreeData : : renameTempPartAndReplace (
2018-09-20 14:30:52 +00:00
MutableDataPartPtr & part , SimpleIncrement * increment , Transaction * out_transaction ,
2018-05-21 13:49:54 +00:00
std : : unique_lock < std : : mutex > & lock , DataPartsVector * out_covered_parts )
2018-02-19 15:31:43 +00:00
{
2018-09-20 14:30:52 +00:00
if ( out_transaction & & & out_transaction - > data ! = this )
throw Exception ( " MergeTreeData::Transaction for one table cannot be used with another. It is a bug. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-19 15:31:43 +00:00
part - > assertState ( { DataPartState : : Temporary } ) ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
MergeTreePartInfo part_info = part - > info ;
String part_name ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( DataPartPtr existing_part_in_partition = getAnyPartInPartition ( part - > info . partition_id , lock ) )
{
if ( part - > partition . value ! = existing_part_in_partition - > partition . value )
throw Exception (
" Partition value mismatch between two parts with the same partition ID. Existing part: "
+ existing_part_in_partition - > name + " , newly added part: " + part - > name ,
ErrorCodes : : CORRUPTED_DATA ) ;
}
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
/** It is important that obtaining new block number and adding that block to parts set is done atomically.
* Otherwise there is race condition - merge of blocks could happen in interval that doesn ' t yet contain new part .
*/
if ( increment )
2018-05-23 19:34:37 +00:00
{
2018-02-19 15:31:43 +00:00
part_info . min_block = part_info . max_block = increment - > get ( ) ;
2018-05-23 19:34:37 +00:00
part_name = part - > getNewName ( part_info ) ;
}
2018-02-19 15:31:43 +00:00
else
2018-05-23 19:34:37 +00:00
part_name = part - > name ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
LOG_TRACE ( log , " Renaming temporary part " < < part - > relative_path < < " to " < < part_name < < " . " ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
auto it_duplicate = data_parts_by_info . find ( part_info ) ;
if ( it_duplicate ! = data_parts_by_info . end ( ) )
{
String message = " Part " + ( * it_duplicate ) - > getNameWithState ( ) + " already exists " ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( ( * it_duplicate ) - > checkState ( { DataPartState : : Outdated , DataPartState : : Deleting } ) )
throw Exception ( message + " , but it will be deleted soon " , ErrorCodes : : PART_IS_TEMPORARILY_LOCKED ) ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
throw Exception ( message , ErrorCodes : : DUPLICATE_DATA_PART ) ;
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
DataPartPtr covering_part ;
2018-02-19 16:12:16 +00:00
DataPartsVector covered_parts = getActivePartsToReplace ( part_info , part_name , covering_part , lock ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
if ( covering_part )
{
LOG_WARNING ( log , " Tried to add obsolete part " < < part_name < < " covered by " < < covering_part - > getNameWithState ( ) ) ;
2018-05-21 13:49:54 +00:00
return ;
2018-02-19 15:31:43 +00:00
}
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
/// All checks are passed. Now we can rename the part on disk.
/// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts
///
/// If out_transaction is null, we commit the part to the active set immediately, else add it to the transaction.
part - > name = part_name ;
part - > info = part_info ;
part - > is_temp = false ;
part - > state = DataPartState : : PreCommitted ;
part - > renameTo ( part_name ) ;
2017-11-20 19:33:12 +00:00
2018-02-19 15:31:43 +00:00
auto part_it = data_parts_indexes . insert ( part ) . first ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
if ( out_transaction )
{
out_transaction - > precommitted_parts . insert ( part ) ;
}
else
{
auto current_time = time ( nullptr ) ;
for ( const DataPartPtr & covered_part : covered_parts )
2017-11-20 19:33:12 +00:00
{
2018-03-03 17:44:53 +00:00
covered_part - > remove_time . store ( current_time , std : : memory_order_relaxed ) ;
2018-02-19 15:31:43 +00:00
modifyPartState ( covered_part , DataPartState : : Outdated ) ;
removePartContributionToColumnSizes ( covered_part ) ;
2017-11-20 19:33:12 +00:00
}
2018-02-19 15:31:43 +00:00
modifyPartState ( part_it , DataPartState : : Committed ) ;
addPartContributionToColumnSizes ( part ) ;
2017-04-01 07:20:54 +00:00
}
2018-05-21 13:49:54 +00:00
if ( out_covered_parts )
{
for ( DataPartPtr & covered_part : covered_parts )
out_covered_parts - > emplace_back ( std : : move ( covered_part ) ) ;
}
}
MergeTreeData : : DataPartsVector MergeTreeData : : renameTempPartAndReplace (
MutableDataPartPtr & part , SimpleIncrement * increment , Transaction * out_transaction )
{
2018-09-20 14:30:52 +00:00
if ( out_transaction & & & out_transaction - > data ! = this )
throw Exception ( " MergeTreeData::Transaction for one table cannot be used with another. It is a bug. " ,
ErrorCodes : : LOGICAL_ERROR ) ;
2018-05-21 13:49:54 +00:00
DataPartsVector covered_parts ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-05-21 13:49:54 +00:00
renameTempPartAndReplace ( part , increment , out_transaction , lock , & covered_parts ) ;
}
2018-02-19 15:31:43 +00:00
return covered_parts ;
2014-03-13 17:44:00 +00:00
}
2018-05-21 13:49:54 +00:00
void MergeTreeData : : removePartsFromWorkingSet ( const MergeTreeData : : DataPartsVector & remove , bool clear_without_timeout , DataPartsLock & /*acquired_lock*/ )
2014-07-01 15:58:25 +00:00
{
2018-05-21 13:49:54 +00:00
auto remove_time = clear_without_timeout ? 0 : time ( nullptr ) ;
for ( const DataPartPtr & part : remove )
{
if ( part - > state = = MergeTreeDataPart : : State : : Committed )
removePartContributionToColumnSizes ( part ) ;
if ( part - > state = = MergeTreeDataPart : : State : : Committed | | clear_without_timeout )
part - > remove_time . store ( remove_time , std : : memory_order_relaxed ) ;
if ( part - > state ! = MergeTreeDataPart : : State : : Outdated )
modifyPartState ( part , MergeTreeDataPart : : State : : Outdated ) ;
}
}
void MergeTreeData : : removePartsFromWorkingSet ( const DataPartsVector & remove , bool clear_without_timeout , DataPartsLock * acquired_lock )
{
auto lock = ( acquired_lock ) ? DataPartsLock ( ) : lockParts ( ) ;
2014-07-01 15:58:25 +00:00
2017-09-05 19:03:51 +00:00
for ( auto & part : remove )
2017-04-01 07:20:54 +00:00
{
2018-02-19 15:31:43 +00:00
if ( ! data_parts_by_info . count ( part - > info ) )
2017-09-11 22:40:51 +00:00
throw Exception ( " Part " + part - > getNameWithState ( ) + " not found in data_parts " , ErrorCodes : : LOGICAL_ERROR ) ;
2016-01-30 02:29:20 +00:00
2017-09-21 21:51:17 +00:00
part - > assertState ( { DataPartState : : PreCommitted , DataPartState : : Committed , DataPartState : : Outdated } ) ;
2017-04-01 07:20:54 +00:00
}
2015-09-16 04:18:16 +00:00
2018-05-21 13:49:54 +00:00
removePartsFromWorkingSet ( remove , clear_without_timeout , lock ) ;
}
MergeTreeData : : DataPartsVector MergeTreeData : : removePartsInRangeFromWorkingSet ( const MergeTreePartInfo & drop_range , bool clear_without_timeout ,
bool skip_intersecting_parts , DataPartsLock & lock )
{
DataPartsVector parts_to_remove ;
if ( drop_range . min_block > drop_range . max_block )
return parts_to_remove ;
auto partition_range = getDataPartsPartitionRange ( drop_range . partition_id ) ;
for ( const DataPartPtr & part : partition_range )
2017-04-01 07:20:54 +00:00
{
2018-05-21 13:49:54 +00:00
if ( part - > info . partition_id ! = drop_range . partition_id )
throw Exception ( " Unexpected partition_id of part " + part - > name + " . This is a bug. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-11-20 19:33:12 +00:00
2018-05-21 13:49:54 +00:00
if ( part - > info . min_block < drop_range . min_block )
{
if ( drop_range . min_block < = part - > info . max_block )
{
/// Intersect left border
String error = " Unexpected merged part " + part - > name + " intersecting drop range " + drop_range . getPartName ( ) ;
if ( ! skip_intersecting_parts )
throw Exception ( error , ErrorCodes : : LOGICAL_ERROR ) ;
LOG_WARNING ( log , error ) ;
}
continue ;
}
/// Stop on new parts
if ( part - > info . min_block > drop_range . max_block )
break ;
if ( part - > info . min_block < = drop_range . max_block & & drop_range . max_block < part - > info . max_block )
{
/// Intersect right border
String error = " Unexpected merged part " + part - > name + " intersecting drop range " + drop_range . getPartName ( ) ;
if ( ! skip_intersecting_parts )
throw Exception ( error , ErrorCodes : : LOGICAL_ERROR ) ;
LOG_WARNING ( log , error ) ;
continue ;
}
if ( part - > state ! = DataPartState : : Deleting )
parts_to_remove . emplace_back ( part ) ;
2017-04-01 07:20:54 +00:00
}
2014-07-01 15:58:25 +00:00
2018-05-21 13:49:54 +00:00
removePartsFromWorkingSet ( parts_to_remove , clear_without_timeout , lock ) ;
2017-09-05 19:03:51 +00:00
2018-05-21 13:49:54 +00:00
return parts_to_remove ;
}
2017-09-05 19:03:51 +00:00
2018-05-21 13:49:54 +00:00
void MergeTreeData : : forgetPartAndMoveToDetached ( const MergeTreeData : : DataPartPtr & part_to_detach , const String & prefix , bool
restore_covered )
2014-04-02 07:59:43 +00:00
{
2018-05-21 13:49:54 +00:00
LOG_INFO ( log , " Renaming " < < part_to_detach - > relative_path < < " to " < < prefix < < part_to_detach - > name < < " and forgiving it. " ) ;
2017-04-01 07:20:54 +00:00
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-04-01 07:20:54 +00:00
2018-02-19 15:31:43 +00:00
auto it_part = data_parts_by_info . find ( part_to_detach - > info ) ;
if ( it_part = = data_parts_by_info . end ( ) )
2017-09-11 22:40:51 +00:00
throw Exception ( " No such data part " + part_to_detach - > getNameWithState ( ) , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2017-04-01 07:20:54 +00:00
2018-05-21 13:49:54 +00:00
/// What if part_to_detach is a reference to *it_part? Make a new owner just in case.
2017-11-20 19:33:12 +00:00
DataPartPtr part = * it_part ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
if ( part - > state = = DataPartState : : Committed )
removePartContributionToColumnSizes ( part ) ;
modifyPartState ( it_part , DataPartState : : Deleting ) ;
2018-05-21 13:49:54 +00:00
part - > renameToDetached ( prefix ) ;
2017-11-20 19:33:12 +00:00
data_parts_indexes . erase ( it_part ) ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
if ( restore_covered & & part - > info . level = = 0 )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
LOG_WARNING ( log , " Will not recover parts covered by zero-level part " < < part - > name ) ;
return ;
}
2017-09-11 22:40:51 +00:00
2017-11-20 19:33:12 +00:00
if ( restore_covered )
{
2017-04-01 07:20:54 +00:00
Strings restored ;
bool error = false ;
2017-11-20 19:33:12 +00:00
String error_parts ;
2017-04-01 07:20:54 +00:00
2017-08-14 18:16:11 +00:00
Int64 pos = part - > info . min_block ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
auto is_appropriate_state = [ ] ( DataPartState state )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
return state = = DataPartState : : Committed | | state = = DataPartState : : Outdated ;
} ;
2018-02-19 15:31:43 +00:00
auto update_error = [ & ] ( DataPartIteratorByInfo it )
2017-11-20 19:33:12 +00:00
{
error = true ;
error_parts + = ( * it ) - > getNameWithState ( ) + " " ;
} ;
2018-02-19 15:31:43 +00:00
auto it_middle = data_parts_by_info . lower_bound ( part - > info ) ;
2017-11-20 19:33:12 +00:00
/// Restore the leftmost part covered by the part
2018-02-19 15:31:43 +00:00
if ( it_middle ! = data_parts_by_info . begin ( ) )
2017-11-20 19:33:12 +00:00
{
auto it = std : : prev ( it_middle ) ;
if ( part - > contains ( * * it ) & & is_appropriate_state ( ( * it ) - > state ) )
2017-04-01 07:20:54 +00:00
{
2017-11-20 19:33:12 +00:00
/// Maybe, we must consider part level somehow
2017-08-14 18:16:11 +00:00
if ( ( * it ) - > info . min_block ! = part - > info . min_block )
2017-11-20 19:33:12 +00:00
update_error ( it ) ;
2017-09-11 22:40:51 +00:00
if ( ( * it ) - > state ! = DataPartState : : Committed )
{
addPartContributionToColumnSizes ( * it ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( it , DataPartState : : Committed ) ; // iterator is not invalidated here
2017-09-11 22:40:51 +00:00
}
2017-08-14 18:16:11 +00:00
pos = ( * it ) - > info . max_block + 1 ;
2017-04-01 07:20:54 +00:00
restored . push_back ( ( * it ) - > name ) ;
}
else
2017-11-20 19:33:12 +00:00
update_error ( it ) ;
2017-04-01 07:20:54 +00:00
}
else
error = true ;
2017-11-20 19:33:12 +00:00
/// Restore "right" parts
2018-02-19 15:31:43 +00:00
for ( auto it = it_middle ; it ! = data_parts_by_info . end ( ) & & part - > contains ( * * it ) ; + + it )
2017-04-01 07:20:54 +00:00
{
2017-08-14 18:16:11 +00:00
if ( ( * it ) - > info . min_block < pos )
2017-04-01 07:20:54 +00:00
continue ;
2017-11-20 19:33:12 +00:00
if ( ! is_appropriate_state ( ( * it ) - > state ) )
{
update_error ( it ) ;
continue ;
}
2017-08-14 18:16:11 +00:00
if ( ( * it ) - > info . min_block > pos )
2017-11-20 19:33:12 +00:00
update_error ( it ) ;
2017-09-11 22:40:51 +00:00
if ( ( * it ) - > state ! = DataPartState : : Committed )
{
addPartContributionToColumnSizes ( * it ) ;
2017-11-20 19:33:12 +00:00
modifyPartState ( it , DataPartState : : Committed ) ;
2017-09-11 22:40:51 +00:00
}
2017-08-14 18:16:11 +00:00
pos = ( * it ) - > info . max_block + 1 ;
2017-04-01 07:20:54 +00:00
restored . push_back ( ( * it ) - > name ) ;
}
2017-08-14 18:16:11 +00:00
if ( pos ! = part - > info . max_block + 1 )
2017-04-01 07:20:54 +00:00
error = true ;
for ( const String & name : restored )
{
LOG_INFO ( log , " Activated part " < < name ) ;
}
if ( error )
2017-11-20 19:33:12 +00:00
{
LOG_ERROR ( log , " The set of parts restored in place of " < < part - > name < < " looks incomplete. "
< < " There might or might not be a data loss. "
< < ( error_parts . empty ( ) ? " " : " Suspicious parts: " + error_parts ) ) ;
}
2017-04-01 07:20:54 +00:00
}
2014-03-13 17:44:00 +00:00
}
2014-09-19 11:44:29 +00:00
2018-09-20 14:30:52 +00:00
void MergeTreeData : : tryRemovePartImmediately ( DataPartPtr & & part )
{
DataPartPtr part_to_delete ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-09-20 14:30:52 +00:00
LOG_TRACE ( log , " Trying to immediately remove part " < < part - > getNameWithState ( ) ) ;
auto it = data_parts_by_info . find ( part - > info ) ;
if ( it = = data_parts_by_info . end ( ) | | ( * it ) . get ( ) ! = part . get ( ) )
throw Exception ( " Part " + part - > name + " doesn't exist " , ErrorCodes : : LOGICAL_ERROR ) ;
part . reset ( ) ;
if ( ! ( ( * it ) - > state = = DataPartState : : Outdated & & it - > unique ( ) ) )
return ;
modifyPartState ( it , DataPartState : : Deleting ) ;
part_to_delete = * it ;
}
try
{
part_to_delete - > remove ( ) ;
}
catch ( . . . )
{
rollbackDeletingParts ( { part_to_delete } ) ;
throw ;
}
removePartsFinally ( { part_to_delete } ) ;
LOG_TRACE ( log , " Removed part " < < part_to_delete - > name ) ;
}
2015-11-18 21:37:28 +00:00
size_t MergeTreeData : : getTotalActiveSizeInBytes ( ) const
2015-04-17 05:35:53 +00:00
{
2017-04-01 07:20:54 +00:00
size_t res = 0 ;
2017-11-20 19:33:12 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-11-20 19:33:12 +00:00
for ( auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2018-03-26 14:18:04 +00:00
res + = part - > bytes_on_disk ;
2017-11-20 19:33:12 +00:00
}
2015-04-17 05:35:53 +00:00
2017-04-01 07:20:54 +00:00
return res ;
2015-04-17 05:35:53 +00:00
}
2014-04-09 16:32:32 +00:00
2019-10-28 17:27:43 +00:00
size_t MergeTreeData : : getTotalActiveSizeInRows ( ) const
{
size_t res = 0 ;
{
auto lock = lockParts ( ) ;
for ( auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
res + = part - > rows_count ;
}
return res ;
}
2019-05-02 14:48:54 +00:00
size_t MergeTreeData : : getPartsCount ( ) const
{
auto lock = lockParts ( ) ;
size_t res = 0 ;
for ( const auto & part [[maybe_unused]] : getDataPartsStateRange ( DataPartState : : Committed ) )
+ + res ;
return res ;
}
2017-08-14 18:16:11 +00:00
size_t MergeTreeData : : getMaxPartsCountForPartition ( ) const
2014-04-11 16:56:49 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-04-01 07:20:54 +00:00
size_t res = 0 ;
size_t cur_count = 0 ;
2017-08-14 18:16:11 +00:00
const String * cur_partition_id = nullptr ;
2017-04-01 07:20:54 +00:00
2017-11-20 19:33:12 +00:00
for ( const auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2017-04-01 07:20:54 +00:00
{
2017-08-14 18:16:11 +00:00
if ( cur_partition_id & & part - > info . partition_id = = * cur_partition_id )
2017-04-01 07:20:54 +00:00
{
+ + cur_count ;
}
else
{
2017-08-14 18:16:11 +00:00
cur_partition_id = & part - > info . partition_id ;
2017-04-01 07:20:54 +00:00
cur_count = 1 ;
}
res = std : : max ( res , cur_count ) ;
}
return res ;
2014-04-11 16:56:49 +00:00
}
2016-01-30 00:57:35 +00:00
2018-07-31 12:34:34 +00:00
std : : optional < Int64 > MergeTreeData : : getMinPartDataVersion ( ) const
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-07-31 12:34:34 +00:00
std : : optional < Int64 > result ;
2019-05-02 14:48:54 +00:00
for ( const auto & part : getDataPartsStateRange ( DataPartState : : Committed ) )
2018-07-31 12:34:34 +00:00
{
if ( ! result | | * result > part - > info . getDataVersion ( ) )
result = part - > info . getDataVersion ( ) ;
}
return result ;
}
2019-08-07 15:21:45 +00:00
void MergeTreeData : : delayInsertOrThrowIfNeeded ( Poco : : Event * until ) const
2014-05-27 08:43:01 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-05-02 14:48:54 +00:00
const size_t parts_count_in_total = getPartsCount ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_count_in_total > = settings - > max_parts_in_total )
2019-05-02 14:48:54 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
throw Exception ( " Too many parts ( " + toString ( parts_count_in_total ) + " ) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting. " , ErrorCodes : : TOO_MANY_PARTS ) ;
}
const size_t parts_count_in_partition = getMaxPartsCountForPartition ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_count_in_partition < settings - > parts_to_delay_insert )
2017-04-01 07:20:54 +00:00
return ;
2016-10-27 22:50:02 +00:00
2019-08-13 10:29:31 +00:00
if ( parts_count_in_partition > = settings - > parts_to_throw_insert )
2017-04-01 07:20:54 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
2019-05-02 14:48:54 +00:00
throw Exception ( " Too many parts ( " + toString ( parts_count_in_partition ) + " ). Merges are processing significantly slower than inserts. " , ErrorCodes : : TOO_MANY_PARTS ) ;
2017-04-01 07:20:54 +00:00
}
2014-09-12 20:29:29 +00:00
2019-08-13 10:29:31 +00:00
const size_t max_k = settings - > parts_to_throw_insert - settings - > parts_to_delay_insert ; /// always > 0
const size_t k = 1 + parts_count_in_partition - settings - > parts_to_delay_insert ; /// from 1 to max_k
const double delay_milliseconds = : : pow ( settings - > max_delay_to_insert * 1000 , static_cast < double > ( k ) / max_k ) ;
2014-09-13 18:34:08 +00:00
2017-04-01 07:20:54 +00:00
ProfileEvents : : increment ( ProfileEvents : : DelayedInserts ) ;
2017-06-22 16:17:01 +00:00
ProfileEvents : : increment ( ProfileEvents : : DelayedInsertsMilliseconds , delay_milliseconds ) ;
2014-06-20 18:45:19 +00:00
2017-04-01 07:20:54 +00:00
CurrentMetrics : : Increment metric_increment ( CurrentMetrics : : DelayedInserts ) ;
2016-10-27 22:50:02 +00:00
2017-04-01 07:20:54 +00:00
LOG_INFO ( log , " Delaying inserting block by "
2019-05-02 14:48:54 +00:00
< < std : : fixed < < std : : setprecision ( 4 ) < < delay_milliseconds < < " ms. because there are " < < parts_count_in_partition < < " parts " ) ;
2014-09-03 02:32:23 +00:00
2017-04-01 07:20:54 +00:00
if ( until )
2017-06-22 16:17:01 +00:00
until - > tryWait ( delay_milliseconds ) ;
2017-04-01 07:20:54 +00:00
else
2017-06-22 16:29:15 +00:00
std : : this_thread : : sleep_for ( std : : chrono : : milliseconds ( static_cast < size_t > ( delay_milliseconds ) ) ) ;
2014-05-27 08:43:01 +00:00
}
2018-05-21 23:17:57 +00:00
void MergeTreeData : : throwInsertIfNeeded ( ) const
2014-04-03 11:48:28 +00:00
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-05-02 14:48:54 +00:00
const size_t parts_count_in_total = getPartsCount ( ) ;
2019-08-13 10:29:31 +00:00
if ( parts_count_in_total > = settings - > max_parts_in_total )
2019-05-02 14:48:54 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
throw Exception ( " Too many parts ( " + toString ( parts_count_in_total ) + " ) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting. " , ErrorCodes : : TOO_MANY_PARTS ) ;
}
const size_t parts_count_in_partition = getMaxPartsCountForPartition ( ) ;
2017-04-01 07:20:54 +00:00
2019-08-13 10:29:31 +00:00
if ( parts_count_in_partition > = settings - > parts_to_throw_insert )
2018-05-21 23:17:57 +00:00
{
ProfileEvents : : increment ( ProfileEvents : : RejectedInserts ) ;
2019-05-02 14:48:54 +00:00
throw Exception ( " Too many parts ( " + toString ( parts_count_in_partition ) + " ). Merges are processing significantly slower than inserts. " , ErrorCodes : : TOO_MANY_PARTS ) ;
2018-05-21 23:17:57 +00:00
}
}
2017-04-01 07:20:54 +00:00
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getActiveContainingPart (
2019-10-28 17:27:43 +00:00
const MergeTreePartInfo & part_info , MergeTreeData : : DataPartState state , DataPartsLock & /*lock*/ ) const
2014-04-03 11:48:28 +00:00
{
2019-10-16 19:01:50 +00:00
auto current_state_parts_range = getDataPartsStateRange ( state ) ;
2017-11-20 19:33:12 +00:00
2017-04-01 07:20:54 +00:00
/// The part can be covered only by the previous or the next one in data_parts.
2018-05-21 13:49:54 +00:00
auto it = data_parts_by_state_and_info . lower_bound ( DataPartStateAndInfo { state , part_info } ) ;
2017-04-01 07:20:54 +00:00
2019-10-16 19:01:50 +00:00
if ( it ! = current_state_parts_range . end ( ) )
2017-04-01 07:20:54 +00:00
{
2018-05-21 13:49:54 +00:00
if ( ( * it ) - > info = = part_info )
2017-04-01 07:20:54 +00:00
return * it ;
2017-08-15 11:59:08 +00:00
if ( ( * it ) - > info . contains ( part_info ) )
2017-04-01 07:20:54 +00:00
return * it ;
}
2019-10-16 19:01:50 +00:00
if ( it ! = current_state_parts_range . begin ( ) )
2017-04-01 07:20:54 +00:00
{
- - it ;
2017-08-15 11:59:08 +00:00
if ( ( * it ) - > info . contains ( part_info ) )
2017-04-01 07:20:54 +00:00
return * it ;
}
return nullptr ;
2014-04-03 11:48:28 +00:00
}
2019-08-19 14:40:12 +00:00
void MergeTreeData : : swapActivePart ( MergeTreeData : : DataPartPtr part_copy )
2019-06-07 19:16:42 +00:00
{
2019-08-21 12:32:48 +00:00
auto lock = lockParts ( ) ;
2019-12-04 10:47:10 +00:00
for ( auto original_active_part : getDataPartsStateRange ( DataPartState : : Committed ) )
2019-06-07 19:16:42 +00:00
{
2019-08-16 15:57:19 +00:00
if ( part_copy - > name = = original_active_part - > name )
2019-06-07 19:16:42 +00:00
{
2019-08-16 15:57:19 +00:00
auto active_part_it = data_parts_by_info . find ( original_active_part - > info ) ;
if ( active_part_it = = data_parts_by_info . end ( ) )
2019-09-04 18:26:18 +00:00
throw Exception ( " Cannot swap part ' " + part_copy - > name + " ', no such active part. " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-06-07 19:16:42 +00:00
2019-09-04 18:26:18 +00:00
modifyPartState ( original_active_part , DataPartState : : DeleteOnDestroy ) ;
2019-08-16 15:57:19 +00:00
data_parts_indexes . erase ( active_part_it ) ;
2019-07-30 16:15:57 +00:00
2019-08-16 15:57:19 +00:00
auto part_it = data_parts_indexes . insert ( part_copy ) . first ;
2019-06-07 19:16:42 +00:00
modifyPartState ( part_it , DataPartState : : Committed ) ;
2019-11-17 21:41:40 +00:00
2019-11-18 08:42:46 +00:00
Poco : : Path marker_path ( Poco : : Path ( original_active_part - > getFullPath ( ) ) , DELETE_ON_DESTROY_MARKER_PATH ) ;
2019-11-17 21:41:40 +00:00
try
{
2019-11-18 08:42:46 +00:00
Poco : : File ( marker_path ) . createFile ( ) ;
2019-11-17 21:41:40 +00:00
}
2019-12-09 13:44:11 +00:00
catch ( Poco : : Exception & e )
2019-11-17 21:41:40 +00:00
{
2019-12-09 13:44:11 +00:00
LOG_ERROR ( log , e . what ( ) < < " (while creating DeleteOnDestroy marker: " + backQuote ( marker_path . toString ( ) ) + " ) " ) ;
2019-11-17 21:41:40 +00:00
}
2019-08-15 09:43:31 +00:00
return ;
2019-06-07 19:16:42 +00:00
}
}
2019-09-04 18:26:18 +00:00
throw Exception ( " Cannot swap part ' " + part_copy - > name + " ', no such active part. " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-08-16 15:57:19 +00:00
}
2019-10-28 17:27:43 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getActiveContainingPart ( const MergeTreePartInfo & part_info ) const
2014-07-25 11:38:46 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
return getActiveContainingPart ( part_info , DataPartState : : Committed , lock ) ;
2018-05-21 13:49:54 +00:00
}
2019-10-28 17:27:43 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getActiveContainingPart ( const String & part_name ) const
2018-09-11 14:41:04 +00:00
{
auto part_info = MergeTreePartInfo : : fromPartName ( part_name , format_version ) ;
return getActiveContainingPart ( part_info ) ;
}
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getDataPartsVectorInPartition ( MergeTreeData : : DataPartState state , const String & partition_id )
{
DataPartStateAndPartitionID state_with_partition { state , partition_id } ;
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-05-21 13:49:54 +00:00
return DataPartsVector (
data_parts_by_state_and_info . lower_bound ( state_with_partition ) ,
data_parts_by_state_and_info . upper_bound ( state_with_partition ) ) ;
}
MergeTreeData : : DataPartPtr MergeTreeData : : getPartIfExists ( const MergeTreePartInfo & part_info , const MergeTreeData : : DataPartStates & valid_states )
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-10-03 19:04:56 +00:00
2018-02-19 15:31:43 +00:00
auto it = data_parts_by_info . find ( part_info ) ;
if ( it = = data_parts_by_info . end ( ) )
2017-11-20 19:33:12 +00:00
return nullptr ;
for ( auto state : valid_states )
{
if ( ( * it ) - > state = = state )
2017-12-18 17:26:46 +00:00
return * it ;
2017-11-20 19:33:12 +00:00
}
2014-07-25 11:38:46 +00:00
2017-04-01 07:20:54 +00:00
return nullptr ;
2014-07-25 11:38:46 +00:00
}
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartPtr MergeTreeData : : getPartIfExists ( const String & part_name , const MergeTreeData : : DataPartStates & valid_states )
{
return getPartIfExists ( MergeTreePartInfo : : fromPartName ( part_name , format_version ) , valid_states ) ;
}
2016-01-28 01:00:27 +00:00
2019-07-23 13:34:17 +00:00
MergeTreeData : : MutableDataPartPtr MergeTreeData : : loadPartAndFixMetadata ( const DiskSpace : : DiskPtr & disk , const String & relative_path )
2014-08-08 08:28:13 +00:00
{
2019-04-21 18:38:44 +00:00
MutableDataPartPtr part = std : : make_shared < DataPart > ( * this , disk , Poco : : Path ( relative_path ) . getFileName ( ) ) ;
2017-05-16 15:40:32 +00:00
part - > relative_path = relative_path ;
2019-07-25 10:46:07 +00:00
loadPartAndFixMetadata ( part ) ;
return part ;
}
void MergeTreeData : : loadPartAndFixMetadata ( MutableDataPartPtr part )
{
2017-05-16 15:40:32 +00:00
String full_part_path = part - > getFullPath ( ) ;
2017-04-01 07:20:54 +00:00
/// Earlier the list of columns was written incorrectly. Delete it and re-create.
2017-05-16 15:40:32 +00:00
if ( Poco : : File ( full_part_path + " columns.txt " ) . exists ( ) )
Poco : : File ( full_part_path + " columns.txt " ) . remove ( ) ;
2017-04-01 07:20:54 +00:00
2017-08-16 19:24:50 +00:00
part - > loadColumnsChecksumsIndexes ( false , true ) ;
2017-05-16 15:40:32 +00:00
part - > modification_time = Poco : : File ( full_part_path ) . getLastModified ( ) . epochTime ( ) ;
2017-04-01 07:20:54 +00:00
/// If the checksums file is not present, calculate the checksums and write them to disk.
/// Check the data while we are at it.
if ( part - > checksums . empty ( ) )
{
2019-03-20 16:18:13 +00:00
part - > checksums = checkDataPart ( part , false , primary_key_data_types , skip_indices ) ;
2017-04-01 07:20:54 +00:00
{
2017-05-16 15:40:32 +00:00
WriteBufferFromFile out ( full_part_path + " checksums.txt.tmp " , 4096 ) ;
2017-04-01 07:20:54 +00:00
part - > checksums . write ( out ) ;
}
2017-05-16 15:40:32 +00:00
Poco : : File ( full_part_path + " checksums.txt.tmp " ) . renameTo ( full_part_path + " checksums.txt " ) ;
2017-04-01 07:20:54 +00:00
}
2014-08-08 08:28:13 +00:00
}
2014-03-27 11:29:40 +00:00
2017-05-14 23:14:21 +00:00
void MergeTreeData : : calculateColumnSizesImpl ( )
2014-09-19 11:44:29 +00:00
{
2017-04-01 07:20:54 +00:00
column_sizes . clear ( ) ;
2014-09-19 11:44:29 +00:00
2017-09-21 21:51:17 +00:00
/// Take into account only committed parts
2017-11-20 19:33:12 +00:00
auto committed_parts_range = getDataPartsStateRange ( DataPartState : : Committed ) ;
for ( const auto & part : committed_parts_range )
2017-04-01 07:20:54 +00:00
addPartContributionToColumnSizes ( part ) ;
2014-09-19 11:44:29 +00:00
}
void MergeTreeData : : addPartContributionToColumnSizes ( const DataPartPtr & part )
{
2017-11-01 19:56:07 +00:00
std : : shared_lock < std : : shared_mutex > lock ( part - > columns_lock ) ;
2017-04-01 07:20:54 +00:00
2018-03-26 14:18:04 +00:00
for ( const auto & column : part - > columns )
2017-04-01 07:20:54 +00:00
{
2019-07-16 17:13:12 +00:00
ColumnSize & total_column_size = column_sizes [ column . name ] ;
ColumnSize part_column_size = part - > getColumnSize ( column . name , * column . type ) ;
2018-03-26 14:18:04 +00:00
total_column_size . add ( part_column_size ) ;
2017-04-01 07:20:54 +00:00
}
2014-09-19 11:44:29 +00:00
}
void MergeTreeData : : removePartContributionToColumnSizes ( const DataPartPtr & part )
{
2018-03-26 14:18:04 +00:00
std : : shared_lock < std : : shared_mutex > lock ( part - > columns_lock ) ;
2017-04-01 07:20:54 +00:00
2018-03-26 14:18:04 +00:00
for ( const auto & column : part - > columns )
2017-04-01 07:20:54 +00:00
{
2019-07-16 17:13:12 +00:00
ColumnSize & total_column_size = column_sizes [ column . name ] ;
ColumnSize part_column_size = part - > getColumnSize ( column . name , * column . type ) ;
2017-04-01 07:20:54 +00:00
2018-03-26 14:18:04 +00:00
auto log_subtract = [ & ] ( size_t & from , size_t value , const char * field )
2017-04-01 07:20:54 +00:00
{
2018-03-26 14:18:04 +00:00
if ( value > from )
LOG_ERROR ( log , " Possibly incorrect column size subtraction: "
< < from < < " - " < < value < < " = " < < from - value
< < " , column: " < < column . name < < " , field: " < < field ) ;
from - = value ;
} ;
2017-04-01 07:20:54 +00:00
2018-03-26 14:18:04 +00:00
log_subtract ( total_column_size . data_compressed , part_column_size . data_compressed , " .data_compressed " ) ;
log_subtract ( total_column_size . data_uncompressed , part_column_size . data_uncompressed , " .data_uncompressed " ) ;
log_subtract ( total_column_size . marks , part_column_size . marks , " .marks " ) ;
2017-04-01 07:20:54 +00:00
}
2014-09-19 11:44:29 +00:00
}
2014-10-03 17:57:01 +00:00
2019-08-27 20:43:08 +00:00
void MergeTreeData : : freezePartition ( const ASTPtr & partition_ast , const String & with_name , const Context & context , TableStructureReadLockHolder & )
2014-11-11 04:11:07 +00:00
{
2017-11-20 04:15:43 +00:00
std : : optional < String > prefix ;
2017-09-18 20:49:21 +00:00
String partition_id ;
2017-10-12 18:21:17 +00:00
2017-09-07 16:21:06 +00:00
if ( format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING )
2017-09-06 20:34:26 +00:00
{
2017-09-18 20:49:21 +00:00
/// Month-partitioning specific - partition value can represent a prefix of the partition to freeze.
2019-03-15 16:14:13 +00:00
if ( const auto * partition_lit = partition_ast - > as < ASTPartition & > ( ) . value - > as < ASTLiteral > ( ) )
2017-09-06 20:34:26 +00:00
prefix = partition_lit - > value . getType ( ) = = Field : : Types : : UInt64
? toString ( partition_lit - > value . get < UInt64 > ( ) )
: partition_lit - > value . safeGet < String > ( ) ;
2017-09-18 20:49:21 +00:00
else
partition_id = getPartitionIDFromQuery ( partition_ast , context ) ;
2017-09-06 20:34:26 +00:00
}
else
2017-09-18 20:49:21 +00:00
partition_id = getPartitionIDFromQuery ( partition_ast , context ) ;
2017-09-06 20:34:26 +00:00
2017-09-18 20:49:21 +00:00
if ( prefix )
2017-10-04 00:22:00 +00:00
LOG_DEBUG ( log , " Freezing parts with prefix " + * prefix ) ;
2017-09-18 20:49:21 +00:00
else
LOG_DEBUG ( log , " Freezing parts with partition ID " + partition_id ) ;
2017-04-01 07:20:54 +00:00
2018-11-01 17:13:01 +00:00
freezePartitionsByMatcher (
2018-11-20 17:05:22 +00:00
[ & prefix , & partition_id ] ( const DataPartPtr & part )
2018-11-01 17:47:19 +00:00
{
2018-11-01 17:13:01 +00:00
if ( prefix )
return startsWith ( part - > info . partition_id , * prefix ) ;
else
return part - > info . partition_id = = partition_id ;
} ,
with_name ,
context ) ;
2014-11-11 04:11:07 +00:00
}
2019-08-20 09:59:19 +00:00
void MergeTreeData : : movePartitionToDisk ( const ASTPtr & partition , const String & name , bool moving_part , const Context & context )
2019-07-18 15:19:03 +00:00
{
2019-08-20 09:59:19 +00:00
String partition_id ;
2019-07-23 13:34:17 +00:00
2019-08-20 09:59:19 +00:00
if ( moving_part )
partition_id = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
else
partition_id = getPartitionIDFromQuery ( partition , context ) ;
DataPartsVector parts ;
if ( moving_part )
{
2019-12-09 21:38:41 +00:00
auto part_info = MergeTreePartInfo : : fromPartName ( partition_id , format_version ) ;
parts . push_back ( getActiveContainingPart ( part_info ) ) ;
if ( ! parts . back ( ) | | parts . back ( ) - > name ! = part_info . getPartName ( ) )
2019-08-20 19:04:58 +00:00
throw Exception ( " Part " + partition_id + " is not exists or not active " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-08-20 09:59:19 +00:00
}
else
parts = getDataPartsVectorInPartition ( MergeTreeDataPartState : : Committed , partition_id ) ;
2019-07-23 13:34:17 +00:00
auto disk = storage_policy - > getDiskByName ( name ) ;
if ( ! disk )
throw Exception ( " Disk " + name + " does not exists on policy " + storage_policy - > getName ( ) , ErrorCodes : : UNKNOWN_DISK ) ;
2019-10-24 08:52:33 +00:00
parts . erase ( std : : remove_if ( parts . begin ( ) , parts . end ( ) , [ & ] ( auto part_ptr )
{
return part_ptr - > disk - > getName ( ) = = disk - > getName ( ) ;
} ) , parts . end ( ) ) ;
2019-10-22 14:45:01 +00:00
2019-12-09 21:38:41 +00:00
if ( parts . empty ( ) )
throw Exception ( " Nothing to move " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-10-24 08:52:33 +00:00
if ( parts . empty ( ) )
2019-08-20 09:59:19 +00:00
{
2019-10-24 10:56:32 +00:00
String no_parts_to_move_message ;
if ( moving_part )
no_parts_to_move_message = " Part ' " + partition_id + " ' is already on disk ' " + disk - > getName ( ) + " ' " ;
else
no_parts_to_move_message = " All parts of partition ' " + partition_id + " ' are already on disk ' " + disk - > getName ( ) + " ' " ;
throw Exception ( no_parts_to_move_message , ErrorCodes : : UNKNOWN_DISK ) ;
2019-08-20 09:59:19 +00:00
}
2019-07-23 13:34:17 +00:00
2019-09-05 15:53:23 +00:00
if ( ! movePartsToSpace ( parts , std : : static_pointer_cast < const DiskSpace : : Space > ( disk ) ) )
2019-12-09 21:38:41 +00:00
throw Exception ( " Cannot move parts because moves are manually disabled " , ErrorCodes : : ABORTED ) ;
2019-07-23 13:34:17 +00:00
}
2019-08-20 09:59:19 +00:00
void MergeTreeData : : movePartitionToVolume ( const ASTPtr & partition , const String & name , bool moving_part , const Context & context )
2019-07-23 13:34:17 +00:00
{
2019-08-20 09:59:19 +00:00
String partition_id ;
if ( moving_part )
partition_id = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
else
partition_id = getPartitionIDFromQuery ( partition , context ) ;
DataPartsVector parts ;
if ( moving_part )
{
2019-12-09 21:38:41 +00:00
auto part_info = MergeTreePartInfo : : fromPartName ( partition_id , format_version ) ;
parts . emplace_back ( getActiveContainingPart ( part_info ) ) ;
if ( ! parts . back ( ) | | parts . back ( ) - > name ! = part_info . getPartName ( ) )
2019-08-20 19:04:58 +00:00
throw Exception ( " Part " + partition_id + " is not exists or not active " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-08-20 09:59:19 +00:00
}
else
parts = getDataPartsVectorInPartition ( MergeTreeDataPartState : : Committed , partition_id ) ;
2019-07-23 13:34:17 +00:00
auto volume = storage_policy - > getVolumeByName ( name ) ;
if ( ! volume )
2019-07-30 16:15:57 +00:00
throw Exception ( " Volume " + name + " does not exists on policy " + storage_policy - > getName ( ) , ErrorCodes : : UNKNOWN_DISK ) ;
2019-07-23 13:34:17 +00:00
2019-12-09 21:38:41 +00:00
if ( parts . empty ( ) )
throw Exception ( " Nothing to move " , ErrorCodes : : NO_SUCH_DATA_PART ) ;
2019-10-24 08:52:33 +00:00
parts . erase ( std : : remove_if ( parts . begin ( ) , parts . end ( ) , [ & ] ( auto part_ptr )
2019-10-22 14:45:01 +00:00
{
2019-10-24 08:52:33 +00:00
for ( const auto & disk : volume - > disks )
2019-10-22 14:45:01 +00:00
{
2019-10-24 08:52:33 +00:00
if ( part_ptr - > disk - > getName ( ) = = disk - > getName ( ) )
{
return true ;
}
2019-10-22 14:45:01 +00:00
}
2019-10-24 08:52:33 +00:00
return false ;
} ) , parts . end ( ) ) ;
2019-10-22 14:45:01 +00:00
2019-10-24 08:52:33 +00:00
if ( parts . empty ( ) )
2019-10-22 14:45:01 +00:00
{
2019-10-24 10:56:32 +00:00
String no_parts_to_move_message ;
if ( moving_part )
no_parts_to_move_message = " Part ' " + partition_id + " ' is already on volume ' " + volume - > getName ( ) + " ' " ;
else
no_parts_to_move_message = " All parts of partition ' " + partition_id + " ' are already on volume ' " + volume - > getName ( ) + " ' " ;
throw Exception ( no_parts_to_move_message , ErrorCodes : : UNKNOWN_DISK ) ;
2019-10-22 14:45:01 +00:00
}
2019-07-23 13:34:17 +00:00
2019-09-05 15:53:23 +00:00
if ( ! movePartsToSpace ( parts , std : : static_pointer_cast < const DiskSpace : : Space > ( volume ) ) )
2019-12-09 21:38:41 +00:00
throw Exception ( " Cannot move parts because moves are manually disabled " , ErrorCodes : : ABORTED ) ;
2019-07-18 15:19:03 +00:00
}
2017-09-11 17:55:41 +00:00
String MergeTreeData : : getPartitionIDFromQuery ( const ASTPtr & ast , const Context & context )
2014-10-03 17:57:01 +00:00
{
2019-03-15 16:14:13 +00:00
const auto & partition_ast = ast - > as < ASTPartition & > ( ) ;
2017-09-06 20:34:26 +00:00
2017-09-11 17:55:41 +00:00
if ( ! partition_ast . value )
return partition_ast . id ;
2017-09-06 20:34:26 +00:00
2017-09-07 16:21:06 +00:00
if ( format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING )
2017-09-06 20:34:26 +00:00
{
2017-09-11 17:55:41 +00:00
/// Month-partitioning specific - partition ID can be passed in the partition value.
2019-03-15 16:14:13 +00:00
const auto * partition_lit = partition_ast . value - > as < ASTLiteral > ( ) ;
2017-09-06 20:34:26 +00:00
if ( partition_lit & & partition_lit - > value . getType ( ) = = Field : : Types : : String )
{
String partition_id = partition_lit - > value . get < String > ( ) ;
if ( partition_id . size ( ) ! = 6 | | ! std : : all_of ( partition_id . begin ( ) , partition_id . end ( ) , isNumericASCII ) )
throw Exception (
" Invalid partition format: " + partition_id + " . Partition should consist of 6 digits: YYYYMM " ,
ErrorCodes : : INVALID_PARTITION_VALUE ) ;
return partition_id ;
}
}
2014-10-03 17:57:01 +00:00
2017-09-06 20:34:26 +00:00
/// Re-parse partition key fields using the information about expected field types.
2018-02-21 17:05:21 +00:00
size_t fields_count = partition_key_sample . columns ( ) ;
2017-09-11 17:55:41 +00:00
if ( partition_ast . fields_count ! = fields_count )
2017-09-06 20:34:26 +00:00
throw Exception (
2017-09-11 17:55:41 +00:00
" Wrong number of fields in the partition expression: " + toString ( partition_ast . fields_count ) +
2017-09-06 20:34:26 +00:00
" , must be: " + toString ( fields_count ) ,
ErrorCodes : : INVALID_PARTITION_VALUE ) ;
2018-06-08 01:51:55 +00:00
const FormatSettings format_settings ;
2017-09-06 20:34:26 +00:00
Row partition_row ( fields_count ) ;
if ( fields_count )
{
ReadBufferFromMemory left_paren_buf ( " ( " , 1 ) ;
2019-06-25 13:10:09 +00:00
ReadBufferFromMemory fields_buf ( partition_ast . fields_str . data ( ) , partition_ast . fields_str . size ( ) ) ;
2017-09-06 20:34:26 +00:00
ReadBufferFromMemory right_paren_buf ( " ) " , 1) ;
ConcatReadBuffer buf ( { & left_paren_buf , & fields_buf , & right_paren_buf } ) ;
2019-08-02 17:16:58 +00:00
auto input_stream = FormatFactory : : instance ( ) . getInput ( " Values " , buf , partition_key_sample , context , context . getSettingsRef ( ) . max_block_size ) ;
2017-12-15 21:11:24 +00:00
2019-08-02 17:16:58 +00:00
auto block = input_stream - > read ( ) ;
if ( ! block | | ! block . rows ( ) )
2017-09-06 20:34:26 +00:00
throw Exception (
2019-06-25 13:10:09 +00:00
" Could not parse partition value: ` " + partition_ast . fields_str + " ` " ,
2017-09-06 20:34:26 +00:00
ErrorCodes : : INVALID_PARTITION_VALUE ) ;
for ( size_t i = 0 ; i < fields_count ; + + i )
2019-08-02 17:16:58 +00:00
block . getByPosition ( i ) . column - > get ( 0 , partition_row [ i ] ) ;
2017-09-06 20:34:26 +00:00
}
2014-10-03 17:57:01 +00:00
2017-09-11 17:55:41 +00:00
MergeTreePartition partition ( std : : move ( partition_row ) ) ;
String partition_id = partition . getID ( * this ) ;
{
2019-03-28 19:58:41 +00:00
auto data_parts_lock = lockParts ( ) ;
2017-09-11 17:55:41 +00:00
DataPartPtr existing_part_in_partition = getAnyPartInPartition ( partition_id , data_parts_lock ) ;
if ( existing_part_in_partition & & existing_part_in_partition - > partition . value ! = partition . value )
{
WriteBufferFromOwnString buf ;
writeCString ( " Parsed partition value: " , buf ) ;
2018-10-09 18:32:44 +00:00
partition . serializeText ( * this , buf , format_settings ) ;
2017-09-11 17:55:41 +00:00
writeCString ( " doesn't match partition value for an existing part with the same partition ID: " , buf ) ;
writeString ( existing_part_in_partition - > name , buf ) ;
throw Exception ( buf . str ( ) , ErrorCodes : : INVALID_PARTITION_VALUE ) ;
}
}
return partition_id ;
2016-01-30 00:57:35 +00:00
}
2017-11-20 19:33:12 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getDataPartsVector ( const DataPartStates & affordable_states , DataPartStateVector * out_states ) const
2017-09-11 22:40:51 +00:00
{
DataPartsVector res ;
2017-11-20 19:33:12 +00:00
DataPartsVector buf ;
2017-09-11 22:40:51 +00:00
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-11-20 19:33:12 +00:00
for ( auto state : affordable_states )
{
2019-01-09 15:44:20 +00:00
std : : swap ( buf , res ) ;
2017-11-20 19:33:12 +00:00
res . clear ( ) ;
auto range = getDataPartsStateRange ( state ) ;
std : : merge ( range . begin ( ) , range . end ( ) , buf . begin ( ) , buf . end ( ) , std : : back_inserter ( res ) , LessDataPart ( ) ) ;
}
if ( out_states ! = nullptr )
{
out_states - > resize ( res . size ( ) ) ;
for ( size_t i = 0 ; i < res . size ( ) ; + + i )
( * out_states ) [ i ] = res [ i ] - > state ;
}
2017-09-11 22:40:51 +00:00
}
2017-11-20 19:33:12 +00:00
2017-09-11 22:40:51 +00:00
return res ;
}
2017-11-20 19:33:12 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : getAllDataPartsVector ( MergeTreeData : : DataPartStateVector * out_states ) const
2017-10-06 16:48:41 +00:00
{
DataPartsVector res ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2018-02-19 15:31:43 +00:00
res . assign ( data_parts_by_info . begin ( ) , data_parts_by_info . end ( ) ) ;
2017-10-06 16:48:41 +00:00
2017-11-20 19:33:12 +00:00
if ( out_states ! = nullptr )
{
out_states - > resize ( res . size ( ) ) ;
for ( size_t i = 0 ; i < res . size ( ) ; + + i )
( * out_states ) [ i ] = res [ i ] - > state ;
}
2017-10-06 16:48:41 +00:00
}
2017-11-20 19:33:12 +00:00
2017-10-06 16:48:41 +00:00
return res ;
}
2019-05-20 16:24:36 +00:00
std : : vector < DetachedPartInfo >
MergeTreeData : : getDetachedParts ( ) const
{
std : : vector < DetachedPartInfo > res ;
2019-11-19 06:44:10 +00:00
for ( const auto & [ path , disk ] : getDataPathsWithDisks ( ) )
2019-05-20 16:24:36 +00:00
{
2019-06-09 12:38:54 +00:00
for ( Poco : : DirectoryIterator it ( path + " detached " ) ;
it ! = Poco : : DirectoryIterator ( ) ; + + it )
{
auto dir_name = it . name ( ) ;
2019-05-20 16:24:36 +00:00
2019-06-09 12:38:54 +00:00
res . emplace_back ( ) ;
2019-08-29 16:17:47 +00:00
auto & part = res . back ( ) ;
2019-05-20 16:24:36 +00:00
2019-08-29 16:17:47 +00:00
DetachedPartInfo : : tryParseDetachedPartName ( dir_name , part , format_version ) ;
2019-11-19 06:44:10 +00:00
part . disk = disk - > getName ( ) ;
2019-08-29 16:17:47 +00:00
}
2019-05-20 16:24:36 +00:00
}
return res ;
}
2019-07-26 20:04:45 +00:00
void MergeTreeData : : validateDetachedPartName ( const String & name ) const
{
if ( name . find ( ' / ' ) ! = std : : string : : npos | | name = = " . " | | name = = " .. " )
2019-09-11 17:17:10 +00:00
throw DB : : Exception ( " Invalid part name ' " + name + " ' " , ErrorCodes : : INCORRECT_FILE_NAME ) ;
2019-05-20 16:24:36 +00:00
2019-08-29 16:17:47 +00:00
String full_path = getFullPathForPart ( name , " detached/ " ) ;
if ( full_path . empty ( ) | | ! Poco : : File ( full_path + name ) . exists ( ) )
2019-07-26 20:04:45 +00:00
throw DB : : Exception ( " Detached part \" " + name + " \" not found " , ErrorCodes : : BAD_DATA_PART_NAME ) ;
2019-05-20 16:24:36 +00:00
2019-07-31 14:44:55 +00:00
if ( startsWith ( name , " attaching_ " ) | | startsWith ( name , " deleting_ " ) )
throw DB : : Exception ( " Cannot drop part " + name + " : "
" most likely it is used by another DROP or ATTACH query. " ,
ErrorCodes : : BAD_DATA_PART_NAME ) ;
2019-07-26 20:04:45 +00:00
}
2019-07-31 14:44:55 +00:00
void MergeTreeData : : dropDetached ( const ASTPtr & partition , bool part , const Context & context )
2019-07-30 17:24:40 +00:00
{
2019-08-29 16:17:47 +00:00
PartsTemporaryRename renamed_parts ( * this , " detached/ " ) ;
2019-07-31 14:44:55 +00:00
if ( part )
{
String part_name = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
validateDetachedPartName ( part_name ) ;
renamed_parts . addPart ( part_name , " deleting_ " + part_name ) ;
2019-05-20 16:24:36 +00:00
}
2019-07-31 14:44:55 +00:00
else
{
String partition_id = getPartitionIDFromQuery ( partition , context ) ;
DetachedPartsInfo detached_parts = getDetachedParts ( ) ;
for ( const auto & part_info : detached_parts )
if ( part_info . valid_name & & part_info . partition_id = = partition_id
& & part_info . prefix ! = " attaching " & & part_info . prefix ! = " deleting " )
renamed_parts . addPart ( part_info . dir_name , " deleting_ " + part_info . dir_name ) ;
}
2019-05-20 16:24:36 +00:00
2019-07-31 14:44:55 +00:00
LOG_DEBUG ( log , " Will drop " < < renamed_parts . old_and_new_names . size ( ) < < " detached parts. " ) ;
2019-07-30 17:24:40 +00:00
2019-07-31 14:44:55 +00:00
renamed_parts . tryRenameAll ( ) ;
2019-09-06 15:09:20 +00:00
for ( auto & [ old_name , new_name ] : renamed_parts . old_and_new_names )
2019-07-31 14:44:55 +00:00
{
2019-09-06 15:09:20 +00:00
Poco : : File ( renamed_parts . old_part_name_to_full_path [ old_name ] + " detached/ " + new_name ) . remove ( true ) ;
LOG_DEBUG ( log , " Dropped detached part " < < old_name ) ;
old_name . clear ( ) ;
2019-07-31 14:44:55 +00:00
}
2019-07-30 17:24:40 +00:00
}
2019-07-30 19:11:15 +00:00
MergeTreeData : : MutableDataPartsVector MergeTreeData : : tryLoadPartsToAttach ( const ASTPtr & partition , bool attach_part ,
const Context & context , PartsTemporaryRename & renamed_parts )
{
String source_dir = " detached/ " ;
2019-08-29 16:17:47 +00:00
std : : map < String , DiskSpace : : DiskPtr > name_to_disk ;
2019-07-30 19:11:15 +00:00
/// Let's compose a list of parts that should be added.
if ( attach_part )
{
2019-07-31 14:44:55 +00:00
String part_id = partition - > as < ASTLiteral & > ( ) . value . safeGet < String > ( ) ;
validateDetachedPartName ( part_id ) ;
renamed_parts . addPart ( part_id , " attaching_ " + part_id ) ;
2019-10-03 18:07:47 +00:00
if ( MergeTreePartInfo : : tryParsePartName ( part_id , nullptr , format_version ) )
name_to_disk [ part_id ] = getDiskForPart ( part_id , source_dir ) ;
2019-07-30 19:11:15 +00:00
}
else
{
2019-07-31 14:44:55 +00:00
String partition_id = getPartitionIDFromQuery ( partition , context ) ;
2019-07-30 19:11:15 +00:00
LOG_DEBUG ( log , " Looking for parts for partition " < < partition_id < < " in " < < source_dir ) ;
ActiveDataPartSet active_parts ( format_version ) ;
2019-08-29 16:17:47 +00:00
const auto disks = storage_policy - > getDisks ( ) ;
for ( const DiskSpace : : DiskPtr & disk : disks )
2019-07-30 19:11:15 +00:00
{
2019-08-29 16:17:47 +00:00
const auto full_path = getFullPathOnDisk ( disk ) ;
for ( Poco : : DirectoryIterator it = Poco : : DirectoryIterator ( full_path + source_dir ) ; it ! = Poco : : DirectoryIterator ( ) ; + + it )
{
const String & name = it . name ( ) ;
MergeTreePartInfo part_info ;
// TODO what if name contains "_tryN" suffix?
/// Parts with prefix in name (e.g. attaching_1_3_3_0, deleting_1_3_3_0) will be ignored
if ( ! MergeTreePartInfo : : tryParsePartName ( name , & part_info , format_version )
| | part_info . partition_id ! = partition_id )
{
continue ;
}
LOG_DEBUG ( log , " Found part " < < name ) ;
active_parts . add ( name ) ;
name_to_disk [ name ] = disk ;
}
2019-07-30 19:11:15 +00:00
}
LOG_DEBUG ( log , active_parts . size ( ) < < " of them are active " ) ;
/// Inactive parts rename so they can not be attached in case of repeated ATTACH.
2019-09-03 08:30:59 +00:00
for ( const auto & [ name , disk ] : name_to_disk )
2019-07-30 19:11:15 +00:00
{
String containing_part = active_parts . getContainingPart ( name ) ;
if ( ! containing_part . empty ( ) & & containing_part ! = name )
2019-08-29 16:17:47 +00:00
{
2019-09-03 08:30:59 +00:00
auto full_path = getFullPathOnDisk ( disk ) ;
2019-07-31 14:44:55 +00:00
// TODO maybe use PartsTemporaryRename here?
2019-08-29 16:17:47 +00:00
Poco : : File ( full_path + source_dir + name )
. renameTo ( full_path + source_dir + " inactive_ " + name ) ;
}
2019-07-31 14:44:55 +00:00
else
renamed_parts . addPart ( name , " attaching_ " + name ) ;
2019-06-09 12:38:54 +00:00
}
2019-05-20 16:24:36 +00:00
}
2019-07-30 19:11:15 +00:00
2019-08-29 16:17:47 +00:00
2019-07-30 19:11:15 +00:00
/// Try to rename all parts before attaching to prevent race with DROP DETACHED and another ATTACH.
2019-07-31 14:44:55 +00:00
renamed_parts . tryRenameAll ( ) ;
2019-07-30 19:11:15 +00:00
/// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist.
LOG_DEBUG ( log , " Checking parts " ) ;
MutableDataPartsVector loaded_parts ;
2019-07-31 14:44:55 +00:00
loaded_parts . reserve ( renamed_parts . old_and_new_names . size ( ) ) ;
2019-07-30 19:11:15 +00:00
for ( const auto & part_names : renamed_parts . old_and_new_names )
{
LOG_DEBUG ( log , " Checking part " < < part_names . second ) ;
2019-08-29 16:17:47 +00:00
MutableDataPartPtr part = std : : make_shared < DataPart > ( * this , name_to_disk [ part_names . first ] , part_names . first ) ;
2019-07-30 19:11:15 +00:00
part - > relative_path = source_dir + part_names . second ;
loadPartAndFixMetadata ( part ) ;
loaded_parts . push_back ( part ) ;
}
return loaded_parts ;
2019-05-20 16:24:36 +00:00
}
2019-10-31 10:40:11 +00:00
namespace
2019-04-01 18:41:19 +00:00
{
2019-11-11 10:01:04 +00:00
inline DiskSpace : : ReservationPtr returnReservationOrThrowError ( UInt64 expected_size , DiskSpace : : ReservationPtr reservation )
2019-10-31 10:40:11 +00:00
{
2019-11-11 10:01:04 +00:00
if ( reservation )
return reservation ;
2019-10-31 10:40:11 +00:00
throw Exception ( " Cannot reserve " + formatReadableSizeWithBinarySuffix ( expected_size ) + " , not enough space " ,
ErrorCodes : : NOT_ENOUGH_SPACE ) ;
}
}
DiskSpace : : ReservationPtr MergeTreeData : : reserveSpace ( UInt64 expected_size ) const
{
2019-09-04 17:26:53 +00:00
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2019-04-05 17:37:27 +00:00
2019-09-04 17:26:53 +00:00
auto reservation = storage_policy - > reserve ( expected_size ) ;
2019-04-03 12:52:09 +00:00
2019-11-11 10:01:04 +00:00
return returnReservationOrThrowError ( expected_size , std : : move ( reservation ) ) ;
2019-10-31 10:40:11 +00:00
}
DiskSpace : : ReservationPtr MergeTreeData : : reserveSpacePreferringMoveDestination ( UInt64 expected_size ,
const MergeTreeDataPart : : TTLInfos & ttl_infos ,
2019-11-29 07:00:43 +00:00
time_t time_of_move ) const
2019-10-31 10:40:11 +00:00
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2019-11-30 19:22:01 +00:00
DiskSpace : : ReservationPtr reservation = tryReserveSpacePreferringMoveDestination ( expected_size , ttl_infos , time_of_move ) ;
return returnReservationOrThrowError ( expected_size , std : : move ( reservation ) ) ;
}
DiskSpace : : ReservationPtr MergeTreeData : : tryReserveSpacePreferringMoveDestination ( UInt64 expected_size ,
const MergeTreeDataPart : : TTLInfos & ttl_infos ,
time_t time_of_move ) const
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2019-11-29 07:00:43 +00:00
DiskSpace : : ReservationPtr reservation ;
2019-10-31 10:40:11 +00:00
2019-11-29 07:00:43 +00:00
auto ttl_entry = selectMoveDestination ( ttl_infos , time_of_move ) ;
2019-10-31 10:40:11 +00:00
if ( ttl_entry ! = nullptr )
{
2019-11-29 07:00:43 +00:00
DiskSpace : : SpacePtr destination_ptr = ttl_entry - > getDestination ( storage_policy ) ;
if ( ! destination_ptr )
2019-10-31 10:40:11 +00:00
{
2019-11-29 07:00:43 +00:00
if ( ttl_entry - > destination_type = = PartDestinationType : : VOLUME )
2019-10-31 10:40:11 +00:00
LOG_WARNING ( log , " Would like to reserve space on volume ' "
< < ttl_entry - > destination_name < < " ' by TTL rule of table ' "
< < log_name < < " ' but volume was not found " ) ;
2019-11-29 07:00:43 +00:00
else if ( ttl_entry - > destination_type = = PartDestinationType : : DISK )
2019-10-31 10:40:11 +00:00
LOG_WARNING ( log , " Would like to reserve space on disk ' "
< < ttl_entry - > destination_name < < " ' by TTL rule of table ' "
< < log_name < < " ' but disk was not found " ) ;
}
2019-11-29 07:00:43 +00:00
else
{
reservation = destination_ptr - > reserve ( expected_size ) ;
if ( reservation )
return reservation ;
}
2019-10-31 10:40:11 +00:00
}
2019-11-29 07:00:43 +00:00
reservation = storage_policy - > reserve ( expected_size ) ;
2019-11-30 19:22:01 +00:00
return reservation ;
2019-10-31 10:40:11 +00:00
}
2019-11-29 07:00:43 +00:00
DiskSpace : : ReservationPtr MergeTreeData : : reserveSpaceInSpecificSpace ( UInt64 expected_size , DiskSpace : : SpacePtr space ) const
2019-10-31 10:40:11 +00:00
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
2019-11-30 19:22:01 +00:00
auto reservation = tryReserveSpaceInSpecificSpace ( expected_size , space ) ;
2019-10-31 10:40:11 +00:00
2019-11-11 10:01:04 +00:00
return returnReservationOrThrowError ( expected_size , std : : move ( reservation ) ) ;
2019-04-01 18:41:19 +00:00
}
2019-11-30 19:22:01 +00:00
DiskSpace : : ReservationPtr MergeTreeData : : tryReserveSpaceInSpecificSpace ( UInt64 expected_size , DiskSpace : : SpacePtr space ) const
{
expected_size = std : : max ( RESERVATION_MIN_ESTIMATION_SIZE , expected_size ) ;
return space - > reserve ( expected_size ) ;
}
2019-11-29 07:00:43 +00:00
DiskSpace : : SpacePtr MergeTreeData : : TTLEntry : : getDestination ( const DiskSpace : : StoragePolicyPtr & storage_policy ) const
{
if ( destination_type = = PartDestinationType : : VOLUME )
return storage_policy - > getVolumeByName ( destination_name ) ;
else if ( destination_type = = PartDestinationType : : DISK )
return storage_policy - > getDiskByName ( destination_name ) ;
else
return { } ;
}
bool MergeTreeData : : TTLEntry : : isPartInDestination ( const DiskSpace : : StoragePolicyPtr & storage_policy , const MergeTreeDataPart & part ) const
{
if ( destination_type = = PartDestinationType : : VOLUME )
{
for ( const auto & disk : storage_policy - > getVolumeByName ( destination_name ) - > disks )
if ( disk - > getName ( ) = = part . disk - > getName ( ) )
return true ;
}
else if ( destination_type = = PartDestinationType : : DISK )
return storage_policy - > getDiskByName ( destination_name ) - > getName ( ) = = part . disk - > getName ( ) ;
return false ;
}
const MergeTreeData : : TTLEntry * MergeTreeData : : selectMoveDestination (
const MergeTreeDataPart : : TTLInfos & ttl_infos ,
time_t time_of_move ) const
{
const MergeTreeData : : TTLEntry * result = nullptr ;
/// Prefer TTL rule which went into action last.
time_t max_max_ttl = 0 ;
for ( const auto & ttl_entry : move_ttl_entries )
{
auto ttl_info_it = ttl_infos . moves_ttl . find ( ttl_entry . result_column ) ;
if ( ttl_info_it ! = ttl_infos . moves_ttl . end ( )
& & ttl_info_it - > second . max < = time_of_move
2019-11-29 07:21:44 +00:00
& & max_max_ttl < = ttl_info_it - > second . max )
2019-11-29 07:00:43 +00:00
{
result = & ttl_entry ;
max_max_ttl = ttl_info_it - > second . max ;
}
}
return result ;
}
2017-10-03 19:04:56 +00:00
MergeTreeData : : DataParts MergeTreeData : : getDataParts ( const DataPartStates & affordable_states ) const
2017-09-11 22:40:51 +00:00
{
DataParts res ;
{
2019-03-28 19:58:41 +00:00
auto lock = lockParts ( ) ;
2017-11-20 19:33:12 +00:00
for ( auto state : affordable_states )
{
auto range = getDataPartsStateRange ( state ) ;
res . insert ( range . begin ( ) , range . end ( ) ) ;
}
2017-09-11 22:40:51 +00:00
}
return res ;
}
MergeTreeData : : DataParts MergeTreeData : : getDataParts ( ) const
{
return getDataParts ( { DataPartState : : Committed } ) ;
}
MergeTreeData : : DataPartsVector MergeTreeData : : getDataPartsVector ( ) const
{
return getDataPartsVector ( { DataPartState : : Committed } ) ;
}
MergeTreeData : : DataPartPtr MergeTreeData : : getAnyPartInPartition (
2018-05-21 13:49:54 +00:00
const String & partition_id , DataPartsLock & /*data_parts_lock*/ )
2017-09-11 22:40:51 +00:00
{
2018-05-21 13:49:54 +00:00
auto it = data_parts_by_state_and_info . lower_bound ( DataPartStateAndPartitionID { DataPartState : : Committed , partition_id } ) ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( it ! = data_parts_by_state_and_info . end ( ) & & ( * it ) - > state = = DataPartState : : Committed & & ( * it ) - > info . partition_id = = partition_id )
2017-09-11 22:40:51 +00:00
return * it ;
2017-11-20 19:33:12 +00:00
return nullptr ;
2017-09-11 22:40:51 +00:00
}
2017-05-24 20:19:29 +00:00
void MergeTreeData : : Transaction : : rollback ( )
{
2017-11-20 19:33:12 +00:00
if ( ! isEmpty ( ) )
2017-05-24 20:19:29 +00:00
{
std : : stringstream ss ;
2018-02-19 15:31:43 +00:00
ss < < " Removing parts: " ;
for ( const auto & part : precommitted_parts )
ss < < " " < < part - > relative_path ;
ss < < " . " ;
2018-09-20 14:30:52 +00:00
LOG_DEBUG ( data . log , " Undoing transaction. " < < ss . str ( ) ) ;
2017-05-24 20:19:29 +00:00
2018-09-20 14:30:52 +00:00
data . removePartsFromWorkingSet (
2018-02-19 15:31:43 +00:00
DataPartsVector ( precommitted_parts . begin ( ) , precommitted_parts . end ( ) ) ,
/* clear_without_timeout = */ true ) ;
2017-05-24 20:19:29 +00:00
}
2017-11-20 19:33:12 +00:00
clear ( ) ;
2017-05-24 20:19:29 +00:00
}
2018-05-21 13:49:54 +00:00
MergeTreeData : : DataPartsVector MergeTreeData : : Transaction : : commit ( MergeTreeData : : DataPartsLock * acquired_parts_lock )
2017-09-11 17:55:41 +00:00
{
2018-02-19 15:31:43 +00:00
DataPartsVector total_covered_parts ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
if ( ! isEmpty ( ) )
2017-09-11 22:40:51 +00:00
{
2018-09-20 14:30:52 +00:00
auto parts_lock = acquired_parts_lock ? MergeTreeData : : DataPartsLock ( ) : data . lockParts ( ) ;
2018-05-21 13:49:54 +00:00
auto owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : & parts_lock ;
2017-09-11 22:40:51 +00:00
2018-02-19 15:31:43 +00:00
auto current_time = time ( nullptr ) ;
for ( const DataPartPtr & part : precommitted_parts )
2017-09-11 22:40:51 +00:00
{
2018-02-19 15:31:43 +00:00
DataPartPtr covering_part ;
2018-09-20 14:30:52 +00:00
DataPartsVector covered_parts = data . getActivePartsToReplace ( part - > info , part - > name , covering_part , * owing_parts_lock ) ;
2018-02-19 15:31:43 +00:00
if ( covering_part )
2017-09-11 22:40:51 +00:00
{
2018-09-20 14:30:52 +00:00
LOG_WARNING ( data . log , " Tried to commit obsolete part " < < part - > name
2018-02-19 15:31:43 +00:00
< < " covered by " < < covering_part - > getNameWithState ( ) ) ;
2018-03-03 17:44:53 +00:00
part - > remove_time . store ( 0 , std : : memory_order_relaxed ) ; /// The part will be removed without waiting for old_parts_lifetime seconds.
2018-09-20 14:30:52 +00:00
data . modifyPartState ( part , DataPartState : : Outdated ) ;
2017-09-11 22:40:51 +00:00
}
2018-02-19 15:31:43 +00:00
else
{
total_covered_parts . insert ( total_covered_parts . end ( ) , covered_parts . begin ( ) , covered_parts . end ( ) ) ;
for ( const DataPartPtr & covered_part : covered_parts )
{
2018-03-03 17:44:53 +00:00
covered_part - > remove_time . store ( current_time , std : : memory_order_relaxed ) ;
2018-09-20 14:30:52 +00:00
data . modifyPartState ( covered_part , DataPartState : : Outdated ) ;
data . removePartContributionToColumnSizes ( covered_part ) ;
2018-02-19 15:31:43 +00:00
}
2017-09-11 22:40:51 +00:00
2018-09-20 14:30:52 +00:00
data . modifyPartState ( part , DataPartState : : Committed ) ;
data . addPartContributionToColumnSizes ( part ) ;
2018-02-19 15:31:43 +00:00
}
2017-09-11 22:40:51 +00:00
}
}
2018-02-19 15:31:43 +00:00
clear ( ) ;
return total_covered_parts ;
2017-09-11 17:55:41 +00:00
}
2017-05-24 20:19:29 +00:00
2018-04-19 20:34:02 +00:00
bool MergeTreeData : : isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( const ASTPtr & node ) const
2018-01-21 07:30:07 +00:00
{
2018-06-30 21:35:01 +00:00
const String column_name = node - > getColumnName ( ) ;
2018-01-21 07:30:07 +00:00
2018-10-11 14:53:23 +00:00
for ( const auto & name : primary_key_columns )
2018-06-30 21:35:01 +00:00
if ( column_name = = name )
2018-01-21 07:30:07 +00:00
return true ;
2018-06-30 21:35:01 +00:00
for ( const auto & name : minmax_idx_columns )
if ( column_name = = name )
2018-04-19 20:34:02 +00:00
return true ;
2018-04-04 20:37:28 +00:00
2019-03-11 13:22:51 +00:00
if ( const auto * func = node - > as < ASTFunction > ( ) )
2018-03-16 06:51:37 +00:00
if ( func - > arguments - > children . size ( ) = = 1 )
2018-04-19 20:34:02 +00:00
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( func - > arguments - > children . front ( ) ) ;
2018-03-16 06:51:37 +00:00
2018-01-21 07:30:07 +00:00
return false ;
}
2019-05-03 02:00:57 +00:00
bool MergeTreeData : : mayBenefitFromIndexForIn ( const ASTPtr & left_in_operand , const Context & ) const
2018-01-21 07:30:07 +00:00
{
2018-04-19 20:34:02 +00:00
/// Make sure that the left side of the IN operator contain part of the key.
/// If there is a tuple on the left side of the IN operator, at least one item of the tuple
/// must be part of the key (probably wrapped by a chain of some acceptable functions).
2019-03-11 13:22:51 +00:00
const auto * left_in_operand_tuple = left_in_operand - > as < ASTFunction > ( ) ;
2018-01-21 07:30:07 +00:00
if ( left_in_operand_tuple & & left_in_operand_tuple - > name = = " tuple " )
{
for ( const auto & item : left_in_operand_tuple - > arguments - > children )
2019-02-25 08:43:19 +00:00
{
2018-04-19 20:34:02 +00:00
if ( isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( item ) )
2018-04-03 18:37:35 +00:00
return true ;
2019-02-25 08:43:19 +00:00
for ( const auto & index : skip_indices )
if ( index - > mayBenefitFromIndexForIn ( item ) )
return true ;
}
2018-04-03 18:37:35 +00:00
/// The tuple itself may be part of the primary key, so check that as a last resort.
2018-04-19 20:34:02 +00:00
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( left_in_operand ) ;
2018-01-21 07:30:07 +00:00
}
else
{
2019-02-25 08:43:19 +00:00
for ( const auto & index : skip_indices )
if ( index - > mayBenefitFromIndexForIn ( left_in_operand ) )
return true ;
2018-04-19 20:34:02 +00:00
return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions ( left_in_operand ) ;
2018-01-21 07:30:07 +00:00
}
}
2017-09-11 22:40:51 +00:00
2019-05-03 02:00:57 +00:00
MergeTreeData & MergeTreeData : : checkStructureAndGetMergeTreeData ( const StoragePtr & source_table ) const
2018-05-21 13:49:54 +00:00
{
2019-05-03 02:00:57 +00:00
MergeTreeData * src_data = dynamic_cast < MergeTreeData * > ( source_table . get ( ) ) ;
if ( ! src_data )
throw Exception ( " Table " + table_name + " supports attachPartitionFrom only for MergeTree family of table engines. "
2018-05-21 13:49:54 +00:00
" Got " + source_table - > getName ( ) , ErrorCodes : : NOT_IMPLEMENTED ) ;
if ( getColumns ( ) . getAllPhysical ( ) . sizeOfDifference ( src_data - > getColumns ( ) . getAllPhysical ( ) ) )
throw Exception ( " Tables have different structure " , ErrorCodes : : INCOMPATIBLE_COLUMNS ) ;
auto query_to_string = [ ] ( const ASTPtr & ast )
{
return ast ? queryToString ( ast ) : " " ;
} ;
2018-11-06 18:25:36 +00:00
if ( query_to_string ( order_by_ast ) ! = query_to_string ( src_data - > order_by_ast ) )
2018-05-21 13:49:54 +00:00
throw Exception ( " Tables have different ordering " , ErrorCodes : : BAD_ARGUMENTS ) ;
2018-11-06 18:25:36 +00:00
if ( query_to_string ( partition_by_ast ) ! = query_to_string ( src_data - > partition_by_ast ) )
2018-05-21 13:49:54 +00:00
throw Exception ( " Tables have different partition key " , ErrorCodes : : BAD_ARGUMENTS ) ;
if ( format_version ! = src_data - > format_version )
throw Exception ( " Tables have different format_version " , ErrorCodes : : BAD_ARGUMENTS ) ;
2019-05-03 02:00:57 +00:00
return * src_data ;
2018-05-21 13:49:54 +00:00
}
2019-11-03 22:27:05 +00:00
MergeTreeData : : MutableDataPartPtr MergeTreeData : : cloneAndLoadDataPartOnSameDisk ( const MergeTreeData : : DataPartPtr & src_part ,
const String & tmp_part_prefix ,
const MergeTreePartInfo & dst_part_info )
2018-05-21 13:49:54 +00:00
{
2018-07-20 23:56:25 +00:00
String dst_part_name = src_part - > getNewName ( dst_part_info ) ;
2018-05-21 13:49:54 +00:00
String tmp_dst_part_name = tmp_part_prefix + dst_part_name ;
2019-11-29 07:00:43 +00:00
auto reservation = reserveSpaceInSpecificSpace ( src_part - > bytes_on_disk , src_part - > disk ) ;
2019-05-11 20:44:43 +00:00
String dst_part_path = getFullPathOnDisk ( reservation - > getDisk ( ) ) ;
2019-04-03 12:52:09 +00:00
Poco : : Path dst_part_absolute_path = Poco : : Path ( dst_part_path + tmp_dst_part_name ) . absolute ( ) ;
2018-05-21 13:49:54 +00:00
Poco : : Path src_part_absolute_path = Poco : : Path ( src_part - > getFullPath ( ) ) . absolute ( ) ;
if ( Poco : : File ( dst_part_absolute_path ) . exists ( ) )
throw Exception ( " Part in " + dst_part_absolute_path . toString ( ) + " already exists " , ErrorCodes : : DIRECTORY_ALREADY_EXISTS ) ;
LOG_DEBUG ( log , " Cloning part " < < src_part_absolute_path . toString ( ) < < " to " < < dst_part_absolute_path . toString ( ) ) ;
localBackup ( src_part_absolute_path , dst_part_absolute_path ) ;
2019-09-11 10:57:32 +00:00
MergeTreeData : : MutableDataPartPtr dst_data_part = std : : make_shared < MergeTreeData : : DataPart > (
* this , reservation - > getDisk ( ) , dst_part_name , dst_part_info ) ;
2018-05-21 13:49:54 +00:00
dst_data_part - > relative_path = tmp_dst_part_name ;
dst_data_part - > is_temp = true ;
dst_data_part - > loadColumnsChecksumsIndexes ( require_part_metadata , true ) ;
dst_data_part - > modification_time = Poco : : File ( dst_part_absolute_path ) . getLastModified ( ) . epochTime ( ) ;
return dst_data_part ;
}
2019-07-23 13:34:17 +00:00
String MergeTreeData : : getFullPathOnDisk ( const DiskSpace : : DiskPtr & disk ) const
2019-04-21 20:23:02 +00:00
{
2019-09-11 10:57:32 +00:00
return disk - > getClickHouseDataPath ( ) + escapeForFileName ( database_name ) + ' / ' + escapeForFileName ( table_name ) + ' / ' ;
2019-04-21 18:38:44 +00:00
}
2019-08-29 16:17:47 +00:00
DiskSpace : : DiskPtr MergeTreeData : : getDiskForPart ( const String & part_name , const String & relative_path ) const
{
const auto disks = storage_policy - > getDisks ( ) ;
for ( const DiskSpace : : DiskPtr & disk : disks )
{
const auto disk_path = getFullPathOnDisk ( disk ) ;
for ( Poco : : DirectoryIterator it = Poco : : DirectoryIterator ( disk_path + relative_path ) ; it ! = Poco : : DirectoryIterator ( ) ; + + it )
if ( it . name ( ) = = part_name )
return disk ;
}
return nullptr ;
}
String MergeTreeData : : getFullPathForPart ( const String & part_name , const String & relative_path ) const
{
auto disk = getDiskForPart ( part_name , relative_path ) ;
if ( disk )
2019-09-03 08:48:43 +00:00
return getFullPathOnDisk ( disk ) + relative_path ;
2019-08-29 16:17:47 +00:00
return " " ;
}
2019-05-11 18:50:29 +00:00
Strings MergeTreeData : : getDataPaths ( ) const
2019-04-21 20:23:02 +00:00
{
2019-04-21 18:38:44 +00:00
Strings res ;
2019-05-24 19:03:07 +00:00
auto disks = storage_policy - > getDisks ( ) ;
2019-04-28 14:49:41 +00:00
for ( const auto & disk : disks )
2019-04-21 18:38:44 +00:00
res . push_back ( getFullPathOnDisk ( disk ) ) ;
return res ;
}
2019-11-19 06:44:10 +00:00
MergeTreeData : : PathsWithDisks MergeTreeData : : getDataPathsWithDisks ( ) const
{
PathsWithDisks res ;
auto disks = storage_policy - > getDisks ( ) ;
for ( const auto & disk : disks )
res . emplace_back ( getFullPathOnDisk ( disk ) , disk ) ;
return res ;
}
2018-11-01 17:13:01 +00:00
void MergeTreeData : : freezePartitionsByMatcher ( MatcherFn matcher , const String & with_name , const Context & context )
{
String clickhouse_path = Poco : : Path ( context . getPath ( ) ) . makeAbsolute ( ) . toString ( ) ;
2019-09-11 10:57:32 +00:00
String default_shadow_path = clickhouse_path + " shadow/ " ;
Poco : : File ( default_shadow_path ) . createDirectories ( ) ;
auto increment = Increment ( default_shadow_path + " increment.txt " ) . get ( true ) ;
2018-11-01 17:13:01 +00:00
/// Acquire a snapshot of active data parts to prevent removing while doing backup.
const auto data_parts = getDataParts ( ) ;
size_t parts_processed = 0 ;
for ( const auto & part : data_parts )
{
if ( ! matcher ( part ) )
continue ;
2019-09-11 17:17:10 +00:00
String shadow_path = part - > disk - > getPath ( ) + " shadow/ " ;
2018-11-01 17:13:01 +00:00
2019-09-10 13:06:26 +00:00
Poco : : File ( shadow_path ) . createDirectories ( ) ;
String backup_path = shadow_path
+ ( ! with_name . empty ( )
? escapeForFileName ( with_name )
: toString ( increment ) )
+ " / " ;
2018-11-01 17:13:01 +00:00
2019-09-10 13:06:26 +00:00
LOG_DEBUG ( log , " Freezing part " < < part - > name < < " snapshot will be placed at " + backup_path ) ;
String part_absolute_path = Poco : : Path ( part - > getFullPath ( ) ) . absolute ( ) . toString ( ) ;
2019-11-01 13:08:56 +00:00
String backup_part_absolute_path = backup_path
+ " data/ "
+ escapeForFileName ( getDatabaseName ( ) ) + " / "
+ escapeForFileName ( getTableName ( ) ) + " / "
+ part - > relative_path ;
2018-11-01 17:13:01 +00:00
localBackup ( part_absolute_path , backup_part_absolute_path ) ;
2019-08-21 03:16:59 +00:00
part - > is_frozen . store ( true , std : : memory_order_relaxed ) ;
2018-11-01 17:13:01 +00:00
+ + parts_processed ;
}
LOG_DEBUG ( log , " Freezed " < < parts_processed < < " parts " ) ;
}
2019-06-19 16:16:13 +00:00
bool MergeTreeData : : canReplacePartition ( const DataPartPtr & src_part ) const
{
2019-08-26 14:24:29 +00:00
const auto settings = getSettings ( ) ;
2019-08-13 08:35:49 +00:00
2019-08-13 10:29:31 +00:00
if ( ! settings - > enable_mixed_granularity_parts | | settings - > index_granularity_bytes = = 0 )
2019-06-19 16:16:13 +00:00
{
if ( ! canUseAdaptiveGranularity ( ) & & src_part - > index_granularity_info . is_adaptive )
return false ;
if ( canUseAdaptiveGranularity ( ) & & ! src_part - > index_granularity_info . is_adaptive )
return false ;
}
return true ;
}
2019-09-03 11:32:25 +00:00
void MergeTreeData : : writePartLog (
PartLogElement : : Type type ,
const ExecutionStatus & execution_status ,
UInt64 elapsed_ns ,
const String & new_part_name ,
const DataPartPtr & result_part ,
const DataPartsVector & source_parts ,
const MergeListEntry * merge_entry )
try
{
auto part_log = global_context . getPartLog ( database_name ) ;
if ( ! part_log )
return ;
PartLogElement part_log_elem ;
part_log_elem . event_type = type ;
part_log_elem . error = static_cast < UInt16 > ( execution_status . code ) ;
part_log_elem . exception = execution_status . message ;
part_log_elem . event_time = time ( nullptr ) ;
/// TODO: Stop stopwatch in outer code to exclude ZK timings and so on
2019-10-02 07:46:53 +00:00
part_log_elem . duration_ms = elapsed_ns / 1000000 ;
2019-09-03 11:32:25 +00:00
part_log_elem . database_name = database_name ;
part_log_elem . table_name = table_name ;
part_log_elem . partition_id = MergeTreePartInfo : : fromPartName ( new_part_name , format_version ) . partition_id ;
part_log_elem . part_name = new_part_name ;
if ( result_part )
{
part_log_elem . path_on_disk = result_part - > getFullPath ( ) ;
part_log_elem . bytes_compressed_on_disk = result_part - > bytes_on_disk ;
part_log_elem . rows = result_part - > rows_count ;
}
part_log_elem . source_part_names . reserve ( source_parts . size ( ) ) ;
for ( const auto & source_part : source_parts )
part_log_elem . source_part_names . push_back ( source_part - > name ) ;
if ( merge_entry )
{
part_log_elem . rows_read = ( * merge_entry ) - > rows_read ;
part_log_elem . bytes_read_uncompressed = ( * merge_entry ) - > bytes_read_uncompressed ;
part_log_elem . rows = ( * merge_entry ) - > rows_written ;
part_log_elem . bytes_uncompressed = ( * merge_entry ) - > bytes_written_uncompressed ;
}
part_log - > add ( part_log_elem ) ;
}
catch ( . . . )
{
tryLogCurrentException ( log , __PRETTY_FUNCTION__ ) ;
}
2019-09-05 15:53:23 +00:00
MergeTreeData : : CurrentlyMovingPartsTagger : : CurrentlyMovingPartsTagger ( MergeTreeMovingParts & & moving_parts_ , MergeTreeData & data_ )
: parts_to_move ( std : : move ( moving_parts_ ) ) , data ( data_ )
2019-09-05 13:12:29 +00:00
{
2019-09-05 15:53:23 +00:00
for ( const auto & moving_part : parts_to_move )
if ( ! data . currently_moving_parts . emplace ( moving_part . part ) . second )
throw Exception ( " Cannot move part ' " + moving_part . part - > name + " '. It's already moving. " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
MergeTreeData : : CurrentlyMovingPartsTagger : : ~ CurrentlyMovingPartsTagger ( )
2019-09-05 13:12:29 +00:00
{
2019-09-05 15:53:23 +00:00
std : : lock_guard lock ( data . moving_parts_mutex ) ;
for ( const auto & moving_part : parts_to_move )
2019-09-05 13:12:29 +00:00
{
2019-09-05 15:53:23 +00:00
/// Something went completely wrong
if ( ! data . currently_moving_parts . count ( moving_part . part ) )
std : : terminate ( ) ;
data . currently_moving_parts . erase ( moving_part . part ) ;
2019-09-05 13:12:29 +00:00
}
2019-09-05 15:53:23 +00:00
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
bool MergeTreeData : : selectPartsAndMove ( )
{
if ( parts_mover . moves_blocker . isCancelled ( ) )
return false ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
auto moving_tagger = selectPartsForMove ( ) ;
if ( moving_tagger . parts_to_move . empty ( ) )
return false ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
return moveParts ( std : : move ( moving_tagger ) ) ;
2019-09-05 13:12:29 +00:00
}
2019-11-14 11:10:17 +00:00
bool MergeTreeData : : areBackgroundMovesNeeded ( ) const
{
return storage_policy - > getVolumes ( ) . size ( ) > 1 ;
}
2019-09-05 15:53:23 +00:00
bool MergeTreeData : : movePartsToSpace ( const DataPartsVector & parts , DiskSpace : : SpacePtr space )
2019-09-05 13:12:29 +00:00
{
if ( parts_mover . moves_blocker . isCancelled ( ) )
return false ;
2019-09-05 15:53:23 +00:00
auto moving_tagger = checkPartsForMove ( parts , space ) ;
if ( moving_tagger . parts_to_move . empty ( ) )
return false ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
return moveParts ( std : : move ( moving_tagger ) ) ;
}
MergeTreeData : : CurrentlyMovingPartsTagger MergeTreeData : : selectPartsForMove ( )
{
MergeTreeMovingParts parts_to_move ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
auto can_move = [ this ] ( const DataPartPtr & part , String * reason ) - > bool
{
if ( partIsAssignedToBackgroundOperation ( part ) )
2019-09-05 13:12:29 +00:00
{
2019-09-09 17:01:19 +00:00
* reason = " part already assigned to background operation. " ;
2019-09-05 15:53:23 +00:00
return false ;
}
if ( currently_moving_parts . count ( part ) )
{
* reason = " part is already moving. " ;
return false ;
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
return true ;
} ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
std : : lock_guard moving_lock ( moving_parts_mutex ) ;
2019-09-05 13:12:29 +00:00
2019-09-06 15:09:20 +00:00
parts_mover . selectPartsForMove ( parts_to_move , can_move , moving_lock ) ;
2019-09-05 15:53:23 +00:00
return CurrentlyMovingPartsTagger ( std : : move ( parts_to_move ) , * this ) ;
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
MergeTreeData : : CurrentlyMovingPartsTagger MergeTreeData : : checkPartsForMove ( const DataPartsVector & parts , DiskSpace : : SpacePtr space )
{
std : : lock_guard moving_lock ( moving_parts_mutex ) ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
MergeTreeMovingParts parts_to_move ;
for ( const auto & part : parts )
{
auto reservation = space - > reserve ( part - > bytes_on_disk ) ;
if ( ! reservation )
2019-09-09 17:01:19 +00:00
throw Exception ( " Move is not possible. Not enough space on ' " + space - > getName ( ) + " ' " , ErrorCodes : : NOT_ENOUGH_SPACE ) ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
auto & reserved_disk = reservation - > getDisk ( ) ;
String path_to_clone = getFullPathOnDisk ( reserved_disk ) ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
if ( Poco : : File ( path_to_clone + part - > name ) . exists ( ) )
throw Exception (
2019-09-09 17:01:19 +00:00
" Move is not possible: " + path_to_clone + part - > name + " already exists " ,
2019-09-05 15:53:23 +00:00
ErrorCodes : : DIRECTORY_ALREADY_EXISTS ) ;
if ( currently_moving_parts . count ( part ) | | partIsAssignedToBackgroundOperation ( part ) )
throw Exception (
2019-09-09 17:01:19 +00:00
" Cannot move part ' " + part - > name + " ' because it's participating in background process " ,
2019-09-05 15:53:23 +00:00
ErrorCodes : : PART_IS_TEMPORARILY_LOCKED ) ;
parts_to_move . emplace_back ( part , std : : move ( reservation ) ) ;
2019-09-05 13:12:29 +00:00
}
2019-09-05 15:53:23 +00:00
return CurrentlyMovingPartsTagger ( std : : move ( parts_to_move ) , * this ) ;
}
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
bool MergeTreeData : : moveParts ( CurrentlyMovingPartsTagger & & moving_tagger )
{
LOG_INFO ( log , " Got " < < moving_tagger . parts_to_move . size ( ) < < " parts to move. " ) ;
2019-09-05 13:12:29 +00:00
2019-09-05 15:53:23 +00:00
for ( const auto & moving_part : moving_tagger . parts_to_move )
2019-09-05 13:12:29 +00:00
{
Stopwatch stopwatch ;
DataPartPtr cloned_part ;
auto write_part_log = [ & ] ( const ExecutionStatus & execution_status )
{
writePartLog (
PartLogElement : : Type : : MOVE_PART ,
execution_status ,
stopwatch . elapsed ( ) ,
moving_part . part - > name ,
cloned_part ,
{ moving_part . part } ,
nullptr ) ;
} ;
try
{
cloned_part = parts_mover . clonePart ( moving_part ) ;
parts_mover . swapClonedPart ( cloned_part ) ;
write_part_log ( { } ) ;
}
catch ( . . . )
{
write_part_log ( ExecutionStatus : : fromCurrentException ( ) ) ;
if ( cloned_part )
cloned_part - > remove ( ) ;
throw ;
}
}
return true ;
}
2019-09-03 11:32:25 +00:00
2014-03-09 17:36:01 +00:00
}