2016-02-14 04:58:47 +00:00
# pragma once
2017-08-19 18:11:20 +00:00
# include <Core/Row.h>
2017-08-21 15:35:29 +00:00
# include <Core/Block.h>
2017-04-01 09:19:00 +00:00
# include <Core/NamesAndTypes.h>
2017-08-14 18:16:11 +00:00
# include <Storages/MergeTree/MergeTreePartInfo.h>
2017-09-11 17:55:41 +00:00
# include <Storages/MergeTree/MergeTreePartition.h>
2017-07-13 16:49:09 +00:00
# include <Columns/IColumn.h>
2017-07-28 17:34:02 +00:00
# include <shared_mutex>
2016-02-14 04:58:47 +00:00
class SipHash ;
namespace DB
{
2017-04-16 15:00:33 +00:00
/// Checksum of one file.
2016-02-14 04:58:47 +00:00
struct MergeTreeDataPartChecksum
{
2017-06-21 08:35:38 +00:00
using uint128 = CityHash_v1_0_2 : : uint128 ;
2017-06-21 01:24:05 +00:00
2017-04-01 07:20:54 +00:00
size_t file_size { } ;
uint128 file_hash { } ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
bool is_compressed = false ;
size_t uncompressed_size { } ;
uint128 uncompressed_hash { } ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
MergeTreeDataPartChecksum ( ) { }
MergeTreeDataPartChecksum ( size_t file_size_ , uint128 file_hash_ ) : file_size ( file_size_ ) , file_hash ( file_hash_ ) { }
MergeTreeDataPartChecksum ( size_t file_size_ , uint128 file_hash_ , size_t uncompressed_size_ , uint128 uncompressed_hash_ )
: file_size ( file_size_ ) , file_hash ( file_hash_ ) , is_compressed ( true ) ,
uncompressed_size ( uncompressed_size_ ) , uncompressed_hash ( uncompressed_hash_ ) { }
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
void checkEqual ( const MergeTreeDataPartChecksum & rhs , bool have_uncompressed , const String & name ) const ;
void checkSize ( const String & path ) const ;
2016-02-14 04:58:47 +00:00
} ;
2017-04-16 15:00:33 +00:00
/** Checksums of all non-temporary files.
* For compressed files , the check sum and the size of the decompressed data are stored to not depend on the compression method .
2016-02-14 04:58:47 +00:00
*/
struct MergeTreeDataPartChecksums
{
2017-04-01 07:20:54 +00:00
using Checksum = MergeTreeDataPartChecksum ;
2016-02-14 04:58:47 +00:00
2017-08-09 21:07:01 +00:00
/// The order is important.
2017-04-01 07:20:54 +00:00
using FileChecksums = std : : map < String , Checksum > ;
FileChecksums files ;
2016-02-14 04:58:47 +00:00
2017-06-21 01:24:05 +00:00
void addFile ( const String & file_name , size_t file_size , Checksum : : uint128 file_hash ) ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
void add ( MergeTreeDataPartChecksums & & rhs_checksums ) ;
2016-11-03 12:00:44 +00:00
2017-04-16 15:00:33 +00:00
/// Checks that the set of columns and their checksums are the same. If not, throws an exception.
/// If have_uncompressed, for compressed files it compares the checksums of the decompressed data. Otherwise, it compares only the checksums of the files.
2017-04-01 07:20:54 +00:00
void checkEqual ( const MergeTreeDataPartChecksums & rhs , bool have_uncompressed ) const ;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/// Checks that the directory contains all the needed files of the correct size. Does not check the checksum.
2017-04-01 07:20:54 +00:00
void checkSizes ( const String & path ) const ;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/// Serializes and deserializes in human readable form.
bool read ( ReadBuffer & in ) ; /// Returns false if the checksum is too old.
2017-04-01 07:20:54 +00:00
bool read_v2 ( ReadBuffer & in ) ;
bool read_v3 ( ReadBuffer & in ) ;
bool read_v4 ( ReadBuffer & in ) ;
void write ( WriteBuffer & out ) const ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
bool empty ( ) const
{
return files . empty ( ) ;
}
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/// Checksum from the set of checksums of .bin files.
2017-04-01 07:20:54 +00:00
void summaryDataChecksum ( SipHash & hash ) const ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
String toString ( ) const ;
static MergeTreeDataPartChecksums parse ( const String & s ) ;
2016-02-14 04:58:47 +00:00
} ;
class MergeTreeData ;
2017-04-16 15:00:33 +00:00
/// Description of the data part.
2017-08-14 18:16:11 +00:00
struct MergeTreeDataPart
2016-02-14 04:58:47 +00:00
{
2017-04-01 07:20:54 +00:00
using Checksums = MergeTreeDataPartChecksums ;
2017-03-24 13:52:50 +00:00
using Checksum = MergeTreeDataPartChecksums : : Checksum ;
2016-02-14 04:58:47 +00:00
2017-08-16 19:24:50 +00:00
MergeTreeDataPart ( MergeTreeData & storage_ , const String & name_ , const MergeTreePartInfo & info_ )
: storage ( storage_ ) , name ( name_ ) , info ( info_ )
{
}
2016-02-14 04:58:47 +00:00
2017-08-25 20:41:45 +00:00
MergeTreeDataPart ( MergeTreeData & storage_ , const String & name_ ) ;
2017-03-24 13:52:50 +00:00
/// Returns checksum of column's binary file.
const Checksum * tryGetBinChecksum ( const String & name ) const ;
2017-04-01 07:20:54 +00:00
/// Returns the size of .bin file for column `name` if found, zero otherwise
size_t getColumnCompressedSize ( const String & name ) const ;
2017-03-24 13:52:50 +00:00
size_t getColumnUncompressedSize ( const String & name ) const ;
2016-02-14 04:58:47 +00:00
2017-03-24 13:52:50 +00:00
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
/// If no checksums are present returns the name of the first physically existing column.
2017-04-01 07:20:54 +00:00
String getColumnNameWithMinumumCompressedSize ( ) const ;
2016-02-14 04:58:47 +00:00
2017-03-24 13:52:50 +00:00
/// Returns full path to part dir
String getFullPath ( ) const ;
2017-05-16 15:40:32 +00:00
/// Returns part->name with prefixes like 'tmp_<name>'
String getNameWithPrefix ( ) const ;
2017-08-14 18:16:11 +00:00
bool contains ( const MergeTreeDataPart & other ) const { return info . contains ( other . info ) ; }
2017-08-21 15:35:29 +00:00
/// If the partition key includes date column (a common case), these functions will return min and max values for this column.
DayNum_t getMinDate ( ) const ;
DayNum_t getMaxDate ( ) const ;
2017-05-16 15:40:32 +00:00
2017-04-01 07:20:54 +00:00
MergeTreeData & storage ;
2016-02-14 04:58:47 +00:00
2017-08-14 18:16:11 +00:00
String name ;
MergeTreePartInfo info ;
2017-05-16 15:40:32 +00:00
/// A directory path (realative to storage's path) where part data is actually stored
2017-05-26 00:47:06 +00:00
/// Examples: 'detached/tmp_fetch_<name>', 'tmp_<name>', '<name>'
2017-05-16 15:40:32 +00:00
mutable String relative_path ;
2017-10-24 14:11:53 +00:00
size_t rows_count = 0 ;
size_t marks_count = 0 ;
2017-05-16 15:40:32 +00:00
std : : atomic < size_t > size_in_bytes { 0 } ; /// size in bytes, 0 - if not counted;
/// is used from several threads without locks (it is changed with ALTER).
2017-04-01 07:20:54 +00:00
time_t modification_time = 0 ;
2017-04-16 15:00:33 +00:00
mutable time_t remove_time = std : : numeric_limits < time_t > : : max ( ) ; /// When the part is removed from the working set.
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/// If true, the destructor will delete the directory with the part.
2017-04-01 07:20:54 +00:00
bool is_temp = false ;
2016-02-14 04:58:47 +00:00
2017-10-06 11:30:57 +00:00
/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem
bool is_duplicate = false ;
2017-09-05 19:03:51 +00:00
/**
* Part state is a stage of its lifetime . States are ordered and state of a part could be increased only .
* Part state should be modified under data_parts mutex .
*
* Possible state transitions :
* Temporary - > Precommitted : we are trying to commit a fetched , inserted or merged part to active set
* Precommitted - > Outdated : we could not to add a part to active set and doing a rollback ( for example it is duplicated part )
* Precommitted - > Commited : we successfully committed a part to active dataset
* Precommitted - > Outdated : a part was replaced by a covering part or DROP PARTITION
* Outdated - > Deleting : a cleaner selected this part for deletion
2017-10-06 11:30:57 +00:00
* Deleting - > Outdated : if an ZooKeeper error occurred during the deletion , we will retry deletion
2017-09-05 19:03:51 +00:00
*/
enum class State
{
Temporary , /// the part is generating now, it is not in data_parts list
2017-09-11 22:40:51 +00:00
PreCommitted , /// the part is in data_parts, but not used for SELECTs
2017-09-05 19:03:51 +00:00
Committed , /// active data part, used by current and upcoming SELECTs
Outdated , /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes
Deleting /// not active data part with identity refcounter, it is deleting right now by a cleaner
} ;
2017-09-21 21:51:17 +00:00
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
2017-09-11 22:40:51 +00:00
mutable State state { State : : Temporary } ;
2017-09-05 19:03:51 +00:00
2017-09-11 22:40:51 +00:00
/// Returns name of state
static String stateToString ( State state ) ;
String stateString ( ) const ;
String getNameWithState ( ) const
{
return name + " (state " + stateString ( ) + " ) " ;
}
/// Returns true if state of part is one of affordable_states
2017-09-21 21:51:17 +00:00
bool checkState ( const std : : initializer_list < State > & affordable_states ) const
2017-09-11 22:40:51 +00:00
{
for ( auto affordable_state : affordable_states )
{
if ( state = = affordable_state )
return true ;
}
return false ;
}
/// Throws an exception if state of the part is not in affordable_states
2017-09-21 21:51:17 +00:00
void assertState ( const std : : initializer_list < State > & affordable_states ) const
2017-09-11 22:40:51 +00:00
{
2017-09-21 21:51:17 +00:00
if ( ! checkState ( affordable_states ) )
2017-09-11 22:40:51 +00:00
{
String states_str ;
for ( auto state : affordable_states )
states_str + = stateToString ( state ) + " " ;
throw Exception ( " Unexpected state of part " + getNameWithState ( ) + " . Expected: " + states_str ) ;
}
}
2017-09-21 21:51:17 +00:00
/// In comparison with lambdas, it is move assignable and could has several overloaded operator()
struct StatesFilter
{
std : : initializer_list < State > affordable_states ;
StatesFilter ( const std : : initializer_list < State > & affordable_states ) : affordable_states ( affordable_states ) { }
bool operator ( ) ( const std : : shared_ptr < const MergeTreeDataPart > & part ) const
{
return part - > checkState ( affordable_states ) ;
}
} ;
2017-09-11 22:40:51 +00:00
/// Returns a lambda that returns true only for part with states from specified list
2017-09-21 21:51:17 +00:00
static inline StatesFilter getStatesFilter ( const std : : initializer_list < State > & affordable_states )
2017-09-11 22:40:51 +00:00
{
2017-09-21 21:51:17 +00:00
return StatesFilter ( affordable_states ) ;
2017-09-11 22:40:51 +00:00
}
2017-09-05 19:03:51 +00:00
2017-04-01 07:20:54 +00:00
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
using Index = Columns ;
Index index ;
2016-02-14 04:58:47 +00:00
2017-09-11 17:55:41 +00:00
MergeTreePartition partition ;
2017-08-31 15:40:34 +00:00
/// Index that for each part stores min and max values of a set of columns. This allows quickly excluding
/// parts based on conditions on these columns imposed by a query.
/// Currently this index is built using only columns required by partition expression, but in principle it
/// can be built using any set of columns.
struct MinMaxIndex
{
Row min_values ;
Row max_values ;
bool initialized = false ;
public :
MinMaxIndex ( ) = default ;
/// For month-based partitioning.
MinMaxIndex ( DayNum_t min_date , DayNum_t max_date )
: min_values ( 1 , static_cast < UInt64 > ( min_date ) )
, max_values ( 1 , static_cast < UInt64 > ( max_date ) )
, initialized ( true )
{
}
void load ( const MergeTreeData & storage , const String & part_path ) ;
void store ( const MergeTreeData & storage , const String & part_path , Checksums & checksums ) const ;
void update ( const Block & block , const Names & column_names ) ;
void merge ( const MinMaxIndex & other ) ;
} ;
2017-08-18 19:46:26 +00:00
MinMaxIndex minmax_idx ;
2017-04-01 07:20:54 +00:00
Checksums checksums ;
2016-02-14 04:58:47 +00:00
2017-03-24 13:52:50 +00:00
/// Columns description.
2017-04-01 07:20:54 +00:00
NamesAndTypesList columns ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
using ColumnToSize = std : : map < std : : string , size_t > ;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/** It is blocked for writing when changing columns, checksums or any part files.
* Locked to read when reading columns , checksums or any part files .
2017-04-01 07:20:54 +00:00
*/
2017-07-28 17:34:02 +00:00
mutable std : : shared_mutex columns_lock ;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/** It is taken for the whole time ALTER a part: from the beginning of the recording of the temporary files to their renaming to permanent.
* It is taken with unlocked ` columns_lock ` .
2017-04-01 07:20:54 +00:00
*
2017-04-16 15:00:33 +00:00
* NOTE : " You can " do without this mutex if you could turn ReadRWLock into WriteRWLock without removing the lock .
* This transformation is impossible , because it would create a deadlock , if you do it from two threads at once .
* Taking this mutex means that we want to lock columns_lock on read with intention then , not
* unblocking , block it for writing .
2017-04-01 07:20:54 +00:00
*/
mutable std : : mutex alter_mutex ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
~ MergeTreeDataPart ( ) ;
2016-02-14 04:58:47 +00:00
2017-04-16 15:00:33 +00:00
/// Calculate the total size of the entire directory with all the files
2017-04-01 07:20:54 +00:00
static size_t calcTotalSize ( const String & from ) ;
2016-02-14 04:58:47 +00:00
2017-04-01 07:20:54 +00:00
void remove ( ) const ;
2017-05-16 15:40:32 +00:00
/// Makes checks and move part to new directory
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
void renameTo ( const String & new_relative_path , bool remove_new_dir_if_exists = true ) const ;
2016-02-14 04:58:47 +00:00
2017-06-25 00:17:08 +00:00
/// Renames a part by appending a prefix to the name. To_detached - also moved to the detached directory.
2017-04-01 07:20:54 +00:00
void renameAddPrefix ( bool to_detached , const String & prefix ) const ;
2016-02-14 04:58:47 +00:00
2017-03-24 13:52:50 +00:00
/// Populates columns_to_size map (compressed size).
2017-04-01 07:20:54 +00:00
void accumulateColumnSizes ( ColumnToSize & column_to_size ) const ;
2016-02-14 04:58:47 +00:00
2017-08-16 19:22:49 +00:00
/// Initialize columns (from columns.txt if exists, or create from column files if not).
/// Load checksums from checksums.txt if exists. Load index if required.
2017-08-16 19:24:50 +00:00
void loadColumnsChecksumsIndexes ( bool require_columns_checksums , bool check_consistency ) ;
2016-02-14 04:58:47 +00:00
2017-03-24 13:52:50 +00:00
/// Checks that .bin and .mrk files exist
2017-04-01 07:20:54 +00:00
bool hasColumnFiles ( const String & column ) const ;
2017-01-17 20:49:16 +00:00
2017-03-24 13:52:50 +00:00
/// For data in RAM ('index')
2017-04-01 07:20:54 +00:00
size_t getIndexSizeInBytes ( ) const ;
size_t getIndexSizeInAllocatedBytes ( ) const ;
2017-08-16 19:22:49 +00:00
private :
/// Reads columns names and types from columns.txt
void loadColumns ( bool require ) ;
/// If checksums.txt exists, reads files' checksums (and sizes) from it
void loadChecksums ( bool require ) ;
2017-10-24 14:11:53 +00:00
/// Loads index file. Also calculates this->marks_count if marks_count = 0
2017-08-16 19:22:49 +00:00
void loadIndex ( ) ;
2017-10-24 14:11:53 +00:00
/// Load rows count for this part from disk (for the newer storage format version).
/// For the older format version calculates rows count from the size of a column with a fixed size.
void loadRowsCount ( ) ;
2017-08-18 19:46:26 +00:00
void loadPartitionAndMinMaxIndex ( ) ;
2017-08-16 19:24:50 +00:00
2017-08-16 19:22:49 +00:00
void checkConsistency ( bool require_part_metadata ) ;
2016-02-14 04:58:47 +00:00
} ;
}