#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NOT_IMPLEMENTED; } class Context; using StorageActionBlockType = size_t; class ASTCreateQuery; struct Settings; struct SettingChange; using SettingsChanges = std::vector; class AlterCommands; class MutationCommands; struct PartitionCommand; using PartitionCommands = std::vector; class IProcessor; using ProcessorPtr = std::shared_ptr; using Processors = std::vector; class Pipe; using Pipes = std::vector; class StoragePolicy; using StoragePolicyPtr = std::shared_ptr; struct ColumnSize { size_t marks = 0; size_t data_compressed = 0; size_t data_uncompressed = 0; void add(const ColumnSize & other) { marks += other.marks; data_compressed += other.data_compressed; data_uncompressed += other.data_uncompressed; } }; /** Storage. Describes the table. Responsible for * - storage of the table data; * - the definition in which files (or not in files) the data is stored; * - data lookups and appends; * - data storage structure (compression, etc.) * - concurrent access to data (locks, etc.) */ class IStorage : public std::enable_shared_from_this, public TypePromotion { public: IStorage() = delete; explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)) {} IStorage(StorageID id_, ColumnsDescription virtuals_); virtual ~IStorage() = default; IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; /// The main name of the table type (for example, StorageMergeTree). virtual std::string getName() const = 0; /// The name of the table. StorageID getStorageID() const; /// Returns true if the storage receives data from a remote server or servers. virtual bool isRemote() const { return false; } /// Returns true if the storage is a view of a table or another view. virtual bool isView() const { return false; } /// Returns true if the storage supports queries with the SAMPLE section. virtual bool supportsSampling() const { return false; } /// Returns true if the storage supports queries with the FINAL section. virtual bool supportsFinal() const { return false; } /// Returns true if the storage supports queries with the PREWHERE section. virtual bool supportsPrewhere() const { return false; } /// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas. virtual bool supportsReplication() const { return false; } /// Returns true if the storage supports parallel insert. virtual bool supportsParallelInsert() const { return false; } /// Returns true if the storage supports deduplication of inserted data blocks. virtual bool supportsDeduplication() const { return false; } /// Returns true if the storage supports settings. virtual bool supportsSettings() const { return false; } /// Returns true if the blocks shouldn't be pushed to associated views on insert. virtual bool noPushingToViews() const { return false; } /// Read query returns streams which automatically distribute data between themselves. /// So, it's impossible for one stream run out of data when there is data in other streams. /// Example is StorageSystemNumbers. virtual bool hasEvenlyDistributedRead() const { return false; } /// Returns true if there is set table TTL, any column TTL or any move TTL. virtual bool hasAnyTTL() const { return false; } /// Returns true if there is set TTL for rows. virtual bool hasRowsTTL() const { return false; } /// Optional size information of each physical column. /// Currently it's only used by the MergeTree family for query optimizations. using ColumnSizeByName = std::unordered_map; virtual ColumnSizeByName getColumnSizes() const { return {}; } public: /// thread-unsafe part. lockStructure must be acquired virtual const ColumnsDescription & getColumns() const; /// returns combined set of columns virtual void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. const ColumnsDescription & getVirtuals() const; const IndicesDescription & getIndices() const; const ConstraintsDescription & getConstraints() const; void setConstraints(ConstraintsDescription constraints_); /// Returns storage metadata copy. Direct modification of /// result structure doesn't affect storage. virtual StorageInMemoryMetadata getInMemoryMetadata() const; /// NOTE: these methods should include virtual columns, /// but should NOT include ALIAS columns (they are treated separately). virtual NameAndTypePair getColumn(const String & column_name) const; virtual bool hasColumn(const String & column_name) const; Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. Block getSampleBlockNonMaterialized() const; /// ordinary. Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. void check(const Names & column_names, bool include_virtuals = false) const; /// Check that all the requested names are in the table and have the correct types. void check(const NamesAndTypesList & columns) const; /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types. void check(const NamesAndTypesList & columns, const Names & column_names) const; /// Check that the data block contains all the columns of the table with the correct types, /// contains only the columns of the table, and all the columns are different. /// If |need_all| is set, then checks that all the columns of the table are in the block. void check(const Block & block, bool need_all = false) const; protected: /// still thread-unsafe part. void setIndices(IndicesDescription indices_); /// Returns whether the column is virtual - by default all columns are real. /// Initially reserved virtual column name may be shadowed by real column. virtual bool isVirtualColumn(const String & column_name) const; private: StorageID storage_id; mutable std::mutex id_mutex; ColumnsDescription columns; /// combined real and virtual columns const ColumnsDescription virtuals = {}; IndicesDescription indices; ConstraintsDescription constraints; public: /// Acquire this lock if you need the table structure to remain constant during the execution of /// the query. If will_add_new_data is true, this means that the query will add new data to the table /// (INSERT or a parts merge). TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id); /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you /// can modify the table structure. It can later be upgraded to the exclusive lock. TableStructureWriteLockHolder lockAlterIntention(const String & query_id); /// Upgrade alter intention lock and make sure that no new data is inserted into the table. /// This is used by the ALTER MODIFY of the MergeTree storage to consistently determine /// the set of parts that needs to be altered. void lockNewDataStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries /// to ensure that no other query uses the table structure and it can be safely changed. void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); /// Acquire the full exclusive lock immediately. No other queries can run concurrently. TableStructureWriteLockHolder lockExclusively(const String & query_id); /** Returns stage to which query is going to be processed in read() function. * (Normally, the function only reads the columns from the list, but in other cases, * for example, the request can be partially processed on a remote server.) */ virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &) const { return QueryProcessingStage::FetchColumns; } /** Watch live changes to the table. * Accepts a list of columns to read, as well as a description of the query, * from which information can be extracted about how to retrieve data * (indexes, locks, etc.) * Returns a stream with which you can read data sequentially * or multiple streams for parallel data reading. * The `processed_stage` info is also written to what stage the request was processed. * (Normally, the function only reads the columns from the list, but in other cases, * for example, the request can be partially processed on a remote server.) * * context contains settings for one query. * Usually Storage does not care about these settings, since they are used in the interpreter. * But, for example, for distributed query processing, the settings are passed to the remote server. * * num_streams - a recommendation, how many streams to return, * if the storage can return a different number of streams. * * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). */ virtual BlockInputStreams watch( const Names & /*column_names*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/, unsigned /*num_streams*/) { throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Read a set of columns from the table. * Accepts a list of columns to read, as well as a description of the query, * from which information can be extracted about how to retrieve data * (indexes, locks, etc.) * Returns a stream with which you can read data sequentially * or multiple streams for parallel data reading. * The `processed_stage` must be the result of getQueryProcessingStage() function. * * context contains settings for one query. * Usually Storage does not care about these settings, since they are used in the interpreter. * But, for example, for distributed query processing, the settings are passed to the remote server. * * num_streams - a recommendation, how many streams to return, * if the storage can return a different number of streams. * * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). */ virtual Pipes read( const Names & /*column_names*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, unsigned /*num_streams*/) { throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** The same as read, but returns BlockInputStreams. */ BlockInputStreams readStreams( const Names & /*column_names*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, unsigned /*num_streams*/); /** Writes the data to a table. * Receives a description of the query, which can contain information about the data write method. * Returns an object by which you can write data sequentially. * * It is guaranteed that the table structure will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). */ virtual BlockOutputStreamPtr write( const ASTPtr & /*query*/, const Context & /*context*/) { throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Delete the table data. Called before deleting the directory with the data. * The method can be called only after detaching table from Context (when no queries are performed with table). * The table is not usable during and after call to this method. * If you do not need any action other than deleting the directory with data, you can leave this method blank. */ virtual void drop(TableStructureWriteLockHolder &) {} /** Clear the table data and leave it empty. * Must be called under lockForAlter. */ virtual void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) { throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Rename the table. * Renaming a name in a file with metadata, the name in the list of tables in the RAM, is done separately. * In this function, you need to rename the directory with the data, if any. * Called when the table structure is locked for write. */ virtual void rename(const String & /*new_path_to_table_data*/, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) { renameInMemory(new_database_name, new_table_name); } /** * Just updates names of database and table without moving any data on disk * Can be called directly only from DatabaseAtomic. */ virtual void renameInMemory(const String & new_database_name, const String & new_table_name); /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters. * This method must fully execute the ALTER query, taking care of the locks itself. * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. */ virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder); /** Checks that alter commands can be applied to storage. For example, columns can be modified, * or primary key can be changes, etc. */ virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings); /** ALTER tables with regard to its partitions. * Should handle locks for each command on its own. */ virtual void alterPartition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */) { throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** Perform any background work. For example, combining parts in a MergeTree type table. * Returns whether any work has been done. */ virtual bool optimize(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/) { throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /// Mutate the table contents virtual void mutate(const MutationCommands &, const Context &) { throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /// Cancel a mutation. virtual CancellationCode killMutation(const String & /*mutation_id*/) { throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } /** If the table have to do some complicated work on startup, * that must be postponed after creation of table object * (like launching some background threads), * do it in this method. * You should call this method after creation of object. * By default, does nothing. * Cannot be called simultaneously by multiple threads. */ virtual void startup() {} /** If the table have to do some complicated work when destroying an object - do it in advance. * For example, if the table contains any threads for background work - ask them to complete and wait for completion. * By default, does nothing. * Can be called simultaneously from different threads, even after a call to drop(). */ virtual void shutdown() {} /// Asks table to stop executing some action identified by action_type /// If table does not support such type of lock, and empty lock is returned virtual ActionLock getActionLock(StorageActionBlockType /* action_type */) { return {}; } std::atomic is_dropped{false}; /// Does table support index for IN sections virtual bool supportsIndexForIn() const { return false; } /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; } /// Checks validity of the data virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. /// We do not use mutex because it is not very important that the size could change during the operation. virtual void checkTableCanBeDropped() const {} /// Checks that Partition could be dropped right now /// Otherwise - throws an exception with detailed information. /// We do not use mutex because it is not very important that the size could change during the operation. virtual void checkPartitionCanBeDropped(const ASTPtr & /*partition*/) {} /// Returns data paths if storage supports it, empty vector otherwise. virtual Strings getDataPaths() const { return {}; } /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. virtual ASTPtr getPartitionKeyAST() const { return nullptr; } /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. virtual ASTPtr getSortingKeyAST() const { return nullptr; } /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. virtual ASTPtr getPrimaryKeyAST() const { return nullptr; } /// Returns sampling expression AST for storage or nullptr if there is none. virtual ASTPtr getSamplingKeyAST() const { return nullptr; } /// Returns additional columns that need to be read to calculate partition key. virtual Names getColumnsRequiredForPartitionKey() const { return {}; } /// Returns additional columns that need to be read to calculate sorting key. virtual Names getColumnsRequiredForSortingKey() const { return {}; } /// Returns additional columns that need to be read to calculate primary key. virtual Names getColumnsRequiredForPrimaryKey() const { return {}; } /// Returns additional columns that need to be read to calculate sampling key. virtual Names getColumnsRequiredForSampling() const { return {}; } /// Returns additional columns that need to be read for FINAL to work. virtual Names getColumnsRequiredForFinal() const { return {}; } /// Returns names of primary key + secondary sorting columns virtual Names getSortingKeyColumns() const { return {}; } /// Returns columns, which will be needed to calculate dependencies /// (skip indices, TTL expressions) if we update @updated_columns set of columns. virtual ColumnDependencies getColumnDependencies(const NameSet & /* updated_columns */) const { return {}; } /// Returns storage policy if storage supports it virtual StoragePolicyPtr getStoragePolicy() const { return {}; } /** If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. */ virtual std::optional totalRows() const { return {}; } private: /// You always need to take the next three locks in this order. /// If you hold this lock exclusively, you can be sure that no other structure modifying queries /// (e.g. ALTER, DROP) are concurrently executing. But queries that only read table structure /// (e.g. SELECT, INSERT) can continue to execute. mutable RWLock alter_intention_lock = RWLockImpl::create(); /// It is taken for share for the entire INSERT query and the entire merge of the parts (for MergeTree). /// ALTER COLUMN queries acquire an exclusive lock to ensure that no new parts with the old structure /// are added to the table and thus the set of parts to modify doesn't change. mutable RWLock new_data_structure_lock = RWLockImpl::create(); /// Lock for the table column structure (names, types, etc.) and data path. /// It is taken in exclusive mode by queries that modify them (e.g. RENAME, ALTER and DROP) /// and in share mode by other queries. mutable RWLock structure_lock = RWLockImpl::create(); }; }