ClickHouse/src/Storages/StorageMemory.h

#pragma once

#include <atomic>
#include <optional>
#include <mutex>

#include <base/shared_ptr_helper.h>

#include <Core/NamesAndTypes.h>
#include <Storages/IStorage.h>

#include <Common/MultiVersion.h>

namespace DB
{

/** Implements storage in the RAM.
  * Suitable for temporary data.
  * It does not support keys.
  * Data is stored as a set of blocks and is not stored anywhere else.
  */
class StorageMemory final : public shared_ptr_helper<StorageMemory>, public IStorage
{
friend class MemorySink;
friend struct shared_ptr_helper<StorageMemory>;

public:
    String getName() const override { return "Memory"; }

    size_t getSize() const { return data.get()->size(); }

    struct SnapshotData : public StorageSnapshot::Data
    {
        std::shared_ptr<const Blocks> blocks;
    };

    StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const override;

    Pipe read(
        const Names & column_names,
        const StorageSnapshotPtr & storage_snapshot,
        SelectQueryInfo & query_info,
        ContextPtr context,
        QueryProcessingStage::Enum processed_stage,
        size_t max_block_size,
        unsigned num_streams) override;

    bool supportsParallelInsert() const override { return true; }
    bool supportsSubcolumns() const override { return true; }

    /// Smaller blocks (e.g. 64K rows) are better for CPU cache.
    bool prefersLargeBlocks() const override { return false; }

    bool hasEvenlyDistributedRead() const override { return true; }

    SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override;

    void drop() override;

    void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
    void mutate(const MutationCommands & commands, ContextPtr context) override;

    void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override;

    std::optional<UInt64> totalRows(const Settings &) const override;
    std::optional<UInt64> totalBytes(const Settings &) const override;

    /** Delays initialization of StorageMemory::read() until the first read is actually happen.
      * Usually, fore code like this:
      *
      *     auto out = StorageMemory::write();
      *     auto in = StorageMemory::read();
      *     out->write(new_data);
      *
      * `new_data` won't appear into `in`.
      *  However, if delayReadForGlobalSubqueries is called, first read from `in` will check for new_data and return it.
      *
      *
      * Why is delayReadForGlobalSubqueries needed?
      *
      * The fact is that when processing a query of the form
      *  SELECT ... FROM remote_test WHERE column GLOBAL IN (subquery),
      *  if the distributed remote_test table contains localhost as one of the servers,
      *  the query will be interpreted locally again (and not sent over TCP, as in the case of a remote server).
      *
      * The query execution pipeline will be:
      * CreatingSets
      *  subquery execution, filling the temporary table with _data1 (1)
      *  CreatingSets
      *   reading from the table _data1, creating the set (2)
      *   read from the table subordinate to remote_test.
      *
      * (The second part of the pipeline under CreateSets is a reinterpretation of the query inside StorageDistributed,
      *  the query differs in that the database name and tables are replaced with subordinates, and the subquery is replaced with _data1.)
      *
      * But when creating the pipeline, when creating the source (2), it will be found that the _data1 table is empty
      *  (because the query has not started yet), and empty source will be returned as the source.
      * And then, when the query is executed, an empty set will be created in step (2).
      *
      * Therefore, we make the initialization of step (2) delayed
      *  - so that it does not occur until step (1) is completed, on which the table will be populated.
      */
    void delayReadForGlobalSubqueries() { delay_read_for_global_subqueries = true; }

private:
    /// MultiVersion data storage, so that we can copy the vector of blocks to readers.

    MultiVersion<Blocks> data;

    mutable std::mutex mutex;

    bool delay_read_for_global_subqueries = false;

    std::atomic<size_t> total_size_bytes = 0;
    std::atomic<size_t> total_size_rows = 0;

    bool compress;

protected:
    StorageMemory(
        const StorageID & table_id_,
        ColumnsDescription columns_description_,
        ConstraintsDescription constraints_,
        const String & comment,
        bool compress_ = false);
};

}
dbms: development. 2011-10-31 17:55:06 +00:00			`#pragma once`

Lock-less totalRows/totalBytes + more clear postponed init 2020-10-06 13:45:17 +00:00			`#include <atomic>`
			`#include <optional>`
Using std::shared_ptr for data types [#METR-21503]. 2016-05-28 10:15:36 +00:00			`#include <mutex>`
dbms: added locks to storages Memory and Log [#CONV-2944]. 2012-11-30 04:28:13 +00:00
Rename "common" to "base" 2021-10-02 07:13:14 +00:00			`#include <base/shared_ptr_helper.h>`
use std::enable_shared_from_this for IStorage 2016-08-26 21:25:05 +00:00
Moved headers and sources to same place [#CLICKHOUSE-3]. 2017-04-01 09:19:00 +00:00			`#include <Core/NamesAndTypes.h>`
			`#include <Storages/IStorage.h>`
dbms: development. 2011-10-31 17:55:06 +00:00
multiversion storage for StorageMemory 2020-10-04 16:28:36 +00:00			`#include <Common/MultiVersion.h>`
dbms: development. 2011-10-31 17:55:06 +00:00
			`namespace DB`
			`{`

translate comments 2017-04-16 15:00:33 +00:00			`/** Implements storage in the RAM.`
			`* Suitable for temporary data.`
			`* It does not support keys.`
			`* Data is stored as a set of blocks and is not stored anywhere else.`
dbms: development. 2011-10-31 17:55:06 +00:00			`*/`
Merge ext into common 2021-06-15 19:55:21 +00:00			`class StorageMemory final : public shared_ptr_helper<StorageMemory>, public IStorage`
dbms: development. 2011-10-31 17:55:06 +00:00			`{`
Update storages. 2021-07-23 19:33:59 +00:00			`friend class MemorySink;`
Merge ext into common 2021-06-15 19:55:21 +00:00			`friend struct shared_ptr_helper<StorageMemory>;`
dbms: development. 2011-10-31 17:55:06 +00:00
			`public:`
Implement getDatabaseName() and rename() for all storages 2019-07-09 15:40:21 +00:00			`String getName() const override { return "Memory"; }`
dbms: development. 2011-10-31 17:55:06 +00:00
multiversion storage for StorageMemory 2020-10-04 16:28:36 +00:00			`size_t getSize() const { return data.get()->size(); }`
dbms: fixed problem with sending empty tables, few more fixes [METR-10071] 2014-03-19 15:07:29 +00:00
support dynamic subcolumns for Memory engine 2022-02-09 00:18:53 +00:00			`struct SnapshotData : public StorageSnapshot::Data`
			`{`
			`std::shared_ptr<const Blocks> blocks;`
			`};`

			`StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const override;`

Refactor Pipe [part 2]. 2020-08-03 13:54:14 +00:00			`Pipe read(`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`const Names & column_names,`
dynamic subcolumns: add snapshot for storage 2021-07-09 03:15:41 +00:00			`const StorageSnapshotPtr & storage_snapshot,`
Pass non-const SelectQueryInfo (and drop mutable qualifiers) 2020-09-20 17:52:17 +00:00			`SelectQueryInfo & query_info,`
Replace all Context references with std::weak_ptr (#22297) * Replace all Context references with std::weak_ptr * Fix shared context captured by value * Fix build * Fix Context with named sessions * Fix copy context * Fix gcc build * Merge with master and fix build * Fix gcc-9 build 2021-04-10 23:33:54 +00:00			`ContextPtr context,`
added IStorage::getQueryProcessingStage 2018-04-19 14:47:09 +00:00			`QueryProcessingStage::Enum processed_stage,`
IStorage::read() function, "max_block_size" parameter: change type UInt64 -> size_t. 2019-02-18 23:38:44 +00:00			`size_t max_block_size,`
Small refinements [#CLICKHOUSE-2]. 2017-06-02 15:54:39 +00:00			`unsigned num_streams) override;`
dbms: development. 2011-10-31 17:55:06 +00:00
Support parallel INSERT for more table engines 2020-08-26 16:41:30 +00:00			`bool supportsParallelInsert() const override { return true; }`
fix subcolumns with some storages 2020-12-22 16:40:53 +00:00			`bool supportsSubcolumns() const override { return true; }`

Disable excessive squashing of blocks for StorageMemory #13052 2021-02-07 01:49:52 +00:00			`/// Smaller blocks (e.g. 64K rows) are better for CPU cache.`
			`bool prefersLargeBlocks() const override { return false; }`

Performance improvement by Nikolai Kochetov 2021-02-12 21:26:25 +00:00			`bool hasEvenlyDistributedRead() const override { return true; }`

Update storages. 2021-07-23 19:33:59 +00:00			`SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override;`
dbms: development. 2011-10-31 17:55:06 +00:00
basic async DROP 2020-01-22 11:30:11 +00:00			`void drop() override;`
ISSUES-2259 support truncate syntax 2018-04-21 00:35:20 +00:00
Restrict mutations for engines which doesn't support them 2021-02-25 10:07:48 +00:00			`void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;`
Replace all Context references with std::weak_ptr (#22297) * Replace all Context references with std::weak_ptr * Fix shared context captured by value * Fix build * Fix Context with named sessions * Fix copy context * Fix gcc build * Merge with master and fix build * Fix gcc-9 build 2021-04-10 23:33:54 +00:00			`void mutate(const MutationCommands & commands, ContextPtr context) override;`
add mutation support for StorageMemory 2020-09-22 09:23:46 +00:00
Replace all Context references with std::weak_ptr (#22297) * Replace all Context references with std::weak_ptr * Fix shared context captured by value * Fix build * Fix Context with named sessions * Fix copy context * Fix gcc build * Merge with master and fix build * Fix gcc-9 build 2021-04-10 23:33:54 +00:00			`void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override;`
ISSUES-2259 support truncate syntax 2018-04-21 00:35:20 +00:00
replace Context& to Settings& 2020-11-25 13:47:32 +00:00			`std::optional<UInt64> totalRows(const Settings &) const override;`
			`std::optional<UInt64> totalBytes(const Settings &) const override;`
Implement totalRows() for StorageMemory 2020-03-29 08:02:35 +00:00
Fix tests. 2020-09-04 08:36:47 +00:00			`/** Delays initialization of StorageMemory::read() until the first read is actually happen.`
			`* Usually, fore code like this:`
			`*`
			`* auto out = StorageMemory::write();`
			`* auto in = StorageMemory::read();`
			`* out->write(new_data);`
			`*`
			* `new_data` won't appear into `in`.
			* However, if delayReadForGlobalSubqueries is called, first read from `in` will check for new_data and return it.
			`*`
			`*`
			`* Why is delayReadForGlobalSubqueries needed?`
			`*`
			`* The fact is that when processing a query of the form`
			`* SELECT ... FROM remote_test WHERE column GLOBAL IN (subquery),`
			`* if the distributed remote_test table contains localhost as one of the servers,`
			`* the query will be interpreted locally again (and not sent over TCP, as in the case of a remote server).`
			`*`
			`* The query execution pipeline will be:`
			`* CreatingSets`
			`* subquery execution, filling the temporary table with _data1 (1)`
			`* CreatingSets`
			`* reading from the table _data1, creating the set (2)`
			`* read from the table subordinate to remote_test.`
			`*`
			`* (The second part of the pipeline under CreateSets is a reinterpretation of the query inside StorageDistributed,`
			`* the query differs in that the database name and tables are replaced with subordinates, and the subquery is replaced with _data1.)`
			`*`
			`* But when creating the pipeline, when creating the source (2), it will be found that the _data1 table is empty`
			`* (because the query has not started yet), and empty source will be returned as the source.`
			`* And then, when the query is executed, an empty set will be created in step (2).`
			`*`
			`* Therefore, we make the initialization of step (2) delayed`
			`* - so that it does not occur until step (1) is completed, on which the table will be populated.`
			`*/`
			`void delayReadForGlobalSubqueries() { delay_read_for_global_subqueries = true; }`

dbms: development. 2011-10-31 17:55:06 +00:00			`private:`
MemoryStorage sync comments and code 2021-04-06 10:41:48 +00:00			`/// MultiVersion data storage, so that we can copy the vector of blocks to readers.`
In memory compression: a prototype 2021-02-07 01:41:31 +00:00
Better interface 2021-02-12 00:25:00 +00:00			`MultiVersion<Blocks> data;`
dbms: added locks to storages Memory and Log [#CONV-2944]. 2012-11-30 04:28:13 +00:00
Implement totalRows() for StorageMemory 2020-03-29 08:02:35 +00:00			`mutable std::mutex mutex;`
dbms: StorageBuffer: development [#METR-13297]. 2014-10-26 00:01:36 +00:00
Fix tests. 2020-09-04 08:36:47 +00:00			`bool delay_read_for_global_subqueries = false;`

Lock-less totalRows/totalBytes + more clear postponed init 2020-10-06 13:45:17 +00:00			`std::atomic<size_t> total_size_bytes = 0;`
			`std::atomic<size_t> total_size_rows = 0;`

In memory compression: a prototype 2021-02-07 01:41:31 +00:00			`bool compress;`

Attempt to change ext::shared_ptr_helper (incomplete) [#CLICKHOUSE-2]. 2017-11-04 03:20:18 +00:00			`protected:`
In memory compression: a prototype 2021-02-07 01:41:31 +00:00			`StorageMemory(`
			`const StorageID & table_id_,`
			`ColumnsDescription columns_description_,`
			`ConstraintsDescription constraints_,`
Implement table comments 2021-04-23 12:18:23 +00:00			`const String & comment,`
In memory compression: a prototype 2021-02-07 01:41:31 +00:00			`bool compress_ = false);`
dbms: development. 2011-10-31 17:55:06 +00:00			`};`

			`}`