2016-10-18 14:18:37 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2016-10-18 14:18:37 +00:00
|
|
|
#include <common/logger_useful.h>
|
|
|
|
|
2017-04-08 01:32:05 +00:00
|
|
|
#include <atomic>
|
2017-07-28 17:34:02 +00:00
|
|
|
#include <shared_mutex>
|
2021-06-15 19:55:21 +00:00
|
|
|
#include <common/shared_ptr_helper.h>
|
2017-04-08 01:32:05 +00:00
|
|
|
|
|
|
|
|
2016-10-18 14:18:37 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
class StorageFileBlockInputStream;
|
|
|
|
class StorageFileBlockOutputStream;
|
|
|
|
|
2021-06-15 19:55:21 +00:00
|
|
|
class StorageFile final : public shared_ptr_helper<StorageFile>, public IStorage
|
2016-10-18 14:18:37 +00:00
|
|
|
{
|
2021-06-15 19:55:21 +00:00
|
|
|
friend struct shared_ptr_helper<StorageFile>;
|
2016-10-18 14:18:37 +00:00
|
|
|
public:
|
2019-07-09 15:40:21 +00:00
|
|
|
std::string getName() const override { return "File"; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-08-03 13:54:14 +00:00
|
|
|
Pipe read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2020-06-15 19:08:58 +00:00
|
|
|
const StorageMetadataPtr & /*metadata_snapshot*/,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-06-02 15:54:39 +00:00
|
|
|
unsigned num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
BlockOutputStreamPtr write(
|
2017-05-21 22:25:25 +00:00
|
|
|
const ASTPtr & query,
|
2020-06-15 19:08:58 +00:00
|
|
|
const StorageMetadataPtr & /*metadata_snapshot*/,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-06-18 10:29:13 +00:00
|
|
|
void truncate(
|
|
|
|
const ASTPtr & /*query*/,
|
|
|
|
const StorageMetadataPtr & /* metadata_snapshot */,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr /* context */,
|
2020-06-18 16:10:47 +00:00
|
|
|
TableExclusiveLockHolder &) override;
|
2020-01-05 02:57:09 +00:00
|
|
|
|
2020-04-07 14:05:51 +00:00
|
|
|
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
|
2019-11-11 14:28:28 +00:00
|
|
|
|
2020-11-01 17:38:43 +00:00
|
|
|
bool storesDataOnDisk() const override;
|
2019-09-06 08:53:32 +00:00
|
|
|
Strings getDataPaths() const override;
|
2018-02-21 19:26:59 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
struct CommonArguments : public WithContext
|
2019-10-30 14:17:55 +00:00
|
|
|
{
|
2020-11-02 07:50:38 +00:00
|
|
|
StorageID table_id;
|
|
|
|
std::string format_name;
|
2020-11-05 11:28:20 +00:00
|
|
|
std::optional<FormatSettings> format_settings;
|
2020-11-02 07:50:38 +00:00
|
|
|
std::string compression_method;
|
2019-10-30 14:17:55 +00:00
|
|
|
const ColumnsDescription & columns;
|
|
|
|
const ConstraintsDescription & constraints;
|
2021-04-23 12:18:23 +00:00
|
|
|
const String & comment;
|
2019-10-30 14:17:55 +00:00
|
|
|
};
|
|
|
|
|
2020-04-28 10:38:57 +00:00
|
|
|
NamesAndTypesList getVirtuals() const override;
|
2020-04-27 13:55:30 +00:00
|
|
|
|
2021-04-26 13:34:44 +00:00
|
|
|
static Strings getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read);
|
2020-10-14 12:19:29 +00:00
|
|
|
|
2021-03-31 14:21:19 +00:00
|
|
|
/// Check if the format is column-oriented.
|
|
|
|
/// Is is useful because column oriented formats could effectively skip unknown columns
|
|
|
|
/// So we can create a header of only required columns in read method and ask
|
|
|
|
/// format to read only them. Note: this hack cannot be done with ordinary formats like TSV.
|
2021-03-01 14:11:25 +00:00
|
|
|
bool isColumnOriented() const;
|
|
|
|
|
2016-10-28 17:38:32 +00:00
|
|
|
protected:
|
2020-01-31 13:12:11 +00:00
|
|
|
friend class StorageFileSource;
|
2017-04-01 07:20:54 +00:00
|
|
|
friend class StorageFileBlockOutputStream;
|
2016-10-18 14:18:37 +00:00
|
|
|
|
2019-10-30 14:17:55 +00:00
|
|
|
/// From file descriptor
|
|
|
|
StorageFile(int table_fd_, CommonArguments args);
|
|
|
|
|
|
|
|
/// From user's file
|
2019-12-11 20:05:53 +00:00
|
|
|
StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args);
|
2019-10-30 14:17:55 +00:00
|
|
|
|
|
|
|
/// From table in database
|
|
|
|
StorageFile(const std::string & relative_table_dir_path, CommonArguments args);
|
2017-11-03 21:50:22 +00:00
|
|
|
|
2016-10-18 14:18:37 +00:00
|
|
|
private:
|
2019-10-30 14:17:55 +00:00
|
|
|
explicit StorageFile(CommonArguments args);
|
2020-04-06 23:22:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string format_name;
|
2020-11-05 11:28:20 +00:00
|
|
|
// We use format settings from global context + CREATE query for File table
|
|
|
|
// function -- in this case, format_settings is set.
|
|
|
|
// For `file` table function, we use format settings from current user context,
|
|
|
|
// in this case, format_settings is not set.
|
|
|
|
std::optional<FormatSettings> format_settings;
|
2016-10-28 17:38:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
int table_fd = -1;
|
2019-11-19 12:46:07 +00:00
|
|
|
String compression_method;
|
2016-10-18 14:18:37 +00:00
|
|
|
|
2019-12-24 13:29:53 +00:00
|
|
|
std::string base_path;
|
2019-09-06 18:29:41 +00:00
|
|
|
std::vector<std::string> paths;
|
2019-07-21 13:15:04 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool is_db_table = true; /// Table is stored in real database, not user's file
|
2019-09-06 18:29:41 +00:00
|
|
|
bool use_table_fd = false; /// Use table_fd instead of path
|
2017-04-01 07:20:54 +00:00
|
|
|
std::atomic<bool> table_fd_was_used{false}; /// To detect repeating reads from stdin
|
|
|
|
off_t table_fd_init_offset = -1; /// Initial position of fd, used for repeating reads
|
2016-10-25 13:49:07 +00:00
|
|
|
|
2020-09-24 23:29:16 +00:00
|
|
|
mutable std::shared_timed_mutex rwlock;
|
2016-10-18 14:18:37 +00:00
|
|
|
|
2020-05-30 21:57:37 +00:00
|
|
|
Poco::Logger * log = &Poco::Logger::get("StorageFile");
|
2021-04-26 13:34:44 +00:00
|
|
|
|
2021-04-28 16:25:14 +00:00
|
|
|
/// Total number of bytes to read (sums for multiple files in case of globs). Needed for progress bar.
|
2021-04-26 13:34:44 +00:00
|
|
|
size_t total_bytes_to_read = 0;
|
2016-10-18 14:18:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|