2021-06-09 02:03:36 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Storages/FileLog/Buffer_fwd.h>
|
2021-09-10 17:21:03 +00:00
|
|
|
#include <Storages/FileLog/FileLogDirectoryWatcher.h>
|
2021-09-04 17:04:35 +00:00
|
|
|
#include <Storages/FileLog/FileLogSettings.h>
|
2021-06-09 02:03:36 +00:00
|
|
|
|
|
|
|
#include <Core/BackgroundSchedulePool.h>
|
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Common/SettingsChanges.h>
|
|
|
|
|
2021-10-03 11:03:20 +00:00
|
|
|
#include <base/shared_ptr_helper.h>
|
2021-06-09 02:03:36 +00:00
|
|
|
|
|
|
|
#include <atomic>
|
2021-10-04 12:33:05 +00:00
|
|
|
#include <filesystem>
|
2021-09-04 17:04:35 +00:00
|
|
|
#include <fstream>
|
2021-10-04 12:33:05 +00:00
|
|
|
#include <mutex>
|
2021-10-07 12:15:36 +00:00
|
|
|
#include <optional>
|
2021-06-09 02:03:36 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2021-09-30 16:02:17 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
|
|
|
|
2021-07-04 08:52:05 +00:00
|
|
|
class StorageFileLog final : public shared_ptr_helper<StorageFileLog>, public IStorage, WithContext
|
2021-06-09 02:03:36 +00:00
|
|
|
{
|
2021-07-04 08:52:05 +00:00
|
|
|
friend struct shared_ptr_helper<StorageFileLog>;
|
2021-06-09 02:03:36 +00:00
|
|
|
|
|
|
|
public:
|
2021-09-05 06:32:32 +00:00
|
|
|
|
2021-06-09 02:03:36 +00:00
|
|
|
using Files = std::vector<String>;
|
|
|
|
|
|
|
|
std::string getName() const override { return "FileLog"; }
|
|
|
|
|
|
|
|
bool noPushingToViews() const override { return true; }
|
|
|
|
|
|
|
|
void startup() override;
|
|
|
|
void shutdown() override;
|
|
|
|
|
|
|
|
Pipe read(
|
|
|
|
const Names & column_names,
|
|
|
|
const StorageMetadataPtr & /*metadata_snapshot*/,
|
|
|
|
SelectQueryInfo & query_info,
|
|
|
|
ContextPtr context,
|
|
|
|
QueryProcessingStage::Enum processed_stage,
|
|
|
|
size_t max_block_size,
|
|
|
|
unsigned num_streams) override;
|
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
void drop() override;
|
|
|
|
|
2021-10-06 10:37:58 +00:00
|
|
|
/// We need to call drop() immediately to remove meta data directory,
|
|
|
|
/// otherwise, if another filelog table with same name created before
|
|
|
|
/// the table be dropped finally, then its meta data directory will
|
|
|
|
/// be deleted by this table drop finally
|
|
|
|
bool dropTableImmediately() override { return true; }
|
|
|
|
|
2021-06-09 02:03:36 +00:00
|
|
|
const auto & getFormatName() const { return format_name; }
|
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
enum class FileStatus
|
|
|
|
{
|
|
|
|
OPEN, /// first time open file after table start up
|
|
|
|
NO_CHANGE,
|
|
|
|
UPDATED,
|
|
|
|
REMOVED,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct FileContext
|
|
|
|
{
|
|
|
|
FileStatus status = FileStatus::OPEN;
|
2021-10-01 08:29:19 +00:00
|
|
|
UInt64 inode{};
|
2021-10-07 12:15:36 +00:00
|
|
|
std::optional<std::ifstream> reader = std::nullopt;
|
2021-09-26 07:22:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct FileMeta
|
|
|
|
{
|
|
|
|
String file_name;
|
2021-10-01 08:29:19 +00:00
|
|
|
UInt64 last_writen_position{};
|
|
|
|
UInt64 last_open_end{};
|
2021-09-26 07:22:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using InodeToFileMeta = std::unordered_map<UInt64, FileMeta>;
|
|
|
|
using FileNameToContext = std::unordered_map<String, FileContext>;
|
|
|
|
|
|
|
|
struct FileInfos
|
|
|
|
{
|
|
|
|
InodeToFileMeta meta_by_inode;
|
|
|
|
FileNameToContext context_by_name;
|
|
|
|
/// file names without path
|
|
|
|
Names file_names;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto & getFileInfos() { return file_infos; }
|
|
|
|
|
2021-09-30 16:02:17 +00:00
|
|
|
String getFullMetaPath(const String & file_name) const { return std::filesystem::path(root_meta_path) / file_name; }
|
|
|
|
String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; }
|
2021-06-09 02:03:36 +00:00
|
|
|
|
2021-09-24 16:44:22 +00:00
|
|
|
NamesAndTypesList getVirtuals() const override;
|
|
|
|
|
|
|
|
static Names getVirtualColumnNames();
|
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
static UInt64 getInode(const String & file_name);
|
|
|
|
|
2021-09-27 04:39:50 +00:00
|
|
|
void openFilesAndSetPos();
|
2021-10-06 08:08:49 +00:00
|
|
|
|
2021-10-06 15:48:23 +00:00
|
|
|
/// Used in FileLogSource when finish generating all blocks.
|
|
|
|
/// Each stream responsible for close its files and store meta.
|
2021-10-06 10:37:58 +00:00
|
|
|
void closeFilesAndStoreMeta(size_t start, size_t end);
|
2021-10-06 08:08:49 +00:00
|
|
|
|
|
|
|
/// Used in FileLogSource after generating every block
|
2021-10-06 10:37:58 +00:00
|
|
|
void storeMetas(size_t start, size_t end);
|
2021-09-30 16:02:17 +00:00
|
|
|
|
|
|
|
static void assertStreamGood(const std::ifstream & reader);
|
|
|
|
|
|
|
|
template <typename K, typename V>
|
|
|
|
static V & findInMap(std::unordered_map<K, V> & map, const K & key)
|
|
|
|
{
|
|
|
|
if (auto it = map.find(key); it != map.end())
|
|
|
|
return it->second;
|
|
|
|
else
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "The key {} doesn't exist.", key);
|
|
|
|
}
|
2021-09-27 04:39:50 +00:00
|
|
|
|
2021-10-07 05:56:09 +00:00
|
|
|
void increaseStreams();
|
|
|
|
void reduceStreams();
|
|
|
|
|
2021-06-09 02:03:36 +00:00
|
|
|
protected:
|
|
|
|
StorageFileLog(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
ContextPtr context_,
|
|
|
|
const ColumnsDescription & columns_,
|
2021-09-30 16:02:17 +00:00
|
|
|
const String & path_,
|
|
|
|
const String & relative_data_path_,
|
2021-09-04 17:04:35 +00:00
|
|
|
const String & format_name_,
|
2021-09-26 07:22:45 +00:00
|
|
|
std::unique_ptr<FileLogSettings> settings,
|
2021-09-30 16:02:17 +00:00
|
|
|
const String & comment,
|
2021-09-26 07:22:45 +00:00
|
|
|
bool attach);
|
2021-06-09 02:03:36 +00:00
|
|
|
|
|
|
|
private:
|
2021-09-04 17:04:35 +00:00
|
|
|
std::unique_ptr<FileLogSettings> filelog_settings;
|
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
const String path;
|
2021-09-30 16:02:17 +00:00
|
|
|
/// For meta file
|
|
|
|
const String relative_data_path;
|
2021-09-26 07:22:45 +00:00
|
|
|
bool path_is_directory = true;
|
2021-06-09 02:03:36 +00:00
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
/// If path argument of the table is a regular file, it equals to user_files_path
|
|
|
|
/// otherwise, it equals to user_files_path/ + path_argument/, e.g. path
|
|
|
|
String root_data_path;
|
2021-10-01 08:29:19 +00:00
|
|
|
/// relative_data_path/ + table_name/
|
2021-09-26 07:22:45 +00:00
|
|
|
String root_meta_path;
|
2021-09-04 17:04:35 +00:00
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
FileInfos file_infos;
|
2021-09-04 17:04:35 +00:00
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
const String format_name;
|
|
|
|
Poco::Logger * log;
|
2021-09-04 17:04:35 +00:00
|
|
|
|
|
|
|
std::mutex status_mutex;
|
2021-06-09 02:03:36 +00:00
|
|
|
|
2021-09-10 17:21:03 +00:00
|
|
|
std::unique_ptr<FileLogDirectoryWatcher> directory_watch = nullptr;
|
|
|
|
|
2021-09-21 16:11:35 +00:00
|
|
|
uint64_t milliseconds_to_wait;
|
|
|
|
|
2021-06-09 02:03:36 +00:00
|
|
|
struct TaskContext
|
|
|
|
{
|
|
|
|
BackgroundSchedulePool::TaskHolder holder;
|
|
|
|
std::atomic<bool> stream_cancelled {false};
|
|
|
|
explicit TaskContext(BackgroundSchedulePool::TaskHolder&& task_) : holder(std::move(task_))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
};
|
|
|
|
std::shared_ptr<TaskContext> task;
|
|
|
|
|
2021-10-07 05:56:09 +00:00
|
|
|
/// In order to avoid data race, using a naive trick to forbid execute two select
|
|
|
|
/// simultaneously, although read is not useful in this engine. Using an atomic
|
|
|
|
/// variable to records current unfinishing streams, then if have unfinishing streams,
|
|
|
|
/// later select should forbid to execute.
|
|
|
|
std::atomic<int> running_streams = 0;
|
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
void loadFiles();
|
|
|
|
|
|
|
|
void loadMetaFiles(bool attach);
|
2021-09-22 02:07:59 +00:00
|
|
|
|
2021-06-09 02:03:36 +00:00
|
|
|
void threadFunc();
|
|
|
|
|
|
|
|
size_t getPollMaxBatchSize() const;
|
|
|
|
size_t getMaxBlockSize() const;
|
|
|
|
size_t getPollTimeoutMillisecond() const;
|
|
|
|
|
|
|
|
bool streamToViews();
|
|
|
|
bool checkDependencies(const StorageID & table_id);
|
2021-09-04 17:04:35 +00:00
|
|
|
|
2021-09-26 07:22:45 +00:00
|
|
|
bool updateFileInfos();
|
|
|
|
|
2021-09-30 16:02:17 +00:00
|
|
|
/// Used in shutdown()
|
|
|
|
void serialize() const;
|
|
|
|
/// Used in FileSource closeFileAndStoreMeta(file_name);
|
|
|
|
void serialize(UInt64 inode, const FileMeta & file_meta) const;
|
2021-09-26 07:22:45 +00:00
|
|
|
|
|
|
|
void deserialize();
|
2021-10-06 10:37:58 +00:00
|
|
|
static void checkOffsetIsValid(const String & full_name, UInt64 offset);
|
2021-06-09 02:03:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|