2022-08-31 12:53:26 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <boost/noncopyable.hpp>
|
|
|
|
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Disks/TemporaryFileOnDisk.h>
|
2022-08-31 17:17:31 +00:00
|
|
|
#include <Disks/IVolume.h>
|
2022-10-05 16:35:10 +00:00
|
|
|
#include <Common/CurrentMetrics.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace CurrentMetrics
|
|
|
|
{
|
|
|
|
extern const Metric TemporaryFilesUnknown;
|
|
|
|
}
|
2022-08-31 12:53:26 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
class TemporaryDataOnDiskScope;
|
|
|
|
using TemporaryDataOnDiskScopePtr = std::shared_ptr<TemporaryDataOnDiskScope>;
|
|
|
|
|
2022-08-31 17:17:31 +00:00
|
|
|
class TemporaryDataOnDisk;
|
2022-09-21 12:51:46 +00:00
|
|
|
using TemporaryDataOnDiskPtr = std::unique_ptr<TemporaryDataOnDisk>;
|
2022-08-31 12:53:26 +00:00
|
|
|
|
|
|
|
class TemporaryFileStream;
|
2022-09-21 12:51:46 +00:00
|
|
|
using TemporaryFileStreamPtr = std::unique_ptr<TemporaryFileStream>;
|
2022-08-31 12:53:26 +00:00
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
/*
|
2022-09-21 12:51:46 +00:00
|
|
|
* Used to account amount of temporary data written to disk.
|
2022-09-15 10:19:39 +00:00
|
|
|
* If limit is set, throws exception if limit is exceeded.
|
2022-09-16 11:00:28 +00:00
|
|
|
* Data can be nested, so parent scope accounts all data written by children.
|
2022-09-15 10:19:39 +00:00
|
|
|
* Scopes are: global -> per-user -> per-query -> per-purpose (sorting, aggregation, etc).
|
|
|
|
*/
|
|
|
|
class TemporaryDataOnDiskScope : boost::noncopyable
|
2022-08-31 12:53:26 +00:00
|
|
|
{
|
|
|
|
public:
|
2022-09-15 10:19:39 +00:00
|
|
|
struct StatAtomic
|
2022-08-31 17:17:31 +00:00
|
|
|
{
|
|
|
|
std::atomic<size_t> compressed_size;
|
|
|
|
std::atomic<size_t> uncompressed_size;
|
|
|
|
};
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
explicit TemporaryDataOnDiskScope(VolumePtr volume_, size_t limit_)
|
2022-09-01 12:22:49 +00:00
|
|
|
: volume(std::move(volume_)), limit(limit_)
|
2022-08-31 12:53:26 +00:00
|
|
|
{}
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
explicit TemporaryDataOnDiskScope(TemporaryDataOnDiskScopePtr parent_, size_t limit_)
|
2022-09-01 12:22:49 +00:00
|
|
|
: parent(std::move(parent_)), volume(parent->volume), limit(limit_)
|
2022-08-31 12:53:26 +00:00
|
|
|
{}
|
|
|
|
|
2022-08-31 17:17:31 +00:00
|
|
|
/// TODO: remove
|
|
|
|
/// Refactor all code that uses volume directly to use TemporaryDataOnDisk.
|
|
|
|
VolumePtr getVolume() const { return volume; }
|
2022-08-31 12:53:26 +00:00
|
|
|
|
2022-08-31 17:17:31 +00:00
|
|
|
protected:
|
2022-09-21 12:51:46 +00:00
|
|
|
void deltaAllocAndCheck(int compressed_delta, int uncompressed_delta);
|
2022-08-31 12:53:26 +00:00
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
TemporaryDataOnDiskScopePtr parent = nullptr;
|
2022-08-31 12:53:26 +00:00
|
|
|
VolumePtr volume;
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
StatAtomic stat;
|
|
|
|
size_t limit = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Holds the set of temporary files.
|
|
|
|
* New file stream is created with `createStream`.
|
|
|
|
* Streams are owned by this object and will be deleted when it is deleted.
|
|
|
|
* It's a leaf node in temorarty data scope tree.
|
|
|
|
*/
|
|
|
|
class TemporaryDataOnDisk : private TemporaryDataOnDiskScope
|
|
|
|
{
|
2022-09-21 12:51:46 +00:00
|
|
|
friend class TemporaryFileStream; /// to allow it to call `deltaAllocAndCheck` to account data
|
2022-10-05 16:35:10 +00:00
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
public:
|
|
|
|
using TemporaryDataOnDiskScope::StatAtomic;
|
|
|
|
|
|
|
|
explicit TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_)
|
2022-10-05 16:35:10 +00:00
|
|
|
: TemporaryDataOnDiskScope(std::move(parent_), /* limit_ = */ 0)
|
|
|
|
{}
|
|
|
|
|
|
|
|
explicit TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_, CurrentMetrics::Value metric_scope)
|
|
|
|
: TemporaryDataOnDiskScope(std::move(parent_), /* limit_ = */ 0)
|
|
|
|
, current_metric_scope(metric_scope)
|
2022-09-15 10:19:39 +00:00
|
|
|
{}
|
|
|
|
|
2022-09-28 10:22:16 +00:00
|
|
|
/// If max_file_size > 0, then check that there's enough space on the disk and throw an exception in case of lack of free space
|
2022-10-05 16:35:10 +00:00
|
|
|
TemporaryFileStream & createStream(const Block & header, size_t max_file_size = 0);
|
2022-09-15 10:19:39 +00:00
|
|
|
|
2022-09-28 10:22:16 +00:00
|
|
|
std::vector<TemporaryFileStream *> getStreams() const;
|
2022-09-21 12:51:46 +00:00
|
|
|
bool empty() const;
|
2022-09-15 10:19:39 +00:00
|
|
|
|
|
|
|
const StatAtomic & getStat() const { return stat; }
|
|
|
|
|
|
|
|
private:
|
2022-09-21 12:51:46 +00:00
|
|
|
mutable std::mutex mutex;
|
|
|
|
std::vector<TemporaryFileStreamPtr> streams TSA_GUARDED_BY(mutex);
|
2022-10-05 16:35:10 +00:00
|
|
|
|
|
|
|
typename CurrentMetrics::Value current_metric_scope = CurrentMetrics::TemporaryFilesUnknown;
|
2022-08-31 12:53:26 +00:00
|
|
|
};
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
/*
|
|
|
|
* Data can be written into this stream and then read.
|
|
|
|
* After finish writing, call `finishWriting` and then `read` to read the data.
|
|
|
|
* Account amount of data written to disk in parent scope.
|
|
|
|
*/
|
2022-09-01 12:22:49 +00:00
|
|
|
class TemporaryFileStream : boost::noncopyable
|
2022-08-31 12:53:26 +00:00
|
|
|
{
|
|
|
|
public:
|
2022-09-15 10:19:39 +00:00
|
|
|
struct Stat
|
|
|
|
{
|
|
|
|
/// Statistics for file
|
|
|
|
/// Non-atomic because we don't allow to `read` or `write` into single file from multiple threads
|
|
|
|
size_t compressed_size = 0;
|
|
|
|
size_t uncompressed_size = 0;
|
2022-10-05 16:35:10 +00:00
|
|
|
size_t num_rows = 0;
|
2022-09-15 10:19:39 +00:00
|
|
|
};
|
2022-08-31 12:53:26 +00:00
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_);
|
2022-08-31 12:53:26 +00:00
|
|
|
|
|
|
|
void write(const Block & block);
|
2022-09-21 13:44:35 +00:00
|
|
|
Stat finishWriting();
|
2022-08-31 12:53:26 +00:00
|
|
|
bool isWriteFinished() const;
|
|
|
|
|
|
|
|
Block read();
|
|
|
|
|
2022-10-05 16:35:10 +00:00
|
|
|
const String path() const { return file->getPath(); }
|
2022-09-15 10:19:39 +00:00
|
|
|
Block getHeader() const { return header; }
|
2022-09-01 12:22:49 +00:00
|
|
|
|
2022-10-05 16:35:10 +00:00
|
|
|
/// Read finished and file released
|
|
|
|
bool isEof() const;
|
|
|
|
|
2022-08-31 12:53:26 +00:00
|
|
|
~TemporaryFileStream();
|
|
|
|
|
|
|
|
private:
|
2022-09-21 12:51:46 +00:00
|
|
|
void updateAllocAndCheck();
|
2022-09-01 12:22:49 +00:00
|
|
|
|
2022-10-05 16:35:10 +00:00
|
|
|
/// Release everything, close reader and writer, delete file
|
|
|
|
void release();
|
2022-09-22 17:39:30 +00:00
|
|
|
|
2022-09-01 12:22:49 +00:00
|
|
|
TemporaryDataOnDisk * parent;
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
Block header;
|
2022-09-01 12:22:49 +00:00
|
|
|
|
2022-08-31 12:53:26 +00:00
|
|
|
TemporaryFileOnDiskHolder file;
|
|
|
|
|
2022-09-15 10:19:39 +00:00
|
|
|
Stat stat;
|
|
|
|
|
2022-08-31 12:53:26 +00:00
|
|
|
struct OutputWriter;
|
|
|
|
std::unique_ptr<OutputWriter> out_writer;
|
|
|
|
|
|
|
|
struct InputReader;
|
|
|
|
std::unique_ptr<InputReader> in_reader;
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|