add gzip read/write to file/s3/url/hdfs

This commit is contained in:
Andrei Bodrov 2019-11-19 15:46:07 +03:00
parent 15cb620f42
commit 8cbc52e711
38 changed files with 311 additions and 118 deletions

View File

@ -407,11 +407,11 @@ void HTTPHandler::processQuery(
{ {
if (http_request_compression_method_str == "gzip") if (http_request_compression_method_str == "gzip")
{ {
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Gzip); in_post = std::make_unique<ZlibInflatingReadBuffer>(std::move(in_post_raw), CompressionMethod::Gzip);
} }
else if (http_request_compression_method_str == "deflate") else if (http_request_compression_method_str == "deflate")
{ {
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Zlib); in_post = std::make_unique<ZlibInflatingReadBuffer>(std::move(in_post_raw), CompressionMethod::Zlib);
} }
#if USE_BROTLI #if USE_BROTLI
else if (http_request_compression_method_str == "br") else if (http_request_compression_method_str == "br")

View File

@ -5,7 +5,9 @@
#include <DataStreams/IBlockStream_fwd.h> #include <DataStreams/IBlockStream_fwd.h>
#include <DataStreams/SizeLimits.h> #include <DataStreams/SizeLimits.h>
#include <DataStreams/ExecutionSpeedLimits.h> #include <DataStreams/ExecutionSpeedLimits.h>
#include <IO/CompressionMethod.h>
#include <IO/Progress.h> #include <IO/Progress.h>
#include <IO/ZlibInflatingReadBuffer.h>
#include <Storages/TableStructureLockHolder.h> #include <Storages/TableStructureLockHolder.h>
#include <Common/TypePromotion.h> #include <Common/TypePromotion.h>
@ -228,6 +230,17 @@ public:
/// Enable calculation of minimums and maximums by the result columns. /// Enable calculation of minimums and maximums by the result columns.
void enableExtremes() { enabled_extremes = true; } void enableExtremes() { enabled_extremes = true; }
template <class TReadBuffer, class... Types>
std::unique_ptr<ReadBuffer> getBuffer(const DB::CompressionMethod method, Types... args)
{
if (method == DB::CompressionMethod::Gzip)
{
auto read_buf = std::make_unique<TReadBuffer>(args...);
return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
}
return std::make_unique<TReadBuffer>(args...);
}
protected: protected:
/// Order is important: `table_locks` must be destroyed after `children` so that tables from /// Order is important: `table_locks` must be destroyed after `children` so that tables from
/// which child streams read are protected by the locks during the lifetime of the child streams. /// which child streams read are protected by the locks during the lifetime of the child streams.

View File

@ -3,6 +3,8 @@
#include <Core/Block.h> #include <Core/Block.h>
#include <DataStreams/IBlockStream_fwd.h> #include <DataStreams/IBlockStream_fwd.h>
#include <Storages/TableStructureLockHolder.h> #include <Storages/TableStructureLockHolder.h>
#include <IO/CompressionMethod.h>
#include <IO/ZlibDeflatingWriteBuffer.h>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
@ -63,6 +65,19 @@ public:
*/ */
void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); } void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); }
template <class TWriteBuffer, class... Types>
std::unique_ptr<WriteBuffer> getBuffer(const DB::CompressionMethod method, Types... args)
{
if (method == DB::CompressionMethod::Gzip)
{
auto write_buf = std::make_unique<TWriteBuffer>(args...);
return std::make_unique<ZlibDeflatingWriteBuffer>(std::move(write_buf), method, (int) 1 /* compression level */);
}
return std::make_unique<TWriteBuffer>(args...);
}
virtual void finalize() {}
private: private:
std::vector<TableStructureReadLockHolder> table_locks; std::vector<TableStructureReadLockHolder> table_locks;
}; };

View File

@ -12,6 +12,7 @@ enum class CompressionMethod
/// This option corresponds to HTTP Content-Encoding: deflate. /// This option corresponds to HTTP Content-Encoding: deflate.
Zlib, Zlib,
Brotli, Brotli,
None
}; };
} }

View File

@ -90,6 +90,9 @@ public:
++pos; ++pos;
} }
void virtual sync() {}
void virtual finalize() {}
private: private:
/** Write the data in the buffer (from the beginning of the buffer to the current position). /** Write the data in the buffer (from the beginning of the buffer to the current position).
* Throw an exception if something is wrong. * Throw an exception if something is wrong.

View File

@ -113,8 +113,8 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
response.set("Content-Encoding", "gzip"); response.set("Content-Encoding", "gzip");
response_body_ostr = &(response.send()); response_body_ostr = &(response.send());
#endif #endif
out_raw.emplace(*response_body_ostr); out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr);
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin()); deflating_buf.emplace(std::move(out_raw), compression_method, compression_level, working_buffer.size(), working_buffer.begin());
out = &*deflating_buf; out = &*deflating_buf;
} }
else if (compression_method == CompressionMethod::Zlib) else if (compression_method == CompressionMethod::Zlib)
@ -125,8 +125,8 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
response.set("Content-Encoding", "deflate"); response.set("Content-Encoding", "deflate");
response_body_ostr = &(response.send()); response_body_ostr = &(response.send());
#endif #endif
out_raw.emplace(*response_body_ostr); out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr);
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin()); deflating_buf.emplace(std::move(out_raw), compression_method, compression_level, working_buffer.size(), working_buffer.begin());
out = &*deflating_buf; out = &*deflating_buf;
} }
#if USE_BROTLI #if USE_BROTLI
@ -138,7 +138,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
response.set("Content-Encoding", "br"); response.set("Content-Encoding", "br");
response_body_ostr = &(response.send()); response_body_ostr = &(response.send());
#endif #endif
out_raw.emplace(*response_body_ostr); out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr);
brotli_buf.emplace(*out_raw, compression_level, working_buffer.size(), working_buffer.begin()); brotli_buf.emplace(*out_raw, compression_level, working_buffer.size(), working_buffer.begin());
out = &*brotli_buf; out = &*brotli_buf;
} }
@ -155,7 +155,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
response_body_ostr = &(response.send()); response_body_ostr = &(response.send());
#endif #endif
out_raw.emplace(*response_body_ostr, working_buffer.size(), working_buffer.begin()); out_raw = std::make_unique<WriteBufferFromOStream>(*response_body_ostr, working_buffer.size(), working_buffer.begin());
out = &*out_raw; out = &*out_raw;
} }
} }

View File

@ -60,7 +60,7 @@ private:
std::ostream * response_header_ostr = nullptr; std::ostream * response_header_ostr = nullptr;
#endif #endif
std::optional<WriteBufferFromOStream> out_raw; std::unique_ptr<WriteBufferFromOStream> out_raw;
std::optional<ZlibDeflatingWriteBuffer> deflating_buf; std::optional<ZlibDeflatingWriteBuffer> deflating_buf;
#if USE_BROTLI #if USE_BROTLI
std::optional<BrotliWriteBuffer> brotli_buf; std::optional<BrotliWriteBuffer> brotli_buf;

View File

@ -6,14 +6,14 @@ namespace DB
{ {
ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer( ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
WriteBuffer & out_, std::unique_ptr<WriteBuffer> out_,
CompressionMethod compression_method, CompressionMethod compression_method,
int compression_level, int compression_level,
size_t buf_size, size_t buf_size,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment)
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment) : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
, out(out_) , out(std::move(out_))
{ {
zstr.zalloc = nullptr; zstr.zalloc = nullptr;
zstr.zfree = nullptr; zstr.zfree = nullptr;
@ -64,18 +64,18 @@ void ZlibDeflatingWriteBuffer::nextImpl()
do do
{ {
out.nextIfAtEnd(); out->nextIfAtEnd();
zstr.next_out = reinterpret_cast<unsigned char *>(out.position()); zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
zstr.avail_out = out.buffer().end() - out.position(); zstr.avail_out = out->buffer().end() - out->position();
int rc = deflate(&zstr, Z_NO_FLUSH); int rc = deflate(&zstr, Z_NO_FLUSH);
out.position() = out.buffer().end() - zstr.avail_out; out->position() = out->buffer().end() - zstr.avail_out;
// Unpoison the result of deflate explicitly. It uses some custom SSE algo // Unpoison the result of deflate explicitly. It uses some custom SSE algo
// for computing CRC32, and it looks like msan is unable to comprehend // for computing CRC32, and it looks like msan is unable to comprehend
// it fully, so it complains about the resulting value depending on the // it fully, so it complains about the resulting value depending on the
// uninitialized padding of the input buffer. // uninitialized padding of the input buffer.
__msan_unpoison(out.position(), zstr.avail_out); __msan_unpoison(out->position(), zstr.avail_out);
if (rc != Z_OK) if (rc != Z_OK)
throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
@ -92,18 +92,18 @@ void ZlibDeflatingWriteBuffer::finish()
while (true) while (true)
{ {
out.nextIfAtEnd(); out->nextIfAtEnd();
zstr.next_out = reinterpret_cast<unsigned char *>(out.position()); zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
zstr.avail_out = out.buffer().end() - out.position(); zstr.avail_out = out->buffer().end() - out->position();
int rc = deflate(&zstr, Z_FINISH); int rc = deflate(&zstr, Z_FINISH);
out.position() = out.buffer().end() - zstr.avail_out; out->position() = out->buffer().end() - zstr.avail_out;
// Unpoison the result of deflate explicitly. It uses some custom SSE algo // Unpoison the result of deflate explicitly. It uses some custom SSE algo
// for computing CRC32, and it looks like msan is unable to comprehend // for computing CRC32, and it looks like msan is unable to comprehend
// it fully, so it complains about the resulting value depending on the // it fully, so it complains about the resulting value depending on the
// uninitialized padding of the input buffer. // uninitialized padding of the input buffer.
__msan_unpoison(out.position(), zstr.avail_out); __msan_unpoison(out->position(), zstr.avail_out);
if (rc == Z_STREAM_END) if (rc == Z_STREAM_END)
{ {

View File

@ -20,7 +20,7 @@ class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{ {
public: public:
ZlibDeflatingWriteBuffer( ZlibDeflatingWriteBuffer(
WriteBuffer & out_, std::unique_ptr<WriteBuffer> out_,
CompressionMethod compression_method, CompressionMethod compression_method,
int compression_level, int compression_level,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
@ -37,7 +37,7 @@ public:
private: private:
void nextImpl() override; void nextImpl() override;
WriteBuffer & out; std::unique_ptr<WriteBuffer> out;
z_stream zstr; z_stream zstr;
bool finished = false; bool finished = false;
}; };

View File

@ -5,13 +5,13 @@ namespace DB
{ {
ZlibInflatingReadBuffer::ZlibInflatingReadBuffer( ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
ReadBuffer & in_, std::unique_ptr<ReadBuffer> in_,
CompressionMethod compression_method, CompressionMethod compression_method,
size_t buf_size, size_t buf_size,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment)
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment) : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
, in(in_) , in(std::move(in_))
, eof(false) , eof(false)
{ {
zstr.zalloc = nullptr; zstr.zalloc = nullptr;
@ -49,21 +49,21 @@ bool ZlibInflatingReadBuffer::nextImpl()
if (!zstr.avail_in) if (!zstr.avail_in)
{ {
in.nextIfAtEnd(); in->nextIfAtEnd();
zstr.next_in = reinterpret_cast<unsigned char *>(in.position()); zstr.next_in = reinterpret_cast<unsigned char *>(in->position());
zstr.avail_in = in.buffer().end() - in.position(); zstr.avail_in = in->buffer().end() - in->position();
} }
zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin()); zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
zstr.avail_out = internal_buffer.size(); zstr.avail_out = internal_buffer.size();
int rc = inflate(&zstr, Z_NO_FLUSH); int rc = inflate(&zstr, Z_NO_FLUSH);
in.position() = in.buffer().end() - zstr.avail_in; in->position() = in->buffer().end() - zstr.avail_in;
working_buffer.resize(internal_buffer.size() - zstr.avail_out); working_buffer.resize(internal_buffer.size() - zstr.avail_out);
if (rc == Z_STREAM_END) if (rc == Z_STREAM_END)
{ {
if (in.eof()) if (in->eof())
{ {
eof = true; eof = true;
return working_buffer.size() != 0; return working_buffer.size() != 0;

View File

@ -21,7 +21,7 @@ class ZlibInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{ {
public: public:
ZlibInflatingReadBuffer( ZlibInflatingReadBuffer(
ReadBuffer & in_, std::unique_ptr<ReadBuffer> in_,
CompressionMethod compression_method, CompressionMethod compression_method,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, char * existing_memory = nullptr,
@ -32,7 +32,7 @@ public:
private: private:
bool nextImpl() override; bool nextImpl() override;
ReadBuffer & in; std::unique_ptr<ReadBuffer> in;
z_stream zstr; z_stream zstr;
bool eof; bool eof;
}; };

View File

@ -22,8 +22,8 @@ try
Stopwatch stopwatch; Stopwatch stopwatch;
{ {
DB::WriteBufferFromFile buf("test_zlib_buffers.gz", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC); auto buf = std::make_unique<DB::WriteBufferFromFile>("test_zlib_buffers.gz", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::CompressionMethod::Gzip, /* compression_level = */ 3); DB::ZlibDeflatingWriteBuffer deflating_buf(std::move(buf), DB::CompressionMethod::Gzip, /* compression_level = */ 3);
stopwatch.restart(); stopwatch.restart();
for (size_t i = 0; i < n; ++i) for (size_t i = 0; i < n; ++i)
@ -40,8 +40,8 @@ try
} }
{ {
DB::ReadBufferFromFile buf("test_zlib_buffers.gz"); auto buf = std::make_unique<DB::ReadBufferFromFile>("test_zlib_buffers.gz");
DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::CompressionMethod::Gzip); DB::ZlibInflatingReadBuffer inflating_buf(std::move(buf), DB::CompressionMethod::Gzip);
stopwatch.restart(); stopwatch.restart();
for (size_t i = 0; i < n; ++i) for (size_t i = 0; i < n; ++i)

View File

@ -3,6 +3,7 @@
#include <Storages/AlterCommands.h> #include <Storages/AlterCommands.h>
#include <Parsers/ASTCreateQuery.h> #include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Processors/Executors/TreeExecutorBlockInputStream.h> #include <Processors/Executors/TreeExecutorBlockInputStream.h>
@ -445,4 +446,21 @@ BlockInputStreams IStorage::read(
return res; return res;
} }
DB::CompressionMethod IStorage::chooseCompressionMethod(const String & uri, const String & compression_method)
{
if (compression_method == "auto" || compression_method == "")
{
if (endsWith(uri, ".gz"))
return DB::CompressionMethod::Gzip;
else
return DB::CompressionMethod::None;
}
else if (compression_method == "gzip")
return DB::CompressionMethod::Gzip;
else if (compression_method == "none")
return DB::CompressionMethod::None;
else
throw Exception("Only auto, none, gzip supported as compression method", ErrorCodes::NOT_IMPLEMENTED);
}
} }

View File

@ -5,6 +5,7 @@
#include <DataStreams/IBlockStream_fwd.h> #include <DataStreams/IBlockStream_fwd.h>
#include <Databases/IDatabase.h> #include <Databases/IDatabase.h>
#include <Interpreters/CancellationCode.h> #include <Interpreters/CancellationCode.h>
#include <IO/CompressionMethod.h>
#include <Storages/IStorage_fwd.h> #include <Storages/IStorage_fwd.h>
#include <Storages/SelectQueryInfo.h> #include <Storages/SelectQueryInfo.h>
#include <Storages/TableStructureLockHolder.h> #include <Storages/TableStructureLockHolder.h>
@ -434,6 +435,8 @@ public:
return {}; return {};
} }
static DB::CompressionMethod chooseCompressionMethod(const String & uri, const String & compression_method);
private: private:
/// You always need to take the next three locks in this order. /// You always need to take the next three locks in this order.

View File

@ -127,9 +127,10 @@ StorageFile::StorageFile(
const std::string & format_name_, const std::string & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_) Context & context_,
const String & compression_method_ = "")
: :
table_name(table_name_), database_name(database_name_), format_name(format_name_), context_global(context_), table_fd(table_fd_) table_name(table_name_), database_name(database_name_), format_name(format_name_), context_global(context_), table_fd(table_fd_), compression_method(compression_method_)
{ {
setColumns(columns_); setColumns(columns_);
setConstraints(constraints_); setConstraints(constraints_);
@ -178,7 +179,10 @@ StorageFile::StorageFile(
class StorageFileBlockInputStream : public IBlockInputStream class StorageFileBlockInputStream : public IBlockInputStream
{ {
public: public:
StorageFileBlockInputStream(std::shared_ptr<StorageFile> storage_, const Context & context, UInt64 max_block_size, std::string file_path) StorageFileBlockInputStream(std::shared_ptr<StorageFile> storage_,
const Context & context, UInt64 max_block_size,
std::string file_path,
const CompressionMethod compression_method)
: storage(std::move(storage_)) : storage(std::move(storage_))
{ {
if (storage->use_table_fd) if (storage->use_table_fd)
@ -199,12 +203,12 @@ public:
} }
storage->table_fd_was_used = true; storage->table_fd_was_used = true;
read_buf = std::make_unique<ReadBufferFromFileDescriptor>(storage->table_fd); read_buf = getBuffer<ReadBufferFromFileDescriptor>(compression_method, storage->table_fd);
} }
else else
{ {
shared_lock = std::shared_lock(storage->rwlock); shared_lock = std::shared_lock(storage->rwlock);
read_buf = std::make_unique<ReadBufferFromFile>(file_path); read_buf = getBuffer<ReadBufferFromFile>(compression_method, file_path);
} }
reader = FormatFactory::instance().getInput(storage->format_name, *read_buf, storage->getSampleBlock(), context, max_block_size); reader = FormatFactory::instance().getInput(storage->format_name, *read_buf, storage->getSampleBlock(), context, max_block_size);
@ -235,7 +239,7 @@ public:
private: private:
std::shared_ptr<StorageFile> storage; std::shared_ptr<StorageFile> storage;
Block sample_block; Block sample_block;
std::unique_ptr<ReadBufferFromFileDescriptor> read_buf; std::unique_ptr<ReadBuffer> read_buf;
BlockInputStreamPtr reader; BlockInputStreamPtr reader;
std::shared_lock<std::shared_mutex> shared_lock; std::shared_lock<std::shared_mutex> shared_lock;
@ -260,7 +264,7 @@ BlockInputStreams StorageFile::read(
for (const auto & file_path : paths) for (const auto & file_path : paths)
{ {
BlockInputStreamPtr cur_block = std::make_shared<StorageFileBlockInputStream>( BlockInputStreamPtr cur_block = std::make_shared<StorageFileBlockInputStream>(
std::static_pointer_cast<StorageFile>(shared_from_this()), context, max_block_size, file_path); std::static_pointer_cast<StorageFile>(shared_from_this()), context, max_block_size, file_path, IStorage::chooseCompressionMethod(file_path, compression_method));
blocks_input.push_back(column_defaults.empty() ? cur_block : std::make_shared<AddingDefaultsBlockInputStream>(cur_block, column_defaults, context)); blocks_input.push_back(column_defaults.empty() ? cur_block : std::make_shared<AddingDefaultsBlockInputStream>(cur_block, column_defaults, context));
} }
return blocks_input; return blocks_input;
@ -270,7 +274,8 @@ BlockInputStreams StorageFile::read(
class StorageFileBlockOutputStream : public IBlockOutputStream class StorageFileBlockOutputStream : public IBlockOutputStream
{ {
public: public:
explicit StorageFileBlockOutputStream(StorageFile & storage_) explicit StorageFileBlockOutputStream(StorageFile & storage_,
const CompressionMethod compression_method)
: storage(storage_), lock(storage.rwlock) : storage(storage_), lock(storage.rwlock)
{ {
if (storage.use_table_fd) if (storage.use_table_fd)
@ -280,13 +285,13 @@ public:
* INSERT data; SELECT *; last SELECT returns only insert_data * INSERT data; SELECT *; last SELECT returns only insert_data
*/ */
storage.table_fd_was_used = true; storage.table_fd_was_used = true;
write_buf = std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd); write_buf = getBuffer<WriteBufferFromFileDescriptor>(compression_method, storage.table_fd);
} }
else else
{ {
if (storage.paths.size() != 1) if (storage.paths.size() != 1)
throw Exception("Table '" + storage.table_name + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED); throw Exception("Table '" + storage.table_name + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED);
write_buf = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT); write_buf = getBuffer<WriteBufferFromFile>(compression_method, storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
} }
writer = FormatFactory::instance().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), storage.context_global); writer = FormatFactory::instance().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), storage.context_global);
@ -317,7 +322,7 @@ public:
private: private:
StorageFile & storage; StorageFile & storage;
std::unique_lock<std::shared_mutex> lock; std::unique_lock<std::shared_mutex> lock;
std::unique_ptr<WriteBufferFromFileDescriptor> write_buf; std::unique_ptr<WriteBuffer> write_buf;
BlockOutputStreamPtr writer; BlockOutputStreamPtr writer;
}; };
@ -325,7 +330,8 @@ BlockOutputStreamPtr StorageFile::write(
const ASTPtr & /*query*/, const ASTPtr & /*query*/,
const Context & /*context*/) const Context & /*context*/)
{ {
return std::make_shared<StorageFileBlockOutputStream>(*this); return std::make_shared<StorageFileBlockOutputStream>(*this,
IStorage::chooseCompressionMethod(paths[0], compression_method));
} }
Strings StorageFile::getDataPaths() const Strings StorageFile::getDataPaths() const
@ -361,9 +367,9 @@ void registerStorageFile(StorageFactory & factory)
{ {
ASTs & engine_args = args.engine_args; ASTs & engine_args = args.engine_args;
if (!(engine_args.size() == 1 || engine_args.size() == 2)) if (!(engine_args.size() >= 1 && engine_args.size() <= 3))
throw Exception( throw Exception(
"Storage File requires 1 or 2 arguments: name of used format and source.", "Storage File requires from 1 to 3 arguments: name of used format, source and compression_method.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context);
@ -371,6 +377,7 @@ void registerStorageFile(StorageFactory & factory)
int source_fd = -1; int source_fd = -1;
String source_path; String source_path;
String compression_method;
if (engine_args.size() >= 2) if (engine_args.size() >= 2)
{ {
/// Will use FD if engine_args[1] is int literal or identifier with std* name /// Will use FD if engine_args[1] is int literal or identifier with std* name
@ -397,13 +404,19 @@ void registerStorageFile(StorageFactory & factory)
else if (type == Field::Types::String) else if (type == Field::Types::String)
source_path = literal->value.get<String>(); source_path = literal->value.get<String>();
} }
if (engine_args.size() == 3)
{
engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context);
compression_method = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
} else compression_method = "auto";
} }
return StorageFile::create( return StorageFile::create(
source_path, source_fd, source_path, source_fd,
args.data_path, args.data_path,
args.database_name, args.table_name, format_name, args.columns, args.constraints, args.database_name, args.table_name, format_name, args.columns, args.constraints,
args.context); args.context,
compression_method);
}); });
} }

View File

@ -60,7 +60,8 @@ protected:
const std::string & format_name_, const std::string & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_); Context & context_,
const String & compression_method_);
private: private:
std::string table_name; std::string table_name;
@ -69,6 +70,7 @@ private:
Context & context_global; Context & context_global;
int table_fd = -1; int table_fd = -1;
String compression_method;
std::vector<std::string> paths; std::vector<std::string> paths;

View File

@ -13,7 +13,6 @@
#include <Formats/FormatFactory.h> #include <Formats/FormatFactory.h>
#include <DataStreams/IBlockOutputStream.h> #include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/UnionBlockInputStream.h> #include <DataStreams/UnionBlockInputStream.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h> #include <DataStreams/OwningBlockInputStream.h>
#include <Common/parseGlobs.h> #include <Common/parseGlobs.h>
#include <Poco/URI.h> #include <Poco/URI.h>
@ -36,12 +35,14 @@ StorageHDFS::StorageHDFS(const String & uri_,
const String & format_name_, const String & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_) Context & context_,
const String & compression_method_ = "")
: uri(uri_) : uri(uri_)
, format_name(format_name_) , format_name(format_name_)
, table_name(table_name_) , table_name(table_name_)
, database_name(database_name_) , database_name(database_name_)
, context(context_) , context(context_)
, compression_method(compression_method_)
{ {
setColumns(columns_); setColumns(columns_);
setConstraints(constraints_); setConstraints(constraints_);
@ -57,9 +58,11 @@ public:
const String & format, const String & format,
const Block & sample_block, const Block & sample_block,
const Context & context, const Context & context,
UInt64 max_block_size) UInt64 max_block_size,
const CompressionMethod compression_method)
{ {
std::unique_ptr<ReadBuffer> read_buf = std::make_unique<ReadBufferFromHDFS>(uri); auto read_buf = getBuffer<ReadBufferFromHDFS>(compression_method, uri);
auto input_stream = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); auto input_stream = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
reader = std::make_shared<OwningBlockInputStream<ReadBuffer>>(input_stream, std::move(read_buf)); reader = std::make_shared<OwningBlockInputStream<ReadBuffer>>(input_stream, std::move(read_buf));
} }
@ -99,10 +102,11 @@ public:
HDFSBlockOutputStream(const String & uri, HDFSBlockOutputStream(const String & uri,
const String & format, const String & format,
const Block & sample_block_, const Block & sample_block_,
const Context & context) const Context & context,
const CompressionMethod compression_method)
: sample_block(sample_block_) : sample_block(sample_block_)
{ {
write_buf = std::make_unique<WriteBufferFromHDFS>(uri); write_buf = getBuffer<WriteBufferFromHDFS>(compression_method, uri);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
} }
@ -130,7 +134,7 @@ public:
private: private:
Block sample_block; Block sample_block;
std::unique_ptr<WriteBufferFromHDFS> write_buf; std::unique_ptr<WriteBuffer> write_buf;
BlockOutputStreamPtr writer; BlockOutputStreamPtr writer;
}; };
@ -203,7 +207,7 @@ BlockInputStreams StorageHDFS::read(
for (const auto & res_path : res_paths) for (const auto & res_path : res_paths)
{ {
result.push_back(std::make_shared<HDFSBlockInputStream>(uri_without_path + res_path, format_name, getSampleBlock(), context_, result.push_back(std::make_shared<HDFSBlockInputStream>(uri_without_path + res_path, format_name, getSampleBlock(), context_,
max_block_size)); max_block_size, IStorage::chooseCompressionMethod(res_path, compression_method)));
} }
return result; return result;
@ -217,7 +221,11 @@ void StorageHDFS::rename(const String & /*new_path_to_db*/, const String & new_d
BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const Context & /*context*/) BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const Context & /*context*/)
{ {
return std::make_shared<HDFSBlockOutputStream>(uri, format_name, getSampleBlock(), context); return std::make_shared<HDFSBlockOutputStream>(uri,
format_name,
getSampleBlock(),
context,
IStorage::chooseCompressionMethod(uri, compression_method));
} }
void registerStorageHDFS(StorageFactory & factory) void registerStorageHDFS(StorageFactory & factory)
@ -226,9 +234,9 @@ void registerStorageHDFS(StorageFactory & factory)
{ {
ASTs & engine_args = args.engine_args; ASTs & engine_args = args.engine_args;
if (engine_args.size() != 2) if (engine_args.size() != 2 && engine_args.size() != 3)
throw Exception( throw Exception(
"Storage HDFS requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); "Storage HDFS requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context);
@ -238,7 +246,14 @@ void registerStorageHDFS(StorageFactory & factory)
String format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>(); String format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
return StorageHDFS::create(url, args.database_name, args.table_name, format_name, args.columns, args.constraints, args.context); String compression_method;
if (engine_args.size() == 3)
{
engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context);
compression_method = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
} else compression_method = "auto";
return StorageHDFS::create(url, args.database_name, args.table_name, format_name, args.columns, args.constraints, args.context, compression_method);
}); });
} }

View File

@ -39,7 +39,8 @@ protected:
const String & format_name_, const String & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_); Context & context_,
const String & compression_method_);
private: private:
String uri; String uri;
@ -47,6 +48,7 @@ private:
String table_name; String table_name;
String database_name; String database_name;
Context & context; Context & context;
String compression_method;
Logger * log = &Logger::get("StorageHDFS"); Logger * log = &Logger::get("StorageHDFS");
}; };

View File

@ -35,11 +35,11 @@ namespace
const Block & sample_block, const Block & sample_block,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
const ConnectionTimeouts & timeouts) const ConnectionTimeouts & timeouts,
const CompressionMethod compression_method)
: name(name_) : name(name_)
{ {
read_buf = std::make_unique<ReadBufferFromS3>(uri, timeouts); read_buf = getBuffer<ReadBufferFromS3>(compression_method, uri, timeouts);
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
} }
@ -70,7 +70,7 @@ namespace
private: private:
String name; String name;
std::unique_ptr<ReadBufferFromS3> read_buf; std::unique_ptr<ReadBuffer> read_buf;
BlockInputStreamPtr reader; BlockInputStreamPtr reader;
}; };
@ -82,10 +82,11 @@ namespace
UInt64 min_upload_part_size, UInt64 min_upload_part_size,
const Block & sample_block_, const Block & sample_block_,
const Context & context, const Context & context,
const ConnectionTimeouts & timeouts) const ConnectionTimeouts & timeouts,
const CompressionMethod compression_method)
: sample_block(sample_block_) : sample_block(sample_block_)
{ {
write_buf = std::make_unique<WriteBufferFromS3>(uri, min_upload_part_size, timeouts); write_buf = getBuffer<WriteBufferFromS3>(compression_method, uri, min_upload_part_size, timeouts);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
} }
@ -113,7 +114,7 @@ namespace
private: private:
Block sample_block; Block sample_block;
std::unique_ptr<WriteBufferFromS3> write_buf; std::unique_ptr<WriteBuffer> write_buf;
BlockOutputStreamPtr writer; BlockOutputStreamPtr writer;
}; };
} }
@ -127,7 +128,8 @@ StorageS3::StorageS3(
UInt64 min_upload_part_size_, UInt64 min_upload_part_size_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_) Context & context_,
const String & compression_method_ = "")
: IStorage(columns_) : IStorage(columns_)
, uri(uri_) , uri(uri_)
, context_global(context_) , context_global(context_)
@ -135,6 +137,7 @@ StorageS3::StorageS3(
, database_name(database_name_) , database_name(database_name_)
, table_name(table_name_) , table_name(table_name_)
, min_upload_part_size(min_upload_part_size_) , min_upload_part_size(min_upload_part_size_)
, compression_method(compression_method_)
{ {
setColumns(columns_); setColumns(columns_);
setConstraints(constraints_); setConstraints(constraints_);
@ -156,7 +159,8 @@ BlockInputStreams StorageS3::read(
getHeaderBlock(column_names), getHeaderBlock(column_names),
context, context,
max_block_size, max_block_size,
ConnectionTimeouts::getHTTPTimeouts(context)); ConnectionTimeouts::getHTTPTimeouts(context),
IStorage::chooseCompressionMethod(uri.toString(), compression_method));
auto column_defaults = getColumns().getDefaults(); auto column_defaults = getColumns().getDefaults();
if (column_defaults.empty()) if (column_defaults.empty())
@ -173,7 +177,9 @@ void StorageS3::rename(const String & /*new_path_to_db*/, const String & new_dat
BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/) BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/)
{ {
return std::make_shared<StorageS3BlockOutputStream>( return std::make_shared<StorageS3BlockOutputStream>(
uri, format_name, min_upload_part_size, getSampleBlock(), context_global, ConnectionTimeouts::getHTTPTimeouts(context_global)); uri, format_name, min_upload_part_size, getSampleBlock(), context_global,
ConnectionTimeouts::getHTTPTimeouts(context_global),
IStorage::chooseCompressionMethod(uri.toString(), compression_method));
} }
void registerStorageS3(StorageFactory & factory) void registerStorageS3(StorageFactory & factory)
@ -182,9 +188,9 @@ void registerStorageS3(StorageFactory & factory)
{ {
ASTs & engine_args = args.engine_args; ASTs & engine_args = args.engine_args;
if (engine_args.size() != 2) if (engine_args.size() != 2 && engine_args.size() != 3)
throw Exception( throw Exception(
"Storage S3 requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); "Storage S3 requires 2 or 3 arguments: url, name of used format and compression_method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context);
@ -197,6 +203,13 @@ void registerStorageS3(StorageFactory & factory)
UInt64 min_upload_part_size = args.local_context.getSettingsRef().s3_min_upload_part_size; UInt64 min_upload_part_size = args.local_context.getSettingsRef().s3_min_upload_part_size;
String compression_method;
if (engine_args.size() == 3)
{
engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context);
compression_method = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
} else compression_method = "auto";
return StorageS3::create(uri, args.database_name, args.table_name, format_name, min_upload_part_size, args.columns, args.constraints, args.context); return StorageS3::create(uri, args.database_name, args.table_name, format_name, min_upload_part_size, args.columns, args.constraints, args.context);
}); });
} }

View File

@ -24,7 +24,8 @@ public:
UInt64 min_upload_part_size_, UInt64 min_upload_part_size_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_); Context & context_,
const String & compression_method_);
String getName() const override String getName() const override
{ {
@ -61,6 +62,7 @@ private:
String database_name; String database_name;
String table_name; String table_name;
UInt64 min_upload_part_size; UInt64 min_upload_part_size;
String compression_method;
}; };
} }

View File

@ -31,8 +31,9 @@ IStorageURLBase::IStorageURLBase(
const std::string & table_name_, const std::string & table_name_,
const String & format_name_, const String & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_) const ConstraintsDescription & constraints_,
: uri(uri_), context_global(context_), format_name(format_name_), table_name(table_name_), database_name(database_name_) const String & compression_method_)
: uri(uri_), context_global(context_), compression_method(compression_method_), format_name(format_name_), table_name(table_name_), database_name(database_name_)
{ {
setColumns(columns_); setColumns(columns_);
setConstraints(constraints_); setConstraints(constraints_);
@ -51,10 +52,11 @@ namespace
const Block & sample_block, const Block & sample_block,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
const ConnectionTimeouts & timeouts) const ConnectionTimeouts & timeouts,
const CompressionMethod compression_method)
: name(name_) : name(name_)
{ {
read_buf = std::make_unique<ReadWriteBufferFromHTTP>(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects); read_buf = getBuffer<ReadWriteBufferFromHTTP>(compression_method, uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects);
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
} }
@ -85,7 +87,7 @@ namespace
private: private:
String name; String name;
std::unique_ptr<ReadWriteBufferFromHTTP> read_buf; std::unique_ptr<ReadBuffer> read_buf;
BlockInputStreamPtr reader; BlockInputStreamPtr reader;
}; };
@ -96,10 +98,11 @@ namespace
const String & format, const String & format,
const Block & sample_block_, const Block & sample_block_,
const Context & context, const Context & context,
const ConnectionTimeouts & timeouts) const ConnectionTimeouts & timeouts,
const CompressionMethod compression_method)
: sample_block(sample_block_) : sample_block(sample_block_)
{ {
write_buf = std::make_unique<WriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts); write_buf = getBuffer<WriteBufferFromHTTP>(compression_method, uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
} }
@ -127,7 +130,7 @@ namespace
private: private:
Block sample_block; Block sample_block;
std::unique_ptr<WriteBufferFromHTTP> write_buf; std::unique_ptr<WriteBuffer> write_buf;
BlockOutputStreamPtr writer; BlockOutputStreamPtr writer;
}; };
} }
@ -177,8 +180,8 @@ BlockInputStreams IStorageURLBase::read(const Names & column_names,
getHeaderBlock(column_names), getHeaderBlock(column_names),
context, context,
max_block_size, max_block_size,
ConnectionTimeouts::getHTTPTimeouts(context)); ConnectionTimeouts::getHTTPTimeouts(context),
IStorage::chooseCompressionMethod(request_uri.toString(), compression_method));
auto column_defaults = getColumns().getDefaults(); auto column_defaults = getColumns().getDefaults();
if (column_defaults.empty()) if (column_defaults.empty())
@ -195,7 +198,9 @@ void IStorageURLBase::rename(const String & /*new_path_to_db*/, const String & n
BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const Context & /*context*/) BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const Context & /*context*/)
{ {
return std::make_shared<StorageURLBlockOutputStream>( return std::make_shared<StorageURLBlockOutputStream>(
uri, format_name, getSampleBlock(), context_global, ConnectionTimeouts::getHTTPTimeouts(context_global)); uri, format_name, getSampleBlock(), context_global,
ConnectionTimeouts::getHTTPTimeouts(context_global),
IStorage::chooseCompressionMethod(uri.toString(), compression_method));
} }
void registerStorageURL(StorageFactory & factory) void registerStorageURL(StorageFactory & factory)
@ -204,9 +209,9 @@ void registerStorageURL(StorageFactory & factory)
{ {
ASTs & engine_args = args.engine_args; ASTs & engine_args = args.engine_args;
if (engine_args.size() != 2) if (engine_args.size() != 2 && engine_args.size() != 3)
throw Exception( throw Exception(
"Storage URL requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); "Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context);
@ -217,7 +222,19 @@ void registerStorageURL(StorageFactory & factory)
String format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>(); String format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
return StorageURL::create(uri, args.database_name, args.table_name, format_name, args.columns, args.constraints, args.context); String compression_method;
if (engine_args.size() == 3)
{
engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context);
compression_method = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
} else compression_method = "auto";
return StorageURL::create(
uri,
args.database_name, args.table_name,
format_name,
args.columns, args.constraints, args.context,
compression_method);
}); });
} }
} }

View File

@ -39,10 +39,12 @@ protected:
const std::string & table_name_, const std::string & table_name_,
const String & format_name_, const String & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_); const ConstraintsDescription & constraints_,
const String & compression_method_);
Poco::URI uri; Poco::URI uri;
const Context & context_global; const Context & context_global;
String compression_method;
private: private:
String format_name; String format_name;
@ -80,8 +82,9 @@ public:
const String & format_name_, const String & format_name_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_) Context & context_,
: IStorageURLBase(uri_, context_, database_name_, table_name_, format_name_, columns_, constraints_) const String & compression_method_)
: IStorageURLBase(uri_, context_, database_name_, table_name_, format_name_, columns_, constraints_, compression_method_)
{ {
} }

View File

@ -7,6 +7,7 @@
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <IO/CompressionMethod.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/ReadWriteBufferFromHTTP.h> #include <IO/ReadWriteBufferFromHTTP.h>
#include <Poco/File.h> #include <Poco/File.h>
@ -31,7 +32,7 @@ StorageXDBC::StorageXDBC(
const Context & context_, const Context & context_,
const BridgeHelperPtr bridge_helper_) const BridgeHelperPtr bridge_helper_)
/// Please add support for constraints as soon as StorageODBC or JDBC will support insertion. /// Please add support for constraints as soon as StorageODBC or JDBC will support insertion.
: IStorageURLBase(Poco::URI(), context_, database_name_, table_name_, IXDBCBridgeHelper::DEFAULT_FORMAT, columns_, ConstraintsDescription{}) : IStorageURLBase(Poco::URI(), context_, database_name_, table_name_, IXDBCBridgeHelper::DEFAULT_FORMAT, columns_, ConstraintsDescription{}, "" /* CompressionMethod */)
, bridge_helper(bridge_helper_) , bridge_helper(bridge_helper_)
, remote_database_name(remote_database_name_) , remote_database_name(remote_database_name_)
, remote_table_name(remote_table_name_) , remote_table_name(remote_table_name_)

View File

@ -32,21 +32,27 @@ StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & ast_function, cons
ASTs & args = args_func.at(0)->children; ASTs & args = args_func.at(0)->children;
if (args.size() != 3) if (args.size() != 3 && args.size() != 4)
throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: filename, format and structure.", throw Exception("Table function '" + getName() + "' requires 3 or 4 arguments: filename, format, structure and compression method (default none).",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (size_t i = 0; i < 3; ++i) for (size_t i = 0; i < args.size(); ++i)
args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context);
std::string filename = args[0]->as<ASTLiteral &>().value.safeGet<String>(); std::string filename = args[0]->as<ASTLiteral &>().value.safeGet<String>();
std::string format = args[1]->as<ASTLiteral &>().value.safeGet<String>(); std::string format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
std::string structure = args[2]->as<ASTLiteral &>().value.safeGet<String>(); std::string structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
std::string compression_method;
if (args.size() == 4)
{
compression_method = args[3]->as<ASTLiteral &>().value.safeGet<String>();
} else compression_method = "auto";
ColumnsDescription columns = parseColumnsListFromString(structure, context); ColumnsDescription columns = parseColumnsListFromString(structure, context);
/// Create table /// Create table
StoragePtr storage = getStorage(filename, format, columns, const_cast<Context &>(context), table_name); StoragePtr storage = getStorage(filename, format, columns, const_cast<Context &>(context), table_name, compression_method);
storage->startup(); storage->startup();

View File

@ -16,6 +16,6 @@ class ITableFunctionFileLike : public ITableFunction
private: private:
StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override; StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override;
virtual StoragePtr getStorage( virtual StoragePtr getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const = 0; const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const = 0;
}; };
} }

View File

@ -6,7 +6,7 @@
namespace DB namespace DB
{ {
StoragePtr TableFunctionFile::getStorage( StoragePtr TableFunctionFile::getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const std::string & compression_method) const
{ {
return StorageFile::create(source, return StorageFile::create(source,
-1, -1,
@ -16,7 +16,8 @@ StoragePtr TableFunctionFile::getStorage(
format, format,
columns, columns,
ConstraintsDescription{}, ConstraintsDescription{},
global_context); global_context,
compression_method);
} }
void registerTableFunctionFile(TableFunctionFactory & factory) void registerTableFunctionFile(TableFunctionFactory & factory)

View File

@ -23,6 +23,6 @@ public:
private: private:
StoragePtr getStorage( StoragePtr getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const override; const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const std::string & compression_method) const override;
}; };
} }

View File

@ -9,7 +9,7 @@
namespace DB namespace DB
{ {
StoragePtr TableFunctionHDFS::getStorage( StoragePtr TableFunctionHDFS::getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const
{ {
return StorageHDFS::create(source, return StorageHDFS::create(source,
getDatabaseName(), getDatabaseName(),
@ -17,7 +17,8 @@ StoragePtr TableFunctionHDFS::getStorage(
format, format,
columns, columns,
ConstraintsDescription{}, ConstraintsDescription{},
global_context); global_context,
compression_method);
} }
void registerTableFunctionHDFS(TableFunctionFactory & factory) void registerTableFunctionHDFS(TableFunctionFactory & factory)

View File

@ -24,7 +24,7 @@ public:
private: private:
StoragePtr getStorage( StoragePtr getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const override; const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const override;
}; };
} }

View File

@ -7,11 +7,11 @@ namespace DB
{ {
StoragePtr TableFunctionS3::getStorage( StoragePtr TableFunctionS3::getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const
{ {
Poco::URI uri(source); Poco::URI uri(source);
UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size; UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size;
return StorageS3::create(uri, getDatabaseName(), table_name, format, min_upload_part_size, columns, ConstraintsDescription{}, global_context); return StorageS3::create(uri, getDatabaseName(), table_name, format, min_upload_part_size, columns, ConstraintsDescription{}, global_context, compression_method);
} }
void registerTableFunctionS3(TableFunctionFactory & factory) void registerTableFunctionS3(TableFunctionFactory & factory)

View File

@ -25,7 +25,8 @@ private:
const String & format, const String & format,
const ColumnsDescription & columns, const ColumnsDescription & columns,
Context & global_context, Context & global_context,
const std::string & table_name) const override; const std::string & table_name,
const String & compression_method) const override;
}; };
} }

View File

@ -8,10 +8,10 @@
namespace DB namespace DB
{ {
StoragePtr TableFunctionURL::getStorage( StoragePtr TableFunctionURL::getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const
{ {
Poco::URI uri(source); Poco::URI uri(source);
return StorageURL::create(uri, getDatabaseName(), table_name, format, columns, ConstraintsDescription{}, global_context); return StorageURL::create(uri, getDatabaseName(), table_name, format, columns, ConstraintsDescription{}, global_context, compression_method);
} }
void registerTableFunctionURL(TableFunctionFactory & factory) void registerTableFunctionURL(TableFunctionFactory & factory)

View File

@ -19,6 +19,6 @@ public:
private: private:
StoragePtr getStorage( StoragePtr getStorage(
const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name) const override; const String & source, const String & format, const ColumnsDescription & columns, Context & global_context, const std::string & table_name, const String & compression_method) const override;
}; };
} }

View File

@ -1,4 +1,6 @@
#-*- coding: utf-8 -*- #-*- coding: utf-8 -*-
import StringIO
import gzip
import requests import requests
import subprocess import subprocess
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
@ -19,7 +21,7 @@ class HDFSApi(object):
if response_data.status_code != 200: if response_data.status_code != 200:
response_data.raise_for_status() response_data.raise_for_status()
return response_data.text return response_data.content
# Requests can't put file # Requests can't put file
def _curl_to_put(self, filename, path, params): def _curl_to_put(self, filename, path, params):
@ -44,3 +46,12 @@ class HDFSApi(object):
output = self._curl_to_put(fpath, path, additional_params) output = self._curl_to_put(fpath, path, additional_params)
if "201 Created" not in output: if "201 Created" not in output:
raise Exception("Can't create file on hdfs:\n {}".format(output)) raise Exception("Can't create file on hdfs:\n {}".format(output))
def write_gzip_data(self, path, content):
out = StringIO.StringIO()
with gzip.GzipFile(fileobj=out, mode="w") as f:
f.write(content)
self.write_data(path, out.getvalue())
def read_gzip_data(self, path):
return gzip.GzipFile(fileobj=StringIO.StringIO(self.read_data(path))).read()

View File

@ -133,4 +133,56 @@ def test_globs_in_read_table(started_cluster):
("?", 0)] ("?", 0)]
for pattern, value in test_requests: for pattern, value in test_requests:
assert node1.query("select * from hdfs('hdfs://hdfs1:9000" + globs_dir + pattern + "', 'TSV', 'id UInt64, text String, number Float64')") == value * some_data assert node1.query("select * from hdfs('hdfs://hdfs1:9000" + globs_dir + pattern + "', 'TSV', 'id UInt64, text String, number Float64')") == value * some_data
def test_read_write_gzip_table(started_cluster):
hdfs_api = HDFSApi("root")
data = "1\tHello Jessica\t555.222\n2\tI rolled a joint\t777.333\n"
hdfs_api.write_gzip_data("/simple_table_function.gz", data)
assert hdfs_api.read_gzip_data("/simple_table_function.gz") == data
assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function.gz', 'TSV', 'id UInt64, text String, number Float64')") == data
def test_read_write_gzip_table_with_parameter_gzip(started_cluster):
hdfs_api = HDFSApi("root")
data = "1\tHello Jessica\t555.222\n2\tI rolled a joint\t777.333\n"
hdfs_api.write_gzip_data("/simple_table_function", data)
assert hdfs_api.read_gzip_data("/simple_table_function") == data
assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64', 'gzip')") == data
def test_read_write_table_with_parameter_none(started_cluster):
hdfs_api = HDFSApi("root")
data = "1\tHello Jessica\t555.222\n2\tI rolled a joint\t777.333\n"
hdfs_api.write_data("/simple_table_function.gz", data)
assert hdfs_api.read_data("/simple_table_function.gz") == data
assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function.gz', 'TSV', 'id UInt64, text String, number Float64', 'none')") == data
def test_read_write_gzip_table_with_parameter_auto_gz(started_cluster):
hdfs_api = HDFSApi("root")
data = "1\tHello Jessica\t555.222\n2\tI rolled a joint\t777.333\n"
hdfs_api.write_gzip_data("/simple_table_function.gz", data)
assert hdfs_api.read_gzip_data("/simple_table_function.gz") == data
assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function.gz', 'TSV', 'id UInt64, text String, number Float64', 'auto')") == data
def test_write_gz_storage(started_cluster):
hdfs_api = HDFSApi("root")
node1.query("create table GZHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage.gz', 'TSV')")
node1.query("insert into GZHDFSStorage values (1, 'Mark', 72.53)")
assert hdfs_api.read_gzip_data("/storage.gz") == "1\tMark\t72.53\n"
assert node1.query("select * from GZHDFSStorage") == "1\tMark\t72.53\n"
def test_write_gzip_storage(started_cluster):
hdfs_api = HDFSApi("root")
node1.query("create table GZIPHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/gzip_storage', 'TSV', 'gzip')")
node1.query("insert into GZIPHDFSStorage values (1, 'Mark', 72.53)")
assert hdfs_api.read_gzip_data("/gzip_storage") == "1\tMark\t72.53\n"
assert node1.query("select * from GZIPHDFSStorage") == "1\tMark\t72.53\n"

View File

@ -2,7 +2,7 @@ drop table if exists test_table_hdfs_syntax
; ;
create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('') create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('')
; -- { serverError 42 } ; -- { serverError 42 }
create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('','','') create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('','','', '')
; -- { serverError 42 } ; -- { serverError 42 }
drop table if exists test_table_hdfs_syntax drop table if exists test_table_hdfs_syntax
; ;

View File

@ -2,7 +2,7 @@ drop table if exists test_table_s3_syntax
; ;
create table test_table_s3_syntax (id UInt32) ENGINE = S3('') create table test_table_s3_syntax (id UInt32) ENGINE = S3('')
; -- { serverError 42 } ; -- { serverError 42 }
create table test_table_s3_syntax (id UInt32) ENGINE = S3('','','') create table test_table_s3_syntax (id UInt32) ENGINE = S3('','','','')
; -- { serverError 42 } ; -- { serverError 42 }
drop table if exists test_table_s3_syntax drop table if exists test_table_s3_syntax
; ;

View File

@ -2,7 +2,7 @@ drop table if exists test_table_url_syntax
; ;
create table test_table_url_syntax (id UInt32) ENGINE = URL('') create table test_table_url_syntax (id UInt32) ENGINE = URL('')
; -- { serverError 42 } ; -- { serverError 42 }
create table test_table_url_syntax (id UInt32) ENGINE = URL('','','') create table test_table_url_syntax (id UInt32) ENGINE = URL('','','','')
; -- { serverError 42 } ; -- { serverError 42 }
drop table if exists test_table_url_syntax drop table if exists test_table_url_syntax
; ;