mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
better
This commit is contained in:
parent
a1010d708f
commit
4ff1be6e25
@ -216,7 +216,7 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name,
|
|||||||
const Settings & settings = context.getSettingsRef();
|
const Settings & settings = context.getSettingsRef();
|
||||||
bool parallel_formatting = settings.output_format_parallel_formatting;
|
bool parallel_formatting = settings.output_format_parallel_formatting;
|
||||||
|
|
||||||
if (parallel_formatting && name != "PrettyCompactMonoBlock")
|
if (parallel_formatting && getCreators(name).supports_parallel_formatting)
|
||||||
{
|
{
|
||||||
const auto & output_getter = getCreators(name).output_processor_creator;
|
const auto & output_getter = getCreators(name).output_processor_creator;
|
||||||
|
|
||||||
@ -351,6 +351,16 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
|
|||||||
target = std::move(file_segmentation_engine);
|
target = std::move(file_segmentation_engine);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & name)
|
||||||
|
{
|
||||||
|
auto & target = dict[name].supports_parallel_formatting;
|
||||||
|
if (target)
|
||||||
|
throw Exception("FormatFactory: Output format " + name + " is already marked as supporting parallel formatting.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
target = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
FormatFactory & FormatFactory::instance()
|
FormatFactory & FormatFactory::instance()
|
||||||
{
|
{
|
||||||
static FormatFactory ret;
|
static FormatFactory ret;
|
||||||
|
@ -100,6 +100,7 @@ private:
|
|||||||
InputProcessorCreator input_processor_creator;
|
InputProcessorCreator input_processor_creator;
|
||||||
OutputProcessorCreator output_processor_creator;
|
OutputProcessorCreator output_processor_creator;
|
||||||
FileSegmentationEngine file_segmentation_engine;
|
FileSegmentationEngine file_segmentation_engine;
|
||||||
|
bool supports_parallel_formatting{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
using FormatsDictionary = std::unordered_map<String, Creators>;
|
using FormatsDictionary = std::unordered_map<String, Creators>;
|
||||||
@ -140,6 +141,8 @@ public:
|
|||||||
void registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator);
|
void registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator);
|
||||||
void registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator);
|
void registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator);
|
||||||
|
|
||||||
|
void markOutputFormatSupportsParallelFormatting(const String & name);
|
||||||
|
|
||||||
const FormatsDictionary & getAllFormats() const
|
const FormatsDictionary & getAllFormats() const
|
||||||
{
|
{
|
||||||
return dict;
|
return dict;
|
||||||
|
@ -176,7 +176,7 @@ private:
|
|||||||
{
|
{
|
||||||
const size_t prev_size = memory.size();
|
const size_t prev_size = memory.size();
|
||||||
memory.resize(2 * prev_size + 1);
|
memory.resize(2 * prev_size + 1);
|
||||||
Base::set(memory.data(), memory.size(), prev_size + 1);
|
Base::set(memory.data() + prev_size, memory.size() - prev_size, 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -82,6 +82,7 @@ void registerOutputFormatProcessorCSV(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<CSVRowOutputFormat>(buf, sample, with_names, params, format_settings);
|
return std::make_shared<CSVRowOutputFormat>(buf, sample, with_names, params, format_settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting(with_names ? "CSVWithNames" : "CSV");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,6 +108,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, false, false);
|
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, false, false);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("JSONCompactEachRow");
|
||||||
|
|
||||||
factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
|
factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
|
||||||
WriteBuffer &buf,
|
WriteBuffer &buf,
|
||||||
@ -117,6 +118,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, true, false);
|
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, true, false);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("JSONCompactEachRowWithNamesAndTypes");
|
||||||
|
|
||||||
factory.registerOutputFormatProcessor("JSONCompactStringsEachRow", [](
|
factory.registerOutputFormatProcessor("JSONCompactStringsEachRow", [](
|
||||||
WriteBuffer & buf,
|
WriteBuffer & buf,
|
||||||
@ -126,6 +128,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, false, true);
|
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, false, true);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("JSONCompactStringsEachRow");
|
||||||
|
|
||||||
factory.registerOutputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
|
factory.registerOutputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
|
||||||
WriteBuffer &buf,
|
WriteBuffer &buf,
|
||||||
@ -135,6 +138,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, true, true);
|
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, true, true);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("JSONCompactStringsEachRowWithNamesAndTypes");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,6 +138,7 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
|
|||||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
||||||
settings);
|
settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("JSONEachRow");
|
||||||
|
|
||||||
factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
|
factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
|
||||||
WriteBuffer & buf,
|
WriteBuffer & buf,
|
||||||
@ -150,6 +151,7 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
|
|||||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
||||||
settings);
|
settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("JSONStringEachRow");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <Formats/FormatFactory.h>
|
#include <Formats/FormatFactory.h>
|
||||||
|
|
||||||
#include <deque>
|
#include <deque>
|
||||||
|
#include <future>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
@ -82,7 +83,6 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
InternalFormatterCreator internal_formatter_creator;
|
InternalFormatterCreator internal_formatter_creator;
|
||||||
|
|
||||||
enum ProcessingUnitStatus
|
enum ProcessingUnitStatus
|
||||||
@ -104,6 +104,7 @@ private:
|
|||||||
|
|
||||||
void addChunk(Chunk chunk, ProcessingUnitType type)
|
void addChunk(Chunk chunk, ProcessingUnitType type)
|
||||||
{
|
{
|
||||||
|
// std::cout << "AddChunk of size " << chunk.getNumRows() << std::endl;
|
||||||
const auto current_unit_number = writer_unit_number % processing_units.size();
|
const auto current_unit_number = writer_unit_number % processing_units.size();
|
||||||
|
|
||||||
auto & unit = processing_units[current_unit_number];
|
auto & unit = processing_units[current_unit_number];
|
||||||
@ -119,7 +120,6 @@ private:
|
|||||||
unit.chunk = std::move(chunk);
|
unit.chunk = std::move(chunk);
|
||||||
|
|
||||||
/// Resize memory without deallocate
|
/// Resize memory without deallocate
|
||||||
unit.segment.resize(0);
|
|
||||||
unit.status = READY_TO_FORMAT;
|
unit.status = READY_TO_FORMAT;
|
||||||
unit.type = type;
|
unit.type = type;
|
||||||
|
|
||||||
@ -143,6 +143,8 @@ private:
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::promise<bool> finalizator{};
|
||||||
|
|
||||||
std::atomic_bool need_flush{false};
|
std::atomic_bool need_flush{false};
|
||||||
|
|
||||||
// There are multiple "formatters", that's why we use thread pool.
|
// There are multiple "formatters", that's why we use thread pool.
|
||||||
@ -165,6 +167,9 @@ private:
|
|||||||
|
|
||||||
void finishAndWait()
|
void finishAndWait()
|
||||||
{
|
{
|
||||||
|
std::future<bool> future_finalizator = finalizator.get_future();
|
||||||
|
future_finalizator.get();
|
||||||
|
|
||||||
formatting_finished = true;
|
formatting_finished = true;
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -230,6 +235,8 @@ private:
|
|||||||
{
|
{
|
||||||
const auto current_unit_number = collector_unit_number % processing_units.size();
|
const auto current_unit_number = collector_unit_number % processing_units.size();
|
||||||
|
|
||||||
|
// std::cout << "collecting " << current_unit_number << std::endl;
|
||||||
|
|
||||||
auto & unit = processing_units[current_unit_number];
|
auto & unit = processing_units[current_unit_number];
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -238,13 +245,11 @@ private:
|
|||||||
[&]{ return unit.status == READY_TO_READ; });
|
[&]{ return unit.status == READY_TO_READ; });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unit.type == ProcessingUnitType::TOTALS) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(unit.status == READY_TO_READ);
|
assert(unit.status == READY_TO_READ);
|
||||||
assert(unit.segment.size() > 0);
|
assert(unit.segment.size() > 0);
|
||||||
|
|
||||||
|
auto copy_if_unit_type = unit.type;
|
||||||
|
|
||||||
/// Do main work here.
|
/// Do main work here.
|
||||||
out.write(unit.segment.data(), unit.actual_memory_size);
|
out.write(unit.segment.data(), unit.actual_memory_size);
|
||||||
|
|
||||||
@ -259,8 +264,12 @@ private:
|
|||||||
writer_condvar.notify_all();
|
writer_condvar.notify_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unit.type == ProcessingUnitType::FINALIZE)
|
if (copy_if_unit_type == ProcessingUnitType::FINALIZE)
|
||||||
|
{
|
||||||
|
finalizator.set_value(true);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
@ -280,9 +289,8 @@ private:
|
|||||||
|
|
||||||
assert(unit.status = READY_TO_FORMAT);
|
assert(unit.status = READY_TO_FORMAT);
|
||||||
|
|
||||||
unit.segment.resize(DBMS_DEFAULT_BUFFER_SIZE);
|
unit.segment.resize(1);
|
||||||
|
|
||||||
/// TODO: Implement proper nextImpl
|
|
||||||
BufferWithOutsideMemory<WriteBuffer> out_buffer(unit.segment);
|
BufferWithOutsideMemory<WriteBuffer> out_buffer(unit.segment);
|
||||||
|
|
||||||
auto formatter = internal_formatter_creator(out_buffer);
|
auto formatter = internal_formatter_creator(out_buffer);
|
||||||
|
@ -49,6 +49,7 @@ void registerOutputFormatProcessorTSKV(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<TSKVRowOutputFormat>(buf, sample, params, settings);
|
return std::make_shared<TSKVRowOutputFormat>(buf, sample, params, settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("TSKV");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, false, false, params, settings);
|
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, false, false, params, settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto * name : {"TabSeparatedRaw", "TSVRaw"})
|
for (const auto * name : {"TabSeparatedRaw", "TSVRaw"})
|
||||||
@ -97,6 +98,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<TabSeparatedRawRowOutputFormat>(buf, sample, false, false, params, settings);
|
return std::make_shared<TabSeparatedRawRowOutputFormat>(buf, sample, false, false, params, settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
|
for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
|
||||||
@ -109,6 +111,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, true, false, params, settings);
|
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, true, false, params, settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"})
|
for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"})
|
||||||
@ -121,6 +124,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, true, true, params, settings);
|
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, true, true, params, settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@ void registerOutputFormatProcessorValues(FormatFactory & factory)
|
|||||||
{
|
{
|
||||||
return std::make_shared<ValuesRowOutputFormat>(buf, sample, params, settings);
|
return std::make_shared<ValuesRowOutputFormat>(buf, sample, params, settings);
|
||||||
});
|
});
|
||||||
|
factory.markOutputFormatSupportsParallelFormatting("Values");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
10000000
|
||||||
|
10000000
|
||||||
|
20000000
|
||||||
|
20000000
|
||||||
|
30000000
|
||||||
|
30000000
|
19
tests/queries/0_stateless/01514_parallel_formatting.sql
Normal file
19
tests/queries/0_stateless/01514_parallel_formatting.sql
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
drop table if exists tsv;
|
||||||
|
create table tsv(a int, b int default 7) engine File(TSV);
|
||||||
|
|
||||||
|
insert into tsv(a) select number from numbers(10000000);
|
||||||
|
select '10000000';
|
||||||
|
select count() from tsv;
|
||||||
|
|
||||||
|
|
||||||
|
insert into tsv(a) select number from numbers(10000000);
|
||||||
|
select '20000000';
|
||||||
|
select count() from tsv;
|
||||||
|
|
||||||
|
|
||||||
|
insert into tsv(a) select number from numbers(10000000);
|
||||||
|
select '30000000';
|
||||||
|
select count() from tsv;
|
||||||
|
|
||||||
|
|
||||||
|
drop table tsv;
|
Loading…
Reference in New Issue
Block a user