From f9b70ea77a3b3059d9c784e2fe4b90000e75d3d4 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sat, 22 Jun 2024 09:30:16 +0200 Subject: [PATCH] Add JSONCompactWithProgressRowOutputFormat --- src/Formats/registerFormats.cpp | 2 + ...JSONCompactWithProgressRowOutputFormat.cpp | 125 ++++++++++++++++++ .../JSONCompactWithProgressRowOutputFormat.h | 53 ++++++++ 3 files changed, 180 insertions(+) create mode 100644 src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 57ca1bb49c8..770b747fafd 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -95,6 +95,7 @@ void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); void registerOutputFormatSQLInsert(FormatFactory & factory); +void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory); /// Input only formats. @@ -242,6 +243,7 @@ void registerFormats() registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); registerOutputFormatSQLInsert(factory); + registerOutputFormatJSONCompactWithProgress(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp new file mode 100644 index 00000000000..78cf5b9a003 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -0,0 +1,125 @@ +#include +#include +#include + +#include + +#include + + + +namespace DB +{ + +JSONCompactWithProgressRowOutputFormat::JSONCompactWithProgressRowOutputFormat( + WriteBuffer & out_, + const Block & header, + const FormatSettings & settings_, + bool yield_strings_) + : JSONRowOutputFormat(out_, header, settings_, yield_strings_) +{ +} + +void JSONCompactWithProgressRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) +{ + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); + ++field_number; + LOG_DEBUG(getLogger("JSONCompactWithProgressRowOutputFormat"), "Field number: {}", field_number); +} + +void JSONCompactWithProgressRowOutputFormat::writeFieldDelimiter() +{ + JSONUtils::writeFieldCompactDelimiter(*ostr); +} + +void JSONCompactWithProgressRowOutputFormat::writeRowStartDelimiter() +{ + if (has_progress) + writeProgress(); + JSONUtils::writeCompactArrayStart(*ostr, 2); +} + +void JSONCompactWithProgressRowOutputFormat::writeRowEndDelimiter() +{ + JSONUtils::writeCompactArrayEnd(*ostr); + field_number = 0; + ++row_count; +} + +void JSONCompactWithProgressRowOutputFormat::writeBeforeTotals() +{ + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 1, "totals"); +} + +void JSONCompactWithProgressRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) +{ + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); +} + +void JSONCompactWithProgressRowOutputFormat::writeAfterTotals() +{ + JSONUtils::writeCompactArrayEnd(*ostr); +} + +void JSONCompactWithProgressRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) +{ + JSONUtils::writeCompactArrayStart(*ostr, 2, title); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); +} + +void JSONCompactWithProgressRowOutputFormat::onProgress(const Progress & value) +{ + LOG_DEBUG(getLogger("JSONCompactWithProgressRowOutputFormat"), "onProgress: {}", value.read_rows); + + progress.incrementPiecewiseAtomically(value); + String progress_line; + WriteBufferFromString buf(progress_line); + writeCString("{\"progress\":", buf); + progress.writeJSON(buf); + writeCString("}\n", buf); + buf.finalize(); + std::lock_guard lock(progress_lines_mutex); + progress_lines.emplace_back(std::move(progress_line)); + has_progress = true; +} + + +void JSONCompactWithProgressRowOutputFormat::flush() +{ + if (has_progress) + writeProgress(); + JSONRowOutputFormat::flush(); +} + +void JSONCompactWithProgressRowOutputFormat::writeSuffix() +{ + if (has_progress) + writeProgress(); + JSONRowOutputFormat::writeSuffix(); +} + +void JSONCompactWithProgressRowOutputFormat::writeProgress() +{ + std::lock_guard lock(progress_lines_mutex); + for (const auto & progress_line : progress_lines) + writeString(progress_line, *ostr); + progress_lines.clear(); + has_progress = false; +} + +void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory) +{ + factory.registerOutputFormat("JSONCompactWithProgress", []( + WriteBuffer & buf, + const Block & sample, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings, false); + }); + + factory.markOutputFormatSupportsParallelFormatting("JSONCompactWithProgress"); +} + +} diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h new file mode 100644 index 00000000000..4bc10d41f19 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +struct FormatSettings; + +/** The stream for outputting data in the JSONCompact- formats. + */ +class JSONCompactWithProgressRowOutputFormat final : public JSONRowOutputFormat +{ +public: + JSONCompactWithProgressRowOutputFormat( + WriteBuffer & out_, + const Block & header, + const FormatSettings & settings_, + bool yield_strings_); + + String getName() const override { return "JSONCompactWithProgressRowOutputFormat"; } + + void onProgress(const Progress & value) override; + void flush() override; + +private: + void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; + void writeFieldDelimiter() override; + void writeRowStartDelimiter() override; + void writeRowEndDelimiter() override; + bool supportTotals() const override { return true; } + bool supportExtremes() const override { return true; } + void writeBeforeTotals() override; + void writeAfterTotals() override; + void writeExtremesElement(const char * title, const Columns & columns, size_t row_num) override; + void writeTotals(const Columns & columns, size_t row_num) override; + + void writeProgress(); + void writeSuffix() override; + + Progress progress; + std::vector progress_lines; + std::mutex progress_lines_mutex; + /// To not lock mutex and check progress_lines every row, + /// we will use atomic flag that progress_lines is not empty. + std::atomic_bool has_progress = false; +}; + +}