ClickHouse/src/Functions/formatRow.cpp

159 lines
6.2 KiB
C++
Raw Normal View History

2020-07-19 02:55:08 +00:00
#include <memory>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Formats/FormatFactory.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
2021-05-17 07:30:42 +00:00
#include <Functions/IFunction.h>
2020-07-19 02:55:08 +00:00
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
#include <Processors/Formats/IOutputFormat.h>
2021-11-02 13:26:14 +00:00
#include <Processors/Formats/IRowOutputFormat.h>
2021-10-02 07:13:14 +00:00
#include <base/map.h>
2020-07-19 02:55:08 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNKNOWN_FORMAT;
2021-11-02 13:26:14 +00:00
extern const int BAD_ARGUMENTS;
2020-07-19 02:55:08 +00:00
}
2020-09-07 18:00:37 +00:00
namespace
{
2020-07-19 02:55:08 +00:00
/** formatRow(<format>, x, y, ...) is a function that allows you to use RowOutputFormat over
* several columns to generate a string per row, such as CSV, TSV, JSONEachRow, etc.
* formatRowNoNewline(...) trims the newline character of each row.
2020-07-19 02:55:08 +00:00
*/
template <bool no_newline>
2020-07-19 02:55:08 +00:00
class FunctionFormatRow : public IFunction
{
public:
static constexpr auto name = no_newline ? "formatRowNoNewline" : "formatRow";
2020-07-19 02:55:08 +00:00
2023-02-25 19:16:51 +00:00
FunctionFormatRow(String format_name_, Names arguments_column_names_, ContextPtr context_)
: format_name(std::move(format_name_))
, arguments_column_names(std::move(arguments_column_names_))
, context(std::move(context_))
2020-07-19 02:55:08 +00:00
{
if (!FormatFactory::instance().getAllFormats().contains(format_name))
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", format_name);
2020-07-19 02:55:08 +00:00
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
2020-07-19 02:55:08 +00:00
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
2020-07-19 02:55:08 +00:00
{
auto col_str = ColumnString::create();
ColumnString::Chars & vec = col_str->getChars();
WriteBufferFromVector buffer(vec);
ColumnString::Offsets & offsets = col_str->getOffsets();
offsets.resize(input_rows_count);
2023-02-25 19:16:51 +00:00
2020-10-17 21:41:50 +00:00
Block arg_columns;
2023-02-25 19:16:51 +00:00
size_t arguments_size = arguments.size();
for (size_t i = 1; i < arguments_size; ++i)
{
auto argument_column = arguments[i];
argument_column.name = arguments_column_names[i];
arg_columns.insert(std::move(argument_column));
}
2020-10-14 14:04:50 +00:00
materializeBlockInplace(arg_columns);
auto format_settings = getFormatSettings(context);
auto out = FormatFactory::instance().getOutputFormat(format_name, buffer, arg_columns, context, format_settings);
2021-11-02 13:26:14 +00:00
/// This function make sense only for row output formats.
auto * row_output_format = dynamic_cast<IRowOutputFormat *>(out.get());
if (!row_output_format)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Cannot turn rows into a {} format strings. {} function supports only row output formats",
format_name, getName());
2021-11-02 13:26:14 +00:00
auto columns = arg_columns.getColumns();
for (size_t i = 0; i != input_rows_count; ++i)
{
2022-12-15 19:47:10 +00:00
row_output_format->writePrefixIfNeeded();
row_output_format->writeRow(columns, i);
row_output_format->finalize();
if constexpr (no_newline)
{
// replace '\n' with '\0'
if (buffer.position() != buffer.buffer().begin() && buffer.position()[-1] == '\n')
buffer.position()[-1] = '\0';
}
else
writeChar('\0', buffer);
offsets[i] = buffer.count();
row_output_format->resetFormatter();
}
2020-10-17 21:41:50 +00:00
return col_str;
2020-07-19 02:55:08 +00:00
}
private:
String format_name;
2023-02-25 19:16:51 +00:00
Names arguments_column_names;
2021-06-01 12:20:52 +00:00
ContextPtr context;
2020-07-19 02:55:08 +00:00
};
template <bool no_newline>
2021-05-15 17:33:15 +00:00
class FormatRowOverloadResolver : public IFunctionOverloadResolver
2020-07-19 02:55:08 +00:00
{
public:
static constexpr auto name = no_newline ? "formatRowNoNewline" : "formatRow";
2021-06-01 12:20:52 +00:00
static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<FormatRowOverloadResolver>(context); }
explicit FormatRowOverloadResolver(ContextPtr context_) : context(context_) { }
2020-07-19 02:55:08 +00:00
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
bool useDefaultImplementationForNulls() const override { return false; }
2021-05-15 17:33:15 +00:00
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
2020-07-19 02:55:08 +00:00
{
if (arguments.size() < 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires at least two arguments: the format name and its output expression(s)", getName());
2020-07-19 02:55:08 +00:00
2023-02-25 19:16:51 +00:00
Names arguments_column_names;
arguments_column_names.reserve(arguments.size());
for (const auto & argument : arguments)
arguments_column_names.push_back(argument.name);
2020-07-19 02:55:08 +00:00
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments.at(0).column.get()))
2021-05-15 17:33:15 +00:00
return std::make_unique<FunctionToFunctionBaseAdaptor>(
2023-02-25 19:16:51 +00:00
std::make_shared<FunctionFormatRow<no_newline>>(name_col->getValue<String>(), std::move(arguments_column_names), context),
2021-06-15 19:55:21 +00:00
collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
2020-07-19 02:55:08 +00:00
return_type);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument to {} must be a format name", getName());
2020-07-19 02:55:08 +00:00
}
2021-05-15 17:33:15 +00:00
DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared<DataTypeString>(); }
2020-07-19 02:55:08 +00:00
private:
2021-06-01 12:20:52 +00:00
ContextPtr context;
2020-07-19 02:55:08 +00:00
};
2020-09-07 18:00:37 +00:00
}
REGISTER_FUNCTION(FormatRow)
2020-07-19 02:55:08 +00:00
{
factory.registerFunction<FormatRowOverloadResolver<true>>();
factory.registerFunction<FormatRowOverloadResolver<false>>();
2020-07-19 02:55:08 +00:00
}
}