2020-07-19 02:55:08 +00:00
# include <memory>
# include <Columns/ColumnString.h>
# include <DataTypes/DataTypeString.h>
# include <Formats/FormatFactory.h>
# include <Functions/FunctionFactory.h>
# include <Functions/FunctionHelpers.h>
2021-05-17 07:30:42 +00:00
# include <Functions/IFunction.h>
2020-07-19 02:55:08 +00:00
# include <IO/WriteBufferFromVector.h>
# include <IO/WriteHelpers.h>
# include <Processors/Formats/IOutputFormat.h>
2021-11-02 13:26:14 +00:00
# include <Processors/Formats/IRowOutputFormat.h>
2021-10-02 07:13:14 +00:00
# include <base/map.h>
2020-07-19 02:55:08 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT ;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH ;
extern const int UNKNOWN_FORMAT ;
2021-11-02 13:26:14 +00:00
extern const int BAD_ARGUMENTS ;
2020-07-19 02:55:08 +00:00
}
2020-09-07 18:00:37 +00:00
namespace
{
2020-07-19 02:55:08 +00:00
/** formatRow(<format>, x, y, ...) is a function that allows you to use RowOutputFormat over
* several columns to generate a string per row , such as CSV , TSV , JSONEachRow , etc .
* formatRowNoNewline ( . . . ) trims the newline character of each row .
*/
template < bool no_newline >
class FunctionFormatRow : public IFunction
{
public :
static constexpr auto name = no_newline ? " formatRowNoNewline " : " formatRow " ;
2021-06-01 12:20:52 +00:00
FunctionFormatRow ( const String & format_name_ , ContextPtr context_ ) : format_name ( format_name_ ) , context ( context_ )
2020-07-19 02:55:08 +00:00
{
2022-04-18 10:18:43 +00:00
if ( ! FormatFactory : : instance ( ) . getAllFormats ( ) . contains ( format_name ) )
2020-07-19 02:55:08 +00:00
throw Exception ( " Unknown format " + format_name , ErrorCodes : : UNKNOWN_FORMAT ) ;
}
String getName ( ) const override { return name ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
bool useDefaultImplementationForNulls ( ) const override { return false ; }
bool useDefaultImplementationForConstants ( ) const override { return true ; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution ( const DataTypesWithConstInfo & /*arguments*/ ) const override { return true ; }
2020-07-19 02:55:08 +00:00
ColumnNumbers getArgumentsThatAreAlwaysConstant ( ) const override { return { 0 } ; }
2020-11-17 13:24:45 +00:00
ColumnPtr executeImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & , size_t input_rows_count ) const override
2020-07-19 02:55:08 +00:00
{
auto col_str = ColumnString : : create ( ) ;
ColumnString : : Chars & vec = col_str - > getChars ( ) ;
WriteBufferFromVector buffer ( vec ) ;
ColumnString : : Offsets & offsets = col_str - > getOffsets ( ) ;
offsets . resize ( input_rows_count ) ;
2020-10-17 21:41:50 +00:00
Block arg_columns ;
2020-07-19 02:55:08 +00:00
for ( auto i = 1u ; i < arguments . size ( ) ; + + i )
2020-10-17 21:41:50 +00:00
arg_columns . insert ( arguments [ i ] ) ;
2020-10-14 14:04:50 +00:00
materializeBlockInplace ( arg_columns ) ;
auto out = FormatFactory : : instance ( ) . getOutputFormat ( format_name , buffer , arg_columns , context , [ & ] ( const Columns & , size_t row )
2020-07-19 02:55:08 +00:00
{
if constexpr ( no_newline )
2020-07-22 02:28:20 +00:00
{
// replace '\n' with '\0'
2020-07-25 02:29:47 +00:00
if ( buffer . position ( ) ! = buffer . buffer ( ) . begin ( ) & & buffer . position ( ) [ - 1 ] = = ' \n ' )
buffer . position ( ) [ - 1 ] = ' \0 ' ;
2020-07-22 02:28:20 +00:00
}
2020-07-19 02:55:08 +00:00
else
writeChar ( ' \0 ' , buffer ) ;
offsets [ row ] = buffer . count ( ) ;
} ) ;
2021-11-02 13:26:14 +00:00
/// This function make sense only for row output formats.
if ( ! dynamic_cast < IRowOutputFormat * > ( out . get ( ) ) )
throw Exception ( ErrorCodes : : BAD_ARGUMENTS , " Cannot turn rows into a {} format strings. {} function supports only row output formats " , format_name , getName ( ) ) ;
2021-11-02 13:40:41 +00:00
/// Don't write prefix if any.
out - > doNotWritePrefix ( ) ;
2020-10-14 14:04:50 +00:00
out - > write ( arg_columns ) ;
2020-10-17 21:41:50 +00:00
return col_str ;
2020-07-19 02:55:08 +00:00
}
private :
String format_name ;
2021-06-01 12:20:52 +00:00
ContextPtr context ;
2020-07-19 02:55:08 +00:00
} ;
template < bool no_newline >
2021-05-15 17:33:15 +00:00
class FormatRowOverloadResolver : public IFunctionOverloadResolver
2020-07-19 02:55:08 +00:00
{
public :
static constexpr auto name = no_newline ? " formatRowNoNewline " : " formatRow " ;
2021-06-01 12:20:52 +00:00
static FunctionOverloadResolverPtr create ( ContextPtr context ) { return std : : make_unique < FormatRowOverloadResolver > ( context ) ; }
explicit FormatRowOverloadResolver ( ContextPtr context_ ) : context ( context_ ) { }
2020-07-19 02:55:08 +00:00
String getName ( ) const override { return name ; }
bool isVariadic ( ) const override { return true ; }
size_t getNumberOfArguments ( ) const override { return 0 ; }
ColumnNumbers getArgumentsThatAreAlwaysConstant ( ) const override { return { 0 } ; }
bool useDefaultImplementationForNulls ( ) const override { return false ; }
2021-05-15 17:33:15 +00:00
FunctionBasePtr buildImpl ( const ColumnsWithTypeAndName & arguments , const DataTypePtr & return_type ) const override
2020-07-19 02:55:08 +00:00
{
if ( arguments . size ( ) < 2 )
throw Exception (
" Function " + getName ( ) + " requires at least two arguments: the format name and its output expression(s) " ,
ErrorCodes : : NUMBER_OF_ARGUMENTS_DOESNT_MATCH ) ;
if ( const auto * name_col = checkAndGetColumnConst < ColumnString > ( arguments . at ( 0 ) . column . get ( ) ) )
2021-05-15 17:33:15 +00:00
return std : : make_unique < FunctionToFunctionBaseAdaptor > (
2020-07-19 02:55:08 +00:00
std : : make_shared < FunctionFormatRow < no_newline > > ( name_col - > getValue < String > ( ) , context ) ,
2021-06-15 19:55:21 +00:00
collections : : map < DataTypes > ( arguments , [ ] ( const auto & elem ) { return elem . type ; } ) ,
2020-07-19 02:55:08 +00:00
return_type ) ;
else
throw Exception ( " First argument to " + getName ( ) + " must be a format name " , ErrorCodes : : ILLEGAL_TYPE_OF_ARGUMENT ) ;
}
2021-05-15 17:33:15 +00:00
DataTypePtr getReturnTypeImpl ( const DataTypes & ) const override { return std : : make_shared < DataTypeString > ( ) ; }
2020-07-19 02:55:08 +00:00
private :
2021-06-01 12:20:52 +00:00
ContextPtr context ;
2020-07-19 02:55:08 +00:00
} ;
2020-09-07 18:00:37 +00:00
}
2022-07-04 07:01:39 +00:00
REGISTER_FUNCTION ( FormatRow )
2020-07-19 02:55:08 +00:00
{
factory . registerFunction < FormatRowOverloadResolver < true > > ( ) ;
factory . registerFunction < FormatRowOverloadResolver < false > > ( ) ;
}
}