Merge pull request #55239 from aiven-sal/aiven-sal/format_query

Add formatQuery function
This commit is contained in:
Robert Schulze 2023-10-26 22:46:01 +02:00 committed by GitHub
commit 325ff33c3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 247 additions and 0 deletions

View File

@ -2794,3 +2794,71 @@ message Root
uint32 column2 = 2;
}
```
## formatQuery
Returns a formatted, possibly multi-line, version of the given SQL query.
**Syntax**
```sql
formatQuery(query)
```
**Arguments**
- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)
**Returned value**
- The formatted query. [String](../../sql-reference/data-types/string.md).
**Example**
```sql
SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');
```
Result:
```result
┌─formatQuery('select a, b FRom tab WHERE a > 3 and b < 3')
│ SELECT
a,
b
FROM tab
WHERE (a > 3) AND (b < 3)
└───────────────────────────────────────────────────────────────┘
```
## formatQuerySingleLine
Like formatQuery() but the returned formatted string contains no line breaks.
**Syntax**
```sql
formatQuerySingleLine(query)
```
**Arguments**
- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)
**Returned value**
- The formatted query. [String](../../sql-reference/data-types/string.md).
**Example**
```sql
SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');
```
Result:
```result
┌─formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3')
│ SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)
└─────────────────────────────────────────────────────────────────────────┘
```

View File

@ -0,0 +1,137 @@
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteBufferFromVector.h>
#include <Interpreters/Context.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
template <bool one_line, typename Name>
class FunctionFormatQuery : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr context)
{
const auto & settings = context->getSettings();
return std::make_shared<FunctionFormatQuery>(settings.max_query_size, settings.max_parser_depth);
}
FunctionFormatQuery(size_t max_query_size_, size_t max_parser_depth_)
: max_query_size(max_query_size_), max_parser_depth(max_parser_depth_)
{
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args{{"query", &isString<IDataType>, nullptr, "String"}};
validateFunctionArgumentTypes(*this, arguments, mandatory_args);
return arguments[0].type;
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnString::create();
formatVector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
return col_res;
}
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName());
}
private:
void formatQueryImpl(const char * begin, const char * end, ColumnString::Chars & output) const
{
ParserQuery parser{end};
auto ast = parseQuery(parser, begin, end, {}, max_query_size, max_parser_depth);
WriteBufferFromVector buf(output, AppendModeTag{});
formatAST(*ast, buf, /* hilite */ false, /* one_line */ one_line);
buf.finalize();
}
void formatVector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets) const
{
const size_t size = offsets.size();
res_offsets.resize(size);
res_data.reserve(data.size());
size_t prev_in_offset = 0;
for (size_t i = 0; i < size; ++i)
{
const auto * begin = reinterpret_cast<const char *>(&data[prev_in_offset]);
const char * end = begin + offsets[i] - 1;
formatQueryImpl(begin, end, res_data);
res_offsets[i] = res_data.size() + 1;
prev_in_offset = offsets[i];
}
}
size_t max_query_size;
size_t max_parser_depth;
};
struct NameFormatQuery
{
static constexpr auto name = "formatQuery";
};
struct NameFormatQuerySingleLine
{
static constexpr auto name = "formatQuerySingleLine";
};
REGISTER_FUNCTION(formatQuery)
{
factory.registerFunction<FunctionFormatQuery<false, NameFormatQuery>>(FunctionDocumentation{
.description = "Returns a formatted, possibly multi-line, version of the given SQL query.\n[example:multiline]",
.syntax = "formatQuery(query)",
.arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
.returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).",
.examples{
{"multiline",
"SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');",
"SELECT\n"
" a,\n"
" b\n"
"FROM tab\n"
"WHERE (a > 3) AND (b < 3)"}},
.categories{"Other"}});
}
REGISTER_FUNCTION(formatQuerySingleLine)
{
factory.registerFunction<FunctionFormatQuery<true, NameFormatQuerySingleLine>>(FunctionDocumentation{
.description = "Like formatQuery() but the returned formatted string contains no line breaks.\n[example:multiline]",
.syntax = "formatQuerySingleLine(query)",
.arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}},
.returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).",
.examples{
{"multiline",
"SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');",
"SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}},
.categories{"Other"}});
}
}

View File

@ -0,0 +1,19 @@
SELECT 1
SELECT 1
SELECT 1
SELECT 1
1
INSERT INTO tab FORMAT Values
CREATE TABLE default.no_prop_table\n(\n `some_column` UInt64\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
EXPLAIN SYNTAX\nSELECT\n CAST(1, \'INT\'),\n CEIL(1),\n CEILING(1),\n CHAR(49),\n CHAR_LENGTH(\'1\'),\n CHARACTER_LENGTH(\'1\'),\n COALESCE(1),\n CONCAT(\'1\', \'1\'),\n CORR(1, 1),\n COS(1),\n COUNT(1),\n COVAR_POP(1, 1),\n COVAR_SAMP(1, 1),\n DATABASE(),\n SCHEMA(),\n dateDiff(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')),\n EXP(1),\n FLATTEN([[1]]),\n FLOOR(1),\n FQDN(),\n GREATEST(1),\n IF(1, 1, 1),\n IFNULL(1, 1),\n LCASE(\'A\'),\n LEAST(1),\n LENGTH(\'1\'),\n LN(1),\n LOCATE(\'1\', \'1\'),\n LOG(1),\n LOG10(1),\n LOG2(1),\n LOWER(\'A\'),\n MAX(1),\n MID(\'123\', 1, 1),\n MIN(1),\n MOD(1, 1),\n NOT 1,\n NOW(),\n NOW64(),\n NULLIF(1, 1),\n PI(),\n position(\'123\', \'2\'),\n POW(1, 1),\n POWER(1, 1),\n RAND(),\n REPLACE(\'1\', \'1\', \'2\'),\n REVERSE(\'123\'),\n ROUND(1),\n SIN(1),\n SQRT(1),\n STDDEV_POP(1),\n STDDEV_SAMP(1),\n SUBSTR(\'123\', 2),\n substring(\'123\', 2),\n SUM(1),\n TAN(1),\n TANH(1),\n TRUNC(1),\n TRUNCATE(1),\n UCASE(\'A\'),\n UPPER(\'A\'),\n USER(),\n VAR_POP(1),\n VAR_SAMP(1),\n WEEK(toDate(\'2020-10-24\')),\n YEARWEEK(toDate(\'2020-10-24\'))\nFORMAT TSVRaw
1
formatQuerySingleLine
SELECT 1
SELECT 1
SELECT 1
SELECT 1
1
INSERT INTO tab FORMAT Values
CREATE TABLE default.no_prop_table (`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192
EXPLAIN SYNTAX SELECT CAST(1, \'INT\'), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), dateDiff(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT 1, NOW(), NOW64(), NULLIF(1, 1), PI(), position(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), substring(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) FORMAT TSVRaw
1

View File

@ -0,0 +1,21 @@
SELECT formatQuery('select 1;');
SELECT formatQuery('select 1');
SELECT formatQuery('SELECT 1;');
SELECT formatQuery('SELECT 1');
SELECT formatQuery('select 1;') == formatQuery('SeLecT 1');
SELECT formatQuery('INSERT INTO tab VALUES (\'\') (\'test\')');
SELECT formatQuery('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
SELECT formatQuery('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
SELECT normalizedQueryHash(formatQuery('select 1')) = normalizedQueryHash(formatQuery('SELECT 1'));
SELECT formatQuery('SEECTwrong'); -- { serverError SYNTAX_ERROR }
SELECT 'formatQuerySingleLine';
SELECT formatQuerySingleLine('select 1;');
SELECT formatQuerySingleLine('select 1');
SELECT formatQuerySingleLine('SELECT 1;');
SELECT formatQuerySingleLine('SELECT 1');
SELECT formatQuerySingleLine('select 1;') == formatQuerySingleLine('SeLecT 1');
SELECT formatQuerySingleLine('INSERT INTO tab VALUES (\'\') (\'test\')');
SELECT formatQuerySingleLine('CREATE TABLE default.no_prop_table(`some_column` UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192');
SELECT formatQuerySingleLine('EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH(\'1\'), CHARACTER_LENGTH(\'1\'), COALESCE(1), CONCAT(\'1\', \'1\'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), SCHEMA(), DATEDIFF(\'DAY\', toDate(\'2020-10-24\'), toDate(\'2019-10-24\')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE(\'A\'), LEAST(1), LENGTH(\'1\'), LN(1), LOCATE(\'1\', \'1\'), LOG(1), LOG10(1), LOG2(1), LOWER(\'A\'), MAX(1), MID(\'123\', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION(\'123\', \'2\'), POW(1, 1), POWER(1, 1), RAND(), REPLACE(\'1\', \'1\', \'2\'), REVERSE(\'123\'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR(\'123\', 2), SUBSTRING(\'123\', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE(\'A\'), UPPER(\'A\'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate(\'2020-10-24\')), YEARWEEK(toDate(\'2020-10-24\')) format TSVRaw;');
SELECT normalizedQueryHash(formatQuerySingleLine('select 1')) = normalizedQueryHash(formatQuerySingleLine('SELECT 1'));
SELECT formatQuerySingleLine('SEECTwrong'); -- { serverError SYNTAX_ERROR }

View File

@ -1479,6 +1479,8 @@ fmtlib
formatDateTime
formatDateTimeInJoda
formatDateTimeInJodaSyntax
formatQuery
formatQuerySingleLine
formatReadableDecimalSize
formatReadableQuantity
formatReadableSize