Merge pull request #52209 from mkmkme/mkmkme/first-line

This commit is contained in:
vdimir 2023-07-19 11:10:13 +02:00 committed by GitHub
commit 92f04d2c53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 147 additions and 17 deletions

View File

@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
## firstLine
Returns the first line from a multi-line string.
**Syntax**
```sql
firstLine(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- The first line of the input value or the whole value if there is no line
separators. [String](../data-types/string.md)
**Example**
```sql
select firstLine('foo\nbar\nbaz');
```
Result:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00  
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
## firstLine
Возвращает первую строку в многострочном тексте.
**Синтаксис**
```sql
firstLine(val)
```
**Аргументы**
- `val` - текст для обработки. [String](../data-types/string.md)
**Returned value**
- Первая строка текста или весь текст, если переносы строк отсутствуют.
Тип: [String](../data-types/string.md)
**Пример**
Запрос:
```sql
select firstLine('foo\nbar\nbaz');
```
Результат:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -7,8 +7,8 @@
namespace DB
{
/** URL processing functions. See implementation in separate .cpp files.
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
/** These helpers are used by URL processing functions. See implementation in separate .cpp files.
* All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
*
* Functions for extraction parts of URL.
* If URL has nothing like, then empty string is returned.
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};
@ -156,7 +156,7 @@ struct CutSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};

View File

@ -1,8 +1,8 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/StringHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>

View File

@ -1,7 +1,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
#include "FunctionsURL.h"
namespace DB
{

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB
{

View File

@ -1,7 +1,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
}
}

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h"
#include <Functions/StringHelpers.h>
#include "path.h"
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include <base/find_symbols.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/StringHelpers.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h"
#include <Functions/StringHelpers.h>
#include "path.h"
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <Common/StringUtils/StringUtils.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -54,4 +54,3 @@ struct ExtractProtocol
};
}

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
};
}

View File

@ -0,0 +1,42 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
namespace DB
{
struct FirstLine
{
static size_t getReserveLengthForElement() { return 16; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
const Pos end = data + size;
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
res_size = pos - data;
}
};
struct NameFirstLine
{
static constexpr auto name = "firstLine";
};
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
REGISTER_FUNCTION(FirstLine)
{
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
.description = "Returns first line of a multi-line string.",
.syntax = "firstLine(string)",
.arguments = {{.name = "string", .description = "The string to process."}},
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
.examples = {
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
}});
}
}

View File

@ -0,0 +1,9 @@
foo
foo
foo
foobarbaz
== vector
1 foo
2 quux
3 single line
4 windows

View File

@ -0,0 +1,12 @@
select firstLine('foo\nbar\nbaz');
select firstLine('foo\rbar\rbaz');
select firstLine('foo\r\nbar\r\nbaz');
select firstLine('foobarbaz');
select '== vector';
drop table if exists 02815_first_line_vector;
create table 02815_first_line_vector (n Int32, text String) engine = MergeTree order by n;
insert into 02815_first_line_vector values (1, 'foo\nbar\nbaz'), (2, 'quux\n'), (3, 'single line'), (4, 'windows\r\nline breaks');
select n, firstLine(text) from 02815_first_line_vector order by n;

View File

@ -1428,6 +1428,7 @@ filesystemFree
filesystems
finalizeAggregation
fips
firstLine
firstSignificantSubdomain
firstSignificantSubdomainCustom
fixedstring