Merge pull request #39218 from evillique/file_default_value

Add default argument to the function `file`
This commit is contained in:
Kruglov Pavel 2022-08-01 13:04:19 +02:00 committed by GitHub
commit dfdfabec94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 123 additions and 10 deletions

View File

@ -12,12 +12,13 @@ Reads file as a String. The file content is not parsed, so any information is re
**Syntax**
``` sql
file(path)
file(path[, default])
```
**Arguments**
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `default` — The value that will be returned in the case when a file does not exist or cannot be accessed. Data types supported: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
**Example**

View File

@ -12,12 +12,13 @@ sidebar_label: "Функции для работы с файлами"
**Синтаксис**
``` sql
file(path)
file(path[, default])
```
**Аргументы**
- `path` — относительный путь до файла от [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Путь к файлу может включать следующие символы подстановки и шаблоны: `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
- `default` — Значение возвращаемое в случае, если указанный файл не существует. Поддерживаемые типы данных: [String](../../sql-reference/data-types/string.md) и [NULL](../../sql-reference/syntax.md#null-literal).
**Примеры**

View File

@ -1,7 +1,10 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
#include <Columns/IColumn.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/copyData.h>
@ -19,6 +22,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int DATABASE_ACCESS_DENIED;
}
@ -30,21 +34,41 @@ public:
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionFile>(context_); }
explicit FunctionFile(ContextPtr context_) : WithContext(context_) {}
bool isVariadic() const override { return true; }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
size_t getNumberOfArguments() const override { return 0; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.empty() || arguments.size() > 2)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2",
getName(), toString(arguments.size()));
if (!isString(arguments[0].type))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is only implemented for type String", getName());
if (arguments.size() == 2)
{
if (arguments[1].type->onlyNull())
return makeNullable(std::make_shared<DataTypeString>());
if (!isString(arguments[1].type))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} only accepts String or Null as second argument", getName());
}
return std::make_shared<DataTypeString>();
}
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
const ColumnString * column_src = checkAndGetColumn<ColumnString>(column.get());
@ -53,6 +77,31 @@ public:
fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()),
ErrorCodes::ILLEGAL_COLUMN);
String default_result;
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr;
if (arguments.size() == 2)
{
if (result_type->isNullable())
{
col_null_map_to = ColumnUInt8::create(input_rows_count, false);
vec_null_map_to = &col_null_map_to->getData();
}
else
{
const auto & default_column = arguments[1].column;
const ColumnConst * default_col = checkAndGetColumn<ColumnConst>(default_column.get());
if (!default_col)
throw Exception(
"Illegal column " + arguments[1].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
default_result = default_col->getValue<String>();
}
}
auto result = ColumnString::create();
auto & res_chars = result->getChars();
auto & res_offsets = result->getOffsets();
@ -77,18 +126,34 @@ public:
/// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
file_path = fs::absolute(file_path).lexically_normal();
if (need_check && file_path.string().find(user_files_absolute_path_string) != 0)
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_absolute_path.string());
try
{
if (need_check && file_path.string().find(user_files_absolute_path_string) != 0)
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_absolute_path.string());
ReadBufferFromFile in(file_path);
WriteBufferFromVector out(res_chars, AppendModeTag{});
copyData(in, out);
out.finalize();
ReadBufferFromFile in(file_path);
WriteBufferFromVector out(res_chars, AppendModeTag{});
copyData(in, out);
out.finalize();
}
catch (...)
{
if (arguments.size() == 1)
throw;
if (vec_null_map_to)
(*vec_null_map_to)[row] = true;
else
res_chars.insert(default_result.data(), default_result.data() + default_result.size());
}
res_chars.push_back(0);
res_offsets[row] = res_chars.size();
}
if (vec_null_map_to)
return ColumnNullable::create(std::move(result), std::move(col_null_map_to));
return result;
}
};

View File

@ -0,0 +1,2 @@
default
\N

View File

@ -0,0 +1,3 @@
SELECT file('nonexistent.txt'); -- { serverError 107 }
SELECT file('nonexistent.txt', 'default');
SELECT file('nonexistent.txt', NULL);

View File

@ -0,0 +1,20 @@
text_0\n text_0\n
default \N
text_2\n text_2\n
default \N
default \N
text_5\n text_5\n
text_6\n text_6\n
text_7\n text_7\n
default \N
text_9\n text_9\n
default \N
default \N
text_5\n text_5\n
text_6\n text_6\n
text_7\n text_7\n
default \N
text_9\n text_9\n
default \N
default \N
default \N

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
WORKING_FOLDER_02357="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}"
rm -rf "${WORKING_FOLDER_02357}"
mkdir "${WORKING_FOLDER_02357}"
for i in 0 2 5 6 7 9
do
echo "text_${i}" > "${WORKING_FOLDER_02357}/file_${i}"
done
${CLICKHOUSE_CLIENT} --query "WITH '${WORKING_FOLDER_02357}/file_' || toString(number) AS path SELECT file(path, 'default'), file(path, NULL) from numbers(10);"
${CLICKHOUSE_CLIENT} --query "WITH '${WORKING_FOLDER_02357}/file_' || toString(number) AS path SELECT file(path, 'default'), file(path, NULL) from numbers(3, 10);"
rm -rf "${WORKING_FOLDER_02357}"