mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
refactor function
This commit is contained in:
parent
812641f5a7
commit
7c5d845866
@ -11,93 +11,124 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int INCORRECT_FILE_NAME;
|
||||
extern const int DATABASE_ACCESS_DENIED;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
/// A function to read file as a string.
|
||||
class FunctionFile : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "file";
|
||||
static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
|
||||
explicit FunctionFile(const Context &context_) : context(context_) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int INCORRECT_FILE_NAME;
|
||||
extern const int DATABASE_ACCESS_DENIED;
|
||||
if (!isString(arguments[0].type))
|
||||
throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
/** A function to read file as a string.
|
||||
*/
|
||||
class FunctionFile : public IFunction
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "file";
|
||||
static FunctionPtr create(const Context &context) { return std::make_shared<FunctionFile>(context); }
|
||||
explicit FunctionFile(const Context &context_) : context(context_) {}
|
||||
const ColumnPtr column = arguments[0].column;
|
||||
const ColumnString * expected = checkAndGetColumn<ColumnString>(column.get());
|
||||
if (!expected)
|
||||
throw Exception(
|
||||
fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
String getName() const override { return name; }
|
||||
const ColumnString::Chars & chars = expected->getChars();
|
||||
const ColumnString::Offsets & offsets = expected->getOffsets();
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
|
||||
std::vector<String> checked_filenames(input_rows_count);
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
auto result = ColumnString::create();
|
||||
auto & res_chars = result->getChars();
|
||||
auto & res_offsets = result->getOffsets();
|
||||
|
||||
res_offsets.resize(input_rows_count);
|
||||
|
||||
size_t source_offset = 0;
|
||||
size_t result_offset = 0;
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
{
|
||||
if (!isString(arguments[0].type))
|
||||
throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
|
||||
return std::make_shared<DataTypeString>();
|
||||
const char * filename = reinterpret_cast<const char *>(&chars[source_offset]);
|
||||
|
||||
const String user_files_path = context.getUserFilesPath();
|
||||
String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
|
||||
Poco::Path poco_filepath = Poco::Path(filename);
|
||||
if (poco_filepath.isRelative())
|
||||
poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
|
||||
const String file_absolute_path = poco_filepath.absolute().toString();
|
||||
checkReadIsAllowedOrThrow(user_files_absolute_path, file_absolute_path);
|
||||
|
||||
checked_filenames[row] = file_absolute_path;
|
||||
auto file = Poco::File(file_absolute_path);
|
||||
|
||||
if (!file.exists())
|
||||
throw Exception(fmt::format("File {} doesn't exist.", file_absolute_path), ErrorCodes::FILE_DOESNT_EXIST);
|
||||
|
||||
const auto current_file_size = Poco::File(file_absolute_path).getSize();
|
||||
|
||||
result_offset += current_file_size + 1;
|
||||
res_offsets[row] = result_offset;
|
||||
source_offset = offsets[row];
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
res_chars.resize(result_offset);
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
size_t prev_offset = 0;
|
||||
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
{
|
||||
const auto & column = arguments[0].column;
|
||||
const char * filename = nullptr;
|
||||
if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
|
||||
{
|
||||
const auto & filename_chars = column_string->getChars();
|
||||
filename = reinterpret_cast<const char *>(&filename_chars[0]);
|
||||
auto res = ColumnString::create();
|
||||
auto & res_chars = res->getChars();
|
||||
auto & res_offsets = res->getOffsets();
|
||||
auto file_absolute_path = checked_filenames[row];
|
||||
ReadBufferFromFile in(file_absolute_path);
|
||||
char * res_buf = reinterpret_cast<char *>(&res_chars[prev_offset]);
|
||||
|
||||
const String user_files_path = context.getUserFilesPath();
|
||||
String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString();
|
||||
Poco::Path poco_filepath = Poco::Path(filename);
|
||||
if (poco_filepath.isRelative())
|
||||
poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath);
|
||||
const String file_absolute_path = poco_filepath.absolute().toString();
|
||||
checkReadIsAllowed(user_files_absolute_path, file_absolute_path);
|
||||
|
||||
ReadBufferFromFile in(file_absolute_path);
|
||||
ssize_t file_len = Poco::File(file_absolute_path).getSize();
|
||||
res_chars.resize_exact(file_len + 1);
|
||||
char *res_buf = reinterpret_cast<char *>(&res_chars[0]);
|
||||
in.readStrict(res_buf, file_len);
|
||||
res_offsets.push_back(file_len + 1);
|
||||
res_buf[file_len] = '\0';
|
||||
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
const size_t file_lenght = res_offsets[row] - prev_offset - 1;
|
||||
prev_offset = res_offsets[row];
|
||||
in.readStrict(res_buf, file_lenght);
|
||||
res_buf[file_lenght] = '\0';
|
||||
}
|
||||
|
||||
private:
|
||||
void checkReadIsAllowed(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
|
||||
{
|
||||
// If run in Local mode, no need for path checking.
|
||||
if (context.getApplicationType() != Context::ApplicationType::LOCAL)
|
||||
if (file_absolute_path.find(user_files_absolute_path) != 0)
|
||||
throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
|
||||
Poco::File path_poco_file = Poco::File(file_absolute_path);
|
||||
if (path_poco_file.exists() && path_poco_file.isDirectory())
|
||||
throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
}
|
||||
|
||||
const Context & context;
|
||||
};
|
||||
|
||||
|
||||
void registerFunctionFile(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionFile>();
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void checkReadIsAllowedOrThrow(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
|
||||
{
|
||||
// If run in Local mode, no need for path checking.
|
||||
if (context.getApplicationType() != Context::ApplicationType::LOCAL)
|
||||
if (file_absolute_path.find(user_files_absolute_path) != 0)
|
||||
throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
|
||||
Poco::File path_poco_file = Poco::File(file_absolute_path);
|
||||
if (path_poco_file.exists() && path_poco_file.isDirectory())
|
||||
throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
}
|
||||
|
||||
const Context & context;
|
||||
};
|
||||
|
||||
|
||||
void registerFunctionFile(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionFile>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,11 +25,11 @@ protected:
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
|
||||
|
||||
public:
|
||||
ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
char * existing_memory = nullptr, size_t alignment = 0);
|
||||
|
||||
/// Use pre-opened file descriptor.
|
||||
ReadBufferFromFile(
|
||||
explicit ReadBufferFromFile(
|
||||
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
|
||||
const std::string & original_file_name = {},
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
|
@ -5,6 +5,9 @@ aaaaaaaaa bbbbbbbbb
|
||||
ccccccccc aaaaaaaaa bbbbbbbbb
|
||||
ccccccccc aaaaaaaaa bbbbbbbbb
|
||||
:0
|
||||
aaaaaaaaa
|
||||
bbbbbbbbb
|
||||
ccccccccc
|
||||
:107
|
||||
:79
|
||||
:35
|
||||
|
@ -28,7 +28,11 @@ ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${u
|
||||
${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
|
||||
${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$?
|
||||
${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$?
|
||||
|
||||
${CLICKHOUSE_CLIENT} --multiquery --query "
|
||||
create table filenames(name String) engine=MergeTree() order by tuple();
|
||||
insert into filenames values ('a.txt'), ('b.txt'), ('c.txt');
|
||||
select file(name) from filenames format TSV;
|
||||
"
|
||||
|
||||
# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit)
|
||||
# Test non-exists file
|
||||
|
Loading…
Reference in New Issue
Block a user