diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index e4327862982..f477f6123c3 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -11,93 +11,124 @@ namespace DB { - namespace ErrorCodes +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int NOT_IMPLEMENTED; + extern const int INCORRECT_FILE_NAME; + extern const int DATABASE_ACCESS_DENIED; + extern const int FILE_DOESNT_EXIST; +} + +/// A function to read file as a string. +class FunctionFile : public IFunction +{ +public: + static constexpr auto name = "file"; + static FunctionPtr create(const Context &context) { return std::make_shared(context); } + explicit FunctionFile(const Context &context_) : context(context_) {} + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - extern const int ILLEGAL_COLUMN; - extern const int NOT_IMPLEMENTED; - extern const int INCORRECT_FILE_NAME; - extern const int DATABASE_ACCESS_DENIED; + if (!isString(arguments[0].type)) + throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED); + return std::make_shared(); } - /** A function to read file as a string. - */ - class FunctionFile : public IFunction + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - public: - static constexpr auto name = "file"; - static FunctionPtr create(const Context &context) { return std::make_shared(context); } - explicit FunctionFile(const Context &context_) : context(context_) {} + const ColumnPtr column = arguments[0].column; + const ColumnString * expected = checkAndGetColumn(column.get()); + if (!expected) + throw Exception( + fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()), + ErrorCodes::ILLEGAL_COLUMN); - String getName() const override { return name; } + const ColumnString::Chars & chars = expected->getChars(); + const ColumnString::Offsets & offsets = expected->getOffsets(); - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + std::vector checked_filenames(input_rows_count); - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + auto result = ColumnString::create(); + auto & res_chars = result->getChars(); + auto & res_offsets = result->getOffsets(); + + res_offsets.resize(input_rows_count); + + size_t source_offset = 0; + size_t result_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) { - if (!isString(arguments[0].type)) - throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED); - return std::make_shared(); + const char * filename = reinterpret_cast(&chars[source_offset]); + + const String user_files_path = context.getUserFilesPath(); + String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); + Poco::Path poco_filepath = Poco::Path(filename); + if (poco_filepath.isRelative()) + poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath); + const String file_absolute_path = poco_filepath.absolute().toString(); + checkReadIsAllowedOrThrow(user_files_absolute_path, file_absolute_path); + + checked_filenames[row] = file_absolute_path; + auto file = Poco::File(file_absolute_path); + + if (!file.exists()) + throw Exception(fmt::format("File {} doesn't exist.", file_absolute_path), ErrorCodes::FILE_DOESNT_EXIST); + + const auto current_file_size = Poco::File(file_absolute_path).getSize(); + + result_offset += current_file_size + 1; + res_offsets[row] = result_offset; + source_offset = offsets[row]; } - bool useDefaultImplementationForConstants() const override { return true; } + res_chars.resize(result_offset); - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + size_t prev_offset = 0; + + for (size_t row = 0; row < input_rows_count; ++row) { - const auto & column = arguments[0].column; - const char * filename = nullptr; - if (const auto * column_string = checkAndGetColumn(column.get())) - { - const auto & filename_chars = column_string->getChars(); - filename = reinterpret_cast(&filename_chars[0]); - auto res = ColumnString::create(); - auto & res_chars = res->getChars(); - auto & res_offsets = res->getOffsets(); + auto file_absolute_path = checked_filenames[row]; + ReadBufferFromFile in(file_absolute_path); + char * res_buf = reinterpret_cast(&res_chars[prev_offset]); - const String user_files_path = context.getUserFilesPath(); - String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); - Poco::Path poco_filepath = Poco::Path(filename); - if (poco_filepath.isRelative()) - poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath); - const String file_absolute_path = poco_filepath.absolute().toString(); - checkReadIsAllowed(user_files_absolute_path, file_absolute_path); - - ReadBufferFromFile in(file_absolute_path); - ssize_t file_len = Poco::File(file_absolute_path).getSize(); - res_chars.resize_exact(file_len + 1); - char *res_buf = reinterpret_cast(&res_chars[0]); - in.readStrict(res_buf, file_len); - res_offsets.push_back(file_len + 1); - res_buf[file_len] = '\0'; - - return res; - } - else - { - throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - } + const size_t file_lenght = res_offsets[row] - prev_offset - 1; + prev_offset = res_offsets[row]; + in.readStrict(res_buf, file_lenght); + res_buf[file_lenght] = '\0'; } - private: - void checkReadIsAllowed(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const - { - // If run in Local mode, no need for path checking. - if (context.getApplicationType() != Context::ApplicationType::LOCAL) - if (file_absolute_path.find(user_files_absolute_path) != 0) - throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - Poco::File path_poco_file = Poco::File(file_absolute_path); - if (path_poco_file.exists() && path_poco_file.isDirectory()) - throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME); - } - - const Context & context; - }; - - - void registerFunctionFile(FunctionFactory & factory) - { - factory.registerFunction(); + return result; } +private: + + void checkReadIsAllowedOrThrow(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const + { + // If run in Local mode, no need for path checking. + if (context.getApplicationType() != Context::ApplicationType::LOCAL) + if (file_absolute_path.find(user_files_absolute_path) != 0) + throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED); + + Poco::File path_poco_file = Poco::File(file_absolute_path); + if (path_poco_file.exists() && path_poco_file.isDirectory()) + throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME); + } + + const Context & context; +}; + + +void registerFunctionFile(FunctionFactory & factory) +{ + factory.registerFunction(); +} + } diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index cebda605b21..33365bc7ceb 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -25,11 +25,11 @@ protected: CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; public: - ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, + explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, char * existing_memory = nullptr, size_t alignment = 0); /// Use pre-opened file descriptor. - ReadBufferFromFile( + explicit ReadBufferFromFile( int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference index a22076de920..87659c32e39 100644 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -5,6 +5,9 @@ aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb :0 +aaaaaaaaa +bbbbbbbbb +ccccccccc :107 :79 :35 diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 43e1e11a193..0359d803a23 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -28,7 +28,11 @@ ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${u ${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$? - +${CLICKHOUSE_CLIENT} --multiquery --query " + create table filenames(name String) engine=MergeTree() order by tuple(); + insert into filenames values ('a.txt'), ('b.txt'), ('c.txt'); + select file(name) from filenames format TSV; +" # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) # Test non-exists file