2021-01-13 16:46:55 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
#include <Functions/FunctionFactory.h>
|
2021-01-14 15:48:38 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <IO/ReadBufferFromFile.h>
|
2021-01-16 03:27:31 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2021-01-16 10:43:56 +00:00
|
|
|
#include <unistd.h>
|
2021-04-28 20:48:34 +00:00
|
|
|
#include <filesystem>
|
|
|
|
|
|
|
|
namespace fs = std::filesystem;
|
2021-01-13 16:46:55 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
extern const int INCORRECT_FILE_NAME;
|
|
|
|
extern const int DATABASE_ACCESS_DENIED;
|
|
|
|
extern const int FILE_DOESNT_EXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A function to read file as a string.
|
2021-06-01 12:20:52 +00:00
|
|
|
class FunctionFile : public IFunction, WithContext
|
2021-02-16 12:37:49 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = "file";
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionFile>(context_); }
|
|
|
|
explicit FunctionFile(ContextPtr context_) : WithContext(context_) {}
|
2021-02-16 12:37:49 +00:00
|
|
|
|
|
|
|
String getName() const override { return name; }
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override { return 1; }
|
|
|
|
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
|
2021-05-17 13:06:11 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(ColumnsWithTypeAndName & /*arguments*/) const override { return true; }
|
2021-02-16 12:37:49 +00:00
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
2021-01-14 15:48:38 +00:00
|
|
|
{
|
2021-02-16 12:37:49 +00:00
|
|
|
if (!isString(arguments[0].type))
|
|
|
|
throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
return std::make_shared<DataTypeString>();
|
2021-01-16 06:55:59 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
|
|
|
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
2021-01-13 16:46:55 +00:00
|
|
|
{
|
2021-02-16 12:37:49 +00:00
|
|
|
const ColumnPtr column = arguments[0].column;
|
|
|
|
const ColumnString * expected = checkAndGetColumn<ColumnString>(column.get());
|
|
|
|
if (!expected)
|
|
|
|
throw Exception(
|
|
|
|
fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()),
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
2021-01-13 16:46:55 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
const ColumnString::Chars & chars = expected->getChars();
|
|
|
|
const ColumnString::Offsets & offsets = expected->getOffsets();
|
2021-01-14 10:44:16 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
std::vector<String> checked_filenames(input_rows_count);
|
2021-01-14 10:44:16 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
auto result = ColumnString::create();
|
|
|
|
auto & res_chars = result->getChars();
|
|
|
|
auto & res_offsets = result->getOffsets();
|
2021-01-14 10:44:16 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
res_offsets.resize(input_rows_count);
|
2021-01-14 12:09:13 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
size_t source_offset = 0;
|
|
|
|
size_t result_offset = 0;
|
|
|
|
for (size_t row = 0; row < input_rows_count; ++row)
|
2021-01-13 16:46:55 +00:00
|
|
|
{
|
2021-02-16 12:37:49 +00:00
|
|
|
const char * filename = reinterpret_cast<const char *>(&chars[source_offset]);
|
|
|
|
|
2021-04-28 20:48:34 +00:00
|
|
|
fs::path user_files_absolute_path = fs::canonical(fs::path(getContext()->getUserFilesPath()));
|
|
|
|
fs::path file_path(filename);
|
|
|
|
if (file_path.is_relative())
|
|
|
|
file_path = user_files_absolute_path / file_path;
|
2021-04-28 23:00:04 +00:00
|
|
|
fs::path file_absolute_path = fs::canonical(file_path);
|
2021-04-28 20:48:34 +00:00
|
|
|
checkReadIsAllowedOrThrow(user_files_absolute_path.string(), file_absolute_path);
|
2021-02-16 12:37:49 +00:00
|
|
|
|
2021-04-28 20:48:34 +00:00
|
|
|
checked_filenames[row] = file_absolute_path.string();
|
2021-02-16 12:37:49 +00:00
|
|
|
|
2021-04-28 20:48:34 +00:00
|
|
|
if (!fs::exists(file_absolute_path))
|
|
|
|
throw Exception(fmt::format("File {} doesn't exist.", file_absolute_path.string()), ErrorCodes::FILE_DOESNT_EXIST);
|
2021-02-16 12:37:49 +00:00
|
|
|
|
2021-04-28 20:48:34 +00:00
|
|
|
const auto current_file_size = fs::file_size(file_absolute_path);
|
2021-02-16 12:37:49 +00:00
|
|
|
|
|
|
|
result_offset += current_file_size + 1;
|
|
|
|
res_offsets[row] = result_offset;
|
|
|
|
source_offset = offsets[row];
|
2021-01-13 16:46:55 +00:00
|
|
|
}
|
2021-01-16 03:27:31 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
res_chars.resize(result_offset);
|
|
|
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
|
|
|
|
for (size_t row = 0; row < input_rows_count; ++row)
|
2021-01-16 10:43:56 +00:00
|
|
|
{
|
2021-02-16 12:37:49 +00:00
|
|
|
auto file_absolute_path = checked_filenames[row];
|
|
|
|
ReadBufferFromFile in(file_absolute_path);
|
|
|
|
char * res_buf = reinterpret_cast<char *>(&res_chars[prev_offset]);
|
|
|
|
|
|
|
|
const size_t file_lenght = res_offsets[row] - prev_offset - 1;
|
|
|
|
prev_offset = res_offsets[row];
|
|
|
|
in.readStrict(res_buf, file_lenght);
|
|
|
|
res_buf[file_lenght] = '\0';
|
2021-01-16 10:43:56 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
return result;
|
|
|
|
}
|
2021-01-13 16:46:55 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
private:
|
2021-01-16 10:43:56 +00:00
|
|
|
|
2021-02-16 12:37:49 +00:00
|
|
|
void checkReadIsAllowedOrThrow(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const
|
2021-01-14 15:48:38 +00:00
|
|
|
{
|
2021-02-16 12:37:49 +00:00
|
|
|
// If run in Local mode, no need for path checking.
|
2021-04-10 23:33:54 +00:00
|
|
|
if (getContext()->getApplicationType() != Context::ApplicationType::LOCAL)
|
2021-02-16 12:37:49 +00:00
|
|
|
if (file_absolute_path.find(user_files_absolute_path) != 0)
|
|
|
|
throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED);
|
|
|
|
|
2021-04-28 20:48:34 +00:00
|
|
|
fs::path fs_path(file_absolute_path);
|
|
|
|
if (fs::exists(fs_path) && fs::is_directory(fs_path))
|
2021-02-16 12:37:49 +00:00
|
|
|
throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME);
|
2021-01-14 15:48:38 +00:00
|
|
|
}
|
2021-02-16 12:37:49 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
void registerFunctionFile(FunctionFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerFunction<FunctionFile>();
|
|
|
|
}
|
|
|
|
|
2021-01-13 16:46:55 +00:00
|
|
|
}
|