2020-10-10 18:37:02 +00:00
|
|
|
#pragma once
|
2022-09-28 12:35:02 +00:00
|
|
|
#include "config.h"
|
2020-04-16 12:31:57 +00:00
|
|
|
|
2018-10-12 08:46:53 +00:00
|
|
|
#if USE_BASE64
|
2022-10-13 22:28:25 +00:00
|
|
|
# include <Columns/ColumnFixedString.h>
|
2020-04-16 12:31:57 +00:00
|
|
|
# include <Columns/ColumnString.h>
|
|
|
|
# include <DataTypes/DataTypeString.h>
|
|
|
|
# include <Functions/FunctionHelpers.h>
|
2022-10-13 22:28:25 +00:00
|
|
|
# include <Functions/IFunction.h>
|
|
|
|
# include <Interpreters/Context_fwd.h>
|
2020-04-16 12:31:57 +00:00
|
|
|
# include <turbob64.h>
|
2022-10-13 22:28:25 +00:00
|
|
|
# include <Common/MemorySanitizer.h>
|
2018-10-10 01:04:07 +00:00
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
# include <span>
|
2018-10-10 01:04:07 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2022-09-27 11:54:55 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2018-10-10 01:04:07 +00:00
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
extern const int INCORRECT_DATA;
|
|
|
|
}
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
namespace Detail
|
|
|
|
{
|
|
|
|
inline size_t base64Decode(const std::span<const UInt8> src, UInt8 * dst)
|
|
|
|
{
|
|
|
|
# if defined(__aarch64__)
|
|
|
|
return tb64sdec(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
|
|
|
# else
|
|
|
|
return _tb64d(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
|
|
|
# endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-10 01:04:07 +00:00
|
|
|
struct Base64Encode
|
|
|
|
{
|
|
|
|
static constexpr auto name = "base64Encode";
|
2022-10-13 22:28:25 +00:00
|
|
|
|
|
|
|
static size_t getBufferSize(const size_t string_length, const size_t string_count)
|
2018-11-02 19:06:05 +00:00
|
|
|
{
|
2018-11-24 01:48:06 +00:00
|
|
|
return ((string_length - string_count) / 3 + string_count) * 4 + string_count;
|
2018-11-02 19:06:05 +00:00
|
|
|
}
|
2022-10-13 22:28:25 +00:00
|
|
|
|
|
|
|
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Some bug in sse arm64 implementation?
|
|
|
|
* `base64Encode(repeat('a', 46))` returns wrong padding character
|
|
|
|
*/
|
|
|
|
# if defined(__aarch64__)
|
|
|
|
return tb64senc(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
|
|
|
# else
|
|
|
|
return _tb64e(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
|
|
|
# endif
|
|
|
|
}
|
2018-10-10 01:04:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Base64Decode
|
|
|
|
{
|
|
|
|
static constexpr auto name = "base64Decode";
|
2018-11-02 19:06:05 +00:00
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
static size_t getBufferSize(const size_t string_length, const size_t string_count)
|
2018-11-02 19:06:05 +00:00
|
|
|
{
|
2018-11-24 01:48:06 +00:00
|
|
|
return ((string_length - string_count) / 4 + string_count) * 3 + string_count;
|
2018-11-02 19:06:05 +00:00
|
|
|
}
|
2022-10-13 22:28:25 +00:00
|
|
|
|
|
|
|
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
|
|
|
|
{
|
|
|
|
const auto outlen = Detail::base64Decode(src, dst);
|
|
|
|
if (src.size() > 0 && !outlen)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INCORRECT_DATA,
|
|
|
|
"Failed to {} input '{}'",
|
|
|
|
name,
|
|
|
|
String(reinterpret_cast<const char *>(src.data()), src.size()));
|
|
|
|
|
|
|
|
return outlen;
|
|
|
|
}
|
2018-10-10 01:04:07 +00:00
|
|
|
};
|
|
|
|
|
2018-11-02 19:06:05 +00:00
|
|
|
struct TryBase64Decode
|
|
|
|
{
|
|
|
|
static constexpr auto name = "tryBase64Decode";
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
static size_t getBufferSize(const size_t string_length, const size_t string_count)
|
2018-11-02 19:06:05 +00:00
|
|
|
{
|
|
|
|
return Base64Decode::getBufferSize(string_length, string_count);
|
|
|
|
}
|
2022-10-13 22:28:25 +00:00
|
|
|
|
|
|
|
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
|
|
|
|
{
|
|
|
|
if (src.empty())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
const auto outlen = Detail::base64Decode(src, dst);
|
|
|
|
// during decoding character array can be partially polluted
|
|
|
|
// if fail, revert back and clean
|
|
|
|
if (!outlen)
|
|
|
|
*dst = 0;
|
|
|
|
|
|
|
|
return outlen;
|
|
|
|
}
|
2018-11-02 19:06:05 +00:00
|
|
|
};
|
2018-10-10 01:04:07 +00:00
|
|
|
|
|
|
|
template <typename Func>
|
|
|
|
class FunctionBase64Conversion : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = Func::name;
|
|
|
|
|
2022-09-27 08:52:08 +00:00
|
|
|
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionBase64Conversion>(); }
|
|
|
|
String getName() const override { return Func::name; }
|
|
|
|
size_t getNumberOfArguments() const override { return 1; }
|
2021-06-22 16:21:23 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
2022-09-27 08:52:08 +00:00
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
2018-10-10 01:04:07 +00:00
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
|
|
{
|
2022-09-27 08:52:08 +00:00
|
|
|
if (arguments.size() != 1)
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}: 1 expected.", getName());
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
if (!WhichDataType(arguments[0].type).isStringOrFixedString())
|
2018-10-10 01:04:07 +00:00
|
|
|
throw Exception(
|
2022-09-27 08:52:08 +00:00
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
2022-10-13 22:28:25 +00:00
|
|
|
"Illegal type {} of 1st argument of function {}. Must be FixedString or String.",
|
|
|
|
arguments[0].type->getName(),
|
|
|
|
getName());
|
2018-10-10 01:04:07 +00:00
|
|
|
|
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const override
|
2018-10-10 01:04:07 +00:00
|
|
|
{
|
2022-10-13 22:28:25 +00:00
|
|
|
const auto & input_column = arguments[0].column;
|
|
|
|
if (const auto * src_column_as_fixed_string = checkAndGetColumn<ColumnFixedString>(*input_column))
|
|
|
|
return execute(*src_column_as_fixed_string, input_rows_count);
|
|
|
|
else if (const auto * src_column_as_string = checkAndGetColumn<ColumnString>(*input_column))
|
|
|
|
return execute(*src_column_as_string, input_rows_count);
|
|
|
|
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN,
|
|
|
|
"Illegal column {} of first argument of function {}, must be of type FixedString or String.",
|
|
|
|
input_column->getName(),
|
|
|
|
getName());
|
|
|
|
}
|
2018-10-10 01:04:07 +00:00
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
private:
|
|
|
|
static ColumnPtr execute(const ColumnString & src_column, const size_t src_row_count)
|
|
|
|
{
|
2018-10-10 01:04:07 +00:00
|
|
|
auto dst_column = ColumnString::create();
|
2022-10-13 22:28:25 +00:00
|
|
|
auto & dst_chars = dst_column->getChars();
|
2018-10-10 01:04:07 +00:00
|
|
|
auto & dst_offsets = dst_column->getOffsets();
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
const auto reserve = Func::getBufferSize(src_column.byteSize(), src_column.size());
|
|
|
|
dst_chars.resize(reserve);
|
|
|
|
dst_offsets.resize(src_row_count);
|
2018-10-10 01:04:07 +00:00
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
const auto & src_chars = src_column.getChars();
|
|
|
|
const auto & src_offsets = src_column.getOffsets();
|
2018-10-10 01:04:07 +00:00
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
auto * dst = dst_chars.data();
|
2020-10-17 15:22:42 +00:00
|
|
|
auto * dst_pos = dst;
|
2022-10-13 22:28:25 +00:00
|
|
|
const auto * src = src_chars.data();
|
2018-10-10 01:04:07 +00:00
|
|
|
|
|
|
|
size_t src_offset_prev = 0;
|
2022-10-13 22:28:25 +00:00
|
|
|
for (size_t row = 0; row < src_row_count; ++row)
|
2018-10-10 01:04:07 +00:00
|
|
|
{
|
2022-10-13 22:28:25 +00:00
|
|
|
const size_t src_length = src_offsets[row] - src_offset_prev - 1;
|
|
|
|
const auto outlen = Func::performCoding({src, src_length}, dst_pos);
|
2018-10-10 01:04:07 +00:00
|
|
|
|
2020-09-19 19:43:26 +00:00
|
|
|
/// Base64 library is using AVX-512 with some shuffle operations.
|
|
|
|
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
|
|
|
|
__msan_unpoison(dst_pos, outlen);
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
src += src_length + 1;
|
2020-12-17 02:20:56 +00:00
|
|
|
dst_pos += outlen;
|
|
|
|
*dst_pos = '\0';
|
|
|
|
dst_pos += 1;
|
2018-10-10 01:04:07 +00:00
|
|
|
|
|
|
|
dst_offsets[row] = dst_pos - dst;
|
|
|
|
src_offset_prev = src_offsets[row];
|
|
|
|
}
|
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
dst_chars.resize(dst_pos - dst);
|
|
|
|
return dst_column;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ColumnPtr execute(const ColumnFixedString & src_column, const size_t src_row_count)
|
|
|
|
{
|
|
|
|
auto dst_column = ColumnString::create();
|
|
|
|
auto & dst_chars = dst_column->getChars();
|
|
|
|
auto & dst_offsets = dst_column->getOffsets();
|
|
|
|
|
|
|
|
const auto reserve = Func::getBufferSize(src_column.byteSize(), src_column.size());
|
|
|
|
dst_chars.resize(reserve);
|
|
|
|
dst_offsets.resize(src_row_count);
|
|
|
|
|
|
|
|
const auto & src_chars = src_column.getChars();
|
|
|
|
const auto & src_n = src_column.getN();
|
|
|
|
|
|
|
|
auto * dst = dst_chars.data();
|
|
|
|
auto * dst_pos = dst;
|
|
|
|
const auto * src = src_chars.data();
|
|
|
|
|
|
|
|
for (size_t row = 0; row < src_row_count; ++row)
|
|
|
|
{
|
|
|
|
const auto outlen = Func::performCoding({src, src_n}, dst_pos);
|
|
|
|
|
|
|
|
/// Base64 library is using AVX-512 with some shuffle operations.
|
|
|
|
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
|
|
|
|
__msan_unpoison(dst_pos, outlen);
|
|
|
|
|
|
|
|
src += src_n;
|
|
|
|
dst_pos += outlen;
|
|
|
|
*dst_pos = '\0';
|
|
|
|
dst_pos += 1;
|
|
|
|
|
|
|
|
dst_offsets[row] = dst_pos - dst;
|
|
|
|
}
|
2018-10-10 01:04:07 +00:00
|
|
|
|
2022-10-13 22:28:25 +00:00
|
|
|
dst_chars.resize(dst_pos - dst);
|
2020-10-17 15:22:42 +00:00
|
|
|
return dst_column;
|
2018-10-10 01:04:07 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
2019-12-26 01:42:51 +00:00
|
|
|
|
2018-11-23 18:54:23 +00:00
|
|
|
#endif
|