2022-06-15 06:49:55 +00:00
|
|
|
#pragma once
|
|
|
|
#include "config_functions.h"
|
|
|
|
|
2022-06-20 18:53:47 +00:00
|
|
|
#if USE_BASEX
|
2022-06-15 06:49:55 +00:00
|
|
|
# include <Columns/ColumnConst.h>
|
|
|
|
# include <Common/MemorySanitizer.h>
|
|
|
|
# include <Columns/ColumnString.h>
|
|
|
|
# include <DataTypes/DataTypeString.h>
|
|
|
|
# include <Functions/FunctionFactory.h>
|
|
|
|
# include <Functions/FunctionHelpers.h>
|
|
|
|
# include <IO/WriteHelpers.h>
|
|
|
|
# include <base_x.hh>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
extern const int BAD_ARGUMENTS;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Base58Encode
|
|
|
|
{
|
|
|
|
static constexpr auto name = "base58Encode";
|
|
|
|
|
2022-06-20 14:30:54 +00:00
|
|
|
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
|
2022-06-15 06:49:55 +00:00
|
|
|
{
|
2022-06-16 10:11:41 +00:00
|
|
|
auto & dst_data = dst_column->getChars();
|
|
|
|
auto & dst_offsets = dst_column->getOffsets();
|
|
|
|
|
2022-06-20 14:30:54 +00:00
|
|
|
/// Wikipedia states Base58 has efficiency of 73%, and we take 1.5 scale to avoid reallocation in most cases
|
|
|
|
size_t current_allocated_size = ceil(1.5 * input.getChars().size());
|
2022-06-16 10:11:41 +00:00
|
|
|
|
|
|
|
dst_data.resize(current_allocated_size);
|
|
|
|
dst_offsets.resize(input_rows_count);
|
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
const ColumnString::Offsets & src_offsets = input.getOffsets();
|
2022-06-16 10:11:41 +00:00
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
const auto * source = input.getChars().raw_data();
|
2022-06-16 10:11:41 +00:00
|
|
|
auto * dst = dst_data.data();
|
|
|
|
auto * dst_pos = dst;
|
|
|
|
|
|
|
|
size_t src_offset_prev = 0;
|
|
|
|
size_t processed_size = 0;
|
|
|
|
|
|
|
|
const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
|
|
|
|
((alphabet == "flickr") ? Base58::flickr() :
|
2022-06-16 20:52:45 +00:00
|
|
|
((alphabet == "ripple") ? Base58::ripple() :
|
2022-06-19 18:10:28 +00:00
|
|
|
Base58::base58())); //GMP
|
2022-06-15 06:49:55 +00:00
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
std::string encoded;
|
2022-06-16 10:11:41 +00:00
|
|
|
for (size_t row = 0; row < input_rows_count; ++row)
|
|
|
|
{
|
|
|
|
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
2022-06-16 20:52:45 +00:00
|
|
|
/// Why we didn't use char* here?
|
2022-06-16 10:11:41 +00:00
|
|
|
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
|
|
|
|
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
|
2022-06-16 20:52:45 +00:00
|
|
|
/// This way we do exponential resizes and one final resize after whole operation is complete
|
2022-06-19 18:10:28 +00:00
|
|
|
encoded.clear();
|
2022-06-17 18:48:14 +00:00
|
|
|
if (srclen)
|
2022-06-23 19:36:57 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
encoder.encode(encoded, source, srclen);
|
|
|
|
}
|
|
|
|
catch (const std::invalid_argument& e)
|
|
|
|
{
|
|
|
|
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
}
|
|
|
|
catch (const std::domain_error& e)
|
|
|
|
{
|
|
|
|
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
}
|
2022-06-16 10:11:41 +00:00
|
|
|
size_t outlen = encoded.size();
|
|
|
|
|
|
|
|
if (processed_size + outlen >= current_allocated_size)
|
|
|
|
{
|
|
|
|
current_allocated_size += current_allocated_size;
|
|
|
|
dst_data.resize(current_allocated_size);
|
2022-06-19 18:10:28 +00:00
|
|
|
auto processed_offset = dst_pos - dst;
|
|
|
|
dst = dst_data.data();
|
|
|
|
dst_pos = dst;
|
|
|
|
dst_pos += processed_offset;
|
2022-06-16 10:11:41 +00:00
|
|
|
}
|
2022-06-19 18:10:28 +00:00
|
|
|
std::memcpy(dst_pos, encoded.c_str(), ++outlen);
|
2022-06-16 10:11:41 +00:00
|
|
|
|
|
|
|
source += srclen + 1;
|
2022-06-17 18:48:14 +00:00
|
|
|
dst_pos += outlen;
|
2022-06-16 10:11:41 +00:00
|
|
|
|
|
|
|
dst_offsets[row] = dst_pos - dst;
|
|
|
|
src_offset_prev = src_offsets[row];
|
|
|
|
processed_size += outlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_data.resize(dst_pos - dst);
|
2022-06-15 06:49:55 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Base58Decode
|
|
|
|
{
|
|
|
|
static constexpr auto name = "base58Decode";
|
|
|
|
|
2022-06-20 14:30:54 +00:00
|
|
|
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
|
2022-06-15 06:49:55 +00:00
|
|
|
{
|
2022-06-16 20:52:45 +00:00
|
|
|
auto & dst_data = dst_column->getChars();
|
|
|
|
auto & dst_offsets = dst_column->getOffsets();
|
2022-06-15 06:49:55 +00:00
|
|
|
|
2022-06-20 14:30:54 +00:00
|
|
|
/// We allocate probably even more then needed to avoid many resizes
|
2022-06-19 18:10:28 +00:00
|
|
|
size_t current_allocated_size = input.getChars().size();
|
2022-06-15 06:49:55 +00:00
|
|
|
|
2022-06-16 20:52:45 +00:00
|
|
|
dst_data.resize(current_allocated_size);
|
|
|
|
dst_offsets.resize(input_rows_count);
|
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
const ColumnString::Offsets & src_offsets = input.getOffsets();
|
2022-06-16 20:52:45 +00:00
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
const auto * source = input.getChars().raw_data();
|
2022-06-16 20:52:45 +00:00
|
|
|
auto * dst = dst_data.data();
|
|
|
|
auto * dst_pos = dst;
|
|
|
|
|
|
|
|
size_t src_offset_prev = 0;
|
|
|
|
size_t processed_size = 0;
|
|
|
|
|
|
|
|
const auto& decoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
|
|
|
|
((alphabet == "flickr") ? Base58::flickr() :
|
|
|
|
((alphabet == "ripple") ? Base58::ripple() :
|
|
|
|
Base58::base58()));
|
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
std::string decoded;
|
2022-06-16 20:52:45 +00:00
|
|
|
for (size_t row = 0; row < input_rows_count; ++row)
|
|
|
|
{
|
|
|
|
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
|
|
|
/// Why we didn't use char* here?
|
|
|
|
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
|
|
|
|
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
|
|
|
|
/// This way we do exponential resizes and one final resize after whole operation is complete
|
2022-06-19 18:10:28 +00:00
|
|
|
decoded.clear();
|
|
|
|
if (srclen)
|
2022-06-23 19:36:57 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
decoder.decode(decoded, source, srclen);
|
|
|
|
}
|
|
|
|
catch (const std::invalid_argument& e)
|
|
|
|
{
|
|
|
|
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
}
|
|
|
|
catch (const std::domain_error& e)
|
|
|
|
{
|
|
|
|
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
}
|
2022-06-16 20:52:45 +00:00
|
|
|
size_t outlen = decoded.size();
|
|
|
|
|
|
|
|
if (processed_size + outlen >= current_allocated_size)
|
|
|
|
{
|
|
|
|
current_allocated_size += current_allocated_size;
|
|
|
|
dst_data.resize(current_allocated_size);
|
2022-06-19 18:10:28 +00:00
|
|
|
auto processed_offset = dst_pos - dst;
|
|
|
|
dst = dst_data.data();
|
|
|
|
dst_pos = dst;
|
|
|
|
dst_pos += processed_offset;
|
2022-06-16 20:52:45 +00:00
|
|
|
}
|
2022-06-19 18:10:28 +00:00
|
|
|
std::memcpy(dst_pos, decoded.c_str(), ++outlen);
|
2022-06-16 20:52:45 +00:00
|
|
|
|
|
|
|
source += srclen + 1;
|
2022-06-17 18:48:14 +00:00
|
|
|
dst_pos += outlen;
|
2022-06-16 20:52:45 +00:00
|
|
|
|
|
|
|
dst_offsets[row] = dst_pos - dst;
|
|
|
|
src_offset_prev = src_offsets[row];
|
|
|
|
processed_size += outlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_data.resize(dst_pos - dst);
|
2022-06-15 06:49:55 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename Func>
|
|
|
|
class FunctionBase58Conversion : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = Func::name;
|
|
|
|
|
|
|
|
static FunctionPtr create(ContextPtr)
|
|
|
|
{
|
|
|
|
return std::make_shared<FunctionBase58Conversion>();
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
|
|
|
return Func::name;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isVariadic() const override { return true; }
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override { return 0; }
|
|
|
|
|
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
|
|
|
|
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
|
|
|
|
|
|
|
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
|
|
{
|
2022-06-16 20:52:45 +00:00
|
|
|
if (arguments.size() != 1 && arguments.size() != 2)
|
2022-06-15 06:49:55 +00:00
|
|
|
throw Exception(
|
2022-06-16 20:52:45 +00:00
|
|
|
"Wrong number of arguments for function " + getName() + ": 1 or 2 expected.",
|
2022-06-15 06:49:55 +00:00
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
|
|
|
if (!isString(arguments[0].type))
|
|
|
|
throw Exception(
|
|
|
|
"Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
2022-06-17 18:48:14 +00:00
|
|
|
if (arguments.size() == 2 && !isString(arguments[1].type))
|
2022-06-15 06:49:55 +00:00
|
|
|
throw Exception(
|
|
|
|
"Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() + ". Must be String.",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
|
|
|
{
|
|
|
|
const ColumnPtr column_string = arguments[0].column;
|
|
|
|
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
|
|
|
|
if (!input)
|
|
|
|
throw Exception(
|
2022-06-20 14:30:54 +00:00
|
|
|
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName() + ", must be String",
|
2022-06-15 06:49:55 +00:00
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
|
|
std::string alphabet = "bitcoin";
|
|
|
|
|
|
|
|
if (arguments.size() == 2)
|
|
|
|
{
|
|
|
|
const auto * alphabet_column = checkAndGetColumn<ColumnConst>(arguments[1].column.get());
|
|
|
|
|
|
|
|
if (!alphabet_column)
|
|
|
|
throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
2022-06-20 14:30:54 +00:00
|
|
|
alphabet = alphabet_column->getValue<DB::String>();
|
|
|
|
if (alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp")
|
2022-06-16 10:11:41 +00:00
|
|
|
throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'gmp' or 'flickr'", ErrorCodes::ILLEGAL_COLUMN);
|
2022-06-15 06:49:55 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
auto dst_column = ColumnString::create();
|
|
|
|
|
2022-06-19 18:10:28 +00:00
|
|
|
Func::process(*input, dst_column, alphabet, input_rows_count);
|
2022-06-15 06:49:55 +00:00
|
|
|
|
|
|
|
return dst_column;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|