added new DataType + fixes

This commit is contained in:
zvonand 2022-07-16 18:58:47 +02:00
parent f8e3bdecaf
commit 4ab52b6873
28 changed files with 192 additions and 359 deletions

3
.gitmodules vendored
View File

@ -271,6 +271,3 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git
[submodule "contrib/base-x"]
path = contrib/base-x
url = https://github.com/ClickHouse/base-x.git

View File

@ -1,9 +0,0 @@
#pragma once
#include <base/strong_typedef.h>
#include <base/extended_types.h>
namespace DB
{
using Base58 = StrongTypedef<DB::String, struct Base58Type>;
}

View File

@ -155,7 +155,6 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (base-x-cmake base-x)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear

1
contrib/base-x vendored

@ -1 +0,0 @@
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5

View File

@ -1,28 +0,0 @@
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASEX)
message(STATUS "Not using base-x")
return()
endif()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
set (SRCS
${LIBRARY_DIR}/base_x.hh
${LIBRARY_DIR}/uinteger_t.hh
)
add_library(_base-x INTERFACE)
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base-x ALIAS _base-x)

View File

@ -18,7 +18,6 @@ The list of third-party libraries:
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |

View File

@ -494,22 +494,21 @@ If the s string is non-empty and does not contain the c character at
Returns the string s that was converted from the encoding in from to the encoding in to.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(encoded_text[, alphabet_name])
## Base58Encode(plaintext), Base58Decode(encoded_text)
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet.
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
**Syntax**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
encodeBase58(decoded)
decodeBase58(encoded)
```
**Arguments**
- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant.
- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `bitcoin`.
**Returned value**
@ -522,16 +521,16 @@ Type: [String](../../sql-reference/data-types/string.md).
Query:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT encodeBase58('encode');
SELECT decodeBase58('izCFiDUY');
```
Result:
```text
┌─base58Encode('encode', 'flickr')─┐
┌─encodeBase58('encode', 'flickr')─┐
│ SvyTHb1D │
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
┌─decodeBase58('izCFiDUY', 'ripple')─┐
│ decode │
└────────────────────────────────────┘
```

View File

@ -18,7 +18,6 @@ sidebar_label: "Используемые сторонние библиотеки
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |

View File

@ -490,22 +490,21 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2);
Возвращает сконвертированную из кодировки from в кодировку to строку s.
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(plaintext[, alphabet_name]) {#base58}
## Base58Encode(plaintext), Base58Decode(encoded_text) {#base58}
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита.
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием стандартного алфавита Bitcoin.
**Синтаксис**
```sql
base58Encode(decoded[, alphabet_name])
base58Decode(encoded[, alphabet_name])
encodeBase58(decoded)
decodeBase58(encoded)
```
**Аргументы**
- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md).
- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`.
- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `bitcoin`.
**Возвращаемое значение**
@ -518,16 +517,16 @@ base58Decode(encoded[, alphabet_name])
Запрос:
``` sql
SELECT base58Encode('encode', 'flickr');
SELECT base58Decode('izCFiDUY', 'ripple');
SELECT encodeBase58('encode');
SELECT decodeBase58('izCFiDUY');
```
Результат:
```text
┌─base58Encode('encode', 'flickr')─┐
┌─encodeBase58('encode', 'flickr')─┐
│ SvyTHb1D │
└──────────────────────────────────┘
┌─base58Decode('izCFiDUY', 'ripple')─┐
┌─decodeBase58('izCFiDUY', 'ripple')─┐
│ decode │
└────────────────────────────────────┘
```

View File

@ -68,6 +68,7 @@ String FieldVisitorToString::operator() (const UUID & x) const { return formatQu
String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const { return formatQuoted(x.data); }
String FieldVisitorToString::operator() (const bool & x) const { return x ? "true" : "false"; }
String FieldVisitorToString::operator() (const Array & x) const
{
WriteBufferFromOwnString wb;

85
src/Common/base58.h Normal file
View File

@ -0,0 +1,85 @@
#pragma once
#include <climits>
#include <cstring>
namespace DB
{
inline bool encodeBase58(const char8_t * src, char8_t * dst)
{
const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
size_t idx = 0;
for (; *src; ++src)
{
unsigned int carry = static_cast<unsigned char>(*src);
for (size_t j = 0; j < idx; ++j)
{
carry += static_cast<unsigned int>(dst[j] << 8);
dst[j] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
while (carry > 0)
{
dst[idx++] = static_cast<unsigned char>(carry % 58);
carry /= 58;
}
}
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = base58_encoding_alphabet[static_cast<unsigned char>(dst[i])];
dst[i] = base58_encoding_alphabet[static_cast<unsigned char>(dst[idx - (i + 1)])];
dst[idx - (i + 1)] = s;
}
if ((idx & 1))
{
dst[c_idx] = base58_encoding_alphabet[static_cast<unsigned char>(dst[c_idx])];
}
dst[idx] = '\0';
return true;
}
inline bool decodeBase58(const char8_t * src, char8_t * dst)
{
const char map_digits[128]
= {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, -1, -1, -1,
-1, 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, 18, 19, 20, 21, -1, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, -1, -1, -1, -1, -1,
-1, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, -1, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1};
size_t idx = 0;
for (; *src; ++src)
{
unsigned int carry = static_cast<unsigned int>(map_digits[static_cast<unsigned char>(*src)]);
if (carry == UINT_MAX || *src < '1' || map_digits[static_cast<unsigned char>(*src)] == map_digits[0])
{
return false;
}
for (size_t j = 0; j < idx; j++)
{
carry += static_cast<unsigned char>(dst[j]) * 58;
dst[j] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
while (carry > 0)
{
dst[idx++] = static_cast<unsigned char>(carry & 0xff);
carry >>= 8;
}
}
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = dst[i];
dst[i] = dst[idx - (i + 1)];
dst[idx - (i + 1)] = s;
}
dst[idx] = '\0';
return true;
}
}

View File

@ -1,14 +0,0 @@
#pragma once
#include <Core/Types.h>
namespace DB
{
namespace Base58Helpers
{
const Base58 Nil{};
}
}

View File

@ -7,7 +7,6 @@
#include <base/Decimal.h>
#include <base/defines.h>
#include <base/UUID.h>
#include <base/Base58.h>
namespace DB
@ -89,7 +88,6 @@ enum class TypeIndex
LowCardinality,
Map,
Object,
Base58,
};
#if !defined(__clang__)
#pragma GCC diagnostic pop

View File

@ -1,34 +0,0 @@
#include <DataTypes/DataTypeBase58.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationBase58.h>
namespace DB
{
bool DataTypeBase58::equals(const IDataType & rhs) const
{
return typeid(rhs) == typeid(*this);
}
SerializationPtr DataTypeBase58::doGetDefaultSerialization() const
{
return std::make_shared<SerializationUUID>();
}
Field DataTypeUUID::getDefault() const
{
return UUID{};
}
MutableColumnPtr DataTypeUUID::createColumn() const
{
return ColumnVector<UUID>::create();
}
void registerDataTypeUUID(DataTypeFactory & factory)
{
factory.registerSimpleDataType("UUID", [] { return DataTypePtr(std::make_shared<DataTypeUUID>()); });
}
}

View File

@ -1,48 +0,0 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <Columns/ColumnVector.h>
#include <Core/Base58.h>
namespace DB
{
class DataTypeBase58 : public IDataType
{
public:
static constexpr bool is_parametric = false;
using FieldType = Base58;
using ColumnType = ColumnVector<Base58>;
static constexpr auto type_id = TypeIndex::Base58;
const char * getFamilyName() const override { return "Base58"; }
TypeIndex getTypeId() const override { return type_id; }
Field getDefault() const override;
MutableColumnPtr createColumn() const override;
bool isParametric() const override { return false; }
bool haveSubtypes() const override { return false; }
bool equals(const IDataType & rhs) const override;
bool canBeUsedInBitOperations() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool canBePromoted() const override { return false; }
bool shouldAlignRightInPrettyFormats() const override { return false; }
bool textCanContainOnlyValidUTF8() const override { return true; }
bool isComparable() const override { return true; }
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
bool haveMaximumSizeOfValue() const override { return true; }
size_t getSizeOfValueInMemory() const override { return sizeof(Base58); }
bool isCategorial() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override;
};
}

View File

@ -0,0 +1,19 @@
#include <DataTypes/Serializations/SerializationBase58.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeCustom.h>
namespace DB
{
void registerDataTypeBase58(DataTypeFactory & factory)
{
factory.registerSimpleDataTypeCustom("Base58", []
{
auto type = DataTypeFactory::instance().get("String");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("Base58"), std::make_unique<SerializationBase58>(type->getDefaultSerialization())));
});
}
}

View File

@ -219,6 +219,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeDomainGeo(*this);
registerDataTypeMap(*this);
registerDataTypeObject(*this);
registerDataTypeBase58(*this);
}
DataTypeFactory & DataTypeFactory::instance()

View File

@ -88,5 +88,6 @@ void registerDataTypeDomainBool(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
void registerDataTypeDomainGeo(DataTypeFactory & factory);
void registerDataTypeObject(DataTypeFactory & factory);
void registerDataTypeBase58(DataTypeFactory & factory);
}

View File

@ -1,11 +1,11 @@
#include <DataTypes/Serializations/SerializationBase58.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Common/Exception.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <Formats/FormatSettings.h>
#include <Common/base58.h>
namespace DB
@ -17,47 +17,56 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
SerializationBase58::SerializationBase58(const SerializationPtr & nested_)
: SerializationCustomSimpleText(nested_)
SerializationBase58::SerializationBase58(const SerializationPtr & nested_) : SerializationCustomSimpleText(nested_)
{
}
void SerializationBase58::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const auto * col = checkAndGetColumn<ColumnUInt32>(&column);
const ColumnString * col = checkAndGetColumn<ColumnString>(&column);
if (!col)
{
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("Base58 type can only serialize columns of type String." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
auto
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
auto value = col->getDataAtWithTerminatingZero(row_num);
char buffer[value.size * 2 + 1];
char * ptr = buffer;
formatIPv4(reinterpret_cast<const unsigned char *>(&col->getData()[row_num]), ptr);
encodeBase58(reinterpret_cast<const char8_t *>(value.data), reinterpret_cast<char8_t *>(ptr));
ostr.write(buffer, strlen(buffer));
}
void SerializationBase58::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
{
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
ColumnString * col = typeid_cast<ColumnString *>(&column);
if (!col)
{
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
throw Exception("Base58 type can only deserialize columns of type String." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
istr.read(buffer, sizeof(buffer) - 1);
UInt32 ipv4_value = 0;
size_t allocated = 32;
std::string encoded(allocated, '\0');
bool parse_result = parseIPv4(buffer, reinterpret_cast<unsigned char *>(&ipv4_value));
if (!parse_result && !settings.input_format_ipv4_default_on_conversion_error)
size_t read_position = 0;
while (istr.read(encoded[read_position]))
{
throw Exception("Invalid IPv4 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
++read_position;
if (read_position == allocated)
{
allocated *= 2;
encoded.resize(allocated, '\0');
}
}
col->insert(ipv4_value);
char buffer[read_position + 1];
if (!decodeBase58(reinterpret_cast<const char8_t *>(encoded.c_str()), reinterpret_cast<char8_t *>(buffer)))
{
throw Exception("Invalid Base58 encoded value, cannot parse." + column.getName(), ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insertDataWithTerminatingZero(buffer, read_position+1);
if (whole && !istr.eof())
throwUnexpectedDataAfterParsedValue(column, istr, settings, "IPv4");
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Base58");
}
}

View File

@ -72,10 +72,6 @@ if (TARGET ch_contrib::llvm)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::llvm)
endif ()
if (TARGET ch_contrib::base-x)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base-x)
endif()
if (TARGET ch_contrib::base64)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base64)
endif()

View File

@ -1,15 +1,13 @@
#pragma once
#include "config_functions.h"
#if USE_BASEX
# include <Columns/ColumnConst.h>
# include <Common/MemorySanitizer.h>
# include <Columns/ColumnString.h>
# include <DataTypes/DataTypeString.h>
# include <Functions/FunctionFactory.h>
# include <Functions/FunctionHelpers.h>
# include <IO/WriteHelpers.h>
# include <base_x.hh>
#include <Columns/ColumnConst.h>
#include <Common/MemorySanitizer.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/base58.h>
#include <cstring>
namespace DB
@ -26,72 +24,39 @@ struct Base58Encode
{
static constexpr auto name = "base58Encode";
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
static void process(const ColumnString & src_column, ColumnString::MutablePtr & dst_column, size_t input_rows_count)
{
auto & dst_data = dst_column->getChars();
auto & dst_offsets = dst_column->getOffsets();
/// Wikipedia states Base58 has efficiency of 73%, and we take 1.5 scale to avoid reallocation in most cases
size_t current_allocated_size = ceil(1.5 * input.getChars().size());
/// Base58 has efficiency of 73% (8/11) [https://monerodocs.org/cryptography/base58/],
/// and we take double scale to avoid any reallocation.
dst_data.resize(current_allocated_size);
size_t max_result_size = ceil(2 * src_column.getChars().size() + 1);
dst_data.resize(max_result_size);
dst_offsets.resize(input_rows_count);
const ColumnString::Offsets & src_offsets = input.getOffsets();
const ColumnString::Offsets & src_offsets = src_column.getOffsets();
const auto * source = input.getChars().raw_data();
const auto * source = src_column.getChars().data();
auto * dst = dst_data.data();
auto * dst_pos = dst;
size_t src_offset_prev = 0;
size_t processed_size = 0;
const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
((alphabet == "flickr") ? Base58::flickr() :
((alphabet == "ripple") ? Base58::ripple() :
Base58::base58())); //GMP
std::string encoded;
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev - 1;
/// Why we didn't use char* here?
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
/// This way we do exponential resizes and one final resize after whole operation is complete
encoded.clear();
if (srclen)
try
{
encoder.encode(encoded, source, srclen);
}
catch (const std::invalid_argument& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
catch (const std::domain_error& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
size_t outlen = encoded.size();
encodeBase58(source, dst_pos);
if (processed_size + outlen >= current_allocated_size)
{
current_allocated_size += current_allocated_size;
dst_data.resize(current_allocated_size);
auto processed_offset = dst_pos - dst;
dst = dst_data.data();
dst_pos = dst;
dst_pos += processed_offset;
}
std::memcpy(dst_pos, encoded.c_str(), ++outlen);
size_t encoded_length = strlen(reinterpret_cast<const char *>(dst_pos));
source += srclen + 1;
dst_pos += outlen;
dst_pos += encoded_length + 1;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];
processed_size += outlen;
}
dst_data.resize(dst_pos - dst);
@ -102,72 +67,40 @@ struct Base58Decode
{
static constexpr auto name = "base58Decode";
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
static void process(const ColumnString & src_column, ColumnString::MutablePtr & dst_column, size_t input_rows_count)
{
auto & dst_data = dst_column->getChars();
auto & dst_offsets = dst_column->getOffsets();
/// We allocate probably even more then needed to avoid many resizes
size_t current_allocated_size = input.getChars().size();
/// Base58 has efficiency of 73% (8/11) [https://monerodocs.org/cryptography/base58/],
/// and decoded value will be no longer than source.
dst_data.resize(current_allocated_size);
size_t max_result_size = src_column.getChars().size() + 1;
dst_data.resize(max_result_size);
dst_offsets.resize(input_rows_count);
const ColumnString::Offsets & src_offsets = input.getOffsets();
const ColumnString::Offsets & src_offsets = src_column.getOffsets();
const auto * source = input.getChars().raw_data();
const auto * source = src_column.getChars().data();
auto * dst = dst_data.data();
auto * dst_pos = dst;
size_t src_offset_prev = 0;
size_t processed_size = 0;
const auto& decoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
((alphabet == "flickr") ? Base58::flickr() :
((alphabet == "ripple") ? Base58::ripple() :
Base58::base58()));
std::string decoded;
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev - 1;
/// Why we didn't use char* here?
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
/// This way we do exponential resizes and one final resize after whole operation is complete
decoded.clear();
if (srclen)
try
{
decoder.decode(decoded, source, srclen);
}
catch (const std::invalid_argument& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
catch (const std::domain_error& e)
{
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
}
size_t outlen = decoded.size();
if (!decodeBase58(source, dst_pos))
throw Exception("Invalid Base58 value, cannot be decoded", ErrorCodes::BAD_ARGUMENTS);
if (processed_size + outlen >= current_allocated_size)
{
current_allocated_size += current_allocated_size;
dst_data.resize(current_allocated_size);
auto processed_offset = dst_pos - dst;
dst = dst_data.data();
dst_pos = dst;
dst_pos += processed_offset;
}
std::memcpy(dst_pos, decoded.c_str(), ++outlen);
size_t encoded_length = strlen(reinterpret_cast<const char *>(dst_pos));
source += srclen + 1;
dst_pos += outlen;
dst_pos += encoded_length + 1;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];
processed_size += outlen;
}
dst_data.resize(dst_pos - dst);
@ -190,9 +123,7 @@ public:
return Func::name;
}
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
@ -202,19 +133,12 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1 && arguments.size() != 2)
throw Exception(
"Wrong number of arguments for function " + getName() + ": 1 or 2 expected.",
ErrorCodes::BAD_ARGUMENTS);
if (arguments.size() != 1)
throw Exception("Wrong number of arguments for function " + getName() + ": 1 expected.", ErrorCodes::BAD_ARGUMENTS);
if (!isString(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 2 && !isString(arguments[1].type))
throw Exception(
"Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() + ". Must be String.",
"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
@ -229,28 +153,11 @@ public:
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName() + ", must be String",
ErrorCodes::ILLEGAL_COLUMN);
std::string alphabet = "bitcoin";
if (arguments.size() == 2)
{
const auto * alphabet_column = checkAndGetColumn<ColumnConst>(arguments[1].column.get());
if (!alphabet_column)
throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
alphabet = alphabet_column->getValue<DB::String>();
if (alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp")
throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'gmp' or 'flickr'", ErrorCodes::ILLEGAL_COLUMN);
}
auto dst_column = ColumnString::create();
Func::process(*input, dst_column, alphabet, input_rows_count);
Func::process(*input, dst_column, input_rows_count);
return dst_column;
}
};
}
#endif

View File

@ -1,5 +1,4 @@
#include <Functions/FunctionBase58Conversion.h>
#if USE_BASEX
#include <Functions/FunctionFactory.h>
namespace DB
@ -14,4 +13,3 @@ void registerFunctionBase58Decode(FunctionFactory & factory)
factory.registerFunction<FunctionBase58Conversion<Base58Decode>>();
}
}
#endif

View File

@ -2,7 +2,6 @@
// .h autogenerated by cmake!
#cmakedefine01 USE_BASEX
#cmakedefine01 USE_BASE64
#cmakedefine01 USE_SIMDJSON
#cmakedefine01 USE_RAPIDJSON

View File

@ -1,9 +1,6 @@
if (TARGET ch_contrib::fastops)
set(USE_FASTOPS 1)
endif()
if (TARGET ch_contrib::base-x)
set(USE_BASEX 1)
endif()
if (TARGET ch_contrib::base64)
set(USE_BASE64 1)
endif()

View File

@ -49,10 +49,8 @@ void registerFunctionBase64Decode(FunctionFactory &);
void registerFunctionTryBase64Decode(FunctionFactory &);
#endif
#if USE_BASEX
void registerFunctionBase58Encode(FunctionFactory &);
void registerFunctionBase58Decode(FunctionFactory &);
#endif
#if USE_NLP
void registerFunctionStem(FunctionFactory &);
@ -110,10 +108,8 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionTryBase64Decode(factory);
#endif
#if USE_BASEX
registerFunctionBase58Encode(factory);
registerFunctionBase58Decode(factory);
#endif
#if USE_NLP
registerFunctionStem(factory);

View File

@ -55,9 +55,6 @@ endif()
if (TARGET ch_contrib::base64)
set(USE_BASE64 1)
endif()
if (TARGET ch_contrib::base-x)
set(USE_BASEX 1)
endif()
if (TARGET ch_contrib::yaml_cpp)
set(USE_YAML_CPP 1)
endif()

View File

@ -8,30 +8,6 @@ fooba
foobar
Hello world!
f
fo
foo
foob
fooba
foobar
Hello world!
f
fo
foo
foob
fooba
foobar
Hello world!
f
fo
foo
foob
fooba
foobar
Hello world!
2m
8o8
bQbp

View File

@ -3,15 +3,10 @@
SET send_logs_level = 'fatal';
SELECT base58Encode('Hold my beer...');
SELECT base58Encode('Hold my beer...', ''); -- { serverError 44 }
SELECT base58Encode('Hold my beer...', 'gmp', 'third'); -- { serverError 36 }
SELECT base58Encode('Hold my beer...', 'Second arg'); -- { serverError 42 }
SELECT base58Decode('Hold my beer...'); -- { serverError 36 }
SELECT base58Decode(encoded, 'gmp') FROM (SELECT base58Encode(val, 'gmp') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
SELECT base58Decode(encoded, 'ripple') FROM (SELECT base58Encode(val, 'ripple') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
SELECT base58Decode(encoded, 'flickr') FROM (SELECT base58Encode(val, 'flickr') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
SELECT base58Decode(encoded, 'bitcoin') FROM (SELECT base58Encode(val, 'bitcoin') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
SELECT base58Decode(encoded) FROM (SELECT base58Encode(val) as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val);
SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val);
SELECT base58Decode('Why_not?'); -- { serverError 36 }