mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
added new DataType + fixes
This commit is contained in:
parent
f8e3bdecaf
commit
4ab52b6873
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -271,6 +271,3 @@
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing.git
|
||||
[submodule "contrib/base-x"]
|
||||
path = contrib/base-x
|
||||
url = https://github.com/ClickHouse/base-x.git
|
||||
|
@ -1,9 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/strong_typedef.h>
|
||||
#include <base/extended_types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using Base58 = StrongTypedef<DB::String, struct Base58Type>;
|
||||
}
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -155,7 +155,6 @@ endif()
|
||||
|
||||
add_contrib (sqlite-cmake sqlite-amalgamation)
|
||||
add_contrib (s2geometry-cmake s2geometry)
|
||||
add_contrib (base-x-cmake base-x)
|
||||
|
||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||
|
1
contrib/base-x
vendored
1
contrib/base-x
vendored
@ -1 +0,0 @@
|
||||
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5
|
@ -1,28 +0,0 @@
|
||||
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_BASEX)
|
||||
message(STATUS "Not using base-x")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
|
||||
|
||||
set (SRCS
|
||||
${LIBRARY_DIR}/base_x.hh
|
||||
${LIBRARY_DIR}/uinteger_t.hh
|
||||
)
|
||||
|
||||
add_library(_base-x INTERFACE)
|
||||
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
|
||||
|
||||
if (XCODE OR XCODE_VERSION)
|
||||
# https://gitlab.kitware.com/cmake/cmake/issues/17457
|
||||
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
|
||||
# This applies to Xcode.
|
||||
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
|
||||
endif ()
|
||||
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
|
||||
endif ()
|
||||
|
||||
add_library(ch_contrib::base-x ALIAS _base-x)
|
@ -18,7 +18,6 @@ The list of third-party libraries:
|
||||
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
|
||||
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
|
||||
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
|
||||
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
|
||||
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
|
||||
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
|
||||
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |
|
||||
|
@ -494,22 +494,21 @@ If the ‘s’ string is non-empty and does not contain the ‘c’ character at
|
||||
|
||||
Returns the string ‘s’ that was converted from the encoding in ‘from’ to the encoding in ‘to’.
|
||||
|
||||
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(encoded_text[, alphabet_name])
|
||||
## Base58Encode(plaintext), Base58Decode(encoded_text)
|
||||
|
||||
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet.
|
||||
Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base58Encode(decoded[, alphabet_name])
|
||||
base58Decode(encoded[, alphabet_name])
|
||||
encodeBase58(decoded)
|
||||
decodeBase58(encoded)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant.
|
||||
- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown.
|
||||
- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `bitcoin`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -522,16 +521,16 @@ Type: [String](../../sql-reference/data-types/string.md).
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT base58Encode('encode', 'flickr');
|
||||
SELECT base58Decode('izCFiDUY', 'ripple');
|
||||
SELECT encodeBase58('encode');
|
||||
SELECT decodeBase58('izCFiDUY');
|
||||
```
|
||||
|
||||
Result:
|
||||
```text
|
||||
┌─base58Encode('encode', 'flickr')─┐
|
||||
┌─encodeBase58('encode', 'flickr')─┐
|
||||
│ SvyTHb1D │
|
||||
└──────────────────────────────────┘
|
||||
┌─base58Decode('izCFiDUY', 'ripple')─┐
|
||||
┌─decodeBase58('izCFiDUY', 'ripple')─┐
|
||||
│ decode │
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
@ -18,7 +18,6 @@ sidebar_label: "Используемые сторонние библиотеки
|
||||
| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) |
|
||||
| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) |
|
||||
| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) |
|
||||
| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) |
|
||||
| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) |
|
||||
| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) |
|
||||
| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) |
|
||||
|
@ -490,22 +490,21 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2);
|
||||
|
||||
Возвращает сконвертированную из кодировки from в кодировку to строку s.
|
||||
|
||||
## Base58Encode(plaintext[, alphabet_name]), Base58Decode(plaintext[, alphabet_name]) {#base58}
|
||||
## Base58Encode(plaintext), Base58Decode(encoded_text) {#base58}
|
||||
|
||||
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита.
|
||||
Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием стандартного алфавита Bitcoin.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
base58Encode(decoded[, alphabet_name])
|
||||
base58Decode(encoded[, alphabet_name])
|
||||
encodeBase58(decoded)
|
||||
decodeBase58(encoded)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md).
|
||||
- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`.
|
||||
- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `bitcoin`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -518,16 +517,16 @@ base58Decode(encoded[, alphabet_name])
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT base58Encode('encode', 'flickr');
|
||||
SELECT base58Decode('izCFiDUY', 'ripple');
|
||||
SELECT encodeBase58('encode');
|
||||
SELECT decodeBase58('izCFiDUY');
|
||||
```
|
||||
|
||||
Результат:
|
||||
```text
|
||||
┌─base58Encode('encode', 'flickr')─┐
|
||||
┌─encodeBase58('encode', 'flickr')─┐
|
||||
│ SvyTHb1D │
|
||||
└──────────────────────────────────┘
|
||||
┌─base58Decode('izCFiDUY', 'ripple')─┐
|
||||
┌─decodeBase58('izCFiDUY', 'ripple')─┐
|
||||
│ decode │
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
@ -68,6 +68,7 @@ String FieldVisitorToString::operator() (const UUID & x) const { return formatQu
|
||||
String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const { return formatQuoted(x.data); }
|
||||
String FieldVisitorToString::operator() (const bool & x) const { return x ? "true" : "false"; }
|
||||
|
||||
|
||||
String FieldVisitorToString::operator() (const Array & x) const
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
|
85
src/Common/base58.h
Normal file
85
src/Common/base58.h
Normal file
@ -0,0 +1,85 @@
|
||||
#pragma once
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
inline bool encodeBase58(const char8_t * src, char8_t * dst)
|
||||
{
|
||||
const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
|
||||
|
||||
size_t idx = 0;
|
||||
for (; *src; ++src)
|
||||
{
|
||||
unsigned int carry = static_cast<unsigned char>(*src);
|
||||
for (size_t j = 0; j < idx; ++j)
|
||||
{
|
||||
carry += static_cast<unsigned int>(dst[j] << 8);
|
||||
dst[j] = static_cast<unsigned char>(carry % 58);
|
||||
carry /= 58;
|
||||
}
|
||||
while (carry > 0)
|
||||
{
|
||||
dst[idx++] = static_cast<unsigned char>(carry % 58);
|
||||
carry /= 58;
|
||||
}
|
||||
}
|
||||
|
||||
size_t c_idx = idx >> 1;
|
||||
for (size_t i = 0; i < c_idx; ++i)
|
||||
{
|
||||
char s = base58_encoding_alphabet[static_cast<unsigned char>(dst[i])];
|
||||
dst[i] = base58_encoding_alphabet[static_cast<unsigned char>(dst[idx - (i + 1)])];
|
||||
dst[idx - (i + 1)] = s;
|
||||
}
|
||||
if ((idx & 1))
|
||||
{
|
||||
dst[c_idx] = base58_encoding_alphabet[static_cast<unsigned char>(dst[c_idx])];
|
||||
}
|
||||
dst[idx] = '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool decodeBase58(const char8_t * src, char8_t * dst)
|
||||
{
|
||||
const char map_digits[128]
|
||||
= {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, -1, -1, -1, -1, -1,
|
||||
-1, 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, 18, 19, 20, 21, -1, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, -1, -1, -1, -1, -1,
|
||||
-1, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, -1, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1};
|
||||
|
||||
size_t idx = 0;
|
||||
|
||||
for (; *src; ++src)
|
||||
{
|
||||
unsigned int carry = static_cast<unsigned int>(map_digits[static_cast<unsigned char>(*src)]);
|
||||
if (carry == UINT_MAX || *src < '1' || map_digits[static_cast<unsigned char>(*src)] == map_digits[0])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
for (size_t j = 0; j < idx; j++)
|
||||
{
|
||||
carry += static_cast<unsigned char>(dst[j]) * 58;
|
||||
dst[j] = static_cast<unsigned char>(carry & 0xff);
|
||||
carry >>= 8;
|
||||
}
|
||||
while (carry > 0)
|
||||
{
|
||||
dst[idx++] = static_cast<unsigned char>(carry & 0xff);
|
||||
carry >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
size_t c_idx = idx >> 1;
|
||||
for (size_t i = 0; i < c_idx; ++i)
|
||||
{
|
||||
char s = dst[i];
|
||||
dst[i] = dst[idx - (i + 1)];
|
||||
dst[idx - (i + 1)] = s;
|
||||
}
|
||||
dst[idx] = '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace Base58Helpers
|
||||
{
|
||||
const Base58 Nil{};
|
||||
}
|
||||
|
||||
}
|
@ -7,7 +7,6 @@
|
||||
#include <base/Decimal.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/UUID.h>
|
||||
#include <base/Base58.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -89,7 +88,6 @@ enum class TypeIndex
|
||||
LowCardinality,
|
||||
Map,
|
||||
Object,
|
||||
Base58,
|
||||
};
|
||||
#if !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
|
@ -1,34 +0,0 @@
|
||||
#include <DataTypes/DataTypeBase58.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/Serializations/SerializationBase58.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool DataTypeBase58::equals(const IDataType & rhs) const
|
||||
{
|
||||
return typeid(rhs) == typeid(*this);
|
||||
}
|
||||
|
||||
SerializationPtr DataTypeBase58::doGetDefaultSerialization() const
|
||||
{
|
||||
return std::make_shared<SerializationUUID>();
|
||||
}
|
||||
|
||||
Field DataTypeUUID::getDefault() const
|
||||
{
|
||||
return UUID{};
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeUUID::createColumn() const
|
||||
{
|
||||
return ColumnVector<UUID>::create();
|
||||
}
|
||||
|
||||
void registerDataTypeUUID(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerSimpleDataType("UUID", [] { return DataTypePtr(std::make_shared<DataTypeUUID>()); });
|
||||
}
|
||||
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Core/Base58.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DataTypeBase58 : public IDataType
|
||||
{
|
||||
public:
|
||||
static constexpr bool is_parametric = false;
|
||||
|
||||
using FieldType = Base58;
|
||||
using ColumnType = ColumnVector<Base58>;
|
||||
static constexpr auto type_id = TypeIndex::Base58;
|
||||
|
||||
const char * getFamilyName() const override { return "Base58"; }
|
||||
TypeIndex getTypeId() const override { return type_id; }
|
||||
|
||||
Field getDefault() const override;
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
bool isParametric() const override { return false; }
|
||||
bool haveSubtypes() const override { return false; }
|
||||
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
|
||||
bool canBeUsedInBitOperations() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
bool canBePromoted() const override { return false; }
|
||||
bool shouldAlignRightInPrettyFormats() const override { return false; }
|
||||
bool textCanContainOnlyValidUTF8() const override { return true; }
|
||||
bool isComparable() const override { return true; }
|
||||
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
|
||||
bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
|
||||
bool haveMaximumSizeOfValue() const override { return true; }
|
||||
size_t getSizeOfValueInMemory() const override { return sizeof(Base58); }
|
||||
bool isCategorial() const override { return true; }
|
||||
bool canBeInsideLowCardinality() const override { return true; }
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
};
|
||||
|
||||
}
|
19
src/DataTypes/DataTypeCustomBase58.cpp
Normal file
19
src/DataTypes/DataTypeCustomBase58.cpp
Normal file
@ -0,0 +1,19 @@
|
||||
#include <DataTypes/Serializations/SerializationBase58.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerDataTypeBase58(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerSimpleDataTypeCustom("Base58", []
|
||||
{
|
||||
auto type = DataTypeFactory::instance().get("String");
|
||||
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
|
||||
std::make_unique<DataTypeCustomFixedName>("Base58"), std::make_unique<SerializationBase58>(type->getDefaultSerialization())));
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -219,6 +219,7 @@ DataTypeFactory::DataTypeFactory()
|
||||
registerDataTypeDomainGeo(*this);
|
||||
registerDataTypeMap(*this);
|
||||
registerDataTypeObject(*this);
|
||||
registerDataTypeBase58(*this);
|
||||
}
|
||||
|
||||
DataTypeFactory & DataTypeFactory::instance()
|
||||
|
@ -88,5 +88,6 @@ void registerDataTypeDomainBool(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainGeo(DataTypeFactory & factory);
|
||||
void registerDataTypeObject(DataTypeFactory & factory);
|
||||
void registerDataTypeBase58(DataTypeFactory & factory);
|
||||
|
||||
}
|
||||
|
@ -1,11 +1,11 @@
|
||||
#include <DataTypes/Serializations/SerializationBase58.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Common/base58.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -17,47 +17,56 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
SerializationBase58::SerializationBase58(const SerializationPtr & nested_)
|
||||
: SerializationCustomSimpleText(nested_)
|
||||
SerializationBase58::SerializationBase58(const SerializationPtr & nested_) : SerializationCustomSimpleText(nested_)
|
||||
{
|
||||
}
|
||||
|
||||
void SerializationBase58::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
const auto * col = checkAndGetColumn<ColumnUInt32>(&column);
|
||||
const ColumnString * col = checkAndGetColumn<ColumnString>(&column);
|
||||
if (!col)
|
||||
{
|
||||
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception("Base58 type can only serialize columns of type String." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
auto
|
||||
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
|
||||
auto value = col->getDataAtWithTerminatingZero(row_num);
|
||||
char buffer[value.size * 2 + 1];
|
||||
char * ptr = buffer;
|
||||
formatIPv4(reinterpret_cast<const unsigned char *>(&col->getData()[row_num]), ptr);
|
||||
|
||||
encodeBase58(reinterpret_cast<const char8_t *>(value.data), reinterpret_cast<char8_t *>(ptr));
|
||||
ostr.write(buffer, strlen(buffer));
|
||||
}
|
||||
|
||||
void SerializationBase58::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
{
|
||||
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
|
||||
ColumnString * col = typeid_cast<ColumnString *>(&column);
|
||||
if (!col)
|
||||
{
|
||||
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception("Base58 type can only deserialize columns of type String." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
|
||||
istr.read(buffer, sizeof(buffer) - 1);
|
||||
UInt32 ipv4_value = 0;
|
||||
size_t allocated = 32;
|
||||
std::string encoded(allocated, '\0');
|
||||
|
||||
bool parse_result = parseIPv4(buffer, reinterpret_cast<unsigned char *>(&ipv4_value));
|
||||
if (!parse_result && !settings.input_format_ipv4_default_on_conversion_error)
|
||||
size_t read_position = 0;
|
||||
while (istr.read(encoded[read_position]))
|
||||
{
|
||||
throw Exception("Invalid IPv4 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
|
||||
++read_position;
|
||||
if (read_position == allocated)
|
||||
{
|
||||
allocated *= 2;
|
||||
encoded.resize(allocated, '\0');
|
||||
}
|
||||
}
|
||||
|
||||
col->insert(ipv4_value);
|
||||
char buffer[read_position + 1];
|
||||
if (!decodeBase58(reinterpret_cast<const char8_t *>(encoded.c_str()), reinterpret_cast<char8_t *>(buffer)))
|
||||
{
|
||||
throw Exception("Invalid Base58 encoded value, cannot parse." + column.getName(), ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
|
||||
}
|
||||
|
||||
col->insertDataWithTerminatingZero(buffer, read_position+1);
|
||||
|
||||
if (whole && !istr.eof())
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "IPv4");
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Base58");
|
||||
}
|
||||
}
|
||||
|
@ -72,10 +72,6 @@ if (TARGET ch_contrib::llvm)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::llvm)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::base-x)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base-x)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::base64)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base64)
|
||||
endif()
|
||||
|
@ -1,15 +1,13 @@
|
||||
#pragma once
|
||||
#include "config_functions.h"
|
||||
|
||||
#if USE_BASEX
|
||||
# include <Columns/ColumnConst.h>
|
||||
# include <Common/MemorySanitizer.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
# include <Functions/FunctionFactory.h>
|
||||
# include <Functions/FunctionHelpers.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <base_x.hh>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/base58.h>
|
||||
#include <cstring>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -26,72 +24,39 @@ struct Base58Encode
|
||||
{
|
||||
static constexpr auto name = "base58Encode";
|
||||
|
||||
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
|
||||
static void process(const ColumnString & src_column, ColumnString::MutablePtr & dst_column, size_t input_rows_count)
|
||||
{
|
||||
auto & dst_data = dst_column->getChars();
|
||||
auto & dst_offsets = dst_column->getOffsets();
|
||||
|
||||
/// Wikipedia states Base58 has efficiency of 73%, and we take 1.5 scale to avoid reallocation in most cases
|
||||
size_t current_allocated_size = ceil(1.5 * input.getChars().size());
|
||||
/// Base58 has efficiency of 73% (8/11) [https://monerodocs.org/cryptography/base58/],
|
||||
/// and we take double scale to avoid any reallocation.
|
||||
|
||||
dst_data.resize(current_allocated_size);
|
||||
size_t max_result_size = ceil(2 * src_column.getChars().size() + 1);
|
||||
|
||||
dst_data.resize(max_result_size);
|
||||
dst_offsets.resize(input_rows_count);
|
||||
|
||||
const ColumnString::Offsets & src_offsets = input.getOffsets();
|
||||
const ColumnString::Offsets & src_offsets = src_column.getOffsets();
|
||||
|
||||
const auto * source = input.getChars().raw_data();
|
||||
const auto * source = src_column.getChars().data();
|
||||
auto * dst = dst_data.data();
|
||||
auto * dst_pos = dst;
|
||||
|
||||
size_t src_offset_prev = 0;
|
||||
size_t processed_size = 0;
|
||||
|
||||
const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
|
||||
((alphabet == "flickr") ? Base58::flickr() :
|
||||
((alphabet == "ripple") ? Base58::ripple() :
|
||||
Base58::base58())); //GMP
|
||||
|
||||
std::string encoded;
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
{
|
||||
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
||||
/// Why we didn't use char* here?
|
||||
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
|
||||
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
|
||||
/// This way we do exponential resizes and one final resize after whole operation is complete
|
||||
encoded.clear();
|
||||
if (srclen)
|
||||
try
|
||||
{
|
||||
encoder.encode(encoded, source, srclen);
|
||||
}
|
||||
catch (const std::invalid_argument& e)
|
||||
{
|
||||
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
catch (const std::domain_error& e)
|
||||
{
|
||||
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
size_t outlen = encoded.size();
|
||||
encodeBase58(source, dst_pos);
|
||||
|
||||
if (processed_size + outlen >= current_allocated_size)
|
||||
{
|
||||
current_allocated_size += current_allocated_size;
|
||||
dst_data.resize(current_allocated_size);
|
||||
auto processed_offset = dst_pos - dst;
|
||||
dst = dst_data.data();
|
||||
dst_pos = dst;
|
||||
dst_pos += processed_offset;
|
||||
}
|
||||
std::memcpy(dst_pos, encoded.c_str(), ++outlen);
|
||||
size_t encoded_length = strlen(reinterpret_cast<const char *>(dst_pos));
|
||||
|
||||
source += srclen + 1;
|
||||
dst_pos += outlen;
|
||||
dst_pos += encoded_length + 1;
|
||||
|
||||
dst_offsets[row] = dst_pos - dst;
|
||||
src_offset_prev = src_offsets[row];
|
||||
processed_size += outlen;
|
||||
}
|
||||
|
||||
dst_data.resize(dst_pos - dst);
|
||||
@ -102,72 +67,40 @@ struct Base58Decode
|
||||
{
|
||||
static constexpr auto name = "base58Decode";
|
||||
|
||||
static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count)
|
||||
static void process(const ColumnString & src_column, ColumnString::MutablePtr & dst_column, size_t input_rows_count)
|
||||
{
|
||||
auto & dst_data = dst_column->getChars();
|
||||
auto & dst_offsets = dst_column->getOffsets();
|
||||
|
||||
/// We allocate probably even more then needed to avoid many resizes
|
||||
size_t current_allocated_size = input.getChars().size();
|
||||
/// Base58 has efficiency of 73% (8/11) [https://monerodocs.org/cryptography/base58/],
|
||||
/// and decoded value will be no longer than source.
|
||||
|
||||
dst_data.resize(current_allocated_size);
|
||||
size_t max_result_size = src_column.getChars().size() + 1;
|
||||
|
||||
dst_data.resize(max_result_size);
|
||||
dst_offsets.resize(input_rows_count);
|
||||
|
||||
const ColumnString::Offsets & src_offsets = input.getOffsets();
|
||||
const ColumnString::Offsets & src_offsets = src_column.getOffsets();
|
||||
|
||||
const auto * source = input.getChars().raw_data();
|
||||
const auto * source = src_column.getChars().data();
|
||||
auto * dst = dst_data.data();
|
||||
auto * dst_pos = dst;
|
||||
|
||||
size_t src_offset_prev = 0;
|
||||
size_t processed_size = 0;
|
||||
|
||||
const auto& decoder = (alphabet == "bitcoin") ? Base58::bitcoin() :
|
||||
((alphabet == "flickr") ? Base58::flickr() :
|
||||
((alphabet == "ripple") ? Base58::ripple() :
|
||||
Base58::base58()));
|
||||
|
||||
std::string decoded;
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
{
|
||||
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
||||
/// Why we didn't use char* here?
|
||||
/// We don't know the size of the result string beforehand (it's not byte-to-byte encoding),
|
||||
/// so we may need to do many resizes (the worst case -- we'll do it for each row)
|
||||
/// This way we do exponential resizes and one final resize after whole operation is complete
|
||||
decoded.clear();
|
||||
if (srclen)
|
||||
try
|
||||
{
|
||||
decoder.decode(decoded, source, srclen);
|
||||
}
|
||||
catch (const std::invalid_argument& e)
|
||||
{
|
||||
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
catch (const std::domain_error& e)
|
||||
{
|
||||
throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
size_t outlen = decoded.size();
|
||||
if (!decodeBase58(source, dst_pos))
|
||||
throw Exception("Invalid Base58 value, cannot be decoded", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (processed_size + outlen >= current_allocated_size)
|
||||
{
|
||||
current_allocated_size += current_allocated_size;
|
||||
dst_data.resize(current_allocated_size);
|
||||
auto processed_offset = dst_pos - dst;
|
||||
dst = dst_data.data();
|
||||
dst_pos = dst;
|
||||
dst_pos += processed_offset;
|
||||
}
|
||||
std::memcpy(dst_pos, decoded.c_str(), ++outlen);
|
||||
size_t encoded_length = strlen(reinterpret_cast<const char *>(dst_pos));
|
||||
|
||||
source += srclen + 1;
|
||||
dst_pos += outlen;
|
||||
dst_pos += encoded_length + 1;
|
||||
|
||||
dst_offsets[row] = dst_pos - dst;
|
||||
src_offset_prev = src_offsets[row];
|
||||
processed_size += outlen;
|
||||
}
|
||||
|
||||
dst_data.resize(dst_pos - dst);
|
||||
@ -190,9 +123,7 @@ public:
|
||||
return Func::name;
|
||||
}
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
@ -202,19 +133,12 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.size() != 1 && arguments.size() != 2)
|
||||
throw Exception(
|
||||
"Wrong number of arguments for function " + getName() + ": 1 or 2 expected.",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
if (arguments.size() != 1)
|
||||
throw Exception("Wrong number of arguments for function " + getName() + ": 1 expected.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (!isString(arguments[0].type))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (arguments.size() == 2 && !isString(arguments[1].type))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() + ". Must be String.",
|
||||
"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ". Must be String.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
@ -229,28 +153,11 @@ public:
|
||||
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName() + ", must be String",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
std::string alphabet = "bitcoin";
|
||||
|
||||
if (arguments.size() == 2)
|
||||
{
|
||||
const auto * alphabet_column = checkAndGetColumn<ColumnConst>(arguments[1].column.get());
|
||||
|
||||
if (!alphabet_column)
|
||||
throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
alphabet = alphabet_column->getValue<DB::String>();
|
||||
if (alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp")
|
||||
throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'gmp' or 'flickr'", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
}
|
||||
|
||||
auto dst_column = ColumnString::create();
|
||||
|
||||
Func::process(*input, dst_column, alphabet, input_rows_count);
|
||||
Func::process(*input, dst_column, input_rows_count);
|
||||
|
||||
return dst_column;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include <Functions/FunctionBase58Conversion.h>
|
||||
#if USE_BASEX
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
@ -14,4 +13,3 @@ void registerFunctionBase58Decode(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionBase58Conversion<Base58Decode>>();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
// .h autogenerated by cmake!
|
||||
|
||||
#cmakedefine01 USE_BASEX
|
||||
#cmakedefine01 USE_BASE64
|
||||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_RAPIDJSON
|
||||
|
@ -1,9 +1,6 @@
|
||||
if (TARGET ch_contrib::fastops)
|
||||
set(USE_FASTOPS 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::base-x)
|
||||
set(USE_BASEX 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::base64)
|
||||
set(USE_BASE64 1)
|
||||
endif()
|
||||
|
@ -49,10 +49,8 @@ void registerFunctionBase64Decode(FunctionFactory &);
|
||||
void registerFunctionTryBase64Decode(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
#if USE_BASEX
|
||||
void registerFunctionBase58Encode(FunctionFactory &);
|
||||
void registerFunctionBase58Decode(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
void registerFunctionStem(FunctionFactory &);
|
||||
@ -110,10 +108,8 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionTryBase64Decode(factory);
|
||||
#endif
|
||||
|
||||
#if USE_BASEX
|
||||
registerFunctionBase58Encode(factory);
|
||||
registerFunctionBase58Decode(factory);
|
||||
#endif
|
||||
|
||||
#if USE_NLP
|
||||
registerFunctionStem(factory);
|
||||
|
@ -55,9 +55,6 @@ endif()
|
||||
if (TARGET ch_contrib::base64)
|
||||
set(USE_BASE64 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::base-x)
|
||||
set(USE_BASEX 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::yaml_cpp)
|
||||
set(USE_YAML_CPP 1)
|
||||
endif()
|
||||
|
@ -8,30 +8,6 @@ fooba
|
||||
foobar
|
||||
Hello world!
|
||||
|
||||
f
|
||||
fo
|
||||
foo
|
||||
foob
|
||||
fooba
|
||||
foobar
|
||||
Hello world!
|
||||
|
||||
f
|
||||
fo
|
||||
foo
|
||||
foob
|
||||
fooba
|
||||
foobar
|
||||
Hello world!
|
||||
|
||||
f
|
||||
fo
|
||||
foo
|
||||
foob
|
||||
fooba
|
||||
foobar
|
||||
Hello world!
|
||||
|
||||
2m
|
||||
8o8
|
||||
bQbp
|
||||
|
@ -3,15 +3,10 @@
|
||||
SET send_logs_level = 'fatal';
|
||||
|
||||
SELECT base58Encode('Hold my beer...');
|
||||
SELECT base58Encode('Hold my beer...', ''); -- { serverError 44 }
|
||||
SELECT base58Encode('Hold my beer...', 'gmp', 'third'); -- { serverError 36 }
|
||||
SELECT base58Encode('Hold my beer...', 'Second arg'); -- { serverError 42 }
|
||||
SELECT base58Decode('Hold my beer...'); -- { serverError 36 }
|
||||
|
||||
SELECT base58Decode(encoded, 'gmp') FROM (SELECT base58Encode(val, 'gmp') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
|
||||
SELECT base58Decode(encoded, 'ripple') FROM (SELECT base58Encode(val, 'ripple') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
|
||||
SELECT base58Decode(encoded, 'flickr') FROM (SELECT base58Encode(val, 'flickr') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
|
||||
SELECT base58Decode(encoded, 'bitcoin') FROM (SELECT base58Encode(val, 'bitcoin') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
|
||||
SELECT base58Decode(encoded) FROM (SELECT base58Encode(val) as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val));
|
||||
|
||||
SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val);
|
||||
SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val);
|
||||
|
||||
SELECT base58Decode('Why_not?'); -- { serverError 36 }
|
||||
|
Loading…
Reference in New Issue
Block a user