Fix base58 trash

This commit is contained in:
Alexey Milovidov 2022-09-11 08:09:14 +02:00
parent 3d8a2130cf
commit e0a9ae0496
3 changed files with 48 additions and 40 deletions

View File

@ -9,9 +9,10 @@ size_t encodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst)
const char * base58_encoding_alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
size_t processed = 0;
size_t idx = 0;
size_t zeros = 0;
while (*src == '\0' && processed < src_length - 1)
while (processed < src_length && *src == 0)
{
++processed;
++zeros;
@ -20,15 +21,13 @@ size_t encodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst)
++src;
}
size_t idx = 0;
while (processed < src_length - 1)
while (processed < src_length)
{
UInt8 carry = static_cast<UInt8>(*src);
UInt32 carry = *src;
for (size_t j = 0; j < idx; ++j)
{
carry += static_cast<UInt32>(dst[j] << 8);
carry += static_cast<UInt32>(dst[j]) << 8;
dst[j] = static_cast<UInt8>(carry % 58);
carry /= 58;
}
@ -39,6 +38,7 @@ size_t encodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst)
++idx;
carry /= 58;
}
++src;
++processed;
}
@ -56,14 +56,13 @@ size_t encodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst)
dst[c_idx] = base58_encoding_alphabet[static_cast<UInt8>(dst[c_idx])];
}
dst[idx] = '\0';
return zeros + idx + 1;
return zeros + idx;
}
size_t decodeBase58(const UInt8 * src, size_t srclen, UInt8 * dst)
std::optional<size_t> decodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst)
{
static const Int8 map_digits[128] =
static const Int8 map_digits[256] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@ -72,13 +71,22 @@ size_t decodeBase58(const UInt8 * src, size_t srclen, UInt8 * dst)
-1, 9, 10, 11, 12, 13, 14, 15, 16, -1, 17, 18, 19, 20, 21, -1,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, -1, -1, -1, -1, -1,
-1, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, -1, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
size_t processed = 0;
size_t idx = 0;
size_t zeros = 0;
while (*src == '1' && processed < srclen - 1)
while (processed < src_length && *src == '1')
{
++processed;
++zeros;
@ -87,14 +95,12 @@ size_t decodeBase58(const UInt8 * src, size_t srclen, UInt8 * dst)
++src;
}
size_t idx = 0;
while (processed < srclen-1)
while (processed < src_length)
{
UInt32 carry = map_digits[*src];
if (carry == static_cast<UInt32>(-1))
{
return 0;
return {};
}
for (size_t j = 0; j < idx; ++j)
{
@ -115,13 +121,12 @@ size_t decodeBase58(const UInt8 * src, size_t srclen, UInt8 * dst)
size_t c_idx = idx >> 1;
for (size_t i = 0; i < c_idx; ++i)
{
char s = dst[i];
UInt8 s = dst[i];
dst[i] = dst[idx - (i + 1)];
dst[idx - (i + 1)] = s;
}
dst[idx] = '\0';
return zeros + idx + 1;
return zeros + idx;
}
}

View File

@ -1,12 +1,13 @@
#pragma once
#include <Core/Types.h>
#include <optional>
namespace DB
{
size_t encodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst);
size_t decodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst);
std::optional<size_t> decodeBase58(const UInt8 * src, size_t src_length, UInt8 * dst);
}

View File

@ -41,23 +41,24 @@ struct Base58Encode
const auto * src = src_column.getChars().data();
auto * dst = dst_data.data();
auto * dst_pos = dst;
size_t src_offset_prev = 0;
size_t prev_src_offset = 0;
size_t current_dst_offset = 0;
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev;
auto encoded_size = encodeBase58(src, srclen, dst_pos);
size_t current_src_offset = src_offsets[row];
size_t src_length = current_src_offset - prev_src_offset - 1;
size_t encoded_size = encodeBase58(&src[prev_src_offset], src_length, &dst[current_dst_offset]);
prev_src_offset = current_src_offset;
current_dst_offset += encoded_size;
dst[current_dst_offset] = 0;
++current_dst_offset;
src += srclen;
dst_pos += encoded_size;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];
dst_offsets[row] = current_dst_offset;
}
dst_data.resize(dst_pos - dst);
dst_data.resize(current_dst_offset);
}
};
@ -82,26 +83,27 @@ struct Base58Decode
const auto * src = src_column.getChars().data();
auto * dst = dst_data.data();
auto * dst_pos = dst;
size_t src_offset_prev = 0;
size_t prev_src_offset = 0;
size_t current_dst_offset = 0;
for (size_t row = 0; row < input_rows_count; ++row)
{
size_t srclen = src_offsets[row] - src_offset_prev;
auto decoded_size = decodeBase58(src, srclen, dst_pos);
size_t current_src_offset = src_offsets[row];
size_t src_length = current_src_offset - prev_src_offset - 1;
std::optional<size_t> decoded_size = decodeBase58(&src[prev_src_offset], src_length, &dst[current_dst_offset]);
if (!decoded_size)
throw Exception("Invalid Base58 value, cannot be decoded", ErrorCodes::BAD_ARGUMENTS);
src += srclen;
dst_pos += decoded_size;
prev_src_offset = current_src_offset;
current_dst_offset += *decoded_size;
dst[current_dst_offset] = 0;
++current_dst_offset;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];
dst_offsets[row] = current_dst_offset;
}
dst_data.resize(dst_pos - dst);
dst_data.resize(current_dst_offset);
}
};