mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-29 05:00:47 +00:00
Merge pull request #3350 from alex-krash/base64_functions
Base64 functions
This commit is contained in:
commit
18400ad4e5
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -46,3 +46,6 @@
|
||||
[submodule "contrib/unixodbc"]
|
||||
path = contrib/unixodbc
|
||||
url = https://github.com/ClickHouse-Extras/UnixODBC.git
|
||||
[submodule "contrib/base64"]
|
||||
path = contrib/base64
|
||||
url = https://github.com/aklomp/base64.git
|
||||
|
@ -256,6 +256,7 @@ include (cmake/find_capnp.cmake)
|
||||
include (cmake/find_llvm.cmake)
|
||||
include (cmake/find_cpuid.cmake)
|
||||
include (cmake/find_consistent-hashing.cmake)
|
||||
include (cmake/find_base64.cmake)
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find_gtest.cmake)
|
||||
endif ()
|
||||
|
12
cmake/find_base64.cmake
Normal file
12
cmake/find_base64.cmake
Normal file
@ -0,0 +1,12 @@
|
||||
option (ENABLE_BASE64 "Enable base64" ON)
|
||||
|
||||
if (ENABLE_BASE64)
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64")
|
||||
message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
else()
|
||||
set (BASE64_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/base64/include)
|
||||
set (BASE64_LIBRARY base64)
|
||||
set (USE_BASE64 1)
|
||||
endif()
|
||||
endif ()
|
||||
|
@ -45,6 +45,38 @@ if (HAVE_SSE42)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
|
||||
endif ()
|
||||
|
||||
set (TEST_FLAG "-mssse3")
|
||||
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
|
||||
check_cxx_source_compiles("
|
||||
#include <tmmintrin.h>
|
||||
int main() {
|
||||
__m64 a = _mm_abs_pi8(__m64());
|
||||
(void)a;
|
||||
return 0;
|
||||
}
|
||||
" HAVE_SSSE3)
|
||||
|
||||
set (TEST_FLAG "-mavx")
|
||||
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
|
||||
check_cxx_source_compiles("
|
||||
#include <immintrin.h>
|
||||
int main() {
|
||||
auto a = _mm256_insert_epi8(__m256i(), 0, 0);
|
||||
(void)a;
|
||||
return 0;
|
||||
}
|
||||
" HAVE_AVX)
|
||||
|
||||
set (TEST_FLAG "-mavx2")
|
||||
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
|
||||
check_cxx_source_compiles("
|
||||
#include <immintrin.h>
|
||||
int main() {
|
||||
auto a = _mm256_add_epi16(__m256i(), __m256i());
|
||||
(void)a;
|
||||
return 0;
|
||||
}
|
||||
" HAVE_AVX2)
|
||||
|
||||
# gcc -dM -E -mpopcnt - < /dev/null | sort > gcc-dump-popcnt
|
||||
#define __POPCNT__ 1
|
||||
@ -65,5 +97,3 @@ if (HAVE_POPCNT AND NOT ARCH_AARCH64)
|
||||
endif ()
|
||||
|
||||
cmake_pop_check_state ()
|
||||
|
||||
# TODO: add here sse3 test if you want use it
|
||||
|
3
contrib/CMakeLists.txt
vendored
3
contrib/CMakeLists.txt
vendored
@ -191,3 +191,6 @@ if (USE_INTERNAL_LLVM_LIBRARY)
|
||||
add_subdirectory (llvm/llvm)
|
||||
endif ()
|
||||
|
||||
if (USE_BASE64)
|
||||
add_subdirectory (base64-cmake)
|
||||
endif()
|
||||
|
1
contrib/base64
vendored
Submodule
1
contrib/base64
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit a27c565d1b6c676beaf297fe503c4518185666f7
|
1
contrib/base64-cmake/.gitignore
vendored
Normal file
1
contrib/base64-cmake/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
config.h
|
52
contrib/base64-cmake/CMakeLists.txt
Normal file
52
contrib/base64-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,52 @@
|
||||
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/base64)
|
||||
|
||||
set(base64_compile_instructions "")
|
||||
LIST(LENGTH base64_compile_instructions 0)
|
||||
macro(cast_to_bool var instruction)
|
||||
if (HAVE_${var})
|
||||
set(base64_${var} 1)
|
||||
set(base64_${var}_opt ${instruction})
|
||||
else()
|
||||
set(base64_${var} 0)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
cast_to_bool(SSSE3 "-mssse3")
|
||||
cast_to_bool(SSE41 "-msse4.1")
|
||||
cast_to_bool(SSE42 "-msse4.2")
|
||||
cast_to_bool(AVX "-mavx")
|
||||
cast_to_bool(AVX2 "-mavx2")
|
||||
|
||||
# write config.h file, to include it in application
|
||||
file(READ config-header.tpl header)
|
||||
file(WRITE config.h ${header})
|
||||
file(APPEND config.h "#define HAVE_SSSE3 ${base64_SSSE3}\n")
|
||||
file(APPEND config.h "#define HAVE_SSE41 ${base64_SSE41}\n")
|
||||
file(APPEND config.h "#define HAVE_SSE42 ${base64_SSE42}\n")
|
||||
file(APPEND config.h "#define HAVE_AVX ${base64_AVX}\n")
|
||||
file(APPEND config.h "#define HAVE_AVX2 ${base64_AVX2}\n")
|
||||
|
||||
set(HAVE_FAST_UNALIGNED_ACCESS 0)
|
||||
if (${base64_SSSE3} OR ${base64_SSE41} OR ${base64_SSE42} OR ${base64_AVX} OR ${base64_AVX2})
|
||||
set(HAVE_FAST_UNALIGNED_ACCESS 1)
|
||||
endif ()
|
||||
|
||||
file(APPEND config.h "#define HAVE_FAST_UNALIGNED_ACCESS " ${HAVE_FAST_UNALIGNED_ACCESS} "\n")
|
||||
|
||||
add_library(base64 ${LINK_MODE}
|
||||
${LIBRARY_DIR}/lib/lib.c
|
||||
${LIBRARY_DIR}/lib/codec_choose.c
|
||||
${LIBRARY_DIR}/lib/arch/avx/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/avx2/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/generic/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/neon32/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/neon64/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/sse41/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/sse42/codec.c
|
||||
${LIBRARY_DIR}/lib/arch/ssse3/codec.c
|
||||
|
||||
${LIBRARY_DIR}/lib/codecs.h
|
||||
config.h)
|
||||
|
||||
target_compile_options(base64 PRIVATE ${base64_SSSE3_opt} ${base64_SSE41_opt} ${base64_SSE42_opt} ${base64_AVX_opt} ${base64_AVX2_opt})
|
||||
target_include_directories(base64 PRIVATE ${LIBRARY_DIR}/include .)
|
2
contrib/base64-cmake/config-header.tpl
Normal file
2
contrib/base64-cmake/config-header.tpl
Normal file
@ -0,0 +1,2 @@
|
||||
#define HAVE_NEON32 0
|
||||
#define HAVE_NEON64 0
|
@ -15,3 +15,4 @@
|
||||
#cmakedefine01 USE_POCO_MONGODB
|
||||
#cmakedefine01 USE_POCO_NETSSL
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
#cmakedefine01 USE_BASE64
|
||||
|
@ -52,7 +52,7 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func
|
||||
|
||||
add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources})
|
||||
|
||||
target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE ${CONSISTENT_HASHING_LIBRARY} consistent-hashing-sumbur ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash)
|
||||
target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE ${CONSISTENT_HASHING_LIBRARY} consistent-hashing-sumbur ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash ${BASE64_LIBRARY})
|
||||
|
||||
target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR})
|
||||
|
||||
@ -81,3 +81,7 @@ endif ()
|
||||
if (USE_EMBEDDED_COMPILER)
|
||||
target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||
endif ()
|
||||
|
||||
if (USE_BASE64)
|
||||
target_include_directories (clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR})
|
||||
endif()
|
166
dbms/src/Functions/FunctionBase64Conversion.h
Normal file
166
dbms/src/Functions/FunctionBase64Conversion.h
Normal file
@ -0,0 +1,166 @@
|
||||
#include <Common/config.h>
|
||||
#if USE_BASE64
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/GatherUtils/Algorithms.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <libbase64.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using namespace GatherUtils;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
struct Base64Encode
|
||||
{
|
||||
static constexpr auto name = "base64Encode";
|
||||
static size_t getBufferSize(size_t string_length, size_t string_count)
|
||||
{
|
||||
return ( ( string_length - string_count ) / 3 + string_count ) * 4 + string_count ;
|
||||
}
|
||||
};
|
||||
|
||||
struct Base64Decode
|
||||
{
|
||||
static constexpr auto name = "base64Decode";
|
||||
|
||||
static size_t getBufferSize(size_t string_length, size_t string_count)
|
||||
{
|
||||
return ( ( string_length - string_count) / 4 + string_count) * 3 + string_count;
|
||||
}
|
||||
};
|
||||
|
||||
struct TryBase64Decode
|
||||
{
|
||||
static constexpr auto name = "tryBase64Decode";
|
||||
|
||||
static size_t getBufferSize(size_t string_length, size_t string_count)
|
||||
{
|
||||
return Base64Decode::getBufferSize(string_length, string_count);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Func>
|
||||
class FunctionBase64Conversion : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Func::name;
|
||||
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionBase64Conversion>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Func::name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (!WhichDataType(arguments[0].type).isString())
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
||||
{
|
||||
const ColumnPtr column_string = block.getByPosition(arguments[0]).column;
|
||||
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
|
||||
|
||||
if (!input)
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
auto dst_column = ColumnString::create();
|
||||
auto & dst_data = dst_column->getChars();
|
||||
auto & dst_offsets = dst_column->getOffsets();
|
||||
|
||||
size_t reserve = Func::getBufferSize(input->getChars().size(), input->size());
|
||||
dst_data.resize(reserve);
|
||||
dst_offsets.resize(input_rows_count);
|
||||
|
||||
const ColumnString::Offsets & src_offsets = input->getOffsets();
|
||||
|
||||
auto source = reinterpret_cast<const char *>(input->getChars().data());
|
||||
auto dst = reinterpret_cast<char *>(dst_data.data());
|
||||
auto dst_pos = dst;
|
||||
|
||||
size_t src_offset_prev = 0;
|
||||
|
||||
int codec = getCodec();
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
{
|
||||
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
||||
size_t outlen = 0;
|
||||
|
||||
if constexpr (std::is_same_v<Func, Base64Encode>)
|
||||
{
|
||||
base64_encode(source, srclen, dst_pos, &outlen, codec);
|
||||
}
|
||||
else if constexpr (std::is_same_v<Func, Base64Decode>)
|
||||
{
|
||||
if (!base64_decode(source, srclen, dst_pos, &outlen, codec))
|
||||
{
|
||||
throw Exception("Failed to " + getName() + " input '" + String(source, srclen) + "'", ErrorCodes::INCORRECT_DATA);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// during decoding character array can be partially polluted
|
||||
// if fail, revert back and clean
|
||||
auto savepoint = dst_pos;
|
||||
if (!base64_decode(source, srclen, dst_pos, &outlen, codec))
|
||||
{
|
||||
outlen = 0;
|
||||
dst_pos = savepoint;
|
||||
// clean the symbol
|
||||
dst_pos[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
source += srclen + 1;
|
||||
dst_pos += outlen + 1;
|
||||
|
||||
dst_offsets[row] = dst_pos - dst;
|
||||
src_offset_prev = src_offsets[row];
|
||||
}
|
||||
|
||||
dst_data.resize(dst_pos - dst);
|
||||
|
||||
block.getByPosition(result).column = std::move(dst_column);
|
||||
}
|
||||
|
||||
private:
|
||||
static int getCodec()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
14
dbms/src/Functions/base64Decode.cpp
Normal file
14
dbms/src/Functions/base64Decode.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include <Functions/FunctionBase64Conversion.h>
|
||||
#if USE_BASE64
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionBase64Decode(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBase64Conversion<Base64Decode>>();
|
||||
}
|
||||
}
|
||||
#endif
|
14
dbms/src/Functions/base64Encode.cpp
Normal file
14
dbms/src/Functions/base64Encode.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include <Functions/FunctionBase64Conversion.h>
|
||||
#if USE_BASE64
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionBase64Encode(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBase64Conversion<Base64Encode>>();
|
||||
}
|
||||
}
|
||||
#endif
|
@ -19,6 +19,11 @@ void registerFunctionSubstringUTF8(FunctionFactory &);
|
||||
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
|
||||
void registerFunctionStartsWith(FunctionFactory &);
|
||||
void registerFunctionEndsWith(FunctionFactory &);
|
||||
#if USE_BASE64
|
||||
void registerFunctionBase64Encode(FunctionFactory &);
|
||||
void registerFunctionBase64Decode(FunctionFactory &);
|
||||
void registerFunctionTryBase64Decode(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
void registerFunctionsString(FunctionFactory & factory)
|
||||
{
|
||||
@ -38,6 +43,11 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionAppendTrailingCharIfAbsent(factory);
|
||||
registerFunctionStartsWith(factory);
|
||||
registerFunctionEndsWith(factory);
|
||||
#if USE_BASE64
|
||||
registerFunctionBase64Encode(factory);
|
||||
registerFunctionBase64Decode(factory);
|
||||
registerFunctionTryBase64Decode(factory);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
14
dbms/src/Functions/tryBase64Decode.cpp
Normal file
14
dbms/src/Functions/tryBase64Decode.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include <Functions/FunctionBase64Conversion.h>
|
||||
#if USE_BASE64
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionTryBase64Decode(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode>>();
|
||||
}
|
||||
}
|
||||
#endif
|
@ -39,6 +39,7 @@ const char * auto_config_build[]
|
||||
"USE_POCO_DATAODBC", "@USE_POCO_DATAODBC@",
|
||||
"USE_POCO_MONGODB", "@USE_POCO_MONGODB@",
|
||||
"USE_POCO_NETSSL", "@USE_POCO_NETSSL@",
|
||||
"USE_BASE64", "@USE_BASE64@",
|
||||
|
||||
nullptr, nullptr
|
||||
};
|
||||
|
@ -0,0 +1,16 @@
|
||||
|
||||
Zg==
|
||||
Zm8=
|
||||
Zm9v
|
||||
Zm9vYg==
|
||||
Zm9vYmE=
|
||||
Zm9vYmFy
|
||||
|
||||
f
|
||||
fo
|
||||
foo
|
||||
foob
|
||||
fooba
|
||||
foobar
|
||||
1 1
|
||||
|
@ -0,0 +1,6 @@
|
||||
SET send_logs_level = 'none';
|
||||
SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val);
|
||||
SELECT base64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val);
|
||||
SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v';
|
||||
SELECT tryBase64Decode('Zm9vYmF=Zm9v');
|
||||
SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 }
|
@ -74,5 +74,13 @@ If the 's' string is non-empty and does not contain the 'c' character at the end
|
||||
|
||||
Returns the string 's' that was converted from the encoding in 'from' to the encoding in 'to'.
|
||||
|
||||
## base64Encode(s)
|
||||
Encodes 's' string into base64
|
||||
|
||||
## base64Decode(s)
|
||||
Decode base64-encoded string 's' into original string. In case of failure raises an exception.
|
||||
|
||||
## tryBase64Decode(s)
|
||||
Similar to base64Decode, but in case of error an empty string would be returned.
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) <!--hide-->
|
@ -59,4 +59,13 @@
|
||||
## convertCharset(s, from, to)
|
||||
Возвращает сконвертированную из кодировки from в кодировку to строку s.
|
||||
|
||||
## base64Encode(s)
|
||||
Производит кодирование строки s в base64-представление.
|
||||
|
||||
## base64Decode(s)
|
||||
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
|
||||
|
||||
## tryBase64Decode(s)
|
||||
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) <!--hide-->
|
Loading…
Reference in New Issue
Block a user