2023-04-09 08:41:11 +00:00
|
|
|
#include <cctype>
|
|
|
|
|
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionsHashing.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
struct SoundexImpl
|
|
|
|
{
|
|
|
|
static constexpr auto name = "soundex";
|
|
|
|
static constexpr size_t length = 4;
|
|
|
|
// enum { length = 4 };
|
2023-04-09 09:24:54 +00:00
|
|
|
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
|
|
|
/* :::::::::::::::::::::::::: */
|
2023-04-09 08:41:11 +00:00
|
|
|
static constexpr auto soundex_map = "01230120022455012623010202";
|
|
|
|
|
2023-04-09 09:24:54 +00:00
|
|
|
static char get_scode(const char ** ptr, const char * in_end)
|
|
|
|
{
|
|
|
|
while (*ptr < in_end && !std::isalpha(**ptr))
|
|
|
|
{
|
2023-04-09 08:41:11 +00:00
|
|
|
(*ptr)++;
|
|
|
|
}
|
2023-04-09 09:24:54 +00:00
|
|
|
if (*ptr == in_end)
|
|
|
|
return 0;
|
2023-04-09 08:41:11 +00:00
|
|
|
return soundex_map[std::toupper(**ptr) - 'A'];
|
|
|
|
}
|
|
|
|
|
2023-04-09 09:24:54 +00:00
|
|
|
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
2023-04-09 08:41:11 +00:00
|
|
|
{
|
2023-04-09 09:24:54 +00:00
|
|
|
const char * in_cur = begin;
|
|
|
|
const char * in_end = begin + size;
|
2023-04-09 08:41:11 +00:00
|
|
|
unsigned char * out_end = out_char_data + length;
|
|
|
|
|
2023-04-09 09:24:54 +00:00
|
|
|
while (in_cur < in_end && !std::isalpha(*in_cur))
|
|
|
|
{
|
2023-04-09 08:41:11 +00:00
|
|
|
in_cur++;
|
|
|
|
}
|
2023-04-09 09:24:54 +00:00
|
|
|
if (in_cur < in_end)
|
|
|
|
{
|
|
|
|
*out_char_data++ = std::toupper(*in_cur);
|
2023-04-09 08:41:11 +00:00
|
|
|
}
|
|
|
|
char last_ch = get_scode(&in_cur, in_end);
|
|
|
|
|
|
|
|
char ch;
|
|
|
|
in_cur++;
|
|
|
|
while (in_cur < in_end && out_char_data < out_end && (ch = get_scode(&in_cur, in_end)) != 0)
|
|
|
|
{
|
2023-04-09 09:24:54 +00:00
|
|
|
if (in_cur == in_end)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2023-04-09 08:41:11 +00:00
|
|
|
in_cur++;
|
|
|
|
if ((ch != '0') && (ch != last_ch))
|
|
|
|
{
|
2023-04-09 09:24:54 +00:00
|
|
|
*out_char_data++ = ch;
|
2023-04-09 08:41:11 +00:00
|
|
|
}
|
|
|
|
last_ch = ch;
|
|
|
|
}
|
2023-04-09 09:24:54 +00:00
|
|
|
while (out_char_data < out_end)
|
|
|
|
{
|
|
|
|
*out_char_data++ = '0';
|
2023-04-09 08:41:11 +00:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
REGISTER_FUNCTION(Soundex)
|
|
|
|
{
|
|
|
|
factory.registerFunction<FunctionStringHashFixedString<SoundexImpl>>();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|