2020-08-16 19:38:56 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
|
|
|
#include <Functions/FunctionFactory.h>
|
2020-12-02 12:08:03 +00:00
|
|
|
#include <Parsers/queryNormalization.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/find_symbols.h>
|
2020-08-16 19:38:56 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
|
|
|
|
|
|
|
|
/** The function returns 64bit hash value that is identical for similar queries.
|
|
|
|
* See also 'normalizeQuery'. This function is only slightly more efficient.
|
|
|
|
*/
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
2020-08-16 19:50:50 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2020-08-16 19:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2021-01-26 14:51:30 +00:00
|
|
|
template <bool keep_names>
|
2020-08-16 19:38:56 +00:00
|
|
|
struct Impl
|
|
|
|
{
|
|
|
|
static void vector(
|
|
|
|
const ColumnString::Chars & data,
|
|
|
|
const ColumnString::Offsets & offsets,
|
|
|
|
PaddedPODArray<UInt64> & res_data)
|
|
|
|
{
|
|
|
|
size_t size = offsets.size();
|
|
|
|
res_data.resize(size);
|
|
|
|
|
|
|
|
ColumnString::Offset prev_src_offset = 0;
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
{
|
|
|
|
ColumnString::Offset curr_src_offset = offsets[i];
|
2021-01-26 14:51:30 +00:00
|
|
|
res_data[i] = normalizedQueryHash<keep_names>(
|
2020-12-02 12:08:03 +00:00
|
|
|
reinterpret_cast<const char *>(&data[prev_src_offset]), reinterpret_cast<const char *>(&data[curr_src_offset - 1]));
|
2020-08-16 19:38:56 +00:00
|
|
|
prev_src_offset = offsets[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-01-26 14:51:30 +00:00
|
|
|
template <bool keep_names>
|
2020-08-16 19:38:56 +00:00
|
|
|
class FunctionNormalizedQueryHash : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
2021-01-26 14:51:30 +00:00
|
|
|
static constexpr auto name = keep_names ? "normalizedQueryHashKeepNames" : "normalizedQueryHash";
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionPtr create(ContextPtr)
|
2020-08-16 19:38:56 +00:00
|
|
|
{
|
|
|
|
return std::make_shared<FunctionNormalizedQueryHash>();
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
|
|
|
{
|
|
|
|
if (!isString(arguments[0]))
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
|
2020-08-16 19:38:56 +00:00
|
|
|
|
|
|
|
return std::make_shared<DataTypeUInt64>();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
|
|
|
|
2021-06-22 16:21:23 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
2021-04-29 14:48:26 +00:00
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
2020-08-16 19:38:56 +00:00
|
|
|
{
|
2020-10-19 15:27:41 +00:00
|
|
|
const ColumnPtr column = arguments[0].column;
|
2020-08-16 19:38:56 +00:00
|
|
|
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
|
|
|
|
{
|
|
|
|
auto col_res = ColumnUInt64::create();
|
|
|
|
typename ColumnUInt64::Container & vec_res = col_res->getData();
|
|
|
|
vec_res.resize(col->size());
|
2021-01-26 14:51:30 +00:00
|
|
|
Impl<keep_names>::vector(col->getChars(), col->getOffsets(), vec_res);
|
2020-10-19 15:27:41 +00:00
|
|
|
return col_res;
|
2020-08-16 19:38:56 +00:00
|
|
|
}
|
|
|
|
else
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
|
|
|
arguments[0].column->getName(), getName());
|
2020-08-16 19:38:56 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-07-04 07:01:39 +00:00
|
|
|
REGISTER_FUNCTION(NormalizedQueryHash)
|
2020-08-16 19:38:56 +00:00
|
|
|
{
|
2021-01-26 14:51:30 +00:00
|
|
|
factory.registerFunction<FunctionNormalizedQueryHash<true>>();
|
|
|
|
factory.registerFunction<FunctionNormalizedQueryHash<false>>();
|
2020-08-16 19:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|