2013-06-10 15:19:37 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <DataTypes/DataTypeFixedString.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Common/Volnitsky.h>
|
2019-12-09 13:12:54 +00:00
|
|
|
#include <Functions/IFunctionImpl.h>
|
2017-07-21 06:35:58 +00:00
|
|
|
#include <Functions/FunctionHelpers.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadBufferFromMemory.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
2017-03-12 10:13:45 +00:00
|
|
|
|
2013-06-10 15:19:37 +00:00
|
|
|
|
2017-06-01 13:41:58 +00:00
|
|
|
/** Functions for retrieving "visit parameters".
|
|
|
|
* Visit parameters in Yandex.Metrika are a special kind of JSONs.
|
|
|
|
* These functions are applicable to almost any JSONs.
|
|
|
|
* Implemented via templates from FunctionsStringSearch.h.
|
2014-11-12 17:23:26 +00:00
|
|
|
*
|
2017-05-27 15:45:25 +00:00
|
|
|
* Check if there is a parameter
|
2017-04-01 07:20:54 +00:00
|
|
|
* visitParamHas
|
2014-11-12 17:23:26 +00:00
|
|
|
*
|
2017-05-27 15:45:25 +00:00
|
|
|
* Retrieve the numeric value of the parameter
|
2017-04-01 07:20:54 +00:00
|
|
|
* visitParamExtractUInt
|
|
|
|
* visitParamExtractInt
|
|
|
|
* visitParamExtractFloat
|
|
|
|
* visitParamExtractBool
|
2014-11-12 17:23:26 +00:00
|
|
|
*
|
2017-05-27 15:45:25 +00:00
|
|
|
* Retrieve the string value of the parameter
|
|
|
|
* visitParamExtractString - unescape value
|
2017-04-01 07:20:54 +00:00
|
|
|
* visitParamExtractRaw
|
2013-06-10 15:19:37 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-03-14 23:10:51 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
}
|
|
|
|
|
2013-06-10 15:19:37 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename NumericType>
|
2013-06-11 12:31:08 +00:00
|
|
|
struct ExtractNumericType
|
2013-06-10 15:19:37 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
using ResultType = NumericType;
|
2014-11-12 17:23:26 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
static ResultType extract(const UInt8 * begin, const UInt8 * end)
|
|
|
|
{
|
|
|
|
ReadBufferFromMemory in(begin, end - begin);
|
2014-11-12 17:23:26 +00:00
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// Read numbers in double quotes
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!in.eof() && *in.position() == '"')
|
|
|
|
++in.position();
|
2014-11-12 17:23:26 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
ResultType x = 0;
|
|
|
|
if (!in.eof())
|
2017-12-22 02:25:03 +00:00
|
|
|
{
|
|
|
|
if constexpr (std::is_floating_point_v<NumericType>)
|
|
|
|
tryReadFloatText(x, in);
|
|
|
|
else
|
|
|
|
tryReadIntText(x, in);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
return x;
|
|
|
|
}
|
2013-06-10 15:19:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/** Searches for occurrences of a field in the visit parameter and calls ParamExtractor
|
|
|
|
* for each occurrence of the field, passing it a pointer to the part of the string,
|
|
|
|
* where the occurrence of the field value begins.
|
|
|
|
* ParamExtractor must parse and return the value of the desired type.
|
2014-11-12 17:23:26 +00:00
|
|
|
*
|
2017-05-27 15:45:25 +00:00
|
|
|
* If a field was not found or an incorrect value is associated with the field,
|
|
|
|
* then the default value used - 0.
|
2013-06-10 15:19:37 +00:00
|
|
|
*/
|
2016-01-27 03:11:28 +00:00
|
|
|
template <typename ParamExtractor>
|
2013-06-10 15:19:37 +00:00
|
|
|
struct ExtractParamImpl
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
using ResultType = typename ParamExtractor::ResultType;
|
|
|
|
|
2020-02-17 18:53:59 +00:00
|
|
|
static constexpr bool use_default_implementation_for_constants = true;
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// It is assumed that `res` is the correct size and initialized with zeros.
|
2020-03-23 02:12:31 +00:00
|
|
|
static void vectorConstant(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string needle,
|
|
|
|
PaddedPODArray<ResultType> & res)
|
|
|
|
{
|
2017-05-27 15:45:25 +00:00
|
|
|
/// We are looking for a parameter simply as a substring of the form "name"
|
2017-04-01 07:20:54 +00:00
|
|
|
needle = "\"" + needle + "\":";
|
|
|
|
|
2018-09-02 03:00:04 +00:00
|
|
|
const UInt8 * begin = data.data();
|
2017-04-01 07:20:54 +00:00
|
|
|
const UInt8 * pos = begin;
|
|
|
|
const UInt8 * end = pos + data.size();
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// The current index in the string array.
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t i = 0;
|
|
|
|
|
|
|
|
Volnitsky searcher(needle.data(), needle.size(), end - pos);
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// We will search for the next occurrence in all strings at once.
|
2017-04-01 07:20:54 +00:00
|
|
|
while (pos < end && end != (pos = searcher.search(pos, end - pos)))
|
|
|
|
{
|
2017-05-27 15:45:25 +00:00
|
|
|
/// Let's determine which index it belongs to.
|
2017-04-01 07:20:54 +00:00
|
|
|
while (begin + offsets[i] <= pos)
|
|
|
|
{
|
|
|
|
res[i] = 0;
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// We check that the entry does not pass through the boundaries of strings.
|
2017-04-01 07:20:54 +00:00
|
|
|
if (pos + needle.size() < begin + offsets[i])
|
2019-07-05 18:36:20 +00:00
|
|
|
res[i] = ParamExtractor::extract(pos + needle.size(), begin + offsets[i] - 1); /// don't include terminating zero
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
res[i] = 0;
|
|
|
|
|
|
|
|
pos = begin + offsets[i];
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
2018-09-02 03:52:04 +00:00
|
|
|
if (res.size() > i)
|
|
|
|
memset(&res[i], 0, (res.size() - i) * sizeof(res[0]));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
template <typename... Args> static void vectorVector(Args &&...)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
template <typename... Args> static void constantVector(Args &&...)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
2020-03-26 18:55:41 +00:00
|
|
|
|
|
|
|
template <typename... Args>
|
|
|
|
static void vectorFixedConstant(Args &&...)
|
|
|
|
{
|
|
|
|
throw Exception("Functions 'visitParamHas' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
2013-06-10 15:19:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/** For the case where the type of field to extract is a string.
|
2013-06-10 15:19:37 +00:00
|
|
|
*/
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename ParamExtractor>
|
2013-06-10 15:19:37 +00:00
|
|
|
struct ExtractParamToStringImpl
|
|
|
|
{
|
2018-11-25 00:08:50 +00:00
|
|
|
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string needle,
|
2018-11-25 00:08:50 +00:00
|
|
|
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-05-27 15:45:25 +00:00
|
|
|
/// Constant 5 is taken from a function that performs a similar task FunctionsStringSearch.h::ExtractImpl
|
2018-11-26 00:56:50 +00:00
|
|
|
res_data.reserve(data.size() / 5);
|
2017-04-01 07:20:54 +00:00
|
|
|
res_offsets.resize(offsets.size());
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// We are looking for a parameter simply as a substring of the form "name"
|
2017-04-01 07:20:54 +00:00
|
|
|
needle = "\"" + needle + "\":";
|
|
|
|
|
2018-09-02 03:00:04 +00:00
|
|
|
const UInt8 * begin = data.data();
|
2017-04-01 07:20:54 +00:00
|
|
|
const UInt8 * pos = begin;
|
|
|
|
const UInt8 * end = pos + data.size();
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// The current index in the string array.
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t i = 0;
|
|
|
|
|
|
|
|
Volnitsky searcher(needle.data(), needle.size(), end - pos);
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// We will search for the next occurrence in all strings at once.
|
2017-04-01 07:20:54 +00:00
|
|
|
while (pos < end && end != (pos = searcher.search(pos, end - pos)))
|
|
|
|
{
|
2017-05-27 15:45:25 +00:00
|
|
|
/// Determine which index it belongs to.
|
2017-04-01 07:20:54 +00:00
|
|
|
while (begin + offsets[i] <= pos)
|
|
|
|
{
|
|
|
|
res_data.push_back(0);
|
|
|
|
res_offsets[i] = res_data.size();
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
2017-05-27 15:45:25 +00:00
|
|
|
/// We check that the entry does not pass through the boundaries of strings.
|
2017-04-01 07:20:54 +00:00
|
|
|
if (pos + needle.size() < begin + offsets[i])
|
|
|
|
ParamExtractor::extract(pos + needle.size(), begin + offsets[i], res_data);
|
|
|
|
|
|
|
|
pos = begin + offsets[i];
|
|
|
|
|
|
|
|
res_data.push_back(0);
|
|
|
|
res_offsets[i] = res_data.size();
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i < res_offsets.size())
|
|
|
|
{
|
|
|
|
res_data.push_back(0);
|
|
|
|
res_offsets[i] = res_data.size();
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
}
|
2013-06-10 15:19:37 +00:00
|
|
|
};
|
|
|
|
|
2014-11-12 17:23:26 +00:00
|
|
|
}
|