Merge pull request #50066 from bigo-sg/improve_geohash

Allow geoHashEncode when its arguments are mixed with const and non-const columns
This commit is contained in:
Alexey Milovidov 2023-07-09 08:44:35 +03:00 committed by GitHub
commit 84600287a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 44 deletions

View File

@ -4,6 +4,7 @@
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <string>
@ -16,7 +17,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
}
@ -37,7 +37,6 @@ public:
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
@ -58,14 +57,25 @@ public:
return std::make_shared<DataTypeString>();
}
template <typename LonType, typename LatType>
bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const ColumnVector<LonType> * longitude = checkAndGetColumn<ColumnVector<LonType>>(lon_column);
const ColumnVector<LatType> * latitude = checkAndGetColumn<ColumnVector<LatType>>(lat_column);
if (!latitude || !longitude)
return false;
const IColumn * longitude = arguments[0].column.get();
const IColumn * latitude = arguments[1].column.get();
ColumnPtr precision;
if (arguments.size() < 3)
precision = DataTypeUInt8().createColumnConst(longitude->size(), GEOHASH_MAX_TEXT_LENGTH);
else
precision = arguments[2].column;
ColumnPtr res_column;
vector(longitude, latitude, precision.get(), res_column);
return res_column;
}
private:
void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const
{
auto col_str = ColumnString::create();
ColumnString::Chars & out_vec = col_str->getChars();
ColumnString::Offsets & out_offsets = col_str->getOffsets();
@ -80,8 +90,9 @@ public:
for (size_t i = 0; i < size; ++i)
{
const Float64 longitude_value = longitude->getElement(i);
const Float64 latitude_value = latitude->getElement(i);
const Float64 longitude_value = lon_column->getFloat64(i);
const Float64 latitude_value = lat_column->getFloat64(i);
const UInt64 precision_value = std::min<UInt64>(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH);
const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos);
@ -95,37 +106,6 @@ public:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)");
result = std::move(col_str);
return true;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const IColumn * longitude = arguments[0].column.get();
const IColumn * latitude = arguments[1].column.get();
const UInt64 precision_value = std::min<UInt64>(GEOHASH_MAX_TEXT_LENGTH,
arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH);
ColumnPtr res_column;
if (tryExecute<Float32, Float32>(longitude, latitude, precision_value, res_column) ||
tryExecute<Float64, Float32>(longitude, latitude, precision_value, res_column) ||
tryExecute<Float32, Float64>(longitude, latitude, precision_value, res_column) ||
tryExecute<Float64, Float64>(longitude, latitude, precision_value, res_column))
return res_column;
std::string arguments_description;
for (size_t i = 0; i < arguments.size(); ++i)
{
if (i != 0)
arguments_description += ", ";
arguments_description += arguments[i].column->getName();
}
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument types: {} for function {}",
arguments_description, getName());
}
};

View File

@ -38,8 +38,8 @@ HDFSFileInfo::~HDFSFileInfo()
}
void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config,
const String & prefix, bool isUser)
void HDFSBuilderWrapper::loadFromConfig(
const Poco::Util::AbstractConfiguration & config, const String & prefix, [[maybe_unused]] bool isUser)
{
Poco::Util::AbstractConfiguration::Keys keys;

View File

@ -9,6 +9,10 @@ default precision:
ezs42d000000
mixing const and non-const-columns:
ezs42d000000
ezs42d000000
ezs42d000000
ezs42d000000
ezs42d000000
from table (with const precision):
1 6 Ok
1 6 Ok

View File

@ -24,7 +24,10 @@ select geohashEncode(-5.60302734375, 42.593994140625);
select 'mixing const and non-const-columns:';
select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), 0);
select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); -- { serverError 44 }
select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0));
select geohashEncode(-5.60302734375, materialize(42.593994140625), 0);
select geohashEncode(materialize(-5.60302734375), 42.593994140625, 0);
select geohashEncode(-5.60302734375, 42.593994140625, 0);
select 'from table (with const precision):';