ClickHouse/src/Functions/greatCircleDistance.cpp

358 lines
13 KiB
C++
Raw Normal View History

2019-06-30 18:20:32 +00:00
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
#include <Common/typeid_cast.h>
2021-05-17 07:30:42 +00:00
#include <Functions/IFunction.h>
2019-06-30 18:20:32 +00:00
#include <Functions/FunctionFactory.h>
2020-05-28 15:35:05 +00:00
#include <Functions/PerformanceAdaptors.h>
#include <Interpreters/castColumn.h>
2022-04-04 12:07:05 +00:00
#include <Common/TargetSpecific.h>
2021-10-02 07:13:14 +00:00
#include <base/range.h>
#include <cmath>
#include <numbers>
2019-06-30 18:20:32 +00:00
namespace DB
{
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
2020-02-25 18:10:48 +00:00
}
2019-06-30 18:20:32 +00:00
2019-12-09 00:02:50 +00:00
/** Calculates the distance between two geographical locations.
* There are three variants:
* greatCircleAngle: calculates the distance on a sphere in degrees: https://en.wikipedia.org/wiki/Great-circle_distance
* greatCircleDistance: calculates the distance on a sphere in meters.
* geoDistance: calculates the distance on WGS-84 ellipsoid in meters.
2019-12-09 00:02:50 +00:00
*
* The function calculates distance between two points on Earth specified by longitude and latitude in degrees.
2019-12-09 00:02:50 +00:00
*
* Latitude must be in [-90, 90], longitude must be [-180, 180].
*
* Original code of this implementation of this function is here:
* https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
* Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
* Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
* The main idea of this implementation is optimisations based on Taylor series, trigonometric identity
* and calculated constants once for cosine, arcsine(sqrt) and look up table.
*/
2019-11-27 00:42:46 +00:00
namespace
{
2019-10-16 17:25:46 +00:00
constexpr double PI = std::numbers::pi_v<double>;
constexpr float PI_F = std::numbers::pi_v<float>;
2019-12-09 01:09:03 +00:00
constexpr float RAD_IN_DEG = static_cast<float>(PI / 180.0);
constexpr float RAD_IN_DEG_HALF = static_cast<float>(PI / 360.0);
2019-12-08 23:00:34 +00:00
constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063%
2022-09-11 03:38:59 +00:00
constexpr float COS_LUT_SIZE_F = 1024.0f; // maxerr 0.00063%
2019-12-08 23:00:34 +00:00
constexpr size_t ASIN_SQRT_LUT_SIZE = 512;
constexpr size_t METRIC_LUT_SIZE = 1024;
2019-10-16 17:25:46 +00:00
/** Earth radius in meters using WGS84 authalic radius.
* We use this value to be consistent with H3 library.
2019-12-09 17:25:16 +00:00
*/
2022-09-11 03:38:59 +00:00
constexpr float EARTH_RADIUS = 6371007.180918475f;
2019-12-09 17:25:16 +00:00
constexpr float EARTH_DIAMETER = 2 * EARTH_RADIUS;
2019-12-08 23:09:05 +00:00
2019-12-08 23:00:34 +00:00
float cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table
2019-12-08 23:09:05 +00:00
float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table
float sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
float sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
float wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
2019-10-16 17:25:46 +00:00
2019-10-16 17:43:44 +00:00
inline double sqr(double v)
{
2019-11-27 00:42:46 +00:00
return v * v;
2019-10-16 17:43:44 +00:00
}
2019-11-27 00:42:46 +00:00
inline float sqrf(float v)
2019-10-16 17:43:44 +00:00
{
2019-11-27 00:42:46 +00:00
return v * v;
2019-10-16 17:43:44 +00:00
}
2019-10-16 17:25:46 +00:00
2019-11-27 00:42:46 +00:00
void geodistInit()
2019-10-16 17:25:46 +00:00
{
2019-12-08 23:00:34 +00:00
for (size_t i = 0; i <= COS_LUT_SIZE; ++i)
cos_lut[i] = static_cast<float>(cos(2 * PI * i / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE]
2019-10-16 17:25:46 +00:00
2019-12-08 23:00:34 +00:00
for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i)
asin_sqrt_lut[i] = static_cast<float>(asin(
2019-12-08 23:00:34 +00:00
sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
2019-10-16 17:25:46 +00:00
2019-12-08 23:00:34 +00:00
for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i)
2019-10-16 17:25:46 +00:00
{
double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
/// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
/// depending on the latitude (in radians).
2019-12-09 19:53:40 +00:00
/// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
wgs84_metric_meters_lut[i * 2] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * latitude) + 1.20 * cos(4 * latitude)));
wgs84_metric_meters_lut[i * 2 + 1] = static_cast<float>(sqr(111415.13 * cos(latitude) - 94.55 * cos(3 * latitude) + 0.12 * cos(5 * latitude)));
sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
2019-12-08 23:00:34 +00:00
2022-09-11 03:38:59 +00:00
sphere_metric_lut[i] = static_cast<float>(sqr(cos(latitude)));
2019-10-16 17:25:46 +00:00
}
}
2021-01-23 00:14:16 +00:00
inline NO_SANITIZE_UNDEFINED size_t floatToIndex(float x)
{
/// Implementation specific behaviour on overflow or infinite value.
return static_cast<size_t>(x);
}
2019-11-27 00:42:46 +00:00
inline float geodistDegDiff(float f)
2019-10-16 17:25:46 +00:00
{
f = fabsf(f);
2019-10-16 17:25:46 +00:00
if (f > 180)
f = 360 - f;
return f;
}
2019-11-27 00:42:46 +00:00
inline float geodistFastCos(float x)
2019-10-16 17:25:46 +00:00
{
2022-09-11 03:38:59 +00:00
float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f);
2021-01-23 00:14:16 +00:00
size_t i = floatToIndex(y);
2019-10-16 17:25:46 +00:00
y -= i;
2019-12-08 23:00:34 +00:00
i &= (COS_LUT_SIZE - 1);
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
2019-10-16 17:25:46 +00:00
}
2019-11-27 00:42:46 +00:00
inline float geodistFastSin(float x)
2019-10-16 17:25:46 +00:00
{
2022-09-11 03:38:59 +00:00
float y = fabsf(x) * (COS_LUT_SIZE_F / PI_F / 2.0f);
2021-01-23 00:14:16 +00:00
size_t i = floatToIndex(y);
2019-10-16 17:25:46 +00:00
y -= i;
2019-12-08 23:00:34 +00:00
i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
2019-10-16 17:25:46 +00:00
}
/// fast implementation of asin(sqrt(x))
/// max error in floats 0.00369%, in doubles 0.00072%
2019-11-27 00:42:46 +00:00
inline float geodistFastAsinSqrt(float x)
2019-10-16 17:25:46 +00:00
{
if (x < 0.122f)
2019-10-16 17:25:46 +00:00
{
// distance under 4546 km, Taylor error under 0.00072%
float y = sqrtf(x);
2019-10-16 17:25:46 +00:00
return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f;
}
if (x < 0.948f)
2019-10-16 17:25:46 +00:00
{
// distance under 17083 km, 512-entry LUT error under 0.00072%
2019-12-08 23:00:34 +00:00
x *= ASIN_SQRT_LUT_SIZE;
2021-01-23 00:14:16 +00:00
size_t i = floatToIndex(x);
2019-12-08 23:00:34 +00:00
return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i);
2019-10-16 17:25:46 +00:00
}
return asinf(sqrtf(x)); // distance over 17083 km, just compute exact
2019-10-16 17:25:46 +00:00
}
enum class Method
{
SPHERE_DEGREES,
SPHERE_METERS,
WGS84_METERS,
};
2020-05-28 15:35:05 +00:00
}
DECLARE_MULTITARGET_CODE(
namespace
{
template <Method method>
2019-12-08 23:09:05 +00:00
float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
{
float lat_diff = geodistDegDiff(lat1deg - lat2deg);
float lon_diff = geodistDegDiff(lon1deg - lon2deg);
if (lon_diff < 13)
{
// points are close enough; use flat ellipsoid model
// interpolate metric coefficients using latitudes midpoint
2019-12-09 01:02:09 +00:00
/// Why comparing only difference in longitude?
/// If longitudes are different enough, there is a big difference between great circle line and a line with constant latitude.
2022-03-11 22:46:02 +00:00
/// (Remember how a plane flies from Amsterdam to New York)
2019-12-09 01:02:09 +00:00
/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
2021-01-29 02:05:46 +00:00
float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes
2021-01-23 00:14:16 +00:00
size_t latitude_midpoint_index = floatToIndex(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
2019-12-08 23:09:05 +00:00
/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
2021-05-08 14:54:08 +00:00
float k_lat{};
float k_lon{};
2019-12-08 23:09:05 +00:00
if constexpr (method == Method::SPHERE_DEGREES)
{
k_lat = 1;
2019-12-08 23:59:25 +00:00
k_lon = sphere_metric_lut[latitude_midpoint_index]
+ (sphere_metric_lut[latitude_midpoint_index + 1] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
}
else if constexpr (method == Method::SPHERE_METERS)
{
2022-09-11 03:38:59 +00:00
k_lat = sqrf(EARTH_DIAMETER * PI_F / 360.0f);
2019-12-08 23:09:05 +00:00
k_lon = sphere_metric_meters_lut[latitude_midpoint_index]
+ (sphere_metric_meters_lut[latitude_midpoint_index + 1] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
}
else if constexpr (method == Method::WGS84_METERS)
{
k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * 2]
+ (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index);
k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]
+ (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index);
}
2019-12-08 23:09:05 +00:00
/// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates.
2019-12-08 23:09:05 +00:00
return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff);
}
else
{
// points too far away; use haversine
2019-12-09 01:09:03 +00:00
float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF))
+ geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF));
2019-12-08 23:09:05 +00:00
if constexpr (method == Method::SPHERE_DEGREES)
2022-09-11 03:38:59 +00:00
return (360.0f / PI_F) * geodistFastAsinSqrt(a);
else
return EARTH_DIAMETER * geodistFastAsinSqrt(a);
2019-10-16 17:25:46 +00:00
}
}
2019-11-27 00:42:46 +00:00
}
template <Method method>
class FunctionGeoDistance : public IFunction
2019-06-30 18:20:32 +00:00
{
public:
static constexpr auto name =
(method == Method::SPHERE_DEGREES) ? "greatCircleAngle"
: ((method == Method::SPHERE_METERS) ? "greatCircleDistance"
: "geoDistance");
2019-06-30 18:20:32 +00:00
private:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 4; }
bool useDefaultImplementationForConstants() const override { return true; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
2019-06-30 18:20:32 +00:00
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
2021-06-15 19:55:21 +00:00
for (const auto arg_idx : collections::range(0, arguments.size()))
2019-06-30 18:20:32 +00:00
{
2020-04-22 08:31:10 +00:00
const auto * arg = arguments[arg_idx].get();
2019-12-08 22:09:32 +00:00
if (!isNumber(WhichDataType(arg)))
2019-06-30 18:20:32 +00:00
throw Exception(
2019-12-08 22:09:32 +00:00
"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be numeric",
2019-06-30 18:20:32 +00:00
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
return std::make_shared<DataTypeFloat32>();
2019-06-30 18:20:32 +00:00
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
2019-06-30 18:20:32 +00:00
{
auto dst = ColumnVector<Float32>::create();
auto & dst_data = dst->getData();
dst_data.resize(input_rows_count);
2019-06-30 18:20:32 +00:00
auto arguments_copy = arguments;
2022-05-25 18:59:31 +00:00
for (auto & argument : arguments_copy)
{
2022-05-25 14:19:29 +00:00
argument.column = argument.column->convertToFullColumnIfConst();
argument.column = castColumn(argument, result_type);
argument.type = result_type;
}
const auto * col_lon1 = convertArgumentColumnToFloat32(arguments_copy, 0);
const auto * col_lat1 = convertArgumentColumnToFloat32(arguments_copy, 1);
const auto * col_lon2 = convertArgumentColumnToFloat32(arguments_copy, 2);
const auto * col_lat2 = convertArgumentColumnToFloat32(arguments_copy, 3);
2019-06-30 18:20:32 +00:00
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
dst_data[row_num] = distance<method>(
col_lon1->getData()[row_num], col_lat1->getData()[row_num],
col_lon2->getData()[row_num], col_lat2->getData()[row_num]);
}
2019-06-30 18:20:32 +00:00
2020-10-19 13:42:14 +00:00
return dst;
2019-06-30 18:20:32 +00:00
}
const ColumnFloat32 * convertArgumentColumnToFloat32(const ColumnsWithTypeAndName & arguments, size_t argument_index) const
{
const auto * column_typed = checkAndGetColumn<ColumnFloat32>(arguments[argument_index].column.get());
if (!column_typed)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal type {} of argument {} of function {}. Must be Float32.",
arguments[argument_index].type->getName(),
argument_index + 1,
getName());
return column_typed;
}
2019-06-30 18:20:32 +00:00
};
2020-05-28 15:35:05 +00:00
) // DECLARE_MULTITARGET_CODE
template <Method method>
class FunctionGeoDistance : public TargetSpecific::Default::FunctionGeoDistance<method>
{
public:
2021-06-01 12:20:52 +00:00
explicit FunctionGeoDistance(ContextPtr context) : selector(context)
2020-05-28 15:35:05 +00:00
{
selector.registerImplementation<TargetArch::Default,
TargetSpecific::Default::FunctionGeoDistance<method>>();
2020-05-29 09:23:22 +00:00
2020-05-28 15:35:05 +00:00
#if USE_MULTITARGET_CODE
selector.registerImplementation<TargetArch::AVX,
TargetSpecific::AVX::FunctionGeoDistance<method>>();
selector.registerImplementation<TargetArch::AVX2,
TargetSpecific::AVX2::FunctionGeoDistance<method>>();
selector.registerImplementation<TargetArch::AVX512F,
TargetSpecific::AVX512F::FunctionGeoDistance<method>>();
#endif
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
2020-05-28 15:35:05 +00:00
{
2020-10-19 13:42:14 +00:00
return selector.selectAndExecute(arguments, result_type, input_rows_count);
2020-05-28 15:35:05 +00:00
}
2021-06-01 12:20:52 +00:00
static FunctionPtr create(ContextPtr context)
2020-05-28 15:35:05 +00:00
{
return std::make_shared<FunctionGeoDistance<method>>(context);
}
private:
ImplementationSelector<IFunction> selector;
};
2019-06-30 18:20:32 +00:00
REGISTER_FUNCTION(GeoDistance)
2019-06-30 18:20:32 +00:00
{
2019-11-27 00:42:46 +00:00
geodistInit();
factory.registerFunction<FunctionGeoDistance<Method::SPHERE_DEGREES>>();
factory.registerFunction<FunctionGeoDistance<Method::SPHERE_METERS>>();
factory.registerFunction<FunctionGeoDistance<Method::WGS84_METERS>>();
2019-06-30 18:20:32 +00:00
}
}