Merge pull request #12006 from ClickHouse/fix-error-geohash

Avoid floating point error in geohashesInBox #11369
This commit is contained in:
alexey-milovidov 2020-06-28 23:09:39 +03:00 committed by GitHub
commit aaadc8addb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 48 additions and 39 deletions

View File

@ -3,6 +3,7 @@
#include <cassert> #include <cassert>
#include <Functions/GeoHash.h> #include <Functions/GeoHash.h>
namespace DB namespace DB
{ {
@ -260,15 +261,21 @@ void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * lo
*latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE)); *latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE));
} }
GeohashesInBoxPreparedArgs geohashesInBoxPrepare(Float64 longitude_min, GeohashesInBoxPreparedArgs geohashesInBoxPrepare(
Float64 latitude_min, Float64 longitude_min,
Float64 longitude_max, Float64 latitude_min,
Float64 latitude_max, Float64 longitude_max,
uint8_t precision) Float64 latitude_max,
uint8_t precision)
{ {
precision = geohashPrecision(precision); precision = geohashPrecision(precision);
if (longitude_max < longitude_min || latitude_max < latitude_min) if (longitude_max < longitude_min
|| latitude_max < latitude_min
|| std::isnan(longitude_min)
|| std::isnan(longitude_max)
|| std::isnan(latitude_min)
|| std::isnan(latitude_max))
{ {
return {}; return {};
} }
@ -281,51 +288,50 @@ GeohashesInBoxPreparedArgs geohashesInBoxPrepare(Float64 longitude_min,
const auto lon_step = getSpan(precision, LONGITUDE); const auto lon_step = getSpan(precision, LONGITUDE);
const auto lat_step = getSpan(precision, LATITUDE); const auto lat_step = getSpan(precision, LATITUDE);
// align max to the right(or up) border of geohash grid cell to ensure that cell is in result. /// Align max to the right (or up) border of geohash grid cell to ensure that cell is in result.
Float64 lon_min = floor(longitude_min / lon_step) * lon_step; Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
Float64 lat_min = floor(latitude_min / lat_step) * lat_step; Float64 lat_min = floor(latitude_min / lat_step) * lat_step;
Float64 lon_max = ceil(longitude_max / lon_step) * lon_step; Float64 lon_max = ceil(longitude_max / lon_step) * lon_step;
Float64 lat_max = ceil(latitude_max / lat_step) * lat_step; Float64 lat_max = ceil(latitude_max / lat_step) * lat_step;
const auto lon_span = lon_max - lon_min; UInt32 lon_items = (lon_max - lon_min) / lon_step;
const auto lat_span = lat_max - lat_min; UInt32 lat_items = (lat_max - lat_min) / lat_step;
// in case of a very small (or zero) span, produce at least 1 item.
const auto items_count = std::max(size_t{1}, static_cast<size_t>(ceil(lon_span/lon_step * lat_span/lat_step)));
return GeohashesInBoxPreparedArgs{ return GeohashesInBoxPreparedArgs
items_count, {
precision, std::max<UInt64>(1, UInt64(lon_items) * lat_items),
lon_min, lon_items,
lat_min, lat_items,
lon_max, lon_min,
lat_max, lat_min,
lon_step, lon_step,
lat_step lat_step,
precision
}; };
} }
UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out) UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out)
{ {
if (args.items_count == 0 if (args.precision == 0
|| args.precision == 0 || args.precision > MAX_PRECISION
|| args.precision > MAX_PRECISION || args.longitude_step <= 0
|| args.latitude_min > args.latitude_max || args.latitude_step <= 0)
|| args.longitude_min > args.longitude_max
|| args.longitude_step <= 0
|| args.latitude_step <= 0)
{ {
return 0; return 0;
} }
UInt64 items = 0; UInt64 items = 0;
for (auto lon = args.longitude_min; lon < args.longitude_max; lon += args.longitude_step) // NOLINT for (size_t i = 0; i < args.longitude_items; ++i)
{ {
for (auto lat = args.latitude_min; lat < args.latitude_max; lat += args.latitude_step) // NOLINT for (size_t j = 0; j < args.latitude_items; ++j)
{ {
assert(items <= args.items_count); size_t length = geohashEncodeImpl(
args.longitude_min + args.longitude_step * i,
args.latitude_min + args.latitude_step * j,
args.precision,
out);
size_t l = geohashEncodeImpl(lon, lat, args.precision, out); out += length;
out += l;
*out = '\0'; *out = '\0';
++out; ++out;
@ -335,8 +341,8 @@ UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out)
if (items == 0) if (items == 0)
{ {
size_t l = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out); size_t length = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out);
out += l; out += length;
*out = '\0'; *out = '\0';
++out; ++out;

View File

@ -23,15 +23,17 @@ std::vector<std::pair<Float64, Float64>> geohashCoverBox(
struct GeohashesInBoxPreparedArgs struct GeohashesInBoxPreparedArgs
{ {
UInt64 items_count = 0; UInt64 items_count = 0;
uint8_t precision = 0;
UInt32 longitude_items = 0;
UInt32 latitude_items = 0;
Float64 longitude_min = 0.0; Float64 longitude_min = 0.0;
Float64 latitude_min = 0.0; Float64 latitude_min = 0.0;
Float64 longitude_max = 0.0;
Float64 latitude_max = 0.0;
Float64 longitude_step = 0.0; Float64 longitude_step = 0.0;
Float64 latitude_step = 0.0; Float64 latitude_step = 0.0;
uint8_t precision = 0;
}; };
GeohashesInBoxPreparedArgs geohashesInBoxPrepare( GeohashesInBoxPreparedArgs geohashesInBoxPrepare(

View File

@ -11,6 +11,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
namespace DB namespace DB
{ {
@ -121,9 +122,7 @@ public:
geohashesInBox(prepared_args, out); geohashesInBox(prepared_args, out);
for (UInt64 i = 1; i <= prepared_args.items_count ; ++i) for (UInt64 i = 1; i <= prepared_args.items_count ; ++i)
{
res_strings_offsets.push_back(starting_offset + (prepared_args.precision + 1) * i); res_strings_offsets.push_back(starting_offset + (prepared_args.precision + 1) * i);
}
res_offsets.push_back((res_offsets.empty() ? 0 : res_offsets.back()) + prepared_args.items_count); res_offsets.push_back((res_offsets.empty() ? 0 : res_offsets.back()) + prepared_args.items_count);
} }
if (!res_strings_offsets.empty() && res_strings_offsets.back() != res_strings_chars.size()) if (!res_strings_offsets.empty() && res_strings_offsets.back() != res_strings_chars.size())

View File

@ -0,0 +1 @@
['7zz','ebp','ebr','ebx','ebz','ecp','ecr','ecx','ecz','efp','efr','efx','efz','egp','egr','egx','egz','eup','eur','eux','euz','evp','evr','evx','evz','eyp','eyr','eyx','eyz','ezp','ezr','ezx','ezz','gbp','gbr','gbx','gbz','gcp','gcr','gcx','gcz','gfp','gfr','gfx','gfz','ggp','ggr','ggx','ggz','gup','gur','gux','guz','gvp','gvr','gvx','gvz','gyp','gyr','gyx','gyz','gzp','gzr','gzx','gzz','kpb','s00','s02','s08','s0b','s10','s12','s18','s1b','s40','s42','s48','s4b','s50','s52','s58','s5b','sh0','sh2','sh8','shb','sj0','sj2','sj8','sjb','sn0','sn2','sn8','snb','sp0','sp2','sp8','spb','u00','u02','u08','u0b','u10','u12','u18','u1b','u40','u42','u48','u4b','u50','u52','u58','u5b','uh0','uh2','uh8','uhb','uj0','uj2','uj8','ujb','un0','un2','un8','unb','up0','up2','up8','upb']

View File

@ -0,0 +1 @@
SELECT arraySort(geohashesInBox(-1., -1., 1., inf, 3));