Fix crash

This commit is contained in:
Nikita Mikhaylov 2023-11-13 21:49:36 +01:00 committed by Robert Schulze
parent 2931bbfe62
commit 0ae79f6d18
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
5 changed files with 48 additions and 3 deletions

View File

@ -1,10 +1,11 @@
#include <Compression/ICompressionCodec.h>
#include <DataTypes/IDataType.h>
#include <Compression/CompressionInfo.h>
#include <Compression/CompressionFactory.h>
#include <Common/Exception.h>
#include <base/arithmeticOverflow.h>
#include <base/unaligned.h>
#include <Parsers/IAST.h>
#include "Common/Exception.h"
#include "DataTypes/IDataType.h"
#include <boost/integer/common_factor.hpp>
#include <libdivide-config.h>
@ -92,6 +93,16 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
unalignedStore<T>(dest, gcd_divider);
dest += sizeof(T);
/// There are two cases:
/// 1) GCD is 0. It may happen if there are some zeros in the source data.
/// We cannot apply this codec anymore. So let's copy the source data to the destination.
/// 2) GCD is 1. It means that we can do nothing (except copying source data) and the result will be the same.
if unlikely(gcd_divider == 0)
{
memcpy(dest, source, source_size);
return;
}
if constexpr (sizeof(T) <= 8)
{
/// libdivide supports only UInt32 and UInt64.
@ -132,10 +143,24 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
const T gcd_multiplier = unalignedLoad<T>(source);
source += sizeof(T);
/// Again two cases:
/// 1) GCD is 0. This is "special" flag which signals what remaining data
/// left unchanged due to unapplicability of the codec.
/// 2) GCD is 1. Even if we proceed futher with the loop the resulting data will likely be the same.
if unlikely(gcd_multiplier == 0 || gcd_multiplier == 1)
{
/// Subtraction is safe, because we checked that source_size >= sizeof(T)
if unlikely(source_size - sizeof(T) != output_size)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress");
memcpy(dest, source, source_size);
return;
}
while (source < source_end)
{
if (dest + sizeof(T) > dest_end) [[unlikely]]
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress");
unalignedStore<T>(dest, unalignedLoad<T>(source) * gcd_multiplier);
source += sizeof(T);

View File

@ -0,0 +1,7 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test (a Nullable(Int64) CODEC (GCD,LZ4)) ENGINE=MergeTree Order by ();
INSERT INTO test SELECT 0 FROM numbers(1e2);
SELECT * FROM test FORMAT Null;
DROP TABLE IF EXISTS test;

View File

@ -0,0 +1,13 @@
DROP TABLE IF EXISTS hits_gcd;
CREATE TABLE hits_gcd (`WatchID` UInt64 CODEC (GCD,LZ4), `JavaEnable` UInt8 CODEC (GCD,LZ4), `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32 CODEC (GCD,LZ4), `ClientIP` UInt32 CODEC (GCD,LZ4), `ClientIP6` FixedString(16), `RegionID` UInt32 CODEC (GCD,LZ4), `UserID` UInt64 CODEC (GCD,LZ4), `CounterClass` Int8, `OS` UInt8 CODEC (GCD,LZ4), `UserAgent` UInt8 CODEC (GCD,LZ4), `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8 CODEC (GCD,LZ4), `IsRobot` UInt8 CODEC (GCD,LZ4), `RefererCategories` Array(UInt16) CODEC (GCD,LZ4), `URLCategories` Array(UInt16) CODEC (GCD,LZ4), `URLRegions` Array(UInt32) CODEC (GCD,LZ4), `RefererRegions` Array(UInt32) CODEC (GCD,LZ4), `ResolutionWidth` UInt16 CODEC (GCD,LZ4), `ResolutionHeight` UInt16 CODEC (GCD,LZ4), `ResolutionDepth` UInt8 CODEC (GCD,LZ4), `FlashMajor` UInt8 CODEC (GCD,LZ4), `FlashMinor` UInt8 CODEC (GCD,LZ4), `FlashMinor2` String, `NetMajor` UInt8 CODEC (GCD,LZ4), `NetMinor` UInt8 CODEC (GCD,LZ4), `UserAgentMajor` UInt16 CODEC (GCD,LZ4), `UserAgentMinor` FixedString(2), `CookieEnable` UInt8 CODEC (GCD,LZ4), `JavascriptEnable` UInt8 CODEC (GCD,LZ4), `IsMobile` UInt8 CODEC (GCD,LZ4), `MobilePhone` UInt8 CODEC (GCD,LZ4), `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32 CODEC (GCD,LZ4), `TraficSourceID` Int8, `SearchEngineID` UInt16 CODEC (GCD,LZ4), `SearchPhrase` String, `AdvEngineID` UInt8 CODEC (GCD,LZ4), `IsArtifical` UInt8 CODEC (GCD,LZ4), `WindowClientWidth` UInt16 CODEC (GCD,LZ4), `WindowClientHeight` UInt16 CODEC (GCD,LZ4), `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8 CODEC (GCD,LZ4), `SilverlightVersion2` UInt8 CODEC (GCD,LZ4), `SilverlightVersion3` UInt32 CODEC (GCD,LZ4), `SilverlightVersion4` UInt16 CODEC (GCD,LZ4), `PageCharset` String, `CodeVersion` UInt32 CODEC (GCD,LZ4), `IsLink` UInt8 CODEC (GCD,LZ4), `IsDownload` UInt8 CODEC (GCD,LZ4), `IsNotBounce` UInt8 CODEC (GCD,LZ4), `FUniqID` UInt64 CODEC (GCD,LZ4), `HID` UInt32 CODEC (GCD,LZ4), `IsOldCounter` UInt8 CODEC (GCD,LZ4), `IsEvent` UInt8 CODEC (GCD,LZ4), `IsParameter` UInt8 CODEC (GCD,LZ4), `DontCountHits` UInt8 CODEC (GCD,LZ4), `WithHash` UInt8 CODEC (GCD,LZ4), `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8 CODEC (GCD,LZ4), `Sex` UInt8 CODEC (GCD,LZ4), `Income` UInt8 CODEC (GCD,LZ4), `Interests` UInt16 CODEC (GCD,LZ4), `Robotness` UInt8 CODEC (GCD,LZ4), `GeneralInterests` Array(UInt16) CODEC (GCD,LZ4), `RemoteIP` UInt32 CODEC (GCD,LZ4), `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16 CODEC (GCD,LZ4), `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8 CODEC (GCD,LZ4), `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16 CODEC (GCD,LZ4), `GoalsReached` Array(UInt32) CODEC (GCD,LZ4), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8 CODEC (GCD,LZ4), `RefererHash` UInt64 CODEC (GCD,LZ4), `URLHash` UInt64 CODEC (GCD,LZ4), `CLID` UInt32 CODEC (GCD,LZ4), `YCLID` UInt64 CODEC (GCD,LZ4), `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32 CODEC (GCD,LZ4), `RequestTry` UInt8)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);
INSERT INTO hits_gcd SELECT * FROM hits;
SELECT * FROM hits_gcd FORMAT Null;
DROP TABLE IF EXISTS hits_gcd;