mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge pull request #14409 from dimarub2000/obfuscator_uuid
Added UUID to Obfuscator
This commit is contained in:
commit
ff2d9aec1a
@ -13,6 +13,7 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/LimitBlockInputStream.h>
|
||||
@ -363,6 +364,17 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
|
||||
}
|
||||
}
|
||||
|
||||
static void transformUUID(const UInt128 & src, UInt128 & dst, UInt64 seed)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(seed);
|
||||
hash.update(reinterpret_cast<const char *>(&src), sizeof(UInt128));
|
||||
|
||||
/// Saving version and variant from an old UUID
|
||||
hash.get128(reinterpret_cast<char *>(&dst));
|
||||
dst.high = (dst.high & 0x1fffffffffffffffull) | (src.high & 0xe000000000000000ull);
|
||||
dst.low = (dst.low & 0xffffffffffff0fffull) | (src.low & 0x000000000000f000ull);
|
||||
}
|
||||
|
||||
class FixedStringModel : public IModel
|
||||
{
|
||||
@ -400,6 +412,38 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class UUIDModel : public IModel
|
||||
{
|
||||
private:
|
||||
UInt64 seed;
|
||||
|
||||
public:
|
||||
explicit UUIDModel(UInt64 seed_) : seed(seed_) {}
|
||||
|
||||
void train(const IColumn &) override {}
|
||||
void finalize() override {}
|
||||
|
||||
ColumnPtr generate(const IColumn & column) override
|
||||
{
|
||||
const ColumnUInt128 & src_column = assert_cast<const ColumnUInt128 &>(column);
|
||||
const auto & src_data = src_column.getData();
|
||||
|
||||
auto res_column = ColumnUInt128::create();
|
||||
auto & res_data = res_column->getData();
|
||||
|
||||
res_data.resize(src_data.size());
|
||||
for (size_t i = 0; i < src_column.size(); ++i)
|
||||
transformUUID(src_data[i], res_data[i], seed);
|
||||
|
||||
return res_column;
|
||||
}
|
||||
|
||||
void updateSeed() override
|
||||
{
|
||||
seed = hash(seed);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class.
|
||||
class DateTimeModel : public IModel
|
||||
@ -935,6 +979,9 @@ public:
|
||||
if (typeid_cast<const DataTypeFixedString *>(&data_type))
|
||||
return std::make_unique<FixedStringModel>(seed);
|
||||
|
||||
if (typeid_cast<const DataTypeUUID *>(&data_type))
|
||||
return std::make_unique<UUIDModel>(seed);
|
||||
|
||||
if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
|
||||
return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));
|
||||
|
||||
|
38
tests/queries/0_stateless/01472_obfuscator_uuid.reference
Normal file
38
tests/queries/0_stateless/01472_obfuscator_uuid.reference
Normal file
@ -0,0 +1,38 @@
|
||||
FROM RAW DATA
|
||||
701e8006-fc9f-4496-80ba-efa6817b917b
|
||||
45bb7333-965b-4526-870e-4f941edb025b
|
||||
4bd62524-e33c-43e5-882d-f1d96cf5561e
|
||||
3f5ffba3-19ff-4f3d-8861-60ae6e1fc1aa
|
||||
7a8b45d2-c18b-4e8c-89eb-abf5bee88931
|
||||
a4e72d0e-f9fa-465e-8d9d-151b9ced94df
|
||||
e0936acf-6e8f-42aa-8f56-d1363476eece
|
||||
239bb790-5293-40df-92ae-472294b6e178
|
||||
508d0e80-729f-4e3b-9336-4c5c8792f6be
|
||||
cb5818ab-83b5-48a8-94b0-5177e30176d9
|
||||
41e3a274-eea9-41d8-a128-de5a6658fcfd
|
||||
7e54dcae-0bb4-4c4f-a636-54a705fb8b40
|
||||
d1d258c2-a35f-4c00-abfa-8addbcbc5471
|
||||
40634f4f-37bf-44e4-ac7c-6f024ad19990
|
||||
94abef70-f2d6-4f7b-ad60-3889409f1dac
|
||||
7c74fbd8-bf79-46ee-adfe-96271040a4f7
|
||||
a72dc048-f72f-470e-b0f9-60cfad6e1157
|
||||
b6f1ec08-8473-4fa2-b134-73db040b0d82
|
||||
TRANSFORMED TO
|
||||
f2d98bb7-5670-4ffb-80b5-023c58a5535b
|
||||
0bb323a5-7854-4fc2-9d4b-695a0d44d617
|
||||
b420b532-fac3-4e06-8ea1-5f2e19cecc4e
|
||||
6c35e54e-a349-48d6-9870-bf078c937982
|
||||
6865f581-be24-48d2-88d4-a9c342677af4
|
||||
e1829ada-49eb-438a-8d32-18e974e675c8
|
||||
db39ffa9-f68a-4c18-88ae-e8c51687bd4d
|
||||
7ebce2cf-19de-44f4-8d08-91624cfa452c
|
||||
892aa897-36df-4d5c-93c1-71da33e77cbd
|
||||
d557d975-00cf-494e-970d-383ee60bd3f7
|
||||
cd8cd1bc-2162-424d-ba2d-0b7b885fbf4e
|
||||
5f142c4d-1281-4c5d-aaba-d2e4f650ea34
|
||||
f9453641-e8db-47e6-a0a0-d7d1fb2c1593
|
||||
c1d89bfe-1552-4f62-bbc3-064fd5e0a489
|
||||
ea2b663d-ccaa-4153-a8d4-9a51f6d8bf9f
|
||||
bd2e9d53-7bdb-4293-a55b-7dee17f02bd4
|
||||
ba8f10d3-f16a-45f8-bcbd-00f7ea2658b2
|
||||
eb5d1629-10c6-4c96-baa0-7bf1698d1e46
|
14
tests/queries/0_stateless/01472_obfuscator_uuid.sh
Executable file
14
tests/queries/0_stateless/01472_obfuscator_uuid.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="CREATE TABLE t_uuid(Id UUID) ENGINE=MergeTree ORDER BY (Id)"
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO t_uuid VALUES ('3f5ffba3-19ff-4f3d-8861-60ae6e1fc1aa'),('4bd62524-e33c-43e5-882d-f1d96cf5561e'),('7a8b45d2-c18b-4e8c-89eb-abf5bee88931'),('45bb7333-965b-4526-870e-4f941edb025b'),('a4e72d0e-f9fa-465e-8d9d-151b9ced94df'),('cb5818ab-83b5-48a8-94b0-5177e30176d9'),('701e8006-fc9f-4496-80ba-efa6817b917b'),('e0936acf-6e8f-42aa-8f56-d1363476eece'),('239bb790-5293-40df-92ae-472294b6e178'),('508d0e80-729f-4e3b-9336-4c5c8792f6be'),('94abef70-f2d6-4f7b-ad60-3889409f1dac'),('b6f1ec08-8473-4fa2-b134-73db040b0d82'),('7e54dcae-0bb4-4c4f-a636-54a705fb8b40'),('d1d258c2-a35f-4c00-abfa-8addbcbc5471'),('7c74fbd8-bf79-46ee-adfe-96271040a4f7'),('41e3a274-eea9-41d8-a128-de5a6658fcfd'),('a72dc048-f72f-470e-b0f9-60cfad6e1157'),('40634f4f-37bf-44e4-ac7c-6f024ad19990')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT Id FROM t_uuid FORMAT TSV" > "${CLICKHOUSE_TMP}"/data.tsv
|
||||
|
||||
echo FROM RAW DATA && cat "${CLICKHOUSE_TMP}"/data.tsv
|
||||
echo TRANSFORMED TO && $CLICKHOUSE_OBFUSCATOR --structure "Id UUID" --input-format TSV --output-format TSV --seed dsrub < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE t_uuid"
|
||||
rm "${CLICKHOUSE_TMP}"/data.tsv
|
Loading…
Reference in New Issue
Block a user