Merge pull request #14409 from dimarub2000/obfuscator_uuid

Added UUID to Obfuscator
This commit is contained in:
alexey-milovidov 2020-09-07 06:58:41 +03:00 committed by GitHub
commit ff2d9aec1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 99 additions and 0 deletions

View File

@ -13,6 +13,7 @@
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeUUID.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <DataStreams/IBlockOutputStream.h> #include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/LimitBlockInputStream.h> #include <DataStreams/LimitBlockInputStream.h>
@ -363,6 +364,17 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
} }
} }
static void transformUUID(const UInt128 & src, UInt128 & dst, UInt64 seed)
{
SipHash hash;
hash.update(seed);
hash.update(reinterpret_cast<const char *>(&src), sizeof(UInt128));
/// Saving version and variant from an old UUID
hash.get128(reinterpret_cast<char *>(&dst));
dst.high = (dst.high & 0x1fffffffffffffffull) | (src.high & 0xe000000000000000ull);
dst.low = (dst.low & 0xffffffffffff0fffull) | (src.low & 0x000000000000f000ull);
}
class FixedStringModel : public IModel class FixedStringModel : public IModel
{ {
@ -400,6 +412,38 @@ public:
} }
}; };
class UUIDModel : public IModel
{
private:
UInt64 seed;
public:
explicit UUIDModel(UInt64 seed_) : seed(seed_) {}
void train(const IColumn &) override {}
void finalize() override {}
ColumnPtr generate(const IColumn & column) override
{
const ColumnUInt128 & src_column = assert_cast<const ColumnUInt128 &>(column);
const auto & src_data = src_column.getData();
auto res_column = ColumnUInt128::create();
auto & res_data = res_column->getData();
res_data.resize(src_data.size());
for (size_t i = 0; i < src_column.size(); ++i)
transformUUID(src_data[i], res_data[i], seed);
return res_column;
}
void updateSeed() override
{
seed = hash(seed);
}
};
/// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class. /// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class.
class DateTimeModel : public IModel class DateTimeModel : public IModel
@ -935,6 +979,9 @@ public:
if (typeid_cast<const DataTypeFixedString *>(&data_type)) if (typeid_cast<const DataTypeFixedString *>(&data_type))
return std::make_unique<FixedStringModel>(seed); return std::make_unique<FixedStringModel>(seed);
if (typeid_cast<const DataTypeUUID *>(&data_type))
return std::make_unique<UUIDModel>(seed);
if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type)) if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params)); return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));

View File

@ -0,0 +1,38 @@
FROM RAW DATA
701e8006-fc9f-4496-80ba-efa6817b917b
45bb7333-965b-4526-870e-4f941edb025b
4bd62524-e33c-43e5-882d-f1d96cf5561e
3f5ffba3-19ff-4f3d-8861-60ae6e1fc1aa
7a8b45d2-c18b-4e8c-89eb-abf5bee88931
a4e72d0e-f9fa-465e-8d9d-151b9ced94df
e0936acf-6e8f-42aa-8f56-d1363476eece
239bb790-5293-40df-92ae-472294b6e178
508d0e80-729f-4e3b-9336-4c5c8792f6be
cb5818ab-83b5-48a8-94b0-5177e30176d9
41e3a274-eea9-41d8-a128-de5a6658fcfd
7e54dcae-0bb4-4c4f-a636-54a705fb8b40
d1d258c2-a35f-4c00-abfa-8addbcbc5471
40634f4f-37bf-44e4-ac7c-6f024ad19990
94abef70-f2d6-4f7b-ad60-3889409f1dac
7c74fbd8-bf79-46ee-adfe-96271040a4f7
a72dc048-f72f-470e-b0f9-60cfad6e1157
b6f1ec08-8473-4fa2-b134-73db040b0d82
TRANSFORMED TO
f2d98bb7-5670-4ffb-80b5-023c58a5535b
0bb323a5-7854-4fc2-9d4b-695a0d44d617
b420b532-fac3-4e06-8ea1-5f2e19cecc4e
6c35e54e-a349-48d6-9870-bf078c937982
6865f581-be24-48d2-88d4-a9c342677af4
e1829ada-49eb-438a-8d32-18e974e675c8
db39ffa9-f68a-4c18-88ae-e8c51687bd4d
7ebce2cf-19de-44f4-8d08-91624cfa452c
892aa897-36df-4d5c-93c1-71da33e77cbd
d557d975-00cf-494e-970d-383ee60bd3f7
cd8cd1bc-2162-424d-ba2d-0b7b885fbf4e
5f142c4d-1281-4c5d-aaba-d2e4f650ea34
f9453641-e8db-47e6-a0a0-d7d1fb2c1593
c1d89bfe-1552-4f62-bbc3-064fd5e0a489
ea2b663d-ccaa-4153-a8d4-9a51f6d8bf9f
bd2e9d53-7bdb-4293-a55b-7dee17f02bd4
ba8f10d3-f16a-45f8-bcbd-00f7ea2658b2
eb5d1629-10c6-4c96-baa0-7bf1698d1e46

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --query="CREATE TABLE t_uuid(Id UUID) ENGINE=MergeTree ORDER BY (Id)"
$CLICKHOUSE_CLIENT --query="INSERT INTO t_uuid VALUES ('3f5ffba3-19ff-4f3d-8861-60ae6e1fc1aa'),('4bd62524-e33c-43e5-882d-f1d96cf5561e'),('7a8b45d2-c18b-4e8c-89eb-abf5bee88931'),('45bb7333-965b-4526-870e-4f941edb025b'),('a4e72d0e-f9fa-465e-8d9d-151b9ced94df'),('cb5818ab-83b5-48a8-94b0-5177e30176d9'),('701e8006-fc9f-4496-80ba-efa6817b917b'),('e0936acf-6e8f-42aa-8f56-d1363476eece'),('239bb790-5293-40df-92ae-472294b6e178'),('508d0e80-729f-4e3b-9336-4c5c8792f6be'),('94abef70-f2d6-4f7b-ad60-3889409f1dac'),('b6f1ec08-8473-4fa2-b134-73db040b0d82'),('7e54dcae-0bb4-4c4f-a636-54a705fb8b40'),('d1d258c2-a35f-4c00-abfa-8addbcbc5471'),('7c74fbd8-bf79-46ee-adfe-96271040a4f7'),('41e3a274-eea9-41d8-a128-de5a6658fcfd'),('a72dc048-f72f-470e-b0f9-60cfad6e1157'),('40634f4f-37bf-44e4-ac7c-6f024ad19990')"
$CLICKHOUSE_CLIENT --query="SELECT Id FROM t_uuid FORMAT TSV" > "${CLICKHOUSE_TMP}"/data.tsv
echo FROM RAW DATA && cat "${CLICKHOUSE_TMP}"/data.tsv
echo TRANSFORMED TO && $CLICKHOUSE_OBFUSCATOR --structure "Id UUID" --input-format TSV --output-format TSV --seed dsrub < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null
$CLICKHOUSE_CLIENT --query="DROP TABLE t_uuid"
rm "${CLICKHOUSE_TMP}"/data.tsv