mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Obfuscator: added heuristic #2518
This commit is contained in:
parent
394b020869
commit
9845814701
@ -87,6 +87,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
extern const int CANNOT_SEEK_THROUGH_FILE;
|
extern const int CANNOT_SEEK_THROUGH_FILE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -682,7 +683,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (table.end() == it)
|
if (table.end() == it)
|
||||||
throw Exception("Logical error in markov model");
|
throw Exception("Logical error in markov model", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
size_t offset_from_begin_of_string = pos - data;
|
size_t offset_from_begin_of_string = pos - data;
|
||||||
size_t determinator_sliding_window_size = params.determinator_sliding_window_size;
|
size_t determinator_sliding_window_size = params.determinator_sliding_window_size;
|
||||||
@ -703,7 +704,8 @@ public:
|
|||||||
/// If string is greater than desired_size, increase probability of end.
|
/// If string is greater than desired_size, increase probability of end.
|
||||||
double end_probability_multiplier = 0;
|
double end_probability_multiplier = 0;
|
||||||
Int64 num_bytes_after_desired_size = (pos - data) - desired_size;
|
Int64 num_bytes_after_desired_size = (pos - data) - desired_size;
|
||||||
if (num_bytes_after_desired_size)
|
|
||||||
|
if (num_bytes_after_desired_size > 0)
|
||||||
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
|
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
|
||||||
|
|
||||||
CodePoint code = it->second.sample(determinator, end_probability_multiplier);
|
CodePoint code = it->second.sample(determinator, end_probability_multiplier);
|
||||||
@ -711,6 +713,14 @@ public:
|
|||||||
if (code == END)
|
if (code == END)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (num_bytes_after_desired_size > 0)
|
||||||
|
{
|
||||||
|
/// Heuristic: break at ASCII non-alnum code point.
|
||||||
|
/// This allows to be close to desired_size but not break natural looking words.
|
||||||
|
if (code < 128 && !isAlphaNumericASCII(code))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!writeCodePoint(code, pos, end))
|
if (!writeCodePoint(code, pos, end))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -884,7 +894,7 @@ public:
|
|||||||
if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
|
if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
|
||||||
return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));
|
return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));
|
||||||
|
|
||||||
throw Exception("Unsupported data type");
|
throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user