Obfuscator: added heuristic #2518

This commit is contained in:
Alexey Milovidov 2018-06-30 22:03:26 +03:00
parent 394b020869
commit 9845814701

View File

@ -87,6 +87,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_SEEK_THROUGH_FILE; extern const int CANNOT_SEEK_THROUGH_FILE;
} }
@ -682,7 +683,7 @@ public:
} }
if (table.end() == it) if (table.end() == it)
throw Exception("Logical error in markov model"); throw Exception("Logical error in markov model", ErrorCodes::LOGICAL_ERROR);
size_t offset_from_begin_of_string = pos - data; size_t offset_from_begin_of_string = pos - data;
size_t determinator_sliding_window_size = params.determinator_sliding_window_size; size_t determinator_sliding_window_size = params.determinator_sliding_window_size;
@ -703,7 +704,8 @@ public:
/// If string is greater than desired_size, increase probability of end. /// If string is greater than desired_size, increase probability of end.
double end_probability_multiplier = 0; double end_probability_multiplier = 0;
Int64 num_bytes_after_desired_size = (pos - data) - desired_size; Int64 num_bytes_after_desired_size = (pos - data) - desired_size;
if (num_bytes_after_desired_size)
if (num_bytes_after_desired_size > 0)
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size); end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
CodePoint code = it->second.sample(determinator, end_probability_multiplier); CodePoint code = it->second.sample(determinator, end_probability_multiplier);
@ -711,6 +713,14 @@ public:
if (code == END) if (code == END)
break; break;
if (num_bytes_after_desired_size > 0)
{
/// Heuristic: break at ASCII non-alnum code point.
/// This allows to be close to desired_size but not break natural looking words.
if (code < 128 && !isAlphaNumericASCII(code))
break;
}
if (!writeCodePoint(code, pos, end)) if (!writeCodePoint(code, pos, end))
break; break;
@ -884,7 +894,7 @@ public:
if (auto type = typeid_cast<const DataTypeNullable *>(&data_type)) if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params)); return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));
throw Exception("Unsupported data type"); throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED);
} }
}; };