fix issues

This commit is contained in:
Andrei Nekrashevich 2020-05-29 05:06:21 +03:00
parent 94c5d87ebc
commit af469c0da5
5 changed files with 94 additions and 21 deletions

View File

@ -60,4 +60,43 @@ Result:
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
``` ```
# Random functions for working with strings {#random-functions-for-working-with-strings}
## randomString {#random-string}
## randomFixedString {#random-fixed-string}
## randomPrintableASCII {#random-printable-ascii}
## randomStringUTF8 {#random-string-utf8}
## fuzzBits {#fuzzbits}
**Syntax**
``` sql
fuzzBits([s], [prob])
```
Inverts bits of `s`, each with probability `prob`.
**Parameters**
- `s` - `String` or `FixedString`
- `prob` - constant `Float32/64`
**Returned value**
Fuzzed string with same as s type.
**Example**
``` sql
SELECT fuzzBits(materialize('abacaba'), 0.1)
FROM numbers(3)
```
``` text
┌─fuzzBits(materialize('abacaba'), 0.1)─┐
│ abaaaja │
│ a*cjab+ │
│ aeca2A │
└───────────────────────────────────────┘
[Original article](https://clickhouse.tech/docs/en/query_language/functions/random_functions/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/query_language/functions/random_functions/) <!--hide-->

View File

@ -18,7 +18,6 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_STRING_SIZE;
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int DECIMAL_OVERFLOW; extern const int DECIMAL_OVERFLOW;
} }
@ -65,13 +64,15 @@ public:
size_t getNumberOfArguments() const override { return 2; } size_t getNumberOfArguments() const override { return 2; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } // indexing from 0
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
if (!isStringOrFixedString(arguments[0].type)) if (!isStringOrFixedString(arguments[0].type))
throw Exception( throw Exception(
"First argument of function " + getName() + " must be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); "First argument of function " + getName() + " must be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!arguments[1].column || !isFloat(arguments[1].type) || !isColumnConst(*arguments[1].column)) if (!arguments[1].column || !isFloat(arguments[1].type))
throw Exception("Second argument of function " + getName() + " must be constant float", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("Second argument of function " + getName() + " must be constant float", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0].type; return arguments[0].type;
@ -82,9 +83,13 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{ {
const auto col_in_untyped = block.getByPosition(arguments[0]).column; auto col_in_untyped = block.getByPosition(arguments[0]).column;
const double inverse_probability = assert_cast<const ColumnConst &>(*block.getByPosition(arguments[1]).column).getValue<double>(); const double inverse_probability = assert_cast<const ColumnConst &>(*block.getByPosition(arguments[1]).column).getValue<double>();
if (const ColumnConst * col_in_untyped_const = checkAndGetColumnConstStringOrFixedString(col_in_untyped.get()))
{
col_in_untyped = col_in_untyped_const->getDataColumnPtr();
}
if (const ColumnString * col_in = checkAndGetColumn<ColumnString>(col_in_untyped.get())) if (const ColumnString * col_in = checkAndGetColumn<ColumnString>(col_in_untyped.get()))
{ {
@ -108,9 +113,9 @@ public:
block.getByPosition(result).column = std::move(col_to); block.getByPosition(result).column = std::move(col_to);
} }
else if (const ColumnFixedString * col_fixed_in = checkAndGetColumn<ColumnFixedString>(col_in_untyped.get())) else if (const ColumnFixedString * col_in_fixed = checkAndGetColumn<ColumnFixedString>(col_in_untyped.get()))
{ {
const auto n = col_fixed_in->getN(); const auto n = col_in_fixed->getN();
auto col_to = ColumnFixedString::create(n); auto col_to = ColumnFixedString::create(n);
ColumnFixedString::Chars & chars_to = col_to->getChars(); ColumnFixedString::Chars & chars_to = col_to->getChars();
@ -120,9 +125,11 @@ public:
chars_to.resize(total_size); chars_to.resize(total_size);
const auto * ptr_in = col_fixed_in->getChars().data(); const auto * ptr_in = col_in_fixed->getChars().data();
auto * ptr_to = chars_to.data(); auto * ptr_to = chars_to.data();
fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability); fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability);
block.getByPosition(result).column = std::move(col_to);
} }
else else
{ {

View File

@ -3,20 +3,12 @@
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(10), 0.1))</query> <query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(10), 0.1))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(100), 0.1))</query> <query>SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(100), 0.5))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(1000), 0.1))</query> <query>SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomFixedString(1000), 0.1))</query>
<query>SELECT count() FROM zeros(1000) WHERE NOT ignore(fuzzBits(randomString(10000), 0.1))</query> <query>SELECT count() FROM zeros(1000) WHERE NOT ignore(fuzzBits(randomFixedString(10000), 0.5))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(rand() % 10), 0.1))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(rand() % 100), 0.1))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(rand() % 1000), 0.1))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(rand() % 10), 0.7))</query> <query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(rand() % 10), 0.7))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(rand() % 100), 0.7))</query> <query>SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(rand() % 100), 0.7))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(rand() % 1000), 0.7))</query> <query>SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(rand() % 1000), 0.7))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomFixedString(10), 0.5))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomFixedString(100), 0.5))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomFixedString(1000), 0.5))</query>
<query>SELECT count() FROM zeros(1000) WHERE NOT ignore(fuzzBits(randomFixedString(10000), 0.5))</query>
</test> </test>

View File

@ -1,2 +1,5 @@
100 100
String String
FixedString(10)
1

View File

@ -1,4 +1,36 @@
SELECT fuzzBits(toString('string'), 1); -- { serverError 43 } SELECT
SELECT fuzzBits('', 0.3); -- { serverError 44 } fuzzBits(toString('string'), 1);
SELECT length(fuzzBits(randomString(100), 0.5)); -- { serverError 43 }
SELECT toTypeName(fuzzBits(randomString(100), 0.5)); SELECT
fuzzBits('', 0.3);
SELECT
length(fuzzBits(randomString(100), 0.5));
SELECT
toTypeName(fuzzBits(randomString(100), 0.5));
SELECT
toTypeName(fuzzBits(toFixedString('abacaba', 10), 0.9));
SELECT
(
(0.3 * 0.99) * 8 * 10000 < sum
AND sum < (0.3 * 1.01) * 8 * 10000
) AS res
FROM
(
SELECT
arraySum(
id -> bitCount(
reinterpretAsUInt8(
substring(
fuzzBits(
arrayStringConcat(arrayMap(x -> toString('\0'), range(10000))),
0.3
),
id + 1,
1
)
)
),
range(10000)
) as sum
)