diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 788ece6371b..05fb982138c 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -60,4 +60,43 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` +# Random functions for working with strings {#random-functions-for-working-with-strings} + +## randomString {#random-string} + +## randomFixedString {#random-fixed-string} + +## randomPrintableASCII {#random-printable-ascii} + +## randomStringUTF8 {#random-string-utf8} + +## fuzzBits {#fuzzbits} + +**Syntax** + +``` sql +fuzzBits([s], [prob]) +``` +Inverts bits of `s`, each with probability `prob`. + +**Parameters** +- `s` - `String` or `FixedString` +- `prob` - constant `Float32/64` + +**Returned value** +Fuzzed string with same as s type. + +**Example** + +``` sql +SELECT fuzzBits(materialize('abacaba'), 0.1) +FROM numbers(3) +``` +``` text +┌─fuzzBits(materialize('abacaba'), 0.1)─┐ +│ abaaaja │ +│ a*cjab+ │ +│ aeca2A │ +└───────────────────────────────────────┘ + [Original article](https://clickhouse.tech/docs/en/query_language/functions/random_functions/) diff --git a/src/Functions/fuzzBits.cpp b/src/Functions/fuzzBits.cpp index 99726ef9331..d2180c0bbba 100644 --- a/src/Functions/fuzzBits.cpp +++ b/src/Functions/fuzzBits.cpp @@ -18,7 +18,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int TOO_LARGE_STRING_SIZE; extern const int ILLEGAL_COLUMN; extern const int DECIMAL_OVERFLOW; } @@ -65,13 +64,15 @@ public: size_t getNumberOfArguments() const override { return 2; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } // indexing from 0 + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isStringOrFixedString(arguments[0].type)) throw Exception( "First argument of function " + getName() + " must be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (!arguments[1].column || !isFloat(arguments[1].type) || !isColumnConst(*arguments[1].column)) + if (!arguments[1].column || !isFloat(arguments[1].type)) throw Exception("Second argument of function " + getName() + " must be constant float", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return arguments[0].type; @@ -82,9 +83,13 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - const auto col_in_untyped = block.getByPosition(arguments[0]).column; + auto col_in_untyped = block.getByPosition(arguments[0]).column; const double inverse_probability = assert_cast(*block.getByPosition(arguments[1]).column).getValue(); + if (const ColumnConst * col_in_untyped_const = checkAndGetColumnConstStringOrFixedString(col_in_untyped.get())) + { + col_in_untyped = col_in_untyped_const->getDataColumnPtr(); + } if (const ColumnString * col_in = checkAndGetColumn(col_in_untyped.get())) { @@ -108,9 +113,9 @@ public: block.getByPosition(result).column = std::move(col_to); } - else if (const ColumnFixedString * col_fixed_in = checkAndGetColumn(col_in_untyped.get())) + else if (const ColumnFixedString * col_in_fixed = checkAndGetColumn(col_in_untyped.get())) { - const auto n = col_fixed_in->getN(); + const auto n = col_in_fixed->getN(); auto col_to = ColumnFixedString::create(n); ColumnFixedString::Chars & chars_to = col_to->getChars(); @@ -120,9 +125,11 @@ public: chars_to.resize(total_size); - const auto * ptr_in = col_fixed_in->getChars().data(); + const auto * ptr_in = col_in_fixed->getChars().data(); auto * ptr_to = chars_to.data(); fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability); + + block.getByPosition(result).column = std::move(col_to); } else { diff --git a/tests/performance/fuzz_bits.xml b/tests/performance/fuzz_bits.xml index 72f8058601b..2679977cb1d 100644 --- a/tests/performance/fuzz_bits.xml +++ b/tests/performance/fuzz_bits.xml @@ -3,20 +3,12 @@ SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(10), 0.1)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(100), 0.1)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(1000), 0.1)) - SELECT count() FROM zeros(1000) WHERE NOT ignore(fuzzBits(randomString(10000), 0.1)) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(rand() % 10), 0.1)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(rand() % 100), 0.1)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(rand() % 1000), 0.1)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(100), 0.5)) + SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomFixedString(1000), 0.1)) + SELECT count() FROM zeros(1000) WHERE NOT ignore(fuzzBits(randomFixedString(10000), 0.5)) SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomString(rand() % 10), 0.7)) SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomString(rand() % 100), 0.7)) SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomString(rand() % 1000), 0.7)) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(fuzzBits(randomFixedString(10), 0.5)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(fuzzBits(randomFixedString(100), 0.5)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(fuzzBits(randomFixedString(1000), 0.5)) - SELECT count() FROM zeros(1000) WHERE NOT ignore(fuzzBits(randomFixedString(10000), 0.5)) - diff --git a/tests/queries/0_stateless/01284_fuzz_bits.reference b/tests/queries/0_stateless/01284_fuzz_bits.reference index a6a5b5df5d0..d35f590ed3d 100644 --- a/tests/queries/0_stateless/01284_fuzz_bits.reference +++ b/tests/queries/0_stateless/01284_fuzz_bits.reference @@ -1,2 +1,5 @@ + 100 String +FixedString(10) +1 diff --git a/tests/queries/0_stateless/01284_fuzz_bits.sql b/tests/queries/0_stateless/01284_fuzz_bits.sql index 1e76db250eb..44a86c9279d 100644 --- a/tests/queries/0_stateless/01284_fuzz_bits.sql +++ b/tests/queries/0_stateless/01284_fuzz_bits.sql @@ -1,4 +1,36 @@ -SELECT fuzzBits(toString('string'), 1); -- { serverError 43 } -SELECT fuzzBits('', 0.3); -- { serverError 44 } -SELECT length(fuzzBits(randomString(100), 0.5)); -SELECT toTypeName(fuzzBits(randomString(100), 0.5)); +SELECT + fuzzBits(toString('string'), 1); +-- { serverError 43 } +SELECT + fuzzBits('', 0.3); +SELECT + length(fuzzBits(randomString(100), 0.5)); +SELECT + toTypeName(fuzzBits(randomString(100), 0.5)); +SELECT + toTypeName(fuzzBits(toFixedString('abacaba', 10), 0.9)); + +SELECT + ( + (0.3 * 0.99) * 8 * 10000 < sum + AND sum < (0.3 * 1.01) * 8 * 10000 + ) AS res +FROM + ( + SELECT + arraySum( + id -> bitCount( + reinterpretAsUInt8( + substring( + fuzzBits( + arrayStringConcat(arrayMap(x -> toString('\0'), range(10000))), + 0.3 + ), + id + 1, + 1 + ) + ) + ), + range(10000) + ) as sum + )