diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index c86c76ca3e9..f65fd8be37e 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2121,20 +2121,20 @@ Result: ## arrayRandomSample -Returns a random sample from an input array. The number of elements in the sample is determined by the `sampleSize` argument. The function ensures that each element in the input array has an equal chance of being included in the sample. +The `arrayRandomSample` function randomly selects elements from the input array, ensuring that the specified number of elements is included in the sample. If the `samples` exceeds the size of the input array, the sample size is automatically limited to the size of the array. In such cases, all elements of the array may be returned, but the order is not guaranteed. The function can handle both flat arrays and nested arrays. **Syntax** ```sql -arrayRandomSample(arr, sampleSize) +arrayRandomSample(arr, samples) ``` **Arguments** -- `arr` — The input array from which to sample elements, including nested arrays. -- `sampleSize` — The number of elements to include in the random sample. +- `arr` — The input array from which to sample elements. This can include both flat arrays (e.g., integers, strings) and nested arrays. +- `samples` — An unsigned integer specifying the number of elements to include in the random sample. -**Returned value** +**Returned Value** - An array containing a random sample of elements from the input array. @@ -2142,23 +2142,6 @@ arrayRandomSample(arr, sampleSize) Query: -```sql -SELECT arrayRandomSample([1, 2, 3, 4, 5, 6], 3) as res; -``` - -Result: -``` -┌─res─────┐ -│ [2,3,1] │ -└─────────┘ -or -┌─res─────┐ -│ [5,2,6] │ -└─────────┘ -``` - -Query: - ```sql SELECT arrayRandomSample(['apple', 'banana', 'cherry', 'date'], 2) as res; ``` @@ -2166,16 +2149,8 @@ SELECT arrayRandomSample(['apple', 'banana', 'cherry', 'date'], 2) as res; Result: ``` ┌─res────────────────┐ -│ ['apple','banana'] │ -└────────────────────┘ -or -┌─res────────────────┐ │ ['banana','apple'] │ └────────────────────┘ -or -┌─res───────────────┐ -│ ['cherry','date'] │ -└───────────────────┘ ``` Query: @@ -2187,10 +2162,6 @@ SELECT arrayRandomSample([[1, 2], [3, 4], [5, 6]], 2) as res; Result: ``` ┌─res───────────┐ -│ [[1,2],[5,6]] │ -└───────────────┘ -or -┌─res───────────┐ │ [[3,4],[5,6]] │ └───────────────┘ ``` @@ -2217,14 +2188,8 @@ Result: ┌─res─────┐ │ [3,1,2] │ └─────────┘ -or -┌─res─────┐ -│ [3,2,1] │ -└─────────┘ ``` -The `arrayRandomSample` function randomly selects elements from the input array, ensuring that the specified number of elements is included in the sample. If `sampleSize` exceeds the size of the input array, it will be limited to the size of the array. It can handle both flat arrays and arrays containing nested arrays, providing flexibility in sampling from complex data structures. - ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index e3beaeac1ba..0821101f2af 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -45,8 +45,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override { - const ColumnArray * input_data = checkAndGetColumn(arguments[0].column.get()); - if (!input_data) + const ColumnArray * column_array = checkAndGetColumn(arguments[0].column.get()); + if (!column_array) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument must be an array"); const IColumn * col_num = arguments[1].column.get(); @@ -74,7 +74,7 @@ public: Poco::Logger::get("FunctionRandomSampleFromArray").debug("The number of samples K = " + std::to_string(K)); - const auto & offsets = input_data->getOffsets(); + const auto & offsets = column_array->getOffsets(); size_t num_elements = offsets[0]; Poco::Logger::get("FunctionRandomSampleFromArray").debug("The number of elements in the array = " + std::to_string(num_elements)); @@ -82,7 +82,7 @@ public: if (num_elements == 0 || K == 0) { // Handle edge cases where input array is empty or K is 0 - return input_data->cloneEmpty(); + return column_array->cloneEmpty(); } std::random_device rd; @@ -92,8 +92,8 @@ public: { K = static_cast(num_elements); } - // Create an empty ColumnArray with the same structure as input_data - auto nested_column = input_data->getDataPtr()->cloneEmpty(); + // Create an empty ColumnArray with the same structure as column_array + auto nested_column = column_array->getDataPtr()->cloneEmpty(); auto offsets_column = ColumnUInt64::create(); // Create an empty offsets column auto res_data = ColumnArray::create(std::move(nested_column), std::move(offsets_column)); @@ -107,7 +107,7 @@ public: size_t source_index = indices[i]; // Insert the corresponding element from the source array - res_data->getData().insertFrom(input_data->getData(), source_index); + res_data->getData().insertFrom(column_array->getData(), source_index); } // Update offsets manually for the single row