This commit is contained in:
Emmanuel 2024-11-20 15:25:07 -08:00 committed by GitHub
commit c79c82fd94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 44 additions and 34 deletions

View File

@ -1211,6 +1211,7 @@ arrayReverse
arrayReverseFill arrayReverseFill
arrayReverseSort arrayReverseSort
arrayReverseSplit arrayReverseSplit
arrayRocAUC
arrayRotateLeft arrayRotateLeft
arrayRotateRight arrayRotateRight
arrayShiftLeft arrayShiftLeft

View File

@ -2121,16 +2121,18 @@ Result:
``` ```
## arrayAUC ## arrayRocAUC
Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>). Calculate AUC (Area Under the Curve) for the Receiver Operating Characteristic (ROC) curve. The AUC is a concept in machine learning, see more details: <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>.
**Syntax** **Syntax**
``` sql ``` sql
arrayAUC(arr_scores, arr_labels[, scale]) arrayRocAUC(arr_scores, arr_labels[, scale])
``` ```
Alias: `arrayAUC`
**Arguments** **Arguments**
- `arr_scores` — scores prediction model gives. - `arr_scores` — scores prediction model gives.
@ -2139,22 +2141,22 @@ arrayAUC(arr_scores, arr_labels[, scale])
**Returned value** **Returned value**
Returns AUC value with type Float64. Returns ROC AUC value with type Float64.
**Example** **Example**
Query: Query:
``` sql ``` sql
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); select arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
``` ```
Result: Result:
``` text ``` text
┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ ┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.75 │ │ 0.75 │
└───────────────────────────────────────────────┘ └──────────────────────────────────────────────────
``` ```
## arrayMap(func, arr1, ...) ## arrayMap(func, arr1, ...)

View File

@ -1617,16 +1617,18 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res
└──────────────┘ └──────────────┘
``` ```
## arrayAUC {#arrayauc} ## arrayRocAUC {#arrayrocauc}
Вычисляет площадь под кривой. Вычисляет площадь под кривой.
**Синтаксис** **Синтаксис**
``` sql ``` sql
arrayAUC(arr_scores, arr_labels) arrayRocAUC(arr_scores, arr_labels)
``` ```
Синоним: `arrayAUC`.
**Аргументы** **Аргументы**
- `arr_scores` — оценка, которую дает модель предсказания. - `arr_scores` — оценка, которую дает модель предсказания.
@ -1643,15 +1645,15 @@ arrayAUC(arr_scores, arr_labels)
Запрос: Запрос:
``` sql ``` sql
SELECT arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); SELECT arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
``` ```
Результат: Результат:
``` text ``` text
┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ ┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.75 │ │ 0.75 │
└────────────────────────────────────────---──┘ └──────────────────────────────────────────────────┘
``` ```
## arrayProduct {#arrayproduct} ## arrayProduct {#arrayproduct}

View File

@ -1186,16 +1186,18 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]);
└──────────────────────────────────────┘ └──────────────────────────────────────┘
``` ```
## arrayAUC {#arrayauc} ## arrayRocAUC {#arrayrocauc}
计算AUC (ROC曲线下的面积这是机器学习中的一个概念更多细节请查看https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve)。 计算AUC (ROC曲线下的面积这是机器学习中的一个概念更多细节请查看https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve)。
**语法** **语法**
``` sql ``` sql
arrayAUC(arr_scores, arr_labels) arrayRocAUC(arr_scores, arr_labels)
``` ```
别名: `arrayAUC`.
**参数** **参数**
- `arr_scores` — 分数预测模型给出。 - `arr_scores` — 分数预测模型给出。
@ -1210,15 +1212,15 @@ arrayAUC(arr_scores, arr_labels)
查询语句: 查询语句:
``` sql ``` sql
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); select arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
``` ```
结果: 结果:
``` text ``` text
┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ ┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.75 │ │ 0.75 │
└───────────────────────────────────────────────┘ └──────────────────────────────────────────────────
``` ```
## arrayMap(func, arr1, ...) {#array-map} ## arrayMap(func, arr1, ...) {#array-map}

View File

@ -58,7 +58,7 @@ namespace ErrorCodes
* TPR_raw = countIf(score > score_i, label = positive) = count positive labels above certain score * TPR_raw = countIf(score > score_i, label = positive) = count positive labels above certain score
* *
* Let's look at the example: * Let's look at the example:
* arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); * arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
* *
* 1. We have pairs: (-, 0.1), (-, 0.4), (+, 0.35), (+, 0.8) * 1. We have pairs: (-, 0.1), (-, 0.4), (+, 0.35), (+, 0.8)
* *
@ -75,11 +75,11 @@ namespace ErrorCodes
* The "curve" will be present by a line that moves one step either towards right or top on each threshold change. * The "curve" will be present by a line that moves one step either towards right or top on each threshold change.
*/ */
class FunctionArrayAUC : public IFunction class FunctionArrayRocAUC : public IFunction
{ {
public: public:
static constexpr auto name = "arrayAUC"; static constexpr auto name = "arrayRocAUC";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayAUC>(); } static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayRocAUC>(); }
private: private:
static Float64 apply( static Float64 apply(
@ -245,9 +245,11 @@ public:
}; };
REGISTER_FUNCTION(ArrayAUC) REGISTER_FUNCTION(ArrayRocAUC)
{ {
factory.registerFunction<FunctionArrayAUC>(); factory.registerFunction<FunctionArrayRocAUC>();
/// Backward compatibility, also the ROC AUC is the more commonly used AUC
factory.registerAlias("arrayAUC", "arrayRocAUC");
} }
} }

View File

@ -1215,7 +1215,6 @@
"argMinSimpleState" "argMinSimpleState"
"argMinState" "argMinState"
"array" "array"
"arrayAUC"
"arrayAll" "arrayAll"
"arrayAvg" "arrayAvg"
"arrayCompact" "arrayCompact"
@ -1265,6 +1264,7 @@
"arrayReverseFill" "arrayReverseFill"
"arrayReverseSort" "arrayReverseSort"
"arrayReverseSplit" "arrayReverseSplit"
"arrayRocAUC"
"arrayRotateLeft" "arrayRotateLeft"
"arrayRotateRight" "arrayRotateRight"
"arrayShiftLeft" "arrayShiftLeft"

View File

@ -528,7 +528,6 @@
"argMinSimpleState" "argMinSimpleState"
"argMinState" "argMinState"
"array" "array"
"arrayAUC"
"arrayAll" "arrayAll"
"arrayAvg" "arrayAvg"
"arrayCompact" "arrayCompact"
@ -578,6 +577,7 @@
"arrayReverseFill" "arrayReverseFill"
"arrayReverseSort" "arrayReverseSort"
"arrayReverseSplit" "arrayReverseSplit"
"arrayRocAUC"
"arrayRotateLeft" "arrayRotateLeft"
"arrayRotateRight" "arrayRotateRight"
"arrayShiftLeft" "arrayShiftLeft"

View File

@ -18,7 +18,6 @@
"array" "array"
"Array" "Array"
"arrayAll" "arrayAll"
"arrayAUC"
"arrayCompact" "arrayCompact"
"arrayConcat" "arrayConcat"
"arrayCount" "arrayCount"
@ -46,6 +45,7 @@
"arrayPopFront" "arrayPopFront"
"arrayPushBack" "arrayPushBack"
"arrayPushFront" "arrayPushFront"
"arrayRocAUC"
"arrayReduce" "arrayReduce"
"arrayReduceInRanges" "arrayReduceInRanges"
"arrayResize" "arrayResize"

View File

@ -1,4 +0,0 @@
<test>
<query>SELECT avg(ifNotFinite(arrayAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
</test>

View File

@ -0,0 +1,4 @@
<test>
<query>SELECT avg(ifNotFinite(arrayRocAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
</test>

View File

@ -90,7 +90,6 @@ alphaTokens
and and
appendTrailingCharIfAbsent appendTrailingCharIfAbsent
array array
arrayAUC
arrayAll arrayAll
arrayAvg arrayAvg
arrayCompact arrayCompact
@ -135,6 +134,7 @@ arrayReverse
arrayReverseFill arrayReverseFill
arrayReverseSort arrayReverseSort
arrayReverseSplit arrayReverseSplit
arrayRocAUC
arraySlice arraySlice
arraySort arraySort
arraySplit arraySplit

View File

@ -1283,6 +1283,7 @@ arrayReverse
arrayReverseFill arrayReverseFill
arrayReverseSort arrayReverseSort
arrayReverseSplit arrayReverseSplit
arrayRocAUC
arrayRotateLeft arrayRotateLeft
arrayRotateRight arrayRotateRight
arrayShiftLeft arrayShiftLeft