This commit is contained in:
Emmanuel 2024-11-20 15:25:07 -08:00 committed by GitHub
commit c79c82fd94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 44 additions and 34 deletions

View File

@ -1211,6 +1211,7 @@ arrayReverse
arrayReverseFill
arrayReverseSort
arrayReverseSplit
arrayRocAUC
arrayRotateLeft
arrayRotateRight
arrayShiftLeft

View File

@ -2121,16 +2121,18 @@ Result:
```
## arrayAUC
## arrayRocAUC
Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>).
Calculate AUC (Area Under the Curve) for the Receiver Operating Characteristic (ROC) curve. The AUC is a concept in machine learning, see more details: <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve>.
**Syntax**
``` sql
arrayAUC(arr_scores, arr_labels[, scale])
arrayRocAUC(arr_scores, arr_labels[, scale])
```
Alias: `arrayAUC`
**Arguments**
- `arr_scores` — scores prediction model gives.
@ -2139,22 +2141,22 @@ arrayAUC(arr_scores, arr_labels[, scale])
**Returned value**
Returns AUC value with type Float64.
Returns ROC AUC value with type Float64.
**Example**
Query:
``` sql
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
select arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
```
Result:
``` text
┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.75 │
└───────────────────────────────────────────────┘
└──────────────────────────────────────────────────
```
## arrayMap(func, arr1, ...)

View File

@ -1617,16 +1617,18 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res
└──────────────┘
```
## arrayAUC {#arrayauc}
## arrayRocAUC {#arrayrocauc}
Вычисляет площадь под кривой.
**Синтаксис**
``` sql
arrayAUC(arr_scores, arr_labels)
arrayRocAUC(arr_scores, arr_labels)
```
Синоним: `arrayAUC`.
**Аргументы**
- `arr_scores` — оценка, которую дает модель предсказания.
@ -1643,15 +1645,15 @@ arrayAUC(arr_scores, arr_labels)
Запрос:
``` sql
SELECT arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
SELECT arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
```
Результат:
``` text
┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.75 │
└────────────────────────────────────────---──┘
└──────────────────────────────────────────────────┘
```
## arrayProduct {#arrayproduct}

View File

@ -1186,16 +1186,18 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]);
└──────────────────────────────────────┘
```
## arrayAUC {#arrayauc}
## arrayRocAUC {#arrayrocauc}
计算AUC (ROC曲线下的面积这是机器学习中的一个概念更多细节请查看https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve)。
**语法**
``` sql
arrayAUC(arr_scores, arr_labels)
arrayRocAUC(arr_scores, arr_labels)
```
别名: `arrayAUC`.
**参数**
- `arr_scores` — 分数预测模型给出。
@ -1210,15 +1212,15 @@ arrayAUC(arr_scores, arr_labels)
查询语句:
``` sql
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
select arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
```
结果:
``` text
┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.75 │
└───────────────────────────────────────────────┘
└──────────────────────────────────────────────────
```
## arrayMap(func, arr1, ...) {#array-map}

View File

@ -58,7 +58,7 @@ namespace ErrorCodes
* TPR_raw = countIf(score > score_i, label = positive) = count positive labels above certain score
*
* Let's look at the example:
* arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
* arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
*
* 1. We have pairs: (-, 0.1), (-, 0.4), (+, 0.35), (+, 0.8)
*
@ -75,11 +75,11 @@ namespace ErrorCodes
* The "curve" will be present by a line that moves one step either towards right or top on each threshold change.
*/
class FunctionArrayAUC : public IFunction
class FunctionArrayRocAUC : public IFunction
{
public:
static constexpr auto name = "arrayAUC";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayAUC>(); }
static constexpr auto name = "arrayRocAUC";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayRocAUC>(); }
private:
static Float64 apply(
@ -245,9 +245,11 @@ public:
};
REGISTER_FUNCTION(ArrayAUC)
REGISTER_FUNCTION(ArrayRocAUC)
{
factory.registerFunction<FunctionArrayAUC>();
factory.registerFunction<FunctionArrayRocAUC>();
/// Backward compatibility, also the ROC AUC is the more commonly used AUC
factory.registerAlias("arrayAUC", "arrayRocAUC");
}
}

View File

@ -1215,7 +1215,6 @@
"argMinSimpleState"
"argMinState"
"array"
"arrayAUC"
"arrayAll"
"arrayAvg"
"arrayCompact"
@ -1265,6 +1264,7 @@
"arrayReverseFill"
"arrayReverseSort"
"arrayReverseSplit"
"arrayRocAUC"
"arrayRotateLeft"
"arrayRotateRight"
"arrayShiftLeft"

View File

@ -528,7 +528,6 @@
"argMinSimpleState"
"argMinState"
"array"
"arrayAUC"
"arrayAll"
"arrayAvg"
"arrayCompact"
@ -578,6 +577,7 @@
"arrayReverseFill"
"arrayReverseSort"
"arrayReverseSplit"
"arrayRocAUC"
"arrayRotateLeft"
"arrayRotateRight"
"arrayShiftLeft"

View File

@ -18,7 +18,6 @@
"array"
"Array"
"arrayAll"
"arrayAUC"
"arrayCompact"
"arrayConcat"
"arrayCount"
@ -46,6 +45,7 @@
"arrayPopFront"
"arrayPushBack"
"arrayPushFront"
"arrayRocAUC"
"arrayReduce"
"arrayReduceInRanges"
"arrayResize"

View File

@ -1,4 +0,0 @@
<test>
<query>SELECT avg(ifNotFinite(arrayAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
</test>

View File

@ -0,0 +1,4 @@
<test>
<query>SELECT avg(ifNotFinite(arrayRocAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
</test>

View File

@ -90,7 +90,6 @@ alphaTokens
and
appendTrailingCharIfAbsent
array
arrayAUC
arrayAll
arrayAvg
arrayCompact
@ -135,6 +134,7 @@ arrayReverse
arrayReverseFill
arrayReverseSort
arrayReverseSplit
arrayRocAUC
arraySlice
arraySort
arraySplit

View File

@ -1283,6 +1283,7 @@ arrayReverse
arrayReverseFill
arrayReverseSort
arrayReverseSplit
arrayRocAUC
arrayRotateLeft
arrayRotateRight
arrayShiftLeft