From 12e6a39d489468dd2f0db06195f281268d6740c5 Mon Sep 17 00:00:00 2001 From: Emmanuel Dias Date: Thu, 14 Nov 2024 07:27:34 -0300 Subject: [PATCH 1/2] rename arrayAUC to arrayRocAUC and add alias --- .../aspell-ignore/en/aspell-dict.txt | 2 +- .../sql-reference/functions/array-functions.md | 18 ++++++++++-------- .../sql-reference/functions/array-functions.md | 14 ++++++++------ .../sql-reference/functions/array-functions.md | 14 ++++++++------ .../array/{arrayAUC.cpp => arrayRocAUC.cpp} | 14 ++++++++------ tests/fuzz/all.dict | 2 +- tests/fuzz/dictionaries/functions.dict | 2 +- tests/fuzz/dictionaries/old.dict | 2 +- tests/performance/array_auc.xml | 4 ---- tests/performance/array_roc_auc.xml | 4 ++++ ..._new_functions_must_be_documented.reference | 2 +- .../aspell-ignore/en/aspell-dict.txt | 2 +- 12 files changed, 44 insertions(+), 36 deletions(-) rename src/Functions/array/{arrayAUC.cpp => arrayRocAUC.cpp} (96%) delete mode 100644 tests/performance/array_auc.xml create mode 100644 tests/performance/array_roc_auc.xml diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index e2966898be2..63cbc5d28b4 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -1160,7 +1160,6 @@ argMax argMin argmax argmin -arrayAUC arrayAll arrayAvg arrayCompact @@ -1211,6 +1210,7 @@ arrayReverse arrayReverseFill arrayReverseSort arrayReverseSplit +arrayRocAUC arrayRotateLeft arrayRotateRight arrayShiftLeft diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 5957b45a881..e00e95e85bd 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2121,16 +2121,18 @@ Result: ``` -## arrayAUC +## arrayRocAUC -Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: ). +Calculate AUC (Area Under the Curve) for the Receiver Operating Characteristic (ROC) curve. The AUC is a concept in machine learning, see more details: . **Syntax** ``` sql -arrayAUC(arr_scores, arr_labels[, scale]) +arrayRocAUC(arr_scores, arr_labels[, scale]) ``` +Alias: `arrayAUC` + **Arguments** - `arr_scores` — scores prediction model gives. @@ -2139,22 +2141,22 @@ arrayAUC(arr_scores, arr_labels[, scale]) **Returned value** -Returns AUC value with type Float64. +Returns ROC AUC value with type Float64. **Example** Query: ``` sql -select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); +select arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); ``` Result: ``` text -┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ -│ 0.75 │ -└───────────────────────────────────────────────┘ +┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ +│ 0.75 │ +└──────────────────────────────────────────────────┘ ``` ## arrayMap(func, arr1, ...) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 825e3f06be2..a74f88f965f 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1617,16 +1617,18 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res └──────────────┘ ``` -## arrayAUC {#arrayauc} +## arrayRocAUC {#arrayrocauc} Вычисляет площадь под кривой. **Синтаксис** ``` sql -arrayAUC(arr_scores, arr_labels) +arrayRocAUC(arr_scores, arr_labels) ``` +Синоним: `arrayAUC`. + **Аргументы** - `arr_scores` — оценка, которую дает модель предсказания. @@ -1643,15 +1645,15 @@ arrayAUC(arr_scores, arr_labels) Запрос: ``` sql -SELECT arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); +SELECT arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); ``` Результат: ``` text -┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ -│ 0.75 │ -└────────────────────────────────────────---──┘ +┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ +│ 0.75 │ +└──────────────────────────────────────────────────┘ ``` ## arrayProduct {#arrayproduct} diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index 69db34e4a36..df2319db222 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -1186,16 +1186,18 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); └──────────────────────────────────────┘ ``` -## arrayAUC {#arrayauc} +## arrayRocAUC {#arrayrocauc} 计算AUC (ROC曲线下的面积,这是机器学习中的一个概念,更多细节请查看:https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve)。 **语法** ``` sql -arrayAUC(arr_scores, arr_labels) +arrayRocAUC(arr_scores, arr_labels) ``` +别名: `arrayAUC`. + **参数** - `arr_scores` — 分数预测模型给出。 @@ -1210,15 +1212,15 @@ arrayAUC(arr_scores, arr_labels) 查询语句: ``` sql -select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); +select arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); ``` 结果: ``` text -┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ -│ 0.75 │ -└───────────────────────────────────────────────┘ +┌─arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ +│ 0.75 │ +└──────────────────────────────────────────────────┘ ``` ## arrayMap(func, arr1, ...) {#array-map} diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayRocAUC.cpp similarity index 96% rename from src/Functions/array/arrayAUC.cpp rename to src/Functions/array/arrayRocAUC.cpp index 94d0c1ed721..b39c7c95539 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayRocAUC.cpp @@ -58,7 +58,7 @@ namespace ErrorCodes * TPR_raw = countIf(score > score_i, label = positive) = count positive labels above certain score * * Let's look at the example: - * arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); + * arrayRocAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); * * 1. We have pairs: (-, 0.1), (-, 0.4), (+, 0.35), (+, 0.8) * @@ -75,11 +75,11 @@ namespace ErrorCodes * The "curve" will be present by a line that moves one step either towards right or top on each threshold change. */ -class FunctionArrayAUC : public IFunction +class FunctionArrayRocAUC : public IFunction { public: - static constexpr auto name = "arrayAUC"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = "arrayRocAUC"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } private: static Float64 apply( @@ -245,9 +245,11 @@ public: }; -REGISTER_FUNCTION(ArrayAUC) +REGISTER_FUNCTION(ArrayRocAUC) { - factory.registerFunction(); + factory.registerFunction(); + /// Backward compatibility, also the ROC AUC is the more commonly used AUC + factory.registerAlias("arrayAUC", "arrayRocAUC"); } } diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 30af3746fca..8895312818a 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -1215,7 +1215,6 @@ "argMinSimpleState" "argMinState" "array" -"arrayAUC" "arrayAll" "arrayAvg" "arrayCompact" @@ -1265,6 +1264,7 @@ "arrayReverseFill" "arrayReverseSort" "arrayReverseSplit" +"arrayRocAUC" "arrayRotateLeft" "arrayRotateRight" "arrayShiftLeft" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index e562595fb67..4d5b91c7330 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -528,7 +528,6 @@ "argMinSimpleState" "argMinState" "array" -"arrayAUC" "arrayAll" "arrayAvg" "arrayCompact" @@ -578,6 +577,7 @@ "arrayReverseFill" "arrayReverseSort" "arrayReverseSplit" +"arrayRocAUC" "arrayRotateLeft" "arrayRotateRight" "arrayShiftLeft" diff --git a/tests/fuzz/dictionaries/old.dict b/tests/fuzz/dictionaries/old.dict index 61914c3b283..09e28c12ec8 100644 --- a/tests/fuzz/dictionaries/old.dict +++ b/tests/fuzz/dictionaries/old.dict @@ -18,7 +18,6 @@ "array" "Array" "arrayAll" -"arrayAUC" "arrayCompact" "arrayConcat" "arrayCount" @@ -46,6 +45,7 @@ "arrayPopFront" "arrayPushBack" "arrayPushFront" +"arrayRocAUC" "arrayReduce" "arrayReduceInRanges" "arrayResize" diff --git a/tests/performance/array_auc.xml b/tests/performance/array_auc.xml deleted file mode 100644 index 59d321b3c62..00000000000 --- a/tests/performance/array_auc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - SELECT avg(ifNotFinite(arrayAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000) - diff --git a/tests/performance/array_roc_auc.xml b/tests/performance/array_roc_auc.xml new file mode 100644 index 00000000000..444ce4b5fb0 --- /dev/null +++ b/tests/performance/array_roc_auc.xml @@ -0,0 +1,4 @@ + + + SELECT avg(ifNotFinite(arrayRocAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000) + diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index dea41174c65..16e0ce5bc3a 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -90,7 +90,6 @@ alphaTokens and appendTrailingCharIfAbsent array -arrayAUC arrayAll arrayAvg arrayCompact @@ -135,6 +134,7 @@ arrayReverse arrayReverseFill arrayReverseSort arrayReverseSplit +arrayRocAUC arraySlice arraySort arraySplit diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a0d4d1d349e..061b6013b1f 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1224,7 +1224,6 @@ argMax argMin argmax argmin -arrayAUC arrayAll arrayAvg arrayCompact @@ -1275,6 +1274,7 @@ arrayReverse arrayReverseFill arrayReverseSort arrayReverseSplit +arrayRocAUC arrayRotateLeft arrayRotateRight arrayShiftLeft From 19788e571ef2948acaf4c2b407eaacff65ae3ae4 Mon Sep 17 00:00:00 2001 From: Emmanuel Dias Date: Mon, 18 Nov 2024 18:56:12 -0300 Subject: [PATCH 2/2] keep alias on aspell-dict --- ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt | 1 + utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index 63cbc5d28b4..075ffa1d501 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -1160,6 +1160,7 @@ argMax argMin argmax argmin +arrayAUC arrayAll arrayAvg arrayCompact diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 061b6013b1f..61d468d1134 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1224,6 +1224,7 @@ argMax argMin argmax argmin +arrayAUC arrayAll arrayAvg arrayCompact