Compare commits

...

72 Commits

Author SHA1 Message Date
李扬
6b3dbfedcc
Merge b724f49deb into 2c29b3d98c 2024-09-19 19:15:59 +08:00
Mikhail f. Shiryaev
2c29b3d98c
Merge pull request #69750 from ClickHouse/light-env
Make env_helper importable from any module
2024-09-19 10:50:11 +00:00
Nikita Taranov
61524aabb6
Merge pull request #69744 from ClickHouse/fix_pr_protocol
Fix parallel replicas protocol after #68424
2024-09-19 09:21:45 +00:00
Kseniia Sumarokova
39c95f6c73
Merge pull request #62730 from takakawa/bugfix/publication_name_error
[bugfix] MaterializedPostgreSQL Cannot  attach table  when pg dbname  contains "-", need doubleQuoting
2024-09-19 08:54:45 +00:00
Robert Schulze
dfdc25acc9
Merge pull request #69741 from rschu1ze/bump-pg
Bump libpq to v16.4
2024-09-19 08:48:32 +00:00
Nikolay Degterinsky
3efe136635
Merge pull request #69736 from evillique/fix-ttl
Fix METADATA_MISMATCH due to TTL with WHERE
2024-09-19 08:38:55 +00:00
Nikolay Degterinsky
efc0cec707
Merge pull request #69751 from evillique/keepermap-parameters
Save CREATE QUERY with KeeperMap engine with evaluated parameters
2024-09-19 08:38:10 +00:00
Robert Schulze
396abf7636
Merge pull request #69717 from gabrielmcg44/add-array-unscaled
Allow `arrayAUC` without scaling
2024-09-19 08:37:18 +00:00
Kseniia Sumarokova
f8fb4fb120
Merge pull request #69742 from ClickHouse/fix-s3-queue-ttl-sec
s3queue: fix tracked_files_ttl_sec
2024-09-19 08:35:08 +00:00
vdimir
1c6165f6ee
Merge pull request #69203 from Avogar/json-dynamic-hash
Fix uniq and GROUP BY for JSON/Dynamic types
2024-09-19 08:24:03 +00:00
Robert Schulze
71dd3d5cf6
Merge pull request #69746 from rschu1ze/sparse-pg
CI: Include PostgreSQL in sparse checkout script
2024-09-19 08:23:41 +00:00
Mikhail f. Shiryaev
cb503ec2ec
Make env_helper importable from any module 2024-09-19 10:22:05 +02:00
Yakov Olkhovskiy
c0c83236b6
Merge pull request #69570 from alexkats/fix-azure
Mask azure connection string sensitive info
2024-09-19 05:40:47 +00:00
Nikolay Degterinsky
14823f789b Save CREATE QUERY with KeeperMap engine with evaluated parameters 2024-09-19 00:56:59 +00:00
Robert Schulze
bb6db8926e
Some fixups 2024-09-18 20:48:36 +00:00
Nikolay Degterinsky
19353a74db Merge remote-tracking branch 'upstream/master' into fix-ttl 2024-09-18 20:01:49 +00:00
Kruglov Pavel
228ac44a92
Fix asan issue 2024-09-18 21:27:38 +02:00
Robert Schulze
d2de15871c
Include postgres in sparse checkout script 2024-09-18 19:15:13 +00:00
robot-clickhouse
0fdd04254d Automatic style fix 2024-09-18 18:36:34 +00:00
Alex Katsman
b88cd79959 Mask azure connection string sensitive info 2024-09-18 18:32:22 +00:00
Nikita Taranov
818aac02c6 fix 2024-09-18 19:29:00 +01:00
Gabriel Mendes
7f0b7a9158
add tests to cover all possible flows 2024-09-18 15:17:54 -03:00
Gabriel Mendes
006d14445e
remove stdout files 2024-09-18 14:59:58 -03:00
Kseniia Sumarokova
b5de8e622d
Merge branch 'master' into bugfix/publication_name_error 2024-09-18 19:58:20 +02:00
kssenii
373927d6a5 Fix tracked_files_ttl_sec 2024-09-18 19:25:18 +02:00
Gabriel Mendes
02fcd90a66
address some pr comments 2024-09-18 14:13:22 -03:00
Robert Schulze
e818b65dc0
Bump libpq to v16.4 2024-09-18 17:11:40 +00:00
Nikolay Degterinsky
3315e87e1a Fix METADATA_MISMATCH due to TTL with WHERE 2024-09-18 15:15:52 +00:00
Gabriel Mendes
e0fc95c894
remove trailing spaces 2024-09-18 11:12:30 -03:00
Gabriel Mendes
b940171252
fix tests 2024-09-18 11:04:46 -03:00
Gabriel Mendes
e3b207d217
fmt 2024-09-18 09:03:29 -03:00
Gabriel Mendes
4be8a0feba
fmt 2024-09-18 08:58:14 -03:00
Gabriel Mendes
4c72fb0e32
remove unnecessary file 2024-09-18 08:56:13 -03:00
Gabriel Mendes
8f350a7ec9
remove separate function 2024-09-18 08:52:58 -03:00
Gabriel Mendes
2218ebebbf
initial commit, tested function 2024-09-18 05:15:57 -03:00
Kseniia Sumarokova
d11abd634a
Update max_replication_slots 2024-09-17 16:37:08 +02:00
kssenii
88b22094c8 Update test 2024-09-17 14:11:17 +02:00
kssenii
3cb8160240 Merge remote-tracking branch 'origin' into bugfix/publication_name_error 2024-09-17 14:05:06 +02:00
taiyang-li
b724f49deb fix failed uts 2024-09-16 11:24:15 +08:00
taiyang-li
9f637cb767 fix failed build 2024-09-13 11:22:05 +08:00
taiyang-li
4cff924fbf Merge branch 'master' into short_circut_func 2024-09-13 11:18:58 +08:00
taiyang-li
303d16c759 Merge branch 'short_circut_func' of https://github.com/bigo-sg/ClickHouse into short_circut_func 2024-09-12 17:34:01 +08:00
taiyang-li
de9fa4d27b fix style 2024-09-12 17:33:33 +08:00
李扬
11dda21a19
Merge branch 'master' into short_circut_func 2024-09-12 17:01:41 +08:00
taiyang-li
386f7045a6 fix building 2024-09-12 11:19:32 +08:00
taiyang-li
b50b93ee86 finish dev 2024-09-12 10:48:43 +08:00
taiyang-li
eecc09394e Merge branch 'master' into short_circut_func 2024-09-12 10:40:11 +08:00
avogar
4ece895b41 Merge branch 'master' of github.com:ClickHouse/ClickHouse into json-dynamic-hash 2024-09-09 10:54:18 +00:00
avogar
f495a4f431 Merge branch 'master' of github.com:ClickHouse/ClickHouse into json-dynamic-hash 2024-09-04 11:21:21 +00:00
avogar
a44b3d0268 Fix sorted typed paths 2024-09-03 17:31:07 +00:00
avogar
f1377b0b4a Fix uniq and GROUP BY for JSON/Dynamic types 2024-09-03 14:10:28 +00:00
taiyang-li
647e956c95 Merge remote-tracking branch 'origin/master' into short_circut_func 2024-08-09 11:10:40 +08:00
taiyang-li
80633baab2 Merge remote-tracking branch 'origin/master' into short_circut_func 2024-08-07 12:06:13 +08:00
taiyang-li
18622685fa Merge branch 'short_circut_func' of https://github.com/bigo-sg/ClickHouse into short_circut_func 2024-08-07 11:51:56 +08:00
taiyang-li
f1dd46e940 Merge branch 'master' into short_circut_func 2024-08-07 11:51:36 +08:00
李扬
1d78d8b3ee
Merge branch 'master' into short_circut_func 2024-07-15 15:47:09 +08:00
taiyang-li
b3483d78b6 fix conflicts 2024-07-05 11:00:18 +08:00
taiyang-li
83d998edf3 fix building 2024-06-25 10:25:02 +08:00
taiyang-li
fc2f7001ca merge master and solve conflicts 2024-06-24 11:08:38 +08:00
taiyang-li
3cef295711 merge master and solve conflicts 2024-06-24 11:04:43 +08:00
kssenii
5ffa2c9ca1 Add a test 2024-04-25 13:37:24 +02:00
taiyang-li
b1e7853e38 change as request 2024-04-25 14:24:22 +08:00
gao chuan
5a6fe87b7c [bugfix]alter postgresql subscription error 2024-04-17 23:43:36 +08:00
taiyang-li
2e4d31270e fix conflicts 2024-04-12 15:22:28 +08:00
taiyang-li
89004bd04d change as requested 2024-04-07 11:06:34 +08:00
taiyang-li
edbfb2dccc Merge remote-tracking branch 'origin/master' into short_circut_func 2024-04-07 11:03:16 +08:00
taiyang-li
eb208333e0 short_circut_func 2024-04-02 14:51:41 +08:00
taiyang-li
e498e766b1 Merge remote-tracking branch 'origin/master' into short_circut_func 2024-04-02 12:20:21 +08:00
taiyang-li
ef1e64a530 add settings allow_short_circuit_default_implementations_for_null 2024-02-28 15:16:35 +08:00
taiyang-li
2ffbb7cf23 fix wrong uts 2024-02-28 13:07:42 +08:00
taiyang-li
9715292cf6 Merge branch 'master' into short_circut_func 2024-02-27 20:00:49 +08:00
taiyang-li
2febfb4173 short circuit for defaultImplementationForNulls 2024-02-19 18:39:26 +08:00
56 changed files with 1675 additions and 566 deletions

2
contrib/postgres vendored

@ -1 +1 @@
Subproject commit cfd77000af28469fcb650485bad65a35e7649e41
Subproject commit 2e51f82e27f4be389cc239d1b8784bbf2f01d33a

View File

@ -38,12 +38,14 @@ set(SRCS
"${POSTGRES_SOURCE_DIR}/src/common/fe_memutils.c"
"${POSTGRES_SOURCE_DIR}/src/common/string.c"
"${POSTGRES_SOURCE_DIR}/src/common/pg_get_line.c"
"${POSTGRES_SOURCE_DIR}/src/common/pg_prng.c"
"${POSTGRES_SOURCE_DIR}/src/common/stringinfo.c"
"${POSTGRES_SOURCE_DIR}/src/common/psprintf.c"
"${POSTGRES_SOURCE_DIR}/src/common/encnames.c"
"${POSTGRES_SOURCE_DIR}/src/common/logging.c"
"${POSTGRES_SOURCE_DIR}/src/port/snprintf.c"
"${POSTGRES_SOURCE_DIR}/src/port/strlcat.c"
"${POSTGRES_SOURCE_DIR}/src/port/strlcpy.c"
"${POSTGRES_SOURCE_DIR}/src/port/strerror.c"
"${POSTGRES_SOURCE_DIR}/src/port/inet_net_ntop.c"
@ -52,6 +54,7 @@ set(SRCS
"${POSTGRES_SOURCE_DIR}/src/port/noblock.c"
"${POSTGRES_SOURCE_DIR}/src/port/pg_strong_random.c"
"${POSTGRES_SOURCE_DIR}/src/port/pgstrcasecmp.c"
"${POSTGRES_SOURCE_DIR}/src/port/pg_bitutils.c"
"${POSTGRES_SOURCE_DIR}/src/port/thread.c"
"${POSTGRES_SOURCE_DIR}/src/port/path.c"
)

View File

@ -0,0 +1,471 @@
/*-------------------------------------------------------------------------
*
* nodetags.h
* Generated node infrastructure code
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* NOTES
* ******************************
* *** DO NOT EDIT THIS FILE! ***
* ******************************
*
* It has been GENERATED by src/backend/nodes/gen_node_support.pl
*
*-------------------------------------------------------------------------
*/
T_List = 1,
T_Alias = 2,
T_RangeVar = 3,
T_TableFunc = 4,
T_IntoClause = 5,
T_Var = 6,
T_Const = 7,
T_Param = 8,
T_Aggref = 9,
T_GroupingFunc = 10,
T_WindowFunc = 11,
T_SubscriptingRef = 12,
T_FuncExpr = 13,
T_NamedArgExpr = 14,
T_OpExpr = 15,
T_DistinctExpr = 16,
T_NullIfExpr = 17,
T_ScalarArrayOpExpr = 18,
T_BoolExpr = 19,
T_SubLink = 20,
T_SubPlan = 21,
T_AlternativeSubPlan = 22,
T_FieldSelect = 23,
T_FieldStore = 24,
T_RelabelType = 25,
T_CoerceViaIO = 26,
T_ArrayCoerceExpr = 27,
T_ConvertRowtypeExpr = 28,
T_CollateExpr = 29,
T_CaseExpr = 30,
T_CaseWhen = 31,
T_CaseTestExpr = 32,
T_ArrayExpr = 33,
T_RowExpr = 34,
T_RowCompareExpr = 35,
T_CoalesceExpr = 36,
T_MinMaxExpr = 37,
T_SQLValueFunction = 38,
T_XmlExpr = 39,
T_JsonFormat = 40,
T_JsonReturning = 41,
T_JsonValueExpr = 42,
T_JsonConstructorExpr = 43,
T_JsonIsPredicate = 44,
T_NullTest = 45,
T_BooleanTest = 46,
T_CoerceToDomain = 47,
T_CoerceToDomainValue = 48,
T_SetToDefault = 49,
T_CurrentOfExpr = 50,
T_NextValueExpr = 51,
T_InferenceElem = 52,
T_TargetEntry = 53,
T_RangeTblRef = 54,
T_JoinExpr = 55,
T_FromExpr = 56,
T_OnConflictExpr = 57,
T_Query = 58,
T_TypeName = 59,
T_ColumnRef = 60,
T_ParamRef = 61,
T_A_Expr = 62,
T_A_Const = 63,
T_TypeCast = 64,
T_CollateClause = 65,
T_RoleSpec = 66,
T_FuncCall = 67,
T_A_Star = 68,
T_A_Indices = 69,
T_A_Indirection = 70,
T_A_ArrayExpr = 71,
T_ResTarget = 72,
T_MultiAssignRef = 73,
T_SortBy = 74,
T_WindowDef = 75,
T_RangeSubselect = 76,
T_RangeFunction = 77,
T_RangeTableFunc = 78,
T_RangeTableFuncCol = 79,
T_RangeTableSample = 80,
T_ColumnDef = 81,
T_TableLikeClause = 82,
T_IndexElem = 83,
T_DefElem = 84,
T_LockingClause = 85,
T_XmlSerialize = 86,
T_PartitionElem = 87,
T_PartitionSpec = 88,
T_PartitionBoundSpec = 89,
T_PartitionRangeDatum = 90,
T_PartitionCmd = 91,
T_RangeTblEntry = 92,
T_RTEPermissionInfo = 93,
T_RangeTblFunction = 94,
T_TableSampleClause = 95,
T_WithCheckOption = 96,
T_SortGroupClause = 97,
T_GroupingSet = 98,
T_WindowClause = 99,
T_RowMarkClause = 100,
T_WithClause = 101,
T_InferClause = 102,
T_OnConflictClause = 103,
T_CTESearchClause = 104,
T_CTECycleClause = 105,
T_CommonTableExpr = 106,
T_MergeWhenClause = 107,
T_MergeAction = 108,
T_TriggerTransition = 109,
T_JsonOutput = 110,
T_JsonKeyValue = 111,
T_JsonObjectConstructor = 112,
T_JsonArrayConstructor = 113,
T_JsonArrayQueryConstructor = 114,
T_JsonAggConstructor = 115,
T_JsonObjectAgg = 116,
T_JsonArrayAgg = 117,
T_RawStmt = 118,
T_InsertStmt = 119,
T_DeleteStmt = 120,
T_UpdateStmt = 121,
T_MergeStmt = 122,
T_SelectStmt = 123,
T_SetOperationStmt = 124,
T_ReturnStmt = 125,
T_PLAssignStmt = 126,
T_CreateSchemaStmt = 127,
T_AlterTableStmt = 128,
T_ReplicaIdentityStmt = 129,
T_AlterTableCmd = 130,
T_AlterCollationStmt = 131,
T_AlterDomainStmt = 132,
T_GrantStmt = 133,
T_ObjectWithArgs = 134,
T_AccessPriv = 135,
T_GrantRoleStmt = 136,
T_AlterDefaultPrivilegesStmt = 137,
T_CopyStmt = 138,
T_VariableSetStmt = 139,
T_VariableShowStmt = 140,
T_CreateStmt = 141,
T_Constraint = 142,
T_CreateTableSpaceStmt = 143,
T_DropTableSpaceStmt = 144,
T_AlterTableSpaceOptionsStmt = 145,
T_AlterTableMoveAllStmt = 146,
T_CreateExtensionStmt = 147,
T_AlterExtensionStmt = 148,
T_AlterExtensionContentsStmt = 149,
T_CreateFdwStmt = 150,
T_AlterFdwStmt = 151,
T_CreateForeignServerStmt = 152,
T_AlterForeignServerStmt = 153,
T_CreateForeignTableStmt = 154,
T_CreateUserMappingStmt = 155,
T_AlterUserMappingStmt = 156,
T_DropUserMappingStmt = 157,
T_ImportForeignSchemaStmt = 158,
T_CreatePolicyStmt = 159,
T_AlterPolicyStmt = 160,
T_CreateAmStmt = 161,
T_CreateTrigStmt = 162,
T_CreateEventTrigStmt = 163,
T_AlterEventTrigStmt = 164,
T_CreatePLangStmt = 165,
T_CreateRoleStmt = 166,
T_AlterRoleStmt = 167,
T_AlterRoleSetStmt = 168,
T_DropRoleStmt = 169,
T_CreateSeqStmt = 170,
T_AlterSeqStmt = 171,
T_DefineStmt = 172,
T_CreateDomainStmt = 173,
T_CreateOpClassStmt = 174,
T_CreateOpClassItem = 175,
T_CreateOpFamilyStmt = 176,
T_AlterOpFamilyStmt = 177,
T_DropStmt = 178,
T_TruncateStmt = 179,
T_CommentStmt = 180,
T_SecLabelStmt = 181,
T_DeclareCursorStmt = 182,
T_ClosePortalStmt = 183,
T_FetchStmt = 184,
T_IndexStmt = 185,
T_CreateStatsStmt = 186,
T_StatsElem = 187,
T_AlterStatsStmt = 188,
T_CreateFunctionStmt = 189,
T_FunctionParameter = 190,
T_AlterFunctionStmt = 191,
T_DoStmt = 192,
T_InlineCodeBlock = 193,
T_CallStmt = 194,
T_CallContext = 195,
T_RenameStmt = 196,
T_AlterObjectDependsStmt = 197,
T_AlterObjectSchemaStmt = 198,
T_AlterOwnerStmt = 199,
T_AlterOperatorStmt = 200,
T_AlterTypeStmt = 201,
T_RuleStmt = 202,
T_NotifyStmt = 203,
T_ListenStmt = 204,
T_UnlistenStmt = 205,
T_TransactionStmt = 206,
T_CompositeTypeStmt = 207,
T_CreateEnumStmt = 208,
T_CreateRangeStmt = 209,
T_AlterEnumStmt = 210,
T_ViewStmt = 211,
T_LoadStmt = 212,
T_CreatedbStmt = 213,
T_AlterDatabaseStmt = 214,
T_AlterDatabaseRefreshCollStmt = 215,
T_AlterDatabaseSetStmt = 216,
T_DropdbStmt = 217,
T_AlterSystemStmt = 218,
T_ClusterStmt = 219,
T_VacuumStmt = 220,
T_VacuumRelation = 221,
T_ExplainStmt = 222,
T_CreateTableAsStmt = 223,
T_RefreshMatViewStmt = 224,
T_CheckPointStmt = 225,
T_DiscardStmt = 226,
T_LockStmt = 227,
T_ConstraintsSetStmt = 228,
T_ReindexStmt = 229,
T_CreateConversionStmt = 230,
T_CreateCastStmt = 231,
T_CreateTransformStmt = 232,
T_PrepareStmt = 233,
T_ExecuteStmt = 234,
T_DeallocateStmt = 235,
T_DropOwnedStmt = 236,
T_ReassignOwnedStmt = 237,
T_AlterTSDictionaryStmt = 238,
T_AlterTSConfigurationStmt = 239,
T_PublicationTable = 240,
T_PublicationObjSpec = 241,
T_CreatePublicationStmt = 242,
T_AlterPublicationStmt = 243,
T_CreateSubscriptionStmt = 244,
T_AlterSubscriptionStmt = 245,
T_DropSubscriptionStmt = 246,
T_PlannerGlobal = 247,
T_PlannerInfo = 248,
T_RelOptInfo = 249,
T_IndexOptInfo = 250,
T_ForeignKeyOptInfo = 251,
T_StatisticExtInfo = 252,
T_JoinDomain = 253,
T_EquivalenceClass = 254,
T_EquivalenceMember = 255,
T_PathKey = 256,
T_PathTarget = 257,
T_ParamPathInfo = 258,
T_Path = 259,
T_IndexPath = 260,
T_IndexClause = 261,
T_BitmapHeapPath = 262,
T_BitmapAndPath = 263,
T_BitmapOrPath = 264,
T_TidPath = 265,
T_TidRangePath = 266,
T_SubqueryScanPath = 267,
T_ForeignPath = 268,
T_CustomPath = 269,
T_AppendPath = 270,
T_MergeAppendPath = 271,
T_GroupResultPath = 272,
T_MaterialPath = 273,
T_MemoizePath = 274,
T_UniquePath = 275,
T_GatherPath = 276,
T_GatherMergePath = 277,
T_NestPath = 278,
T_MergePath = 279,
T_HashPath = 280,
T_ProjectionPath = 281,
T_ProjectSetPath = 282,
T_SortPath = 283,
T_IncrementalSortPath = 284,
T_GroupPath = 285,
T_UpperUniquePath = 286,
T_AggPath = 287,
T_GroupingSetData = 288,
T_RollupData = 289,
T_GroupingSetsPath = 290,
T_MinMaxAggPath = 291,
T_WindowAggPath = 292,
T_SetOpPath = 293,
T_RecursiveUnionPath = 294,
T_LockRowsPath = 295,
T_ModifyTablePath = 296,
T_LimitPath = 297,
T_RestrictInfo = 298,
T_PlaceHolderVar = 299,
T_SpecialJoinInfo = 300,
T_OuterJoinClauseInfo = 301,
T_AppendRelInfo = 302,
T_RowIdentityVarInfo = 303,
T_PlaceHolderInfo = 304,
T_MinMaxAggInfo = 305,
T_PlannerParamItem = 306,
T_AggInfo = 307,
T_AggTransInfo = 308,
T_PlannedStmt = 309,
T_Result = 310,
T_ProjectSet = 311,
T_ModifyTable = 312,
T_Append = 313,
T_MergeAppend = 314,
T_RecursiveUnion = 315,
T_BitmapAnd = 316,
T_BitmapOr = 317,
T_SeqScan = 318,
T_SampleScan = 319,
T_IndexScan = 320,
T_IndexOnlyScan = 321,
T_BitmapIndexScan = 322,
T_BitmapHeapScan = 323,
T_TidScan = 324,
T_TidRangeScan = 325,
T_SubqueryScan = 326,
T_FunctionScan = 327,
T_ValuesScan = 328,
T_TableFuncScan = 329,
T_CteScan = 330,
T_NamedTuplestoreScan = 331,
T_WorkTableScan = 332,
T_ForeignScan = 333,
T_CustomScan = 334,
T_NestLoop = 335,
T_NestLoopParam = 336,
T_MergeJoin = 337,
T_HashJoin = 338,
T_Material = 339,
T_Memoize = 340,
T_Sort = 341,
T_IncrementalSort = 342,
T_Group = 343,
T_Agg = 344,
T_WindowAgg = 345,
T_Unique = 346,
T_Gather = 347,
T_GatherMerge = 348,
T_Hash = 349,
T_SetOp = 350,
T_LockRows = 351,
T_Limit = 352,
T_PlanRowMark = 353,
T_PartitionPruneInfo = 354,
T_PartitionedRelPruneInfo = 355,
T_PartitionPruneStepOp = 356,
T_PartitionPruneStepCombine = 357,
T_PlanInvalItem = 358,
T_ExprState = 359,
T_IndexInfo = 360,
T_ExprContext = 361,
T_ReturnSetInfo = 362,
T_ProjectionInfo = 363,
T_JunkFilter = 364,
T_OnConflictSetState = 365,
T_MergeActionState = 366,
T_ResultRelInfo = 367,
T_EState = 368,
T_WindowFuncExprState = 369,
T_SetExprState = 370,
T_SubPlanState = 371,
T_DomainConstraintState = 372,
T_ResultState = 373,
T_ProjectSetState = 374,
T_ModifyTableState = 375,
T_AppendState = 376,
T_MergeAppendState = 377,
T_RecursiveUnionState = 378,
T_BitmapAndState = 379,
T_BitmapOrState = 380,
T_ScanState = 381,
T_SeqScanState = 382,
T_SampleScanState = 383,
T_IndexScanState = 384,
T_IndexOnlyScanState = 385,
T_BitmapIndexScanState = 386,
T_BitmapHeapScanState = 387,
T_TidScanState = 388,
T_TidRangeScanState = 389,
T_SubqueryScanState = 390,
T_FunctionScanState = 391,
T_ValuesScanState = 392,
T_TableFuncScanState = 393,
T_CteScanState = 394,
T_NamedTuplestoreScanState = 395,
T_WorkTableScanState = 396,
T_ForeignScanState = 397,
T_CustomScanState = 398,
T_JoinState = 399,
T_NestLoopState = 400,
T_MergeJoinState = 401,
T_HashJoinState = 402,
T_MaterialState = 403,
T_MemoizeState = 404,
T_SortState = 405,
T_IncrementalSortState = 406,
T_GroupState = 407,
T_AggState = 408,
T_WindowAggState = 409,
T_UniqueState = 410,
T_GatherState = 411,
T_GatherMergeState = 412,
T_HashState = 413,
T_SetOpState = 414,
T_LockRowsState = 415,
T_LimitState = 416,
T_IndexAmRoutine = 417,
T_TableAmRoutine = 418,
T_TsmRoutine = 419,
T_EventTriggerData = 420,
T_TriggerData = 421,
T_TupleTableSlot = 422,
T_FdwRoutine = 423,
T_Bitmapset = 424,
T_ExtensibleNode = 425,
T_ErrorSaveContext = 426,
T_IdentifySystemCmd = 427,
T_BaseBackupCmd = 428,
T_CreateReplicationSlotCmd = 429,
T_DropReplicationSlotCmd = 430,
T_StartReplicationCmd = 431,
T_ReadReplicationSlotCmd = 432,
T_TimeLineHistoryCmd = 433,
T_SupportRequestSimplify = 434,
T_SupportRequestSelectivity = 435,
T_SupportRequestCost = 436,
T_SupportRequestRows = 437,
T_SupportRequestIndexCondition = 438,
T_SupportRequestWFuncMonotonic = 439,
T_SupportRequestOptimizeWindowClause = 440,
T_Integer = 441,
T_Float = 442,
T_Boolean = 443,
T_String = 444,
T_BitString = 445,
T_ForeignKeyCacheInfo = 446,
T_IntList = 447,
T_OidList = 448,
T_XidList = 449,
T_AllocSetContext = 450,
T_GenerationContext = 451,
T_SlabContext = 452,
T_TIDBitmap = 453,
T_WindowObjectData = 454,

View File

@ -66,13 +66,6 @@
reference if 'false' */
#define FLOAT8PASSBYVAL false
/* Define to 1 if gettimeofday() takes only 1 argument. */
/* #undef GETTIMEOFDAY_1ARG */
#ifdef GETTIMEOFDAY_1ARG
# define gettimeofday(a,b) gettimeofday(a)
#endif
/* Define to 1 if you have the `append_history' function. */
/* #undef HAVE_APPEND_HISTORY */
@ -113,9 +106,6 @@
don't. */
#define HAVE_DECL_SNPRINTF 1
/* Define to 1 if you have the declaration of `sigwait', and to 0 if you don't. */
#define HAVE_DECL_SIGWAIT 1
/* Define to 1 if you have the declaration of `strlcat', and to 0 if you
don't. */
#if OS_DARWIN
@ -139,21 +129,12 @@
/* Define to 1 if you have the <dld.h> header file. */
/* #undef HAVE_DLD_H */
/* Define to 1 if you have the `dlopen' function. */
#define HAVE_DLOPEN 1
/* Define to 1 if you have the <editline/history.h> header file. */
/* #undef HAVE_EDITLINE_HISTORY_H */
/* Define to 1 if you have the <editline/readline.h> header file. */
#define HAVE_EDITLINE_READLINE_H 1
/* Define to 1 if you have the `fdatasync' function. */
#define HAVE_FDATASYNC 1
/* Define to 1 if you have the `fls' function. */
/* #undef HAVE_FLS */
/* Define to 1 if you have the `fpclass' function. */
/* #undef HAVE_FPCLASS */
@ -169,12 +150,6 @@
/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
#define HAVE_FSEEKO 1
/* Define to 1 if your compiler understands __func__. */
#define HAVE_FUNCNAME__FUNC 1
/* Define to 1 if your compiler understands __FUNCTION__. */
/* #undef HAVE_FUNCNAME__FUNCTION */
/* Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int). */
/* #undef HAVE_GCC__ATOMIC_INT32_CAS */
@ -194,12 +169,6 @@
/* Define to 1 if you have __sync_compare_and_swap(int64 *, int64, int64). */
/* #undef HAVE_GCC__SYNC_INT64_CAS */
/* Define to 1 if you have the `getaddrinfo' function. */
#define HAVE_GETADDRINFO 1
/* Define to 1 if you have the `gethostbyname_r' function. */
#define HAVE_GETHOSTBYNAME_R 1
/* Define to 1 if you have the `getifaddrs' function. */
#define HAVE_GETIFADDRS 1
@ -218,17 +187,11 @@
/* Define to 1 if you have the `getpeerucred' function. */
/* #undef HAVE_GETPEERUCRED */
/* Define to 1 if you have the `getpwuid_r' function. */
#define HAVE_GETPWUID_R 1
/* Define to 1 if you have the <gssapi_ext.h> header file. */
/* #undef HAVE_GSSAPI_EXT_H */
/* Define to 1 if you have the `getrlimit' function. */
#define HAVE_GETRLIMIT 1
/* Define to 1 if you have the `getrusage' function. */
#define HAVE_GETRUSAGE 1
/* Define to 1 if you have the `gettimeofday' function. */
/* #undef HAVE_GETTIMEOFDAY */
/* Define to 1 if you have the <gssapi/gssapi_ext.h> header file. */
/* #undef HAVE_GSSAPI_GSSAPI_EXT_H */
/* Define to 1 if you have the <gssapi/gssapi.h> header file. */
//#define HAVE_GSSAPI_GSSAPI_H 0
@ -275,18 +238,12 @@
/* Define to 1 if you have the global variable 'int timezone'. */
#define HAVE_INT_TIMEZONE 1
/* Define to 1 if you have support for IPv6. */
#define HAVE_IPV6 1
/* Define to 1 if you have isinf(). */
#define HAVE_ISINF 1
/* Define to 1 if you have the <langinfo.h> header file. */
#define HAVE_LANGINFO_H 1
/* Define to 1 if you have the <ldap.h> header file. */
//#define HAVE_LDAP_H 0
/* Define to 1 if you have the `crypto' library (-lcrypto). */
#define HAVE_LIBCRYPTO 1
@ -351,18 +308,9 @@
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if the system has the type `MINIDUMP_TYPE'. */
/* #undef HAVE_MINIDUMP_TYPE */
/* Define to 1 if you have the `mkdtemp' function. */
#define HAVE_MKDTEMP 1
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H 1
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#define HAVE_NETINET_TCP_H 1
/* Define to 1 if you have the <net/if.h> header file. */
#define HAVE_NET_IF_H 1
@ -372,15 +320,6 @@
/* Define to 1 if you have the <pam/pam_appl.h> header file. */
/* #undef HAVE_PAM_PAM_APPL_H */
/* Define to 1 if you have the `poll' function. */
#define HAVE_POLL 1
/* Define to 1 if you have the <poll.h> header file. */
#define HAVE_POLL_H 1
/* Define to 1 if you have a POSIX-conforming sigwait declaration. */
/* #undef HAVE_POSIX_DECL_SIGWAIT */
/* Define to 1 if you have the `posix_fadvise' function. */
#define HAVE_POSIX_FADVISE 1
@ -399,12 +338,6 @@
/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
/* #undef HAVE_PPC_LWARX_MUTEX_HINT */
/* Define to 1 if you have the `pstat' function. */
/* #undef HAVE_PSTAT */
/* Define to 1 if the PS_STRINGS thing exists. */
/* #undef HAVE_PS_STRINGS */
/* Define to 1 if you have the `pthread_is_threaded_np' function. */
/* #undef HAVE_PTHREAD_IS_THREADED_NP */
@ -420,9 +353,6 @@
/* Define to 1 if you have the <readline/readline.h> header file. */
/* #undef HAVE_READLINE_READLINE_H */
/* Define to 1 if you have the `readlink' function. */
#define HAVE_READLINK 1
/* Define to 1 if you have the `rint' function. */
#define HAVE_RINT 1
@ -444,12 +374,6 @@
/* Define to 1 if you have the `setproctitle' function. */
/* #undef HAVE_SETPROCTITLE */
/* Define to 1 if you have the `setsid' function. */
#define HAVE_SETSID 1
/* Define to 1 if you have the `shm_open' function. */
#define HAVE_SHM_OPEN 1
/* Define to 1 if the system has the type `socklen_t'. */
#define HAVE_SOCKLEN_T 1
@ -468,6 +392,9 @@
/* Define to 1 if you have spinlocks. */
#define HAVE_SPINLOCKS 1
/* Define to 1 if you have the `SSL_CTX_set_cert_cb' function. */
#define HAVE_SSL_CTX_SET_CERT_CB 1
/* Define to 1 if you have the `SSL_CTX_set_num_tickets' function. */
/* #define HAVE_SSL_CTX_SET_NUM_TICKETS */
@ -498,55 +425,19 @@
/* Define to 1 if you have the `strlcpy' function. */
/* #undef HAVE_STRLCPY */
/* Define to 1 if you have the `strtoll' function. */
#define HAVE_STRTOLL 1
#if (!OS_DARWIN)
#define HAVE_STRCHRNUL 1
#endif
/* Define to 1 if you have the `strtoq' function. */
/* #undef HAVE_STRTOQ */
/* Define to 1 if you have the `strtoull' function. */
#define HAVE_STRTOULL 1
/* Define to 1 if you have the `strtouq' function. */
/* #undef HAVE_STRTOUQ */
/* Define to 1 if the system has the type `struct addrinfo'. */
#define HAVE_STRUCT_ADDRINFO 1
/* Define to 1 if the system has the type `struct cmsgcred'. */
/* #undef HAVE_STRUCT_CMSGCRED */
/* Define to 1 if the system has the type `struct option'. */
#define HAVE_STRUCT_OPTION 1
/* Define to 1 if `sa_len' is a member of `struct sockaddr'. */
/* #undef HAVE_STRUCT_SOCKADDR_SA_LEN */
/* Define to 1 if the system has the type `struct sockaddr_storage'. */
#define HAVE_STRUCT_SOCKADDR_STORAGE 1
/* Define to 1 if `ss_family' is a member of `struct sockaddr_storage'. */
#define HAVE_STRUCT_SOCKADDR_STORAGE_SS_FAMILY 1
/* Define to 1 if `ss_len' is a member of `struct sockaddr_storage'. */
/* #undef HAVE_STRUCT_SOCKADDR_STORAGE_SS_LEN */
/* Define to 1 if `__ss_family' is a member of `struct sockaddr_storage'. */
/* #undef HAVE_STRUCT_SOCKADDR_STORAGE___SS_FAMILY */
/* Define to 1 if `__ss_len' is a member of `struct sockaddr_storage'. */
/* #undef HAVE_STRUCT_SOCKADDR_STORAGE___SS_LEN */
/* Define to 1 if `tm_zone' is a member of `struct tm'. */
#define HAVE_STRUCT_TM_TM_ZONE 1
/* Define to 1 if you have the `symlink' function. */
#define HAVE_SYMLINK 1
/* Define to 1 if you have the `sync_file_range' function. */
/* #undef HAVE_SYNC_FILE_RANGE */
@ -556,45 +447,21 @@
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H 1
/* Define to 1 if you have the <sys/ipc.h> header file. */
#define HAVE_SYS_IPC_H 1
/* Define to 1 if you have the <sys/personality.h> header file. */
/* #undef HAVE_SYS_PERSONALITY_H */
/* Define to 1 if you have the <sys/poll.h> header file. */
#define HAVE_SYS_POLL_H 1
/* Define to 1 if you have the <sys/pstat.h> header file. */
/* #undef HAVE_SYS_PSTAT_H */
/* Define to 1 if you have the <sys/resource.h> header file. */
#define HAVE_SYS_RESOURCE_H 1
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H 1
/* Define to 1 if you have the <sys/sem.h> header file. */
#define HAVE_SYS_SEM_H 1
/* Define to 1 if you have the <sys/shm.h> header file. */
#define HAVE_SYS_SHM_H 1
/* Define to 1 if you have the <sys/signalfd.h> header file. */
/* #undef HAVE_SYS_SIGNALFD_H */
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H 1
/* Define to 1 if you have the <sys/sockio.h> header file. */
/* #undef HAVE_SYS_SOCKIO_H */
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/tas.h> header file. */
/* #undef HAVE_SYS_TAS_H */
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
@ -607,7 +474,6 @@
#endif
/* Define to 1 if you have the <sys/un.h> header file. */
#define HAVE_SYS_UN_H 1
#define _GNU_SOURCE 1 /* Needed for glibc struct ucred */
/* Define to 1 if you have the <termios.h> header file. */
@ -644,9 +510,6 @@
/* Define to 1 if you have unix sockets. */
#define HAVE_UNIX_SOCKETS 1
/* Define to 1 if you have the `unsetenv' function. */
#define HAVE_UNSETENV 1
/* Define to 1 if the system has the type `unsigned long long int'. */
#define HAVE_UNSIGNED_LONG_LONG_INT 1
@ -674,6 +537,9 @@
/* Define to 1 if you have the <uuid/uuid.h> header file. */
/* #undef HAVE_UUID_UUID_H */
/* Define to 1 if your compiler knows the visibility("hidden") attribute. */
/* #undef HAVE_VISIBILITY_ATTRIBUTE */
/* Define to 1 if you have the `vsnprintf' function. */
#define HAVE_VSNPRINTF 1
@ -686,12 +552,6 @@
/* Define to 1 if you have the `wcstombs_l' function. */
/* #undef HAVE_WCSTOMBS_L */
/* Define to 1 if you have the <wctype.h> header file. */
#define HAVE_WCTYPE_H 1
/* Define to 1 if you have the <winldap.h> header file. */
/* #undef HAVE_WINLDAP_H */
/* Define to 1 if your compiler understands __builtin_bswap32. */
/* #undef HAVE__BUILTIN_BSWAP32 */

View File

@ -14,5 +14,6 @@ git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh'
git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh'
git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh'
git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh'
git config submodule."contrib/postgres".update '!../sparse-checkout/update-postgres.sh'
git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh'
git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh'

View File

@ -0,0 +1,16 @@
#!/bin/sh
echo "Using sparse checkout for postgres"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '!/*' > $FILES_TO_CHECKOUT
echo '/src/interfaces/libpq/*' >> $FILES_TO_CHECKOUT
echo '!/src/interfaces/libpq/*/*' >> $FILES_TO_CHECKOUT
echo '/src/common/*' >> $FILES_TO_CHECKOUT
echo '!/src/port/*/*' >> $FILES_TO_CHECKOUT
echo '/src/port/*' >> $FILES_TO_CHECKOUT
echo '/src/include/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -2088,13 +2088,14 @@ Calculate AUC (Area Under the Curve, which is a concept in machine learning, see
**Syntax**
``` sql
arrayAUC(arr_scores, arr_labels)
arrayAUC(arr_scores, arr_labels[, scale])
```
**Arguments**
- `arr_scores` — scores prediction model gives.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
- `scale` - Optional. Wether to return the normalized area. Default value: true. [Bool]
**Returned value**

View File

@ -816,6 +816,22 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
return;
}
/// If it's not null we update hash with the type name and the actual value.
/// If value in this row is in shared variant, deserialize type and value and
/// update hash with it.
if (discr == getSharedVariantDiscriminator())
{
auto value = getSharedVariant().getDataAt(variant_col.offsetAt(n));
ReadBufferFromMemory buf(value.data, value.size);
auto type = decodeDataType(buf);
hash.update(type->getName());
auto tmp_column = type->createColumn();
type->getDefaultSerialization()->deserializeBinary(*tmp_column, buf, getFormatSettings());
tmp_column->updateHashWithValue(0, hash);
return;
}
hash.update(variant_info.variant_names[discr]);
variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
}

View File

@ -47,15 +47,21 @@ ColumnObject::ColumnObject(
, statistics(statistics_)
{
typed_paths.reserve(typed_paths_.size());
sorted_typed_paths.reserve(typed_paths_.size());
for (auto & [path, column] : typed_paths_)
typed_paths[path] = std::move(column);
{
auto it = typed_paths.emplace(path, std::move(column)).first;
sorted_typed_paths.push_back(it->first);
}
std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end());
dynamic_paths.reserve(dynamic_paths_.size());
dynamic_paths_ptrs.reserve(dynamic_paths_.size());
for (auto & [path, column] : dynamic_paths_)
{
dynamic_paths[path] = std::move(column);
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(dynamic_paths[path].get());
auto it = dynamic_paths.emplace(path, std::move(column)).first;
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(it->second.get());
sorted_dynamic_paths.insert(it->first);
}
}
@ -64,13 +70,17 @@ ColumnObject::ColumnObject(
: max_dynamic_paths(max_dynamic_paths_), global_max_dynamic_paths(max_dynamic_paths_), max_dynamic_types(max_dynamic_types_)
{
typed_paths.reserve(typed_paths_.size());
sorted_typed_paths.reserve(typed_paths_.size());
for (auto & [path, column] : typed_paths_)
{
if (!column->empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected non-empty typed path column in ColumnObject constructor");
typed_paths[path] = std::move(column);
auto it = typed_paths.emplace(path, std::move(column)).first;
sorted_typed_paths.push_back(it->first);
}
std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end());
MutableColumns paths_and_values;
paths_and_values.emplace_back(ColumnString::create());
paths_and_values.emplace_back(ColumnString::create());
@ -129,13 +139,8 @@ std::string ColumnObject::getName() const
ss << "Object(";
ss << "max_dynamic_paths=" << global_max_dynamic_paths;
ss << ", max_dynamic_types=" << max_dynamic_types;
std::vector<String> sorted_typed_paths;
sorted_typed_paths.reserve(typed_paths.size());
for (const auto & [path, column] : typed_paths)
sorted_typed_paths.push_back(path);
std::sort(sorted_typed_paths.begin(), sorted_typed_paths.end());
for (const auto & path : sorted_typed_paths)
ss << ", " << path << " " << typed_paths.at(path)->getName();
ss << ", " << path << " " << typed_paths.find(path)->second->getName();
ss << ")";
return ss.str();
}
@ -260,6 +265,7 @@ ColumnDynamic * ColumnObject::tryToAddNewDynamicPath(std::string_view path)
new_dynamic_column->insertManyDefaults(size());
auto it = dynamic_paths.emplace(path, std::move(new_dynamic_column)).first;
auto it_ptr = dynamic_paths_ptrs.emplace(path, assert_cast<ColumnDynamic *>(it->second.get())).first;
sorted_dynamic_paths.insert(it->first);
return it_ptr->second;
}
@ -288,8 +294,9 @@ void ColumnObject::setDynamicPaths(const std::vector<String> & paths)
auto new_dynamic_column = ColumnDynamic::create(max_dynamic_types);
if (size)
new_dynamic_column->insertManyDefaults(size);
dynamic_paths[path] = std::move(new_dynamic_column);
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(dynamic_paths[path].get());
auto it = dynamic_paths.emplace(path, std::move(new_dynamic_column)).first;
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(it->second.get());
sorted_dynamic_paths.insert(it->first);
}
}
@ -658,39 +665,61 @@ void ColumnObject::popBack(size_t n)
StringRef ColumnObject::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
{
StringRef res(begin, 0);
// Serialize all paths and values in binary format.
/// First serialize values from typed paths in sorted order. They are the same for all instances of this column.
for (auto path : sorted_typed_paths)
{
auto data_ref = typed_paths.find(path)->second->serializeValueIntoArena(n, arena, begin);
res.data = data_ref.data - res.size;
res.size += data_ref.size;
}
/// Second, serialize paths and values in bunary format from dynamic paths and shared data in sorted by path order.
/// Calculate total number of paths to serialize and write it.
const auto & shared_data_offsets = getSharedDataOffsets();
size_t offset = shared_data_offsets[static_cast<ssize_t>(n) - 1];
size_t end = shared_data_offsets[static_cast<ssize_t>(n)];
size_t num_paths = typed_paths.size() + dynamic_paths.size() + (end - offset);
size_t num_paths = (end - offset);
/// Don't serialize Nulls from dynamic paths.
for (const auto & [_, column] : dynamic_paths)
num_paths += !column->isNullAt(n);
char * pos = arena.allocContinue(sizeof(size_t), begin);
memcpy(pos, &num_paths, sizeof(size_t));
res.data = pos - res.size;
res.size += sizeof(size_t);
/// Serialize paths and values from typed paths.
for (const auto & [path, column] : typed_paths)
{
size_t path_size = path.size();
pos = arena.allocContinue(sizeof(size_t) + path_size, begin);
memcpy(pos, &path_size, sizeof(size_t));
memcpy(pos + sizeof(size_t), path.data(), path_size);
auto data_ref = column->serializeValueIntoArena(n, arena, begin);
res.data = data_ref.data - res.size - sizeof(size_t) - path_size;
res.size += data_ref.size + sizeof(size_t) + path_size;
}
/// Serialize paths and values from dynamic paths.
for (const auto & [path, column] : dynamic_paths)
{
WriteBufferFromOwnString buf;
getDynamicSerialization()->serializeBinary(*column, n, buf, getFormatSettings());
serializePathAndValueIntoArena(arena, begin, path, buf.str(), res);
}
/// Serialize paths and values from shared data.
auto dynamic_paths_it = sorted_dynamic_paths.begin();
auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
for (size_t i = offset; i != end; ++i)
serializePathAndValueIntoArena(arena, begin, shared_data_paths->getDataAt(i), shared_data_values->getDataAt(i), res);
{
auto path = shared_data_paths->getDataAt(i).toView();
/// Paths in shared data are sorted. Serialize all paths from dynamic paths that go before this path in sorted order.
while (dynamic_paths_it != sorted_dynamic_paths.end() && *dynamic_paths_it < path)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
/// Don't serialize Nulls.
if (!dynamic_column->isNullAt(n))
{
WriteBufferFromOwnString buf;
getDynamicSerialization()->serializeBinary(*dynamic_column, n, buf, getFormatSettings());
serializePathAndValueIntoArena(arena, begin, StringRef(*dynamic_paths_it), buf.str(), res);
}
++dynamic_paths_it;
}
serializePathAndValueIntoArena(arena, begin, StringRef(path), shared_data_values->getDataAt(i), res);
}
/// Serialize all remaining paths in dynamic paths.
for (; dynamic_paths_it != sorted_dynamic_paths.end(); ++dynamic_paths_it)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
if (!dynamic_column->isNullAt(n))
{
WriteBufferFromOwnString buf;
getDynamicSerialization()->serializeBinary(*dynamic_column, n, buf, getFormatSettings());
serializePathAndValueIntoArena(arena, begin, StringRef(*dynamic_paths_it), buf.str(), res);
}
}
return res;
}
@ -711,70 +740,49 @@ void ColumnObject::serializePathAndValueIntoArena(DB::Arena & arena, const char
const char * ColumnObject::deserializeAndInsertFromArena(const char * pos)
{
size_t current_size = size();
/// Deserialize paths and values and insert them into typed paths, dynamic paths or shared data.
/// Serialized paths could be unsorted, so we will have to sort all paths that will be inserted into shared data.
std::vector<std::pair<std::string_view, std::string_view>> paths_and_values_for_shared_data;
/// First deserialize typed paths. They come first.
for (auto path : sorted_typed_paths)
pos = typed_paths.find(path)->second->deserializeAndInsertFromArena(pos);
/// Second deserialize all other paths and values and insert them into dynamic paths or shared data.
auto num_paths = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
for (size_t i = 0; i != num_paths; ++i)
{
auto path_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
std::string_view path(pos, path_size);
pos += path_size;
/// Check if it's a typed path. In this case we should use
/// deserializeAndInsertFromArena of corresponding column.
if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end())
/// Deserialize binary value and try to insert it to dynamic paths or shared data.
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
std::string_view value(pos, value_size);
pos += value_size;
/// Check if we have this path in dynamic paths.
if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end())
{
pos = typed_it->second->deserializeAndInsertFromArena(pos);
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_it->second, buf, getFormatSettings());
}
/// If it's not a typed path, deserialize binary value and try to insert it
/// to dynamic paths or shared data.
/// Try to add a new dynamic path.
else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path))
{
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_path_column, buf, getFormatSettings());
}
/// Limit on dynamic paths is reached, add this path to shared data.
/// Serialized paths are sorted, so we can insert right away.
else
{
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
std::string_view value(pos, value_size);
pos += value_size;
/// Check if we have this path in dynamic paths.
if (auto dynamic_it = dynamic_paths.find(path); dynamic_it != dynamic_paths.end())
{
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_it->second, buf, getFormatSettings());
}
/// Try to add a new dynamic path.
else if (auto * dynamic_path_column = tryToAddNewDynamicPath(path))
{
ReadBufferFromMemory buf(value.data(), value.size());
getDynamicSerialization()->deserializeBinary(*dynamic_path_column, buf, getFormatSettings());
}
/// Limit on dynamic paths is reached, add this path to shared data later.
else
{
paths_and_values_for_shared_data.emplace_back(path, value);
}
shared_data_paths->insertData(path.data(), path.size());
shared_data_values->insertData(value.data(), value.size());
}
}
/// Sort and insert all paths from paths_and_values_for_shared_data into shared data.
std::sort(paths_and_values_for_shared_data.begin(), paths_and_values_for_shared_data.end());
const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
for (const auto & [path, value] : paths_and_values_for_shared_data)
{
shared_data_paths->insertData(path.data(), path.size());
shared_data_values->insertData(value.data(), value.size());
}
getSharedDataOffsets().push_back(shared_data_paths->size());
/// Insert default value in all remaining typed and dynamic paths.
for (auto & [_, column] : typed_paths)
{
if (column->size() == current_size)
column->insertDefault();
}
/// Insert default value in all remaining dynamic paths.
for (auto & [_, column] : dynamic_paths_ptrs)
{
if (column->size() == current_size)
@ -786,6 +794,11 @@ const char * ColumnObject::deserializeAndInsertFromArena(const char * pos)
const char * ColumnObject::skipSerializedInArena(const char * pos) const
{
/// First, skip all values of typed paths;
for (auto path : sorted_typed_paths)
pos = typed_paths.find(path)->second->skipSerializedInArena(pos);
/// Second, skip all other paths and values.
auto num_paths = unalignedLoad<size_t>(pos);
pos += sizeof(size_t);
for (size_t i = 0; i != num_paths; ++i)
@ -794,15 +807,8 @@ const char * ColumnObject::skipSerializedInArena(const char * pos) const
pos += sizeof(size_t);
std::string_view path(pos, path_size);
pos += path_size;
if (auto typed_it = typed_paths.find(path); typed_it != typed_paths.end())
{
pos = typed_it->second->skipSerializedInArena(pos);
}
else
{
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t) + value_size;
}
auto value_size = unalignedLoad<size_t>(pos);
pos += sizeof(size_t) + value_size;
}
return pos;
@ -810,11 +816,51 @@ const char * ColumnObject::skipSerializedInArena(const char * pos) const
void ColumnObject::updateHashWithValue(size_t n, SipHash & hash) const
{
for (const auto & [_, column] : typed_paths)
column->updateHashWithValue(n, hash);
for (const auto & [_, column] : dynamic_paths_ptrs)
column->updateHashWithValue(n, hash);
shared_data->updateHashWithValue(n, hash);
for (auto path : sorted_typed_paths)
typed_paths.find(path)->second->updateHashWithValue(n, hash);
/// The hash of the object in row should not depend on the way we store paths (in dynamic paths or in shared data)
/// and should be the same for the same objects. To support it we update hash with path and its value (if not null) in
/// sorted by path order from both dynamic paths and shared data.
const auto [shared_data_paths, shared_data_values] = getSharedDataPathsAndValues();
const auto & shared_data_offsets = getSharedDataOffsets();
size_t start = shared_data_offsets[static_cast<ssize_t>(n) - 1];
size_t end = shared_data_offsets[static_cast<ssize_t>(n)];
auto dynamic_paths_it = sorted_dynamic_paths.begin();
for (size_t i = start; i != end; ++i)
{
auto path = shared_data_paths->getDataAt(i).toView();
/// Paths in shared data are sorted. Update hash with all paths from dynamic paths that go before this path in sorted order.
while (dynamic_paths_it != sorted_dynamic_paths.end() && *dynamic_paths_it < path)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
if (!dynamic_column->isNullAt(n))
{
hash.update(*dynamic_paths_it);
dynamic_column->updateHashWithValue(n, hash);
}
++dynamic_paths_it;
}
/// Deserialize value in temporary column to get its hash.
auto value = shared_data_values->getDataAt(i);
ReadBufferFromMemory buf(value.data, value.size);
auto tmp_column = ColumnDynamic::create();
getDynamicSerialization()->deserializeBinary(*tmp_column, buf, getFormatSettings());
hash.update(path);
tmp_column->updateHashWithValue(0, hash);
}
/// Iterate over all remaining paths in dynamic paths.
for (; dynamic_paths_it != sorted_dynamic_paths.end(); ++dynamic_paths_it)
{
const auto * dynamic_column = dynamic_paths_ptrs.find(*dynamic_paths_it)->second;
if (!dynamic_column->isNullAt(n))
{
hash.update(*dynamic_paths_it);
dynamic_column->updateHashWithValue(n, hash);
}
}
}
WeakHash32 ColumnObject::getWeakHash32() const
@ -1310,6 +1356,7 @@ void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & sou
/// Reset current state.
dynamic_paths.clear();
dynamic_paths_ptrs.clear();
sorted_dynamic_paths.clear();
max_dynamic_paths = global_max_dynamic_paths;
Statistics new_statistics(Statistics::Source::MERGE);
@ -1328,8 +1375,9 @@ void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & sou
{
if (dynamic_paths.size() < max_dynamic_paths)
{
dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types));
dynamic_paths_ptrs.emplace(path, assert_cast<ColumnDynamic *>(dynamic_paths.find(path)->second.get()));
auto it = dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types)).first;
dynamic_paths_ptrs.emplace(path, assert_cast<ColumnDynamic *>(it->second.get()));
sorted_dynamic_paths.insert(it->first);
}
/// Add all remaining paths into shared data statistics until we reach its max size;
else if (new_statistics.shared_data_paths_statistics.size() < Statistics::MAX_SHARED_DATA_STATISTICS_SIZE)
@ -1343,8 +1391,9 @@ void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & sou
{
for (const auto & [path, _] : path_to_total_number_of_non_null_values)
{
dynamic_paths[path] = ColumnDynamic::create(max_dynamic_types);
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(dynamic_paths[path].get());
auto it = dynamic_paths.emplace(path, ColumnDynamic::create(max_dynamic_types)).first;
dynamic_paths_ptrs[path] = assert_cast<ColumnDynamic *>(it->second.get());
sorted_dynamic_paths.insert(it->first);
}
}

View File

@ -238,10 +238,15 @@ private:
/// Map path -> column for paths with explicitly specified types.
/// This set of paths is constant and cannot be changed.
PathToColumnMap typed_paths;
/// Sorted list of typed paths. Used to avoid sorting paths every time in some methods.
std::vector<std::string_view> sorted_typed_paths;
/// Map path -> column for dynamically added paths. All columns
/// here are Dynamic columns. This set of paths can be extended
/// during inerts into the column.
PathToColumnMap dynamic_paths;
/// Sorted list of dynamic paths. Used to avoid sorting paths every time in some methods.
std::set<std::string_view> sorted_dynamic_paths;
/// Store and use pointers to ColumnDynamic to avoid virtual calls.
/// With hundreds of dynamic paths these virtual calls are noticeable.
PathToDynamicColumnPtrMap dynamic_paths_ptrs;

View File

@ -24,7 +24,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
ssize_t from = data.size() - 1;
ssize_t index = mask.size() - 1;
data.resize(mask.size());
data.resize_exact(mask.size());
while (index >= 0)
{
if (!!mask[index] ^ inverted)

View File

@ -34,6 +34,7 @@ static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54
static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1;
static constexpr auto DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
static constexpr auto DBMS_PARALLEL_REPLICAS_MIN_VERSION_WITH_MARK_SEGMENT_SIZE_FIELD = 4;
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 4;
static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453;

View File

@ -893,6 +893,7 @@ class IColumn;
M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \
M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
M(Bool, short_circuit_default_implementation_for_nulls, true, "Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true. If true, function will not actually evaluate for rows in which there are at least one argument with null value.", 0) \
M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \
M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \

View File

@ -77,6 +77,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"create_if_not_exists", false, false, "New setting."},
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
{"allow_short_circuit_default_implementation_for_nulls", true, true, "Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true. If true, function will not actually evaluate for rows in which there are at least one argument with null value."},
{"output_format_always_quote_identifiers", false, false, "New setting."},
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
{"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"},

View File

@ -1,10 +1,10 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/assert_cast.h>
@ -13,11 +13,11 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
@ -27,8 +27,7 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co
const ColumnConst * res = assert_cast<const ColumnConst *>(column);
if (checkColumn<ColumnString>(&res->getDataColumn())
|| checkColumn<ColumnFixedString>(&res->getDataColumn()))
if (checkColumn<ColumnString>(&res->getDataColumn()) || checkColumn<ColumnFixedString>(&res->getDataColumn()))
return res;
return {};
@ -78,7 +77,7 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col)
{
nullable_res = makeNullable(col.column);
}
return ColumnWithTypeAndName{ nullable_res, nested_type, col.name };
return ColumnWithTypeAndName{nullable_res, nested_type, col.name};
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} for DataTypeNullable", col.dumpStructure());
@ -102,18 +101,22 @@ String withOrdinalEnding(size_t i)
{
switch (i)
{
case 0: return "1st";
case 1: return "2nd";
case 2: return "3rd";
default: return std::to_string(i) + "th";
case 0:
return "1st";
case 1:
return "2nd";
case 2:
return "3rd";
default:
return std::to_string(i) + "th";
}
}
void validateArgumentsImpl(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
size_t argument_offset,
const FunctionArgumentDescriptors & descriptors)
void validateArgumentsImpl(
const IFunction & func,
const ColumnsWithTypeAndName & arguments,
size_t argument_offset,
const FunctionArgumentDescriptors & descriptors)
{
for (size_t i = 0; i < descriptors.size(); ++i)
{
@ -124,13 +127,14 @@ void validateArgumentsImpl(const IFunction & func,
const auto & arg = arguments[i + argument_offset];
const auto & descriptor = descriptors[i];
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
throw Exception(error_code,
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
withOrdinalEnding(argument_offset + i),
descriptor.name,
func.getName(),
descriptor.type_name,
arg.type ? arg.type->getName() : "<?>");
throw Exception(
error_code,
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
withOrdinalEnding(argument_offset + i),
descriptor.name,
func.getName(),
descriptor.type_name,
arg.type ? arg.type->getName() : "<?>");
}
}
@ -150,26 +154,35 @@ int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const Col
return 0;
}
void validateFunctionArguments(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
void validateFunctionArguments(
const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
{
if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size())
{
auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
auto argument_singular_or_plural
= [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
String expected_args_string;
if (!mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args));
expected_args_string = fmt::format(
"{} mandatory {} and {} optional {}",
mandatory_args.size(),
argument_singular_or_plural(mandatory_args),
optional_args.size(),
argument_singular_or_plural(optional_args));
else if (!mandatory_args.empty() && optional_args.empty())
expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
expected_args_string = fmt::format(
"{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
else if (mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args));
else
expected_args_string = "0 arguments";
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"An incorrect number of arguments was specified for function '{}'. Expected {}, got {}",
func.getName(),
expected_args_string,
@ -205,7 +218,8 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
return {nested_columns, offsets->data()};
}
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count)
ColumnPtr
wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count)
{
ColumnPtr result_null_map_column;
@ -263,6 +277,39 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & a
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
}
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnPtr & null_map)
{
if (src->onlyNull())
return src;
ColumnPtr result_null_map_column;
ColumnPtr src_not_nullable = src;
if (const auto * nullable = checkAndGetColumn<ColumnNullable>(src.get()))
{
src_not_nullable = nullable->getNestedColumnPtr();
result_null_map_column = nullable->getNullMapColumnPtr();
MutableColumnPtr mutable_result_null_map_column = IColumn::mutate(std::move(result_null_map_column));
NullMap & result_null_map = assert_cast<ColumnUInt8 &>(*mutable_result_null_map_column).getData();
const NullMap & null_map_data = assert_cast<const ColumnUInt8 &>(*null_map).getData();
for (size_t i = 0; i < result_null_map.size(); ++i)
result_null_map[i] |= null_map_data[i];
result_null_map_column = std::move(mutable_result_null_map_column);
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
}
else if (const auto * const_src = checkAndGetColumn<ColumnConst>(src.get()))
{
const NullMap & null_map_data = assert_cast<const ColumnUInt8 &>(*null_map).getData();
ColumnPtr result_null_map = ColumnUInt8::create(1, null_map_data[0] || const_src->isNullAt(0));
const auto * nullable_data = checkAndGetColumn<ColumnNullable>(&const_src->getDataColumn());
auto data_not_nullable = nullable_data ? nullable_data->getNestedColumnPtr() : const_src->getDataColumnPtr();
return ColumnConst::create(ColumnNullable::create(data_not_nullable, result_null_map), const_src->size());
}
else
return ColumnNullable::create(src->convertToFullColumnIfConst(), null_map);
}
NullPresence getNullPresense(const ColumnsWithTypeAndName & args)
{
NullPresence res;

View File

@ -169,6 +169,11 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count);
/** Return ColumnNullable of src, with input null map
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
*/
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnPtr & null_map);
struct NullPresence
{
bool has_nullable = false;

View File

@ -1,23 +1,28 @@
#include <Functions/IFunctionAdaptors.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Common/SipHash.h>
#include <Core/Block.h>
#include <Core/TypeId.h>
#include <cstdlib>
#include <memory>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/MaskOperations.h>
#include <Core/Block.h>
#include <Core/Settings.h>
#include <Core/TypeId.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/Native.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Functions/FunctionHelpers.h>
#include <cstdlib>
#include <memory>
#include <Interpreters/Context.h>
#include <Common/CurrentThread.h>
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include "config.h"
@ -31,9 +36,9 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -65,9 +70,7 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", column.type->getName());
if (can_be_executed_on_default_arguments)
{
@ -120,10 +123,7 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
/// Check that these arguments are really constant.
for (auto arg_num : arguments_to_remain_constants)
if (arg_num < args.size() && !isColumnConst(*args[arg_num].column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Argument at index {} for function {} must be constant",
arg_num,
getName());
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument at index {} for function {} must be constant", arg_num, getName());
if (args.empty() || !useDefaultImplementationForConstants() || !allArgumentsAreConstants(args))
return nullptr;
@ -137,14 +137,16 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
{
const ColumnWithTypeAndName & column = args[arg_num];
if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
if (arguments_to_remain_constants.end()
!= std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
{
temporary_columns.emplace_back(ColumnWithTypeAndName{column.column->cloneResized(1), column.type, column.name});
}
else
{
have_converted_columns = true;
temporary_columns.emplace_back(ColumnWithTypeAndName{ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
temporary_columns.emplace_back(
ColumnWithTypeAndName{assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name});
}
}
@ -152,7 +154,8 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
* not in "arguments_to_remain_constants" set. Otherwise we get infinite recursion.
*/
if (!have_converted_columns)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: the function requires more arguments",
getName());
@ -175,7 +178,7 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
NullPresence null_presence = getNullPresense(args);
if (null_presence.has_null_constant)
if (null_presence.has_null_constant || null_presence.has_nullable)
{
// Default implementation for nulls returns null result for null arguments,
// so the result type must be nullable.
@ -186,17 +189,87 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
"is expected to return Nullable result, got {}",
getName(),
result_type->getName());
return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
if (null_presence.has_null_constant)
return result_type->createColumnConstWithDefaultValue(input_rows_count);
if (null_presence.has_nullable)
{
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
/// Usually happens during analyzing. We should return non-const column to avoid wrong constant folding.
if (input_rows_count == 0)
return result_type->createColumn();
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
return wrapInNullable(res, args, result_type, input_rows_count);
IColumn::Filter mask(input_rows_count, 1);
MaskInfo mask_info = {.has_ones = true, .has_zeros = false};
for (const auto & arg : args)
{
if (arg.type->isNullable())
{
if (isColumnConst(*arg.column))
{
if (arg.column->isNullAt(0))
{
mask_info.has_ones = false;
mask_info.has_zeros = true;
}
}
else
{
const auto & null_map = assert_cast<const ColumnNullable &>(*arg.column).getNullMapColumnPtr();
mask_info = extractInvertedMask(mask, null_map);
}
}
/// Exit loop early if each row contains null value
if (!mask_info.has_ones)
break;
}
if (!mask_info.has_ones)
{
/// Don't need to evaluate function if each row contains at least one null value.
return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
else if (!mask_info.has_zeros || !short_circuit_default_implementation_for_nulls)
{
/// Each row should be evaluated if there are no nulls or short circuiting is disabled.
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
/// Invert mask as null map
inverseMask(mask, mask_info);
auto null_map = ColumnUInt8::create();
null_map->getData() = std::move(mask);
auto new_res = wrapInNullable(res, std::move(null_map));
return new_res;
}
else
{
/// If short circuit is enabled, we only execute the function on rows with all arguments not null
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
/// Filter every column by mask
size_t size_hint = countBytesInFilter(mask.data(), 0, mask.size());
for (auto & col : temporary_columns)
col.column = col.column->filter(mask, size_hint);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, size_hint, dry_run);
auto mutable_res = IColumn::mutate(std::move(res));
mutable_res->expand(mask, false);
/// Invert mask as null map
inverseMask(mask, mask_info);
auto null_map = ColumnUInt8::create();
null_map->getData() = std::move(mask);
auto new_res = wrapInNullable(std::move(mutable_res), std::move(null_map));
return new_res;
}
}
return nullptr;
@ -258,7 +331,18 @@ static void convertSparseColumnsToFull(ColumnsWithTypeAndName & args)
column.column = recursiveRemoveSparse(column.column);
}
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
IExecutableFunction::IExecutableFunction()
{
if (CurrentThread::isInitialized())
{
auto query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef().short_circuit_default_implementation_for_nulls)
short_circuit_default_implementation_for_nulls = true;
}
}
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
{
ColumnPtr result;
if (useDefaultImplementationForLowCardinalityColumns())
@ -271,19 +355,16 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
size_t new_input_rows_count = columns_without_low_cardinality.empty()
? input_rows_count
: columns_without_low_cardinality.front().column->size();
size_t new_input_rows_count
= columns_without_low_cardinality.empty() ? input_rows_count : columns_without_low_cardinality.front().column->size();
checkFunctionArgumentSizes(columns_without_low_cardinality, new_input_rows_count);
auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
bool res_is_constant = isColumnConst(*res);
auto keys = res_is_constant
? res->cloneResized(1)->convertToFullColumnIfConst()
: res;
auto keys = res_is_constant ? res->cloneResized(1)->convertToFullColumnIfConst() : res;
auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
@ -309,7 +390,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
return result;
}
ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
ColumnPtr IExecutableFunction::execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
{
checkFunctionArgumentSizes(arguments, input_rows_count);
@ -370,7 +452,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
{
const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/1);
}
return ColumnSparse::create(res, sparse_offsets, input_rows_count);
@ -397,7 +479,8 @@ void IFunctionOverloadResolver::checkNumberOfArguments(size_t number_of_argument
size_t expected_number_of_arguments = getNumberOfArguments();
if (number_of_arguments != expected_number_of_arguments)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be {}",
getName(),
number_of_arguments,
@ -436,9 +519,8 @@ DataTypePtr IFunctionOverloadResolver::getReturnType(const ColumnsWithTypeAndNam
auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality);
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality
&& num_full_low_cardinality_columns <= 1 && num_full_ordinary_columns == 0
&& type_without_low_cardinality->canBeInsideLowCardinality())
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality && num_full_low_cardinality_columns <= 1
&& num_full_ordinary_columns == 0 && type_without_low_cardinality->canBeInsideLowCardinality())
return std::make_shared<DataTypeLowCardinality>(type_without_low_cardinality);
else
return type_without_low_cardinality;
@ -545,7 +627,7 @@ llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const ValuesWith
ValuesWithType unwrapped_arguments;
unwrapped_arguments.reserve(arguments.size());
std::vector<llvm::Value*> is_null_values;
std::vector<llvm::Value *> is_null_values;
for (size_t i = 0; i < arguments.size(); ++i)
{

View File

@ -45,6 +45,7 @@ using OptionalFieldInterval = std::optional<FieldInterval>;
class IExecutableFunction
{
public:
IExecutableFunction();
virtual ~IExecutableFunction() = default;
@ -120,6 +121,8 @@ private:
ColumnPtr executeWithoutSparseColumns(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
bool short_circuit_default_implementation_for_nulls = false;
};
using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;

View File

@ -14,6 +14,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -85,7 +86,8 @@ private:
const IColumn & scores,
const IColumn & labels,
ColumnArray::Offset current_offset,
ColumnArray::Offset next_offset)
ColumnArray::Offset next_offset,
bool scale)
{
struct ScoreLabel
{
@ -114,10 +116,10 @@ private:
size_t curr_fp = 0, curr_tp = 0;
for (size_t i = 0; i < size; ++i)
{
// Only increment the area when the score changes
/// Only increment the area when the score changes
if (sorted_labels[i].score != prev_score)
{
area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle)
area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; /// Trapezoidal area under curve (might degenerate to zero or to a rectangle)
prev_fp = curr_fp;
prev_tp = curr_tp;
prev_score = sorted_labels[i].score;
@ -131,12 +133,15 @@ private:
area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0;
/// Then normalize it dividing by the area to the area of rectangle.
/// Then normalize it, if scale is true, dividing by the area to the area of rectangle.
if (curr_tp == 0 || curr_tp == size)
return std::numeric_limits<Float64>::quiet_NaN();
return area / curr_tp / (size - curr_tp);
if (scale)
{
if (curr_tp == 0 || curr_tp == size)
return std::numeric_limits<Float64>::quiet_NaN();
return area / curr_tp / (size - curr_tp);
}
return area;
}
static void vector(
@ -144,7 +149,8 @@ private:
const IColumn & labels,
const ColumnArray::Offsets & offsets,
PaddedPODArray<Float64> & result,
size_t input_rows_count)
size_t input_rows_count,
bool scale)
{
result.resize(input_rows_count);
@ -152,28 +158,43 @@ private:
for (size_t i = 0; i < input_rows_count; ++i)
{
auto next_offset = offsets[i];
result[i] = apply(scores, labels, current_offset, next_offset);
result[i] = apply(scores, labels, current_offset, next_offset, scale);
current_offset = next_offset;
}
}
public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
for (size_t i = 0; i < getNumberOfArguments(); ++i)
size_t number_of_arguments = arguments.size();
if (number_of_arguments < 2 || number_of_arguments > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
getName(), number_of_arguments);
for (size_t i = 0; i < 2; ++i)
{
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].type.get());
if (!array_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName());
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The two first arguments for function {} must be of type Array.", getName());
const auto & nested_type = array_type->getNestedType();
if (!isNativeNumber(nested_type) && !isEnum(nested_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}",
getName(), nested_type->getName());
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", getName(), nested_type->getName());
}
if (number_of_arguments == 3)
{
if (!isBool(arguments[2].type) || arguments[2].column.get() == nullptr || !isColumnConst(*arguments[2].column))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument (scale) for function {} must be of type const Bool.", getName());
}
return std::make_shared<DataTypeFloat64>();
@ -181,6 +202,8 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t number_of_arguments = arguments.size();
ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst();
ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst();
@ -197,6 +220,11 @@ public:
if (!col_array1->hasEqualOffsets(*col_array2))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName());
/// Handle third argument for scale (if passed, otherwise default to true)
bool scale = true;
if (number_of_arguments == 3 && input_rows_count > 0)
scale = arguments[2].column->getBool(0);
auto col_res = ColumnVector<Float64>::create();
vector(
@ -204,7 +232,8 @@ public:
col_array2->getData(),
col_array1->getOffsets(),
col_res->getData(),
input_rows_count);
input_rows_count,
scale);
return col_res;
}

View File

@ -722,7 +722,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
}
settings.ostr << "'[HIDDEN]'";
if (!secret_arguments.replacement.empty())
{
settings.ostr << "'" << secret_arguments.replacement << "'";
}
else
{
settings.ostr << "'[HIDDEN]'";
}
if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
break; /// All other arguments should also be hidden.
continue;

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/KnownObjectNames.h>
#include <Common/re2.h>
#include <Core/QualifiedTableName.h>
#include <base/defines.h>
#include <boost/algorithm/string/predicate.hpp>
@ -49,6 +50,11 @@ public:
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
/// Full replacement of an argument. Only supported when count is 1, otherwise all arguments will be replaced with this string.
/// It's needed in cases when we don't want to hide the entire parameter, but some part of it, e.g. "connection_string" in
/// `azureBlobStorage('DefaultEndpointsProtocol=https;AccountKey=secretkey;...', ...)` should be replaced with
/// `azureBlobStorage('DefaultEndpointsProtocol=https;AccountKey=[HIDDEN];...', ...)`.
std::string replacement;
bool hasSecrets() const
{
@ -74,6 +80,7 @@ protected:
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
chassert(result.replacement.empty()); /// We shouldn't use replacement with masking other arguments
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
@ -199,32 +206,39 @@ protected:
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
/// azureBlobStorageCluster('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
if (maskAzureConnectionString(-1, true, 1))
return;
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
if (maskAzureConnectionString(-1, true, 2))
return;
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
if (maskAzureConnectionString(url_arg_idx))
return;
/// We should check other arguments first because we don't need to do any replacement in case of
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, format, [account_name, account_key, ...])
size_t count = function->arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
String fourth_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &fourth_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
if (fourth_arg == "auto" || KnownFormatNames::instance().exists(fourth_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
@ -234,6 +248,40 @@ protected:
markSecretArgument(url_arg_idx + 4);
}
bool maskAzureConnectionString(ssize_t url_arg_idx, bool argument_is_named = false, size_t start = 0)
{
String url_arg;
if (argument_is_named)
{
url_arg_idx = findNamedArgument(&url_arg, "connection_string", start);
if (url_arg_idx == -1 || url_arg.empty())
url_arg_idx = findNamedArgument(&url_arg, "storage_account_url", start);
if (url_arg_idx == -1 || url_arg.empty())
return false;
}
else
{
if (!tryGetStringFromArgument(url_arg_idx, &url_arg))
return false;
}
if (!url_arg.starts_with("http"))
{
static re2::RE2 account_key_pattern = "AccountKey=.*?(;|$)";
if (RE2::Replace(&url_arg, account_key_pattern, "AccountKey=[HIDDEN]\\1"))
{
chassert(result.count == 0); /// We shouldn't use replacement with masking other arguments
result.start = url_arg_idx;
result.are_named = argument_is_named;
result.count = 1;
result.replacement = url_arg;
return true;
}
}
return false;
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
@ -513,8 +561,9 @@ protected:
return function->arguments->at(arg_idx)->isIdentifier();
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
/// Looks for an argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
/// Returns -1 if no argument was found.
ssize_t findNamedArgument(String * res, const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < function->arguments->size(); ++i)
{
@ -531,8 +580,22 @@ protected:
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
{
tryGetStringFromArgument(*equals_func->arguments->at(1), res);
return i;
}
}
return -1;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
/// If the argument is found, it is marked as a secret.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
ssize_t arg_idx = findNamedArgument(nullptr, key, start);
if (arg_idx >= 0)
markSecretArgument(arg_idx, /* argument_is_named= */ true);
}
};

View File

@ -136,7 +136,7 @@ void InitialAllRangesAnnouncement::serialize(WriteBuffer & out, UInt64 initiator
writeIntBinary(mode, out);
description.serialize(out);
writeIntBinary(replica_num, out);
if (initiator_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
if (initiator_protocol_version >= DBMS_PARALLEL_REPLICAS_MIN_VERSION_WITH_MARK_SEGMENT_SIZE_FIELD)
writeIntBinary(mark_segment_size, out);
}
@ -168,7 +168,7 @@ InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffe
readIntBinary(replica_num, in);
size_t mark_segment_size = 128;
if (replica_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
if (replica_protocol_version >= DBMS_PARALLEL_REPLICAS_MIN_VERSION_WITH_MARK_SEGMENT_SIZE_FIELD)
readIntBinary(mark_segment_size, in);
return InitialAllRangesAnnouncement{mode, description, replica_num, mark_segment_size};

View File

@ -223,7 +223,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
{
account_name = fourth_arg;
account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/structure");
if (is_format_arg(sixth_arg))
{
format = sixth_arg;
@ -257,10 +257,10 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
}
else if (with_structure && engine_args.size() == 8)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "account_name");
account_name = fourth_arg;
account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format");
if (!is_format_arg(sixth_arg))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
format = sixth_arg;

View File

@ -122,7 +122,7 @@ ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(
, local_file_statuses(std::make_shared<LocalFileStatuses>())
{
if (mode == ObjectStorageQueueMode::UNORDERED
&& (table_metadata.tracked_files_limit || table_metadata.tracked_file_ttl_sec))
&& (table_metadata.tracked_files_limit || table_metadata.tracked_files_ttl_sec))
{
task = Context::getGlobalContextInstance()->getSchedulePool().createTask(
"ObjectStorageQueueCleanupFunc",
@ -366,9 +366,9 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
return;
}
chassert(table_metadata.tracked_files_limit || table_metadata.tracked_file_ttl_sec);
chassert(table_metadata.tracked_files_limit || table_metadata.tracked_files_ttl_sec);
const bool check_nodes_limit = table_metadata.tracked_files_limit > 0;
const bool check_nodes_ttl = table_metadata.tracked_file_ttl_sec > 0;
const bool check_nodes_ttl = table_metadata.tracked_files_ttl_sec > 0;
const bool nodes_limit_exceeded = nodes_num > table_metadata.tracked_files_limit;
if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl)
@ -443,7 +443,9 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp);
return wb.str();
};
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", table_metadata.tracked_files_limit, table_metadata.tracked_file_ttl_sec, get_nodes_str());
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}",
table_metadata.tracked_files_limit, table_metadata.tracked_files_ttl_sec, get_nodes_str());
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - table_metadata.tracked_files_limit : 0;
for (const auto & node : sorted_nodes)
@ -464,7 +466,7 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
else if (check_nodes_ttl)
{
UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp;
if (node_age >= table_metadata.tracked_file_ttl_sec)
if (node_age >= table_metadata.tracked_files_ttl_sec)
{
LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached",
node.metadata.file_path, node.zk_path);

View File

@ -45,7 +45,7 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(
, after_processing(engine_settings.after_processing.toString())
, mode(engine_settings.mode.toString())
, tracked_files_limit(engine_settings.tracked_files_limit)
, tracked_file_ttl_sec(engine_settings.tracked_file_ttl_sec)
, tracked_files_ttl_sec(engine_settings.tracked_file_ttl_sec)
, buckets(engine_settings.buckets)
, processing_threads_num(engine_settings.processing_threads_num)
, last_processed_path(engine_settings.last_processed_path)
@ -59,7 +59,7 @@ String ObjectStorageQueueTableMetadata::toString() const
json.set("after_processing", after_processing);
json.set("mode", mode);
json.set("tracked_files_limit", tracked_files_limit);
json.set("tracked_file_ttl_sec", tracked_file_ttl_sec);
json.set("tracked_files_ttl_sec", tracked_files_ttl_sec);
json.set("processing_threads_num", processing_threads_num);
json.set("buckets", buckets);
json.set("format_name", format_name);
@ -100,7 +100,7 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(const Poco::JSO
, after_processing(json->getValue<String>("after_processing"))
, mode(json->getValue<String>("mode"))
, tracked_files_limit(getOrDefault(json, "tracked_files_limit", "s3queue_", 0))
, tracked_file_ttl_sec(getOrDefault(json, "tracked_files_ttl_sec", "s3queue_", 0))
, tracked_files_ttl_sec(getOrDefault(json, "tracked_files_ttl_sec", "", getOrDefault(json, "tracked_file_ttl_sec", "s3queue_", 0)))
, buckets(getOrDefault(json, "buckets", "", 0))
, processing_threads_num(getOrDefault(json, "processing_threads_num", "s3queue_", 1))
, last_processed_path(getOrDefault<String>(json, "last_processed_file", "s3queue_", ""))
@ -142,18 +142,18 @@ void ObjectStorageQueueTableMetadata::checkImmutableFieldsEquals(const ObjectSto
if (tracked_files_limit != from_zk.tracked_files_limit)
throw Exception(
ErrorCodes::METADATA_MISMATCH,
"Existing table metadata in ZooKeeper differs in max set size. "
"Existing table metadata in ZooKeeper differs in `tracked_files_limit`. "
"Stored in ZooKeeper: {}, local: {}",
from_zk.tracked_files_limit,
tracked_files_limit);
if (tracked_file_ttl_sec != from_zk.tracked_file_ttl_sec)
if (tracked_files_ttl_sec != from_zk.tracked_files_ttl_sec)
throw Exception(
ErrorCodes::METADATA_MISMATCH,
"Existing table metadata in ZooKeeper differs in max set age. "
"Existing table metadata in ZooKeeper differs in `tracked_files_ttl_sec`. "
"Stored in ZooKeeper: {}, local: {}",
from_zk.tracked_file_ttl_sec,
tracked_file_ttl_sec);
from_zk.tracked_files_ttl_sec,
tracked_files_ttl_sec);
if (format_name != from_zk.format_name)
throw Exception(

View File

@ -23,7 +23,7 @@ struct ObjectStorageQueueTableMetadata
const String after_processing;
const String mode;
const UInt64 tracked_files_limit;
const UInt64 tracked_file_ttl_sec;
const UInt64 tracked_files_ttl_sec;
const UInt64 buckets;
const UInt64 processing_threads_num;
const String last_processed_path;

View File

@ -157,7 +157,7 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
checkReplicationSlot(replication_slot);
LOG_INFO(log, "Using replication slot {} and publication {}", replication_slot, publication_name);
LOG_INFO(log, "Using replication slot {} and publication {}", replication_slot, doubleQuoteString(publication_name));
startup_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ checkConnectionAndStart(); });
consumer_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ consumerFunc(); });
@ -559,7 +559,7 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::nontransactio
/// This is a case for single Materialized storage. In case of database engine this check is done in advance.
LOG_WARNING(log,
"Publication {} already exists, but it is a CREATE query, not ATTACH. Publication will be dropped",
publication_name);
doubleQuoteString(publication_name));
dropPublication(tx);
}
@ -589,7 +589,7 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::nontransactio
try
{
tx.exec(query_str);
LOG_DEBUG(log, "Created publication {} with tables list: {}", publication_name, tables_list);
LOG_DEBUG(log, "Created publication {} with tables list: {}", doubleQuoteString(publication_name), tables_list);
}
catch (Exception & e)
{
@ -599,7 +599,7 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::nontransactio
}
else
{
LOG_DEBUG(log, "Using existing publication ({}) version", publication_name);
LOG_DEBUG(log, "Using existing publication ({}) version", doubleQuoteString(publication_name));
}
}
@ -677,7 +677,7 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)
{
std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", doubleQuoteString(publication_name));
tx.exec(query_str);
LOG_DEBUG(log, "Dropped publication: {}", publication_name);
LOG_DEBUG(log, "Dropped publication: {}", doubleQuoteString(publication_name));
}
@ -693,7 +693,7 @@ void PostgreSQLReplicationHandler::removeTableFromPublication(pqxx::nontransacti
{
try
{
std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name));
std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", doubleQuoteString(publication_name), doubleQuoteWithSchema(table_name));
ntx.exec(query_str);
LOG_TRACE(log, "Removed table `{}` from publication `{}`", doubleQuoteWithSchema(table_name), publication_name);
}
@ -827,7 +827,7 @@ std::set<String> PostgreSQLReplicationHandler::fetchRequiredTables()
{
LOG_WARNING(log,
"Publication {} already exists, but it is a CREATE query, not ATTACH. Publication will be dropped",
publication_name);
doubleQuoteString(publication_name));
connection.execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); });
}
@ -837,7 +837,7 @@ std::set<String> PostgreSQLReplicationHandler::fetchRequiredTables()
{
LOG_WARNING(log,
"Publication {} already exists and tables list is empty. Assuming publication is correct.",
publication_name);
doubleQuoteString(publication_name));
{
pqxx::nontransaction tx(connection.getRef());
@ -888,7 +888,7 @@ std::set<String> PostgreSQLReplicationHandler::fetchRequiredTables()
"To avoid redundant work, you can try ALTER PUBLICATION query to remove redundant tables. "
"Or you can you ALTER SETTING. "
"\nPublication tables: {}.\nTables list: {}",
publication_name, diff_tables, publication_tables, listed_tables);
doubleQuoteString(publication_name), diff_tables, publication_tables, listed_tables);
return std::set(expected_tables.begin(), expected_tables.end());
}

View File

@ -1473,8 +1473,8 @@ StoragePtr create(const StorageFactory::Arguments & args)
"zk_root_path: path in the Keeper where the values will be stored (required)\n"
"keys_limit: number of keys allowed to be stored, 0 is no limit (default: 0)");
const auto zk_root_path_node = evaluateConstantExpressionAsLiteral(engine_args[0], args.getLocalContext());
auto zk_root_path = checkAndGetLiteralArgument<std::string>(zk_root_path_node, "zk_root_path");
engine_args[0] = evaluateConstantExpressionAsLiteral(engine_args[0], args.getLocalContext());
auto zk_root_path = checkAndGetLiteralArgument<std::string>(engine_args[0], "zk_root_path");
UInt64 keys_limit = 0;
if (engine_args.size() > 1)

View File

@ -248,7 +248,9 @@ TTLDescription TTLDescription::getTTLFromAST(
if (ASTPtr where_expr_ast = ttl_element->where())
{
result.where_expression_ast = where_expr_ast->clone();
where_expression = buildExpressionAndSets(where_expr_ast, columns.getAllPhysical(), context).expression;
ASTPtr ast = where_expr_ast->clone();
where_expression = buildExpressionAndSets(ast, columns.getAllPhysical(), context).expression;
result.where_expression_columns = where_expression->getRequiredColumnsWithTypes();
result.where_result_column = where_expression->getSampleBlock().safeGetByPosition(0).name;
}

View File

@ -7,28 +7,28 @@ import sys
from pathlib import Path
from typing import List
from ci_config import CI
from env_helper import (
GITHUB_JOB_URL,
CI_CONFIG_PATH,
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
IS_CI,
REPORT_PATH,
TEMP_PATH,
CI_CONFIG_PATH,
IS_CI,
)
from pr_info import PRInfo
from report import (
ERROR,
FAILURE,
GITHUB_JOB_URL,
PENDING,
SUCCESS,
BuildResult,
JobReport,
create_build_html_report,
get_worst_status,
FAILURE,
)
from stopwatch import Stopwatch
from ci_config import CI
# Old way to read the neads_data
NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "")

View File

@ -14,9 +14,12 @@ from typing import Any, Dict, List, Optional
import docker_images_helper
import upload_result_helper
from build_check import get_release_or_pr
from ci_buddy import CIBuddy
from ci_cache import CiCache
from ci_config import CI
from ci_metadata import CiMetadata
from ci_utils import GH, Utils, Envs
from ci_settings import CiSettings
from ci_utils import GH, Envs, Utils
from clickhouse_helper import (
CiLogsCredentials,
ClickHouseHelper,
@ -30,19 +33,12 @@ from commit_status_helper import (
RerunHelper,
format_description,
get_commit,
get_commit_filtered_statuses,
post_commit_status,
set_status_comment,
get_commit_filtered_statuses,
)
from digest_helper import DockerDigester
from env_helper import (
IS_CI,
GITHUB_JOB_API_URL,
GITHUB_REPOSITORY,
GITHUB_RUN_ID,
REPO_COPY,
TEMP_PATH,
)
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_ID, IS_CI, REPO_COPY, TEMP_PATH
from get_robot_token import get_best_robot_token
from git_helper import GIT_PREFIX, Git
from git_helper import Runner as GitRunner
@ -50,22 +46,20 @@ from github_helper import GitHub
from pr_info import PRInfo
from report import (
ERROR,
FAIL,
GITHUB_JOB_API_URL,
JOB_FINISHED_TEST_NAME,
JOB_STARTED_TEST_NAME,
OK,
PENDING,
SUCCESS,
BuildResult,
JobReport,
TestResult,
OK,
JOB_STARTED_TEST_NAME,
JOB_FINISHED_TEST_NAME,
FAIL,
)
from s3_helper import S3Helper
from tee_popen import TeePopen
from ci_cache import CiCache
from ci_settings import CiSettings
from ci_buddy import CIBuddy
from stopwatch import Stopwatch
from tee_popen import TeePopen
from version_helper import get_version_from_repo
# pylint: disable=too-many-lines

View File

@ -11,6 +11,8 @@ from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
import requests
from env_helper import IS_CI
logger = logging.getLogger(__name__)
@ -42,7 +44,7 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
def kill_ci_runner(message: str) -> None:
"""The function to kill the current process with all parents when it's possible.
Works only when run with the set `CI` environment"""
if not os.getenv("CI", ""): # cycle import env_helper
if not IS_CI:
logger.info("Running outside the CI, won't kill the runner")
return
print(f"::error::{message}")

View File

@ -7,7 +7,7 @@ import time
from collections import defaultdict
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Dict, List, Optional, Union, Callable
from typing import Callable, Dict, List, Optional, Union
from github import Github
from github.Commit import Commit

View File

@ -1,11 +1,7 @@
#!/usr/bin/env python
import logging
import os
from os import path as p
from typing import Tuple
from build_download_helper import APIException, get_gh_api
module_dir = p.abspath(p.dirname(__file__))
git_root = p.abspath(p.join(module_dir, "..", ".."))
@ -41,102 +37,3 @@ S3_ARTIFACT_DOWNLOAD_TEMPLATE = (
"{pr_or_release}/{commit}/{build_name}/{artifact}"
)
CI_CONFIG_PATH = f"{TEMP_PATH}/ci_config.json"
# These parameters are set only on demand, and only once
_GITHUB_JOB_ID = ""
_GITHUB_JOB_URL = ""
_GITHUB_JOB_API_URL = ""
def GITHUB_JOB_ID(safe: bool = True) -> str:
global _GITHUB_JOB_ID
global _GITHUB_JOB_URL
global _GITHUB_JOB_API_URL
if _GITHUB_JOB_ID:
return _GITHUB_JOB_ID
try:
_GITHUB_JOB_ID, _GITHUB_JOB_URL, _GITHUB_JOB_API_URL = get_job_id_url(
GITHUB_JOB
)
except APIException as e:
logging.warning("Unable to retrieve the job info from GH API: %s", e)
if not safe:
raise e
return _GITHUB_JOB_ID
def GITHUB_JOB_URL(safe: bool = True) -> str:
try:
GITHUB_JOB_ID()
except APIException:
if safe:
logging.warning("Using run URL as a fallback to not fail the job")
return GITHUB_RUN_URL
raise
return _GITHUB_JOB_URL
def GITHUB_JOB_API_URL(safe: bool = True) -> str:
GITHUB_JOB_ID(safe)
return _GITHUB_JOB_API_URL
def get_job_id_url(job_name: str) -> Tuple[str, str, str]:
job_id = ""
job_url = ""
job_api_url = ""
if GITHUB_RUN_ID == "0":
job_id = "0"
if job_id:
return job_id, job_url, job_api_url
jobs = []
page = 1
while not job_id:
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
)
page += 1
data = response.json()
jobs.extend(data["jobs"])
for job in data["jobs"]:
if job["name"] != job_name:
continue
job_id = job["id"]
job_url = job["html_url"]
job_api_url = job["url"]
return job_id, job_url, job_api_url
if (
len(jobs) >= data["total_count"] # just in case of inconsistency
or len(data["jobs"]) == 0 # if we excided pages
):
job_id = "0"
if not job_url:
# This is a terrible workaround for the case of another broken part of
# GitHub actions. For nested workflows it doesn't provide a proper job_name
# value, but only the final one. So, for `OriginalJob / NestedJob / FinalJob`
# full name, job_name contains only FinalJob
matched_jobs = []
for job in jobs:
nested_parts = job["name"].split(" / ")
if len(nested_parts) <= 1:
continue
if nested_parts[-1] == job_name:
matched_jobs.append(job)
if len(matched_jobs) == 1:
# The best case scenario
job_id = matched_jobs[0]["id"]
job_url = matched_jobs[0]["html_url"]
job_api_url = matched_jobs[0]["url"]
return job_id, job_url, job_api_url
if matched_jobs:
logging.error(
"We could not get the ID and URL for the current job name %s, there "
"are more than one jobs match it for the nested workflows. Please, "
"refer to https://github.com/actions/runner/issues/2577",
job_name,
)
return job_id, job_url, job_api_url

View File

@ -20,9 +20,16 @@ from typing import (
Union,
)
from build_download_helper import get_gh_api
from build_download_helper import APIException, get_gh_api
from ci_config import CI
from env_helper import REPORT_PATH, GITHUB_WORKSPACE
from env_helper import (
GITHUB_JOB,
GITHUB_REPOSITORY,
GITHUB_RUN_ID,
GITHUB_RUN_URL,
GITHUB_WORKSPACE,
REPORT_PATH,
)
logger = logging.getLogger(__name__)
@ -38,6 +45,105 @@ SKIPPED: Final = "SKIPPED"
StatusType = Literal["error", "failure", "pending", "success"]
STATUSES = [ERROR, FAILURE, PENDING, SUCCESS] # type: List[StatusType]
# These parameters are set only on demand, and only once
_GITHUB_JOB_ID = ""
_GITHUB_JOB_URL = ""
_GITHUB_JOB_API_URL = ""
def GITHUB_JOB_ID(safe: bool = True) -> str:
global _GITHUB_JOB_ID
global _GITHUB_JOB_URL
global _GITHUB_JOB_API_URL
if _GITHUB_JOB_ID:
return _GITHUB_JOB_ID
try:
_GITHUB_JOB_ID, _GITHUB_JOB_URL, _GITHUB_JOB_API_URL = get_job_id_url(
GITHUB_JOB
)
except APIException as e:
logging.warning("Unable to retrieve the job info from GH API: %s", e)
if not safe:
raise e
return _GITHUB_JOB_ID
def GITHUB_JOB_URL(safe: bool = True) -> str:
try:
GITHUB_JOB_ID()
except APIException:
if safe:
logging.warning("Using run URL as a fallback to not fail the job")
return GITHUB_RUN_URL
raise
return _GITHUB_JOB_URL
def GITHUB_JOB_API_URL(safe: bool = True) -> str:
GITHUB_JOB_ID(safe)
return _GITHUB_JOB_API_URL
def get_job_id_url(job_name: str) -> Tuple[str, str, str]:
job_id = ""
job_url = ""
job_api_url = ""
if GITHUB_RUN_ID == "0":
job_id = "0"
if job_id:
return job_id, job_url, job_api_url
jobs = []
page = 1
while not job_id:
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
)
page += 1
data = response.json()
jobs.extend(data["jobs"])
for job in data["jobs"]:
if job["name"] != job_name:
continue
job_id = job["id"]
job_url = job["html_url"]
job_api_url = job["url"]
return job_id, job_url, job_api_url
if (
len(jobs) >= data["total_count"] # just in case of inconsistency
or len(data["jobs"]) == 0 # if we excided pages
):
job_id = "0"
if not job_url:
# This is a terrible workaround for the case of another broken part of
# GitHub actions. For nested workflows it doesn't provide a proper job_name
# value, but only the final one. So, for `OriginalJob / NestedJob / FinalJob`
# full name, job_name contains only FinalJob
matched_jobs = []
for job in jobs:
nested_parts = job["name"].split(" / ")
if len(nested_parts) <= 1:
continue
if nested_parts[-1] == job_name:
matched_jobs.append(job)
if len(matched_jobs) == 1:
# The best case scenario
job_id = matched_jobs[0]["id"]
job_url = matched_jobs[0]["html_url"]
job_api_url = matched_jobs[0]["url"]
return job_id, job_url, job_api_url
if matched_jobs:
logging.error(
"We could not get the ID and URL for the current job name %s, there "
"are more than one jobs match it for the nested workflows. Please, "
"refer to https://github.com/actions/runner/issues/2577",
job_name,
)
return job_id, job_url, job_api_url
# The order of statuses from the worst to the best
def _state_rank(status: str) -> int:

View File

@ -1,15 +1,10 @@
import logging
import os
from pathlib import Path
from typing import Dict, List, Optional, Sequence, Union
import os
import logging
from env_helper import (
GITHUB_JOB_URL,
GITHUB_REPOSITORY,
GITHUB_RUN_URL,
GITHUB_SERVER_URL,
)
from report import TestResults, create_test_html_report
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, GITHUB_SERVER_URL
from report import GITHUB_JOB_URL, TestResults, create_test_html_report
from s3_helper import S3Helper

View File

@ -2,7 +2,7 @@ version: '2.3'
services:
postgres1:
image: postgres
command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all", "-c", "max_connections=200"]
command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=4", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all", "-c", "max_connections=200"]
restart: always
expose:
- ${POSTGRES_PORT:-5432}

View File

@ -82,24 +82,24 @@ def drop_postgres_schema(cursor, schema_name):
def create_postgres_table(
cursor,
table_name,
database_name="",
replica_identity_full=False,
template=postgres_table_template,
):
if database_name == "":
name = table_name
else:
name = f"{database_name}.{table_name}"
drop_postgres_table(cursor, name)
query = template.format(name)
cursor.execute(query)
drop_postgres_table(cursor, table_name)
query = template.format(table_name)
print(f"Query: {query}")
cursor.execute(query)
if replica_identity_full:
cursor.execute(f"ALTER TABLE {name} REPLICA IDENTITY FULL;")
cursor.execute(f"""ALTER TABLE "{table_name}" REPLICA IDENTITY FULL;""")
def drop_postgres_table(cursor, name):
cursor.execute(f"""DROP TABLE IF EXISTS "{name}" """)
def drop_postgres_table(cursor, name, database_name=""):
if database_name != "":
cursor.execute(f"""DROP TABLE IF EXISTS "{database_name}"."{name}" """)
else:
cursor.execute(f"""DROP TABLE IF EXISTS "{name}" """)
def create_postgres_table_with_schema(cursor, schema_name, table_name):
@ -269,15 +269,28 @@ class PostgresManager:
def create_postgres_table(
self, table_name, database_name="", template=postgres_table_template
):
create_postgres_table(
self.cursor, table_name, database_name=database_name, template=template
)
database_name = self.database_or_default(database_name)
cursor = self.cursor
if database_name != self.get_default_database:
try:
self.create_postgres_db(database_name)
except:
# postgres does not support create database if not exists
pass
conn = get_postgres_conn(
ip=self.ip,
port=self.port,
database=True,
database_name=database_name,
)
cursor = conn.cursor()
create_postgres_table(cursor, table_name, template=template)
def create_and_fill_postgres_table(self, table_name, database_name=""):
create_postgres_table(self.cursor, table_name, database_name)
database_name = self.database_or_default(database_name)
self.create_postgres_table(table_name, database_name)
self.instance.query(
f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)"
f"INSERT INTO `{database_name}`.`{table_name}` SELECT number, number from numbers(50)"
)
def create_and_fill_postgres_tables(
@ -289,11 +302,11 @@ class PostgresManager:
):
for i in range(tables_num):
table_name = f"{table_name_base}_{i}"
create_postgres_table(self.cursor, table_name, database_name)
self.create_postgres_table(table_name, database_name)
if numbers > 0:
db = self.database_or_default(database_name)
self.instance.query(
f"INSERT INTO {db}.{table_name} SELECT number, number from numbers({numbers})"
f"INSERT INTO `{db}`.{table_name} SELECT number, number from numbers({numbers})"
)
@ -408,4 +421,9 @@ def check_several_tables_are_synchronized(
schema_name="",
):
for i in range(tables_num):
check_tables_are_synchronized(instance, f"postgresql_replica_{i}")
check_tables_are_synchronized(
instance,
f"postgresql_replica_{i}",
postgres_database=postgres_database,
materialized_database=materialized_database,
)

View File

@ -1,5 +1,6 @@
import pytest
import random, string
import re
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
@ -336,6 +337,10 @@ def test_create_database():
def test_table_functions():
password = new_password()
azure_conn_string = cluster.env_variables["AZURITE_CONNECTION_STRING"]
account_key_pattern = re.compile("AccountKey=.*?(;|$)")
masked_azure_conn_string = re.sub(
account_key_pattern, "AccountKey=[HIDDEN]\\1", azure_conn_string
)
azure_storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
azure_account_name = "devstoreaccount1"
azure_account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
@ -467,23 +472,23 @@ def test_table_functions():
"CREATE TABLE tablefunc30 (x int) AS s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')",
"CREATE TABLE tablefunc31 (`x` int) AS s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc32 (`x` int) AS deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
f"CREATE TABLE tablefunc33 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"CREATE TABLE tablefunc34 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc35 (x int) AS azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc33 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')",
f"CREATE TABLE tablefunc34 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc35 (`x` int) AS azureBlobStorage('{masked_azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc36 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_3.csv', '{azure_account_name}', '[HIDDEN]')",
f"CREATE TABLE tablefunc37 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_4.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
f"CREATE TABLE tablefunc38 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_5.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
f"CREATE TABLE tablefunc39 (`x` int) AS azureBlobStorage('{azure_storage_account_url}', 'cont', 'test_simple_6.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc40 (x int) AS azureBlobStorage(named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')",
f"CREATE TABLE tablefunc40 (`x` int) AS azureBlobStorage(named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_7.csv', format = 'CSV')",
f"CREATE TABLE tablefunc41 (`x` int) AS azureBlobStorage(named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_8.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
f"CREATE TABLE tablefunc42 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')",
f"CREATE TABLE tablefunc43 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc44 (x int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc42 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_9.csv', 'CSV')",
f"CREATE TABLE tablefunc43 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_10.csv', 'CSV', 'none')",
f"CREATE TABLE tablefunc44 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{masked_azure_conn_string}', 'cont', 'test_simple_11.csv', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc45 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_12.csv', '{azure_account_name}', '[HIDDEN]')",
f"CREATE TABLE tablefunc46 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_13.csv', '{azure_account_name}', '[HIDDEN]', 'CSV')",
f"CREATE TABLE tablefunc47 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_14.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none')",
f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc49 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{masked_azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
"CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
],

View File

@ -0,0 +1,21 @@
<clickhouse>
<remote_servers>
<parallel_replicas>
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>node0</host>
<port>9000</port>
</replica>
<replica>
<host>node1</host>
<port>9000</port>
</replica>
<replica>
<host>node2</host>
<port>9000</port>
</replica>
</shard>
</parallel_replicas>
</remote_servers>
</clickhouse>

View File

@ -0,0 +1,74 @@
import pytest
import re
from helpers.cluster import ClickHouseCluster
from random import randint
cluster = ClickHouseCluster(__file__)
nodes = [
cluster.add_instance(
f"node{num}",
main_configs=["configs/remote_servers.xml"],
with_zookeeper=True,
macros={"replica": f"node{num}", "shard": "shard"},
)
for num in range(3)
]
@pytest.fixture(scope="module", autouse=True)
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def _create_tables(table_name):
nodes[0].query(
f"DROP TABLE IF EXISTS {table_name} ON CLUSTER 'parallel_replicas'",
settings={"database_atomic_wait_for_drop_and_detach_synchronously": True},
)
# big number of granules + low total size in bytes = super tiny granules = big min_marks_per_task
# => big mark_segment_size will be chosen. it is not required to be big, just not equal to the default
nodes[0].query(
f"""
CREATE TABLE {table_name} ON CLUSTER 'parallel_replicas' (value Int64)
Engine=ReplicatedMergeTree('/test_parallel_replicas/shard/{table_name}', '{{replica}}')
ORDER BY ()
SETTINGS index_granularity = 1
"""
)
nodes[0].query(f"INSERT INTO {table_name} SELECT 42 FROM numbers(1000)")
nodes[0].query(f"SYSTEM SYNC REPLICA ON CLUSTER 'parallel_replicas' {table_name}")
# now mark_segment_size is part of the protocol and is communicated to the initiator.
# let's check that the correct value is actually used by the coordinator
def test_mark_segment_size_communicated_correctly(start_cluster):
table_name = "t"
_create_tables(table_name)
for local_plan in [0, 1]:
query_id = f"query_id_{randint(0, 1000000)}"
nodes[0].query(
f"SELECT sum(value) FROM {table_name}",
settings={
"allow_experimental_parallel_reading_from_replicas": 2,
"max_parallel_replicas": 100,
"cluster_for_parallel_replicas": "parallel_replicas",
"parallel_replicas_mark_segment_size": 0,
"parallel_replicas_local_plan": local_plan,
"query_id": query_id,
},
)
nodes[0].query("SYSTEM FLUSH LOGS")
log_line = nodes[0].grep_in_log(
f"{query_id}.*Reading state is fully initialized"
)
assert re.search(r"mark_segment_size: (\d+)", log_line).group(1) == "16384"

View File

@ -1252,6 +1252,110 @@ def test_partial_and_full_table(started_cluster):
)
def test_quoting_publication(started_cluster):
postgres_database = "postgres-postgres"
pg_manager3 = PostgresManager()
pg_manager3.init(
instance,
cluster.postgres_ip,
cluster.postgres_port,
default_database=postgres_database,
)
NUM_TABLES = 5
materialized_database = "test-database"
pg_manager3.create_and_fill_postgres_tables(NUM_TABLES, 10000)
check_table_name_1 = "postgresql-replica-5"
pg_manager3.create_and_fill_postgres_table(check_table_name_1)
pg_manager3.create_materialized_db(
ip=started_cluster.postgres_ip,
port=started_cluster.postgres_port,
materialized_database=materialized_database,
)
check_several_tables_are_synchronized(
instance,
NUM_TABLES,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
result = instance.query(f"SHOW TABLES FROM `{materialized_database}`")
assert (
result
== "postgresql-replica-5\npostgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n"
)
check_tables_are_synchronized(
instance,
check_table_name_1,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
instance.query(
f"INSERT INTO `{postgres_database}`.`{check_table_name_1}` SELECT number, number from numbers(10000, 10000)"
)
check_tables_are_synchronized(
instance,
check_table_name_1,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
check_table_name_2 = "postgresql-replica-6"
pg_manager3.create_and_fill_postgres_table(check_table_name_2)
instance.query(f"ATTACH TABLE `{materialized_database}`.`{check_table_name_2}`")
result = instance.query(f"SHOW TABLES FROM `{materialized_database}`")
assert (
result
== "postgresql-replica-5\npostgresql-replica-6\npostgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n"
)
check_tables_are_synchronized(
instance,
check_table_name_2,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
instance.query(
f"INSERT INTO `{postgres_database}`.`{check_table_name_2}` SELECT number, number from numbers(10000, 10000)"
)
check_tables_are_synchronized(
instance,
check_table_name_2,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
instance.restart_clickhouse()
check_tables_are_synchronized(
instance,
check_table_name_1,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
check_tables_are_synchronized(
instance,
check_table_name_2,
materialized_database=materialized_database,
postgres_database=postgres_database,
)
instance.query(
f"DETACH TABLE `{materialized_database}`.`{check_table_name_2}` PERMANENTLY"
)
time.sleep(5)
result = instance.query(f"SHOW TABLES FROM `{materialized_database}`")
assert (
result
== "postgresql-replica-5\npostgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n"
)
if __name__ == "__main__":
cluster.start()
input("Cluster created, press any key to destroy...")

View File

@ -976,6 +976,14 @@ def test_max_set_age(started_cluster):
)
)
node.restart_clickhouse()
expected_rows *= 2
wait_for_condition(lambda: get_count() == expected_rows)
assert files_to_generate == int(
node.query(f"SELECT uniq(_path) from {dst_table_name}")
)
def test_max_set_size(started_cluster):
node = started_cluster.instances["instance"]

View File

@ -14,3 +14,35 @@
0.25
0.125
0.25
0.75
0.75
0.75
0.75
0.75
0.75
0.75
0.75
0.75
0.25
0.25
0.25
0.25
0.25
0.125
0.25
3
3
3
3
3
3
3
3
3
1
1
1
1
1
1
1

View File

@ -13,4 +13,44 @@ select arrayAUC(cast([-10, -40, -35, -80] as Array(Int32)), [0, 0, 1, 1]);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int64)), [0, 0, 1, 1]);
select arrayAUC(cast([-0.1, -0.4, -0.35, -0.8] as Array(Float32)) , [0, 0, 1, 1]);
select arrayAUC([0, 3, 5, 6, 7.5, 8], [1, 0, 1, 0, 0, 0]);
select arrayAUC([0.1, 0.35, 0.4, 0.8], [1, 0, 1, 0]);
select arrayAUC([0.1, 0.35, 0.4, 0.8], [1, 0, 1, 0]);
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1], true);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast([0, 0, 1, 1] as Array(Int8)), true);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast([-1, -1, 1, 1] as Array(Int8)), true);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast(['false', 'false', 'true', 'true'] as Array(Enum8('false' = 0, 'true' = 1))), true);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast(['false', 'false', 'true', 'true'] as Array(Enum8('false' = -1, 'true' = 1))), true);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt8)), [0, 0, 1, 1], true);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt16)), [0, 0, 1, 1], true);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt32)), [0, 0, 1, 1], true);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt64)), [0, 0, 1, 1], true);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int8)), [0, 0, 1, 1], true);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int16)), [0, 0, 1, 1], true);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int32)), [0, 0, 1, 1], true);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int64)), [0, 0, 1, 1], true);
select arrayAUC(cast([-0.1, -0.4, -0.35, -0.8] as Array(Float32)) , [0, 0, 1, 1], true);
select arrayAUC([0, 3, 5, 6, 7.5, 8], [1, 0, 1, 0, 0, 0], true);
select arrayAUC([0.1, 0.35, 0.4, 0.8], [1, 0, 1, 0], true);
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1], false);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast([0, 0, 1, 1] as Array(Int8)), false);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast([-1, -1, 1, 1] as Array(Int8)), false);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast(['false', 'false', 'true', 'true'] as Array(Enum8('false' = 0, 'true' = 1))), false);
select arrayAUC([0.1, 0.4, 0.35, 0.8], cast(['false', 'false', 'true', 'true'] as Array(Enum8('false' = -1, 'true' = 1))), false);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt8)), [0, 0, 1, 1], false);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt16)), [0, 0, 1, 1], false);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt32)), [0, 0, 1, 1], false);
select arrayAUC(cast([10, 40, 35, 80] as Array(UInt64)), [0, 0, 1, 1], false);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int8)), [0, 0, 1, 1], false);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int16)), [0, 0, 1, 1], false);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int32)), [0, 0, 1, 1], false);
select arrayAUC(cast([-10, -40, -35, -80] as Array(Int64)), [0, 0, 1, 1], false);
select arrayAUC(cast([-0.1, -0.4, -0.35, -0.8] as Array(Float32)) , [0, 0, 1, 1], false);
select arrayAUC([0, 3, 5, 6, 7.5, 8], [1, 0, 1, 0, 0, 0], false);
select arrayAUC([0.1, 0.35, 0.4, 0.8], [1, 0, 1, 0], false);
-- negative tests
select arrayAUC([0, 0, 1, 1]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select arrayAUC([0.1, 0.35], [0, 0, 1, 1]); -- { serverError BAD_ARGUMENTS }
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1], materialize(true)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1], true, true); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }

View File

@ -7,3 +7,21 @@ nan
0.75
1
0.75
nan
nan
nan
0.5
1
0
0.75
1
0.75
0
0
0
0.5
1
0
1.5
2
1.5

View File

@ -12,3 +12,35 @@ SELECT arrayAUC([1, 0], [0, 1]);
SELECT arrayAUC([0, 0, 1], [0, 1, 1]);
SELECT arrayAUC([0, 1, 1], [0, 1, 1]);
SELECT arrayAUC([0, 1, 1], [0, 0, 1]);
SELECT arrayAUC([], [], true); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([1], [1], true);
SELECT arrayAUC([1], [], true); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([], [1], true); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([1, 2], [3], true); -- { serverError BAD_ARGUMENTS }
SELECT arrayAUC([1], [2, 3], true); -- { serverError BAD_ARGUMENTS }
SELECT arrayAUC([1, 1], [1, 1], true);
SELECT arrayAUC([1, 1], [0, 0], true);
SELECT arrayAUC([1, 1], [0, 1], true);
SELECT arrayAUC([0, 1], [0, 1], true);
SELECT arrayAUC([1, 0], [0, 1], true);
SELECT arrayAUC([0, 0, 1], [0, 1, 1], true);
SELECT arrayAUC([0, 1, 1], [0, 1, 1], true);
SELECT arrayAUC([0, 1, 1], [0, 0, 1], true);
SELECT arrayAUC([], [], false); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([1], [1], false);
SELECT arrayAUC([1], [], false); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([], [1], false); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([1, 2], [3], false); -- { serverError BAD_ARGUMENTS }
SELECT arrayAUC([1], [2, 3], false); -- { serverError BAD_ARGUMENTS }
SELECT arrayAUC([1, 1], [1, 1], false);
SELECT arrayAUC([1, 1], [0, 0], false);
SELECT arrayAUC([1, 1], [0, 1], false);
SELECT arrayAUC([0, 1], [0, 1], false);
SELECT arrayAUC([1, 0], [0, 1], false);
SELECT arrayAUC([0, 0, 1], [0, 1, 1], false);
SELECT arrayAUC([0, 1, 1], [0, 1, 1], false);
SELECT arrayAUC([0, 1, 1], [0, 0, 1], false);
SELECT arrayAUC([0, 1, 1], [0, 0, 1], false, true); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT arrayAUC([0, 1, 1]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT arrayAUC([0, 1, 1], [0, 0, 1], 'false'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT arrayAUC([0, 1, 1], [0, 0, 1], 4); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -0,0 +1,5 @@
4
5 [1,2,3]
5 2020-01-01
5 42
5 Hello

View File

@ -0,0 +1,15 @@
set allow_experimental_dynamic_type = 1;
drop table if exists test;
create table test (d Dynamic(max_types=2)) engine=Memory;
insert into test values (42), ('Hello'), ([1,2,3]), ('2020-01-01');
insert into test values ('Hello'), ([1,2,3]), ('2020-01-01'), (42);
insert into test values ([1,2,3]), ('2020-01-01'), (42), ('Hello');
insert into test values ('2020-01-01'), (42), ('Hello'), ([1,2,3]);
insert into test values (42);
insert into test values ('Hello');
insert into test values ([1,2,3]);
insert into test values ('2020-01-01');
select uniqExact(d) from test;
select count(), d from test group by d order by d;
drop table test;

View File

@ -0,0 +1,12 @@
11
6 {"a":0,"b":"Hello"}
6 {"a":0,"b":[{"f":"42"}]}
6 {"a":0,"c":"Hello"}
6 {"a":0,"c":["1","2","3"]}
6 {"a":0,"d":"2020-01-01"}
6 {"a":0,"d":["1","2","3"]}
6 {"a":0,"e":"2020-01-01"}
6 {"a":0,"e":[{"f":"42"}]}
5 {"a":42,"b":"Hello","c":["1","2","3"],"d":"2020-01-01","e":[{"f":"42"}]}
5 {"a":42,"b":[{"f":"42"}],"c":"Hello","d":["1","2","3"],"e":"2020-01-01"}
12 {"a":42}

View File

@ -0,0 +1,39 @@
set allow_experimental_json_type = 1;
drop table if exists test;
create table test (json JSON(a UInt32, max_dynamic_paths=2)) engine=Memory;
insert into test values ('{"a" : 42, "b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01", "e" : [{"f" : 42}]}');
insert into test values ('{"b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01", "e" : [{"f" : 42}], "a" : 42}');
insert into test values ('{"c" : [1, 2, 3], "d" : "2020-01-01", "e" : [{"f" : 42}], "a" : 42, "b" : "Hello"}');
insert into test values ('{"d" : "2020-01-01", "e" : [{"f" : 42}], "a" : 42, "b" : "Hello", "c" : [1, 2, 3]}');
insert into test values ('{"e" : [{"f" : 42}], "a" : 42, "b" : "Hello", "c" : [1, 2, 3], "d" : "2020-01-01"}');
insert into test values ('{"a" : 42}'), ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}');
insert into test values ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}');
insert into test values ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"b" : "Hello"}');
insert into test values ('{"d" : "2020-01-01"}'), ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}');
insert into test values ('{"e" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"b" : "Hello"}'), ('{"c" : [1, 2, 3]}'), ('{"d" : "2020-01-01"}');
insert into test values ('{"a" : 42}');
insert into test values ('{"b" : "Hello"}');
insert into test values ('{"c" : [1, 2, 3]}');
insert into test values ('{"d" : "2020-01-01"}');
insert into test values ('{"e" : [{"f" : 42}]}');
insert into test values ('{"a" : 42, "c" : "Hello", "d" : [1, 2, 3], "e" : "2020-01-01", "b" : [{"f" : 42}]}');
insert into test values ('{"c" : "Hello", "d" : [1, 2, 3], "e" : "2020-01-01", "b" : [{"f" : 42}], "a" : 42}');
insert into test values ('{"d" : [1, 2, 3], "e" : "2020-01-01", "b" : [{"f" : 42}], "a" : 42, "c" : "Hello"}');
insert into test values ('{"e" : "2020-01-01", "b" : [{"f" : 42}], "a" : 42, "c" : "Hello", "d" : [1, 2, 3]}');
insert into test values ('{"b" : [{"f" : 42}], "a" : 42, "c" : "Hello", "d" : [1, 2, 3], "e" : "2020-01-01"}');
insert into test values ('{"a" : 42}'), ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}');
insert into test values ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}');
insert into test values ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"c" : "Hello"}');
insert into test values ('{"e" : "2020-01-01"}'), ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}');
insert into test values ('{"b" : [{"f" : 42}]}'), ('{"a" : 42}'), ('{"c" : "Hello"}'), ('{"d" : [1, 2, 3]}'), ('{"e" : "2020-01-01"}');
insert into test values ('{"a" : 42}');
insert into test values ('{"c" : "Hello"}');
insert into test values ('{"d" : [1, 2, 3]}');
insert into test values ('{"e" : "2020-01-01"}');
insert into test values ('{"b" : [{"f" : 42}]}');
select uniqExact(json) from test;
select count(), json from test group by json order by toString(json);
drop table test;

View File

@ -0,0 +1 @@
CREATE TABLE default.ttl\n(\n `a` UInt32,\n `timestamp` DateTime\n)\nENGINE = MergeTree\nORDER BY a\nTTL timestamp + toIntervalSecond(2) WHERE a IN (\n SELECT number\n FROM system.numbers\n LIMIT 100000\n)\nSETTINGS index_granularity = 8192

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS ttl;
CREATE TABLE ttl
(
`a` UInt32,
`timestamp` DateTime
)
ENGINE = MergeTree
ORDER BY a
TTL timestamp + toIntervalSecond(2) WHERE a IN (
SELECT number
FROM system.numbers
LIMIT 100000
);
SHOW CREATE ttl;
DROP TABLE ttl;

View File

@ -0,0 +1 @@
CREATE TABLE default.`03236_keeper_map_engine_parameters`\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = KeeperMap(\'/default/test2417\')\nPRIMARY KEY key

View File

@ -0,0 +1,8 @@
-- Tags: no-ordinary-database, no-fasttest
DROP TABLE IF EXISTS 03236_keeper_map_engine_parameters;
CREATE TABLE 03236_keeper_map_engine_parameters (key UInt64, value UInt64) Engine=KeeperMap('/' || currentDatabase() || '/test2417') PRIMARY KEY(key);
SHOW CREATE 03236_keeper_map_engine_parameters;
DROP TABLE 03236_keeper_map_engine_parameters;