Fix several issues regarding PostgreSQL array_ndims usage.

1. Properly quote column identifiers.
2. Handle empty tables that have array columns.
3. Throw a more user friendly error when column value = NULL
   when calling array_dims(column)
4. Handle column value being the empty array {}
This commit is contained in:
Ryan Jacobs 2023-11-30 23:46:42 -08:00
parent e664e66a9a
commit a90458eb65
No known key found for this signature in database
GPG Key ID: 8A0D26F7F370AE73

View File

@ -158,6 +158,17 @@ static DataTypePtr convertPostgreSQLDataType(String & type, Fn<void()> auto && r
return res; return res;
} }
/// Check if PostgreSQL relation is empty.
/// postgres_table must be already quoted + schema-qualified.
template<typename T>
bool isTableEmpty(T &tx, const String & postgres_table) {
auto query = fmt::format(
"SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);",
postgres_table
);
pqxx::result result{tx.exec(query)};
return result[0][0].as<bool>();
}
template<typename T> template<typename T>
PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
@ -213,10 +224,39 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
{ {
const auto & name_and_type = columns[i]; const auto & name_and_type = columns[i];
/// All rows must contain the same number of dimensions, so limit 1 is ok. If number of dimensions in all rows is not the same - /// NOTE: If the relation is empty, then array_ndims returns NULL.
/// such arrays are not able to be used as ClickHouse Array at all. /// If this is the case, then assume dimensions=1. This covers most
pqxx::result result{tx.exec(fmt::format("SELECT array_ndims({}) FROM {} LIMIT 1", name_and_type.name, postgres_table))}; /// use cases, but will be incorrect for empty tables with
auto dimensions = result[0][0].as<int>(); /// multi-dimension arrays. The other solutions would be to drop
/// support for empty tables OR attempt fallback to a discovered
/// array_ndims CHECK constraint.
int dimensions;
if (isTableEmpty(tx, postgres_table)) {
dimensions = 1;
} else {
/// All rows must contain the same number of dimensions.
/// 1 is ok. If number of dimensions in all rows is not the same -
/// such arrays are not able to be used as ClickHouse Array at all.
///
/// Assume dimensions=1 for empty arrays.
auto postgres_column = doubleQuoteString(name_and_type.name);
pqxx::result result{tx.exec(fmt::format(
"SELECT {} IS NULL, COALESCE(array_ndims({}), 1) "
"FROM {} LIMIT 1;",
postgres_column, postgres_column, postgres_table
))};
/// Nullable(Array) is not supported.
auto is_null = result[0][0].as<bool>();
if (is_null) {
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"PostgreSQL array cannot be NULL. Column: {}", postgres_column
);
}
dimensions = result[0][1].as<int>();
}
/// It is always 1d array if it is in recheck. /// It is always 1d array if it is in recheck.
DataTypePtr type = assert_cast<const DataTypeArray *>(name_and_type.type.get())->getNestedType(); DataTypePtr type = assert_cast<const DataTypeArray *>(name_and_type.type.get())->getNestedType();