fixed missing columns

This commit is contained in:
lgbo-ustc 2022-12-25 11:14:54 +08:00
parent 4cf6beee27
commit f6850d96cb
4 changed files with 21 additions and 7 deletions

View File

@ -111,7 +111,7 @@ std::vector<int> ArrowFormatUtil::findRequiredIndices(const Block & header,
std::string col_name = named_col.name;
if (ignore_case)
boost::to_lower(col_name);
if (import_nested)
if (import_nested && !fields_indices.contains(col_name))
{
if (!schema.GetFieldByName(col_name))
{
@ -124,8 +124,10 @@ std::vector<int> ArrowFormatUtil::findRequiredIndices(const Block & header,
auto it = fields_indices.find(col_name);
if (it == fields_indices.end())
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found field({}) in arrow schema:{}",
named_col.name, schema.ToString());
if (!allow_missing_columns)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found field({}) in arrow schema:{}", named_col.name, schema.ToString());
else
continue;
}
for (int j = 0; j < it->second.second; ++j)
{

View File

@ -15,10 +15,13 @@ namespace DB
class ArrowFormatUtil
{
public:
explicit ArrowFormatUtil(bool ignore_case_, bool import_nested_, bool nested_type_has_index_)
explicit ArrowFormatUtil(bool ignore_case_, bool import_nested_, bool nested_type_has_index_, bool allow_missing_columns_)
: ignore_case(ignore_case_)
, import_nested(import_nested_)
, nested_type_has_index(nested_type_has_index_){}
, nested_type_has_index(nested_type_has_index_)
, allow_missing_columns(allow_missing_columns_)
{
}
~ArrowFormatUtil() = default;
std::map<std::string, std::pair<int, int>>
@ -32,6 +35,7 @@ private:
bool ignore_case;
bool import_nested;
bool nested_type_has_index;
bool allow_missing_columns;
void calculateFieldIndices(const arrow::Field & field,
int & current_start_index,
std::map<std::string, std::pair<int, int>> & result, const std::string & name_prefix = "");

View File

@ -134,7 +134,11 @@ void ORCBlockInputFormat::prepareReader()
format_settings.orc.case_insensitive_column_matching);
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
ArrowFormatUtil format_util(format_settings.orc.case_insensitive_column_matching, format_settings.orc.import_nested, true);
ArrowFormatUtil format_util(
format_settings.orc.case_insensitive_column_matching,
format_settings.orc.import_nested,
true,
format_settings.orc.allow_missing_columns);
include_indices = format_util.findRequiredIndices(getPort().getHeader(), *schema);
}

View File

@ -128,7 +128,11 @@ void ParquetBlockInputFormat::prepareReader()
format_settings.parquet.case_insensitive_column_matching);
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
ArrowFormatUtil format_util(format_settings.parquet.case_insensitive_column_matching, format_settings.parquet.import_nested, false);
ArrowFormatUtil format_util(
format_settings.parquet.case_insensitive_column_matching,
format_settings.parquet.import_nested,
false,
format_settings.parquet.allow_missing_columns);
column_indices = format_util.findRequiredIndices(getPort().getHeader(), *schema);
}