ClickHouse/programs/format/Format.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

261 lines
9.8 KiB
C++
Raw Normal View History

#include <functional>
#include <iostream>
#include <string_view>
#include <boost/program_options.hpp>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
2020-11-09 16:05:40 +00:00
#include <IO/WriteBufferFromOStream.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/obfuscateQueries.h>
#include <Parsers/parseQuery.h>
#include <Common/ErrorCodes.h>
2019-08-23 15:47:27 +00:00
#include <Common/TerminalSize.h>
#include <Interpreters/Context.h>
#include <Functions/FunctionFactory.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Storages/StorageFactory.h>
#include <Storages/registerStorages.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <DataTypes/DataTypeFactory.h>
2021-10-03 02:56:32 +00:00
#include <Formats/FormatFactory.h>
#include <Formats/registerFormats.h>
2019-12-15 06:34:43 +00:00
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
extern const char * auto_time_zones[];
namespace DB
{
namespace ErrorCodes
{
extern const int INVALID_FORMAT_INSERT_QUERY_WITH_DATA;
}
}
int mainEntryClickHouseFormat(int argc, char ** argv)
{
using namespace DB;
try
{
boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("query", po::value<std::string>(), "query to format")
("help,h", "produce help message")
("hilite", "add syntax highlight with ANSI terminal escape sequences")
("oneline", "format in single line")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
("backslash", "add a backslash at the end of each line of the formatted query")
("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
2022-02-06 03:22:05 +00:00
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
std::string_view name = field.getName();
if (name == "max_parser_depth" || name == "max_query_size")
cmd_settings.addProgramOption(desc, name, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
std::cout << desc << std::endl;
return 1;
}
bool hilite = options.count("hilite");
bool oneline = options.count("oneline");
bool quiet = options.count("quiet");
bool multiple = options.count("multiquery");
bool obfuscate = options.count("obfuscate");
bool backslash = options.count("backslash");
bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");
if (quiet && (hilite || oneline || obfuscate))
{
std::cerr << "Options 'hilite' or 'oneline' or 'obfuscate' have no sense in 'quiet' mode." << std::endl;
return 2;
}
if (obfuscate && (hilite || oneline || quiet))
{
std::cerr << "Options 'hilite' or 'oneline' or 'quiet' have no sense in 'obfuscate' mode." << std::endl;
return 2;
}
String query;
if (options.count("query"))
{
query = options["query"].as<std::string>();
}
else
{
ReadBufferFromFileDescriptor in(STDIN_FILENO);
readStringUntilEOF(query, in);
}
if (obfuscate)
{
WordMap obfuscated_words_map;
WordSet used_nouns;
SipHash hash_func;
if (options.count("seed"))
{
std::string seed;
hash_func.update(options["seed"].as<std::string>());
}
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
2021-10-03 02:56:32 +00:00
registerFormats();
std::unordered_set<std::string> additional_names;
auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames();
auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames();
auto all_known_settings = Settings().getAllRegisteredNames();
auto all_known_merge_tree_settings = MergeTreeSettings().getAllRegisteredNames();
additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end());
additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end());
additional_names.insert(all_known_settings.begin(), all_known_settings.end());
additional_names.insert(all_known_merge_tree_settings.begin(), all_known_merge_tree_settings.end());
for (auto * it = auto_time_zones; *it; ++it)
{
String time_zone_name = *it;
/// Example: Europe/Amsterdam
Strings split;
boost::split(split, time_zone_name, [](char c){ return c == '/'; });
for (const auto & word : split)
if (!word.empty())
additional_names.insert(word);
}
KnownIdentifierFunc is_known_identifier = [&](std::string_view name)
{
std::string what(name);
return FunctionFactory::instance().has(what)
|| AggregateFunctionFactory::instance().isAggregateFunctionName(what)
|| TableFunctionFactory::instance().isTableFunctionName(what)
2021-10-03 02:56:32 +00:00
|| FormatFactory::instance().isOutputFormat(what)
|| FormatFactory::instance().isInputFormat(what)
|| additional_names.contains(what);
};
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
obfuscateQueries(query, out, obfuscated_words_map, used_nouns, hash_func, is_known_identifier);
2023-06-28 08:51:15 +00:00
out.finalize();
}
else
{
const char * pos = query.data();
const char * end = pos + query.size();
ParserQuery parser(end, allow_settings_after_format_in_insert);
do
{
2022-02-06 03:22:05 +00:00
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
2023-08-04 23:43:54 +00:00
/// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure,
/// we should throw an exception early, and make exception message more readable.
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
{
2023-01-23 13:16:14 +00:00
throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA,
"Can't format ASTInsertQuery with data, since data will be lost");
}
2023-08-04 23:43:54 +00:00
if (!quiet)
{
if (!backslash)
{
WriteBufferFromOStream res_buf(std::cout, 4096);
formatAST(*res, res_buf, hilite, oneline);
2023-06-28 08:51:15 +00:00
res_buf.finalize();
if (multiple)
std::cout << "\n;\n";
std::cout << std::endl;
}
/// add additional '\' at the end of each line;
else
{
WriteBufferFromOwnString str_buf;
formatAST(*res, str_buf, hilite, oneline);
auto res_string = str_buf.str();
WriteBufferFromOStream res_cout(std::cout, 4096);
const char * s_pos= res_string.data();
const char * s_end = s_pos + res_string.size();
while (s_pos != s_end)
{
if (*s_pos == '\n')
res_cout.write(" \\", 2);
res_cout.write(*s_pos++);
}
2023-06-28 08:51:15 +00:00
res_cout.finalize();
if (multiple)
std::cout << " \\\n;\n";
std::cout << std::endl;
}
}
2021-03-05 15:44:06 +00:00
do
{
/// skip spaces to avoid throw exception after last query
while (pos != end && std::isspace(*pos))
++pos;
/// for skip comment after the last query and to not throw exception
if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')
{
pos += 2;
/// skip until the end of the line
while (pos != end && *pos != '\n')
++pos;
}
/// need to parse next sql
else
break;
} while (pos != end);
} while (multiple && pos != end);
}
}
catch (...)
{
std::cerr << getCurrentExceptionMessage(true) << '\n';
return getCurrentExceptionCode();
}
return 0;
}