adding codegen fuzzer + code generation script. new contrib added: libprotobuf-mutator

This commit is contained in:
Pavel Cheremushkin 2021-10-12 03:21:31 +03:00
parent 365296eeae
commit 7b9358a597
15 changed files with 3727 additions and 0 deletions

3
.gitmodules vendored
View File

@ -249,3 +249,6 @@
[submodule "contrib/magic_enum"]
path = contrib/magic_enum
url = https://github.com/Neargye/magic_enum
[submodule "contrib/libprotobuf-mutator"]
path = contrib/libprotobuf-mutator
url = https://github.com/google/libprotobuf-mutator

View File

@ -562,6 +562,7 @@ include (cmake/find/cassandra.cmake)
include (cmake/find/sentry.cmake)
include (cmake/find/stats.cmake)
include (cmake/find/datasketches.cmake)
include (cmake/find/libprotobuf-mutator.cmake)
set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "")
find_contrib_lib(cityhash)

View File

@ -0,0 +1,11 @@
option(USE_LIBPROTOBUF_MUTATOR "Enable libprotobuf-mutator" ${ENABLE_FUZZING})
if (NOT USE_LIBPROTOBUF_MUTATOR)
return()
endif()
set(LibProtobufMutator_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator")
if (NOT EXISTS "${LibProtobufMutator_SOURCE_DIR}/README.md")
message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init --recursive")
endif()

View File

@ -49,6 +49,10 @@ add_subdirectory (replxx-cmake)
add_subdirectory (unixodbc-cmake)
add_subdirectory (nanodbc-cmake)
if (ENABLE_FUZZING)
add_subdirectory (libprotobuf-mutator-cmake)
endif()
if (USE_YAML_CPP)
add_subdirectory (yaml-cpp-cmake)
endif()

1
contrib/libprotobuf-mutator vendored Submodule

@ -0,0 +1 @@
Subproject commit ffd86a32874e5c08a143019aad1aaf0907294c9f

View File

@ -0,0 +1,17 @@
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator)
set(NO_FUZZING_FLAGS "-fno-sanitize=fuzzer -fsanitize-coverage=0")
add_library(protobuf-mutator
${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc
${LIBRARY_DIR}/src/libfuzzer/libfuzzer_mutator.cc
${LIBRARY_DIR}/src/binary_format.cc
${LIBRARY_DIR}/src/mutator.cc
${LIBRARY_DIR}/src/text_format.cc
${LIBRARY_DIR}/src/utf8_fix.cc)
target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}")
# target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
target_link_libraries(protobuf-mutator ${PROTOBUF_LIBRARIES})
set_target_properties(protobuf-mutator PROPERTIES
COMPILE_FLAGS "${NO_FUZZING_FLAGS}")

View File

@ -6,3 +6,8 @@ target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZ
add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS})
target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
add_subdirectory(codegen_fuzzer)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")

View File

@ -0,0 +1,13 @@
find_package(Protobuf REQUIRED)
protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS "out.proto")
set(FUZZER_SRCS codegen_select_fuzzer.cpp out.cpp ${PROTO_SRCS} ${PROTO_HDRS})
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIRS}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src")
target_link_libraries(codegen_select_fuzzer PRIVATE clickhouse_parsers protobuf-mutator ${Protobuf_LIBRARIES} ${LIB_FUZZING_ENGINE})

View File

@ -0,0 +1,121 @@
" ";
" ";
" ";
";";
"(" $1 ")";
"(" $1 ", " $2 ")";
"(" $1 ", " $2 ", " $3 ")";
$1 ", " $2 ;
$1 ", " $2 ", " $3 ;
$1 ", " $2 ", " $3 ", " $4 ;
$1 ", " $2 ", " $3 ", " $4 ", " $5 ;
"[" $1 ", " $2 "]";
"[" $1 ", " $2 ", " $3 "]";
"[" $1 ", " $2 ", " $3 ", " $4 "]";
"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]";
$0 "(" $1 ")";
$0 "(" $1 ", " $2 ")";
$0 "(" $1 ", " $2 ", " $3 ")";
$1 " as " $2 ;
// TODO: add more clickhouse specific stuff
"SELECT " $1 " FROM " $2 " WHERE " $3 ;
"SELECT " $1 " FROM " $2 " GROUP BY " $3 ;
"SELECT " $1 " FROM " $2 " SORT BY " $3 ;
"SELECT " $1 " FROM " $2 " LIMIT " $3 ;
"SELECT " $1 " FROM " $2 " JOIN " $3 ;
"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ;
"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ;
"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ;
"SELECT " $1 " INTO OUTFILE " $2 ;
"WITH " $1 " AS " $2 ;
"{" $1 ":" $2 "}";
"[" $1 "," $2 "]";
"[]";
" x ";
"x";
" `x` ";
"`x`";
" \"value\" ";
"\"value\"";
" 0 ";
"0";
"1";
"2";
"123123123123123123";
"182374019873401982734091873420923123123123123123";
"1e-1";
"1.1";
"\"\"";
" '../../../../../../../../../etc/passwd' ";
"/";
"=";
"==";
"!=";
"<>";
"<";
"<=";
">";
">=";
"<<";
"|<<";
"&";
"|";
"||";
"<|";
"|>";
"+";
"-";
"~";
"*";
"/";
"\\";
"%";
"";
".";
",";
",";
",";
",";
",";
",";
"(";
")";
"(";
")";
"(";
")";
"(";
")";
"(";
")";
"(";
")";
"?";
":";
"@";
"@@";
"$";
"\"";
"`";
"{";
"}";
"^";
"::";
"->";
"]";
"[";

View File

@ -0,0 +1,524 @@
" ";
" ";
" ";
";";
"(" $1 ")";
"(" $1 ", " $2 ")";
"(" $1 ", " $2 ", " $3 ")";
$1 ", " $2 ;
$1 ", " $2 ", " $3 ;
$1 ", " $2 ", " $3 ", " $4 ;
$1 ", " $2 ", " $3 ", " $4 ", " $5 ;
"[" $1 ", " $2 "]";
"[" $1 ", " $2 ", " $3 "]";
"[" $1 ", " $2 ", " $3 ", " $4 "]";
"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]";
$0 "(" $1 ")";
$0 "(" $1 ", " $2 ")";
$0 "(" $1 ", " $2 ", " $3 ")";
$1 " as " $2 ;
// TODO: add more clickhouse specific stuff
"SELECT " $1 " FROM " $2 " WHERE " $3 ;
"SELECT " $1 " FROM " $2 " GROUP BY " $3 ;
"SELECT " $1 " FROM " $2 " SORT BY " $3 ;
"SELECT " $1 " FROM " $2 " LIMIT " $3 ;
"SELECT " $1 " FROM " $2 " JOIN " $3 ;
"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ;
"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ;
"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ;
"SELECT " $1 " INTO OUTFILE " $2 ;
"WITH " $1 " AS " $2 ;
"{" $1 ":" $2 "}";
"[" $1 "," $2 "]";
"[]";
" x ";
"x";
" `x` ";
"`x`";
" \"value\" ";
"\"value\"";
" 0 ";
"0";
"1";
"2";
"123123123123123123";
"182374019873401982734091873420923123123123123123";
"1e-1";
"1.1";
"\"\"";
" '../../../../../../../../../etc/passwd' ";
"/";
"=";
"==";
"!=";
"<>";
"<";
"<=";
">";
">=";
"<<";
"|<<";
"&";
"|";
"||";
"<|";
"|>";
"+";
"-";
"~";
"*";
"/";
"\\";
"%";
"";
".";
",";
",";
",";
",";
",";
",";
"(";
")";
"(";
")";
"(";
")";
"(";
")";
"(";
")";
"(";
")";
"?";
":";
"@";
"@@";
"$";
"\"";
"`";
"{";
"}";
"^";
"::";
"->";
"]";
"[";
" ADD ";
" ADD COLUMN ";
" ADD CONSTRAINT ";
" ADD INDEX ";
" AFTER ";
" AggregateFunction ";
" aggThrow ";
" ALL ";
" ALTER LIVE VIEW ";
" ALTER TABLE ";
" and ";
" ANTI ";
" any ";
" anyHeavy ";
" anyLast ";
" argMax ";
" argMin ";
" array ";
" Array ";
" arrayAll ";
" arrayAUC ";
" arrayCompact ";
" arrayConcat ";
" arrayCount ";
" arrayCumSum ";
" arrayCumSumNonNegative ";
" arrayDifference ";
" arrayDistinct ";
" arrayElement ";
" arrayEnumerate ";
" arrayEnumerateDense ";
" arrayEnumerateDenseRanked ";
" arrayEnumerateUniq ";
" arrayEnumerateUniqRanked ";
" arrayExists ";
" arrayFill ";
" arrayFilter ";
" arrayFirst ";
" arrayFirstIndex ";
" arrayFlatten ";
" arrayIntersect ";
" arrayJoin ";
" ARRAY JOIN ";
" arrayMap ";
" arrayPopBack ";
" arrayPopFront ";
" arrayPushBack ";
" arrayPushFront ";
" arrayReduce ";
" arrayReduceInRanges ";
" arrayResize ";
" arrayReverse ";
" arrayReverseFill ";
" arrayReverseSort ";
" arrayReverseSplit ";
" arraySlice ";
" arraySort ";
" arraySplit ";
" arraySum ";
" arrayUniq ";
" arrayWithConstant ";
" arrayZip ";
" AS ";
" ASC ";
" ASCENDING ";
" ASOF ";
" AST ";
" ATTACH ";
" ATTACH PART ";
" ATTACH PARTITION ";
" avg ";
" avgWeighted ";
" basename ";
" BETWEEN ";
" BOTH ";
" boundingRatio ";
" BY ";
" CAST ";
" categoricalInformationValue ";
" CHECK ";
" CHECK TABLE ";
" CLEAR COLUMN ";
" CLEAR INDEX ";
" COLLATE ";
" COLUMNS ";
" COMMENT COLUMN ";
" CONSTRAINT ";
" corr ";
" corrStable ";
" count ";
" countEqual ";
" covarPop ";
" covarPopStable ";
" covarSamp ";
" covarSampStable ";
" CREATE ";
" CROSS ";
" CUBE ";
" cutFragment ";
" cutQueryString ";
" cutQueryStringAndFragment ";
" cutToFirstSignificantSubdomain ";
" cutURLParameter ";
" cutWWW ";
" D ";
" DATABASE ";
" DATABASES ";
" Date ";
" DATE ";
" DATE_ADD ";
" DATEADD ";
" DATE_DIFF ";
" DATEDIFF ";
" DATE_SUB ";
" DATESUB ";
" DateTime ";
" DateTime64 ";
" DAY ";
" DD ";
" Decimal ";
" Decimal128 ";
" Decimal32 ";
" Decimal64 ";
" decodeURLComponent ";
" DEDUPLICATE ";
" DELETE ";
" DELETE WHERE ";
" DESC ";
" DESCENDING ";
" DESCRIBE ";
" DETACH ";
" DETACH PARTITION ";
" DICTIONARIES ";
" DICTIONARY ";
" DISTINCT ";
" domain ";
" domainWithoutWWW ";
" DROP ";
" DROP COLUMN ";
" DROP CONSTRAINT ";
" DROP DETACHED PART ";
" DROP DETACHED PARTITION ";
" DROP INDEX ";
" DROP PARTITION ";
" emptyArrayToSingle ";
" ENGINE ";
" entropy ";
" Enum ";
" Enum16 ";
" Enum8 ";
" EVENTS ";
" EXCHANGE TABLES ";
" EXISTS ";
" EXTRACT ";
" extractURLParameter ";
" extractURLParameterNames ";
" extractURLParameters ";
" FETCH PARTITION ";
" FETCH PART ";
" FINAL ";
" FIRST ";
" firstSignificantSubdomain ";
" FixedString ";
" Float32 ";
" Float64 ";
" FOR ";
" ForEach ";
" FORMAT ";
" fragment ";
" FREEZE ";
" FROM ";
" FULL ";
" FUNCTION ";
" __getScalar ";
" GLOBAL ";
" GRANULARITY ";
" groupArray ";
" groupArrayInsertAt ";
" groupArrayMovingAvg ";
" groupArrayMovingSum ";
" groupArraySample ";
" groupBitAnd ";
" groupBitmap ";
" groupBitmapAnd ";
" groupBitmapOr ";
" groupBitmapXor ";
" groupBitOr ";
" groupBitXor ";
" GROUP BY ";
" groupUniqArray ";
" has ";
" hasAll ";
" hasAny ";
" HAVING ";
" HH ";
" histogram ";
" HOUR ";
" ID ";
" if ";
" IF EXISTS ";
" IF NOT EXISTS ";
" IN ";
" INDEX ";
" indexOf ";
" INNER ";
" IN PARTITION ";
" INSERT INTO ";
" Int16 ";
" Int32 ";
" Int64 ";
" Int8 ";
" INTERVAL ";
" IntervalDay ";
" IntervalHour ";
" IntervalMinute ";
" IntervalMonth ";
" IntervalQuarter ";
" IntervalSecond ";
" IntervalWeek ";
" IntervalYear ";
" INTO OUTFILE ";
" JOIN ";
" kurtPop ";
" kurtSamp ";
" LAST ";
" LAYOUT ";
" LEADING ";
" LEFT ";
" LEFT ARRAY JOIN ";
" length ";
" LIFETIME ";
" LIKE ";
" LIMIT ";
" LIVE ";
" LOCAL ";
" LowCardinality ";
" LTRIM ";
" M ";
" MATERIALIZED ";
" MATERIALIZE INDEX ";
" MATERIALIZE TTL ";
" max ";
" maxIntersections ";
" maxIntersectionsPosition ";
" Merge ";
" MI ";
" min ";
" MINUTE ";
" MM ";
" MODIFY ";
" MODIFY COLUMN ";
" MODIFY ORDER BY ";
" MODIFY QUERY ";
" MODIFY SETTING ";
" MODIFY TTL ";
" MONTH ";
" MOVE PART ";
" MOVE PARTITION ";
" movingXXX ";
" N ";
" NAME ";
" Nested ";
" NO DELAY ";
" NONE ";
" not ";
" nothing ";
" Nothing ";
" Null ";
" Nullable ";
" NULLS ";
" OFFSET ";
" ON ";
" ONLY ";
" OPTIMIZE TABLE ";
" ORDER BY ";
" OR REPLACE ";
" OUTER ";
" PARTITION ";
" PARTITION BY ";
" path ";
" pathFull ";
" POPULATE ";
" PREWHERE ";
" PRIMARY KEY ";
" protocol ";
" Q ";
" QQ ";
" QUARTER ";
" queryString ";
" queryStringAndFragment ";
" range ";
" REFRESH ";
" RENAME COLUMN ";
" RENAME TABLE ";
" REPLACE PARTITION ";
" Resample ";
" RESUME ";
" retention ";
" RIGHT ";
" ROLLUP ";
" RTRIM ";
" S ";
" SAMPLE ";
" SAMPLE BY ";
" SECOND ";
" SELECT ";
" SEMI ";
" sequenceCount ";
" sequenceMatch ";
" SET ";
" SETTINGS ";
" SHOW ";
" SHOW PROCESSLIST ";
" simpleLinearRegression ";
" skewPop ";
" skewSamp ";
" SOURCE ";
" SQL_TSI_DAY ";
" SQL_TSI_HOUR ";
" SQL_TSI_MINUTE ";
" SQL_TSI_MONTH ";
" SQL_TSI_QUARTER ";
" SQL_TSI_SECOND ";
" SQL_TSI_WEEK ";
" SQL_TSI_YEAR ";
" SS ";
" State ";
" stddevPop ";
" stddevPopStable ";
" stddevSamp ";
" stddevSampStable ";
" STEP ";
" stochasticLinearRegression ";
" stochasticLogisticRegression ";
" String ";
" SUBSTRING ";
" sum ";
" sumKahan ";
" sumMap ";
" sumMapFiltered ";
" sumMapFilteredWithOverflow ";
" sumMapWithOverflow ";
" sumWithOverflow ";
" SUSPEND ";
" TABLE ";
" TABLES ";
" TEMPORARY ";
" TIMESTAMP ";
" TIMESTAMP_ADD ";
" TIMESTAMPADD ";
" TIMESTAMP_DIFF ";
" TIMESTAMPDIFF ";
" TIMESTAMP_SUB ";
" TIMESTAMPSUB ";
" TO ";
" TO DISK ";
" TOP ";
" topK ";
" topKWeighted ";
" topLevelDomain ";
" TO TABLE ";
" TOTALS ";
" TO VOLUME ";
" TRAILING ";
" TRIM ";
" TRUNCATE ";
" TTL ";
" Tuple ";
" TYPE ";
" UInt16 ";
" UInt32 ";
" UInt64 ";
" UInt8 ";
" uniq ";
" uniqCombined ";
" uniqCombined64 ";
" uniqExact ";
" uniqHLL12 ";
" uniqUpTo ";
" UPDATE ";
" URLHierarchy ";
" URLPathHierarchy ";
" USE ";
" USING ";
" UUID ";
" VALUES ";
" varPop ";
" varPopStable ";
" varSamp ";
" varSampStable ";
" VIEW ";
" WATCH ";
" WEEK ";
" WHERE ";
" windowFunnel ";
" WITH ";
" WITH FILL ";
" WITH TIES ";
" WK ";
" WW ";
" YEAR ";
" YY ";
" YYYY ";

View File

@ -0,0 +1,40 @@
#include <iostream>
#include <string>
#include <IO/WriteBufferFromOStream.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Parsers/formatAST.h>
#include <libfuzzer/libfuzzer_macro.h>
#include "out.pb.h"
void GenerateSentence(const Sentence&, std::string &, int);
DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) {
static std::string input;
input.reserve(4096);
GenerateSentence(main, input, 0);
if (input.size()) {
std::cout << input << std::endl;
DB::ParserQueryWithOutput parser(input.data() + input.size());
try {
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
DB::WriteBufferFromOStream out(std::cerr, 4096);
DB::formatAST(*ast, out);
std::cerr << std::endl;
} catch (...) {
}
input.clear();
}
}

View File

@ -0,0 +1,249 @@
#!/usr/bin/env python3
import sys
import string
TOKEN_TEXT = 1
TOKEN_VAR = 2
TOKEN_COLON = ':'
TOKEN_SEMI = ';'
TOKEN_OR = '|'
TOKEN_QUESTIONMARK = '?'
TOKEN_ROUND_BRACKET_OPEN = '('
TOKEN_ROUND_BRACKET_CLOSE = ')'
TOKEN_ASTERISK = '*'
TOKEN_SLASH = '/'
class TextValue:
def __init__(self, t):
self.t = t
self.slug = None
def get_slug(self):
if self.slug is not None:
return self.slug
slug = ''
for c in self.t:
slug += c if c in string.ascii_letters else '_'
self.slug = slug
return slug
def get_name(self):
return f"TextValue_{self.get_slug()}"
def __repr__(self):
return f"TextValue(\"{self.t}\")"
class Var:
def __init__(self, id_):
self.id_ = id_
def __repr__(self):
return f"Var({self.id_})"
class Parser:
def __init__(self):
self.chains = []
self.text = None
self.col = 0
self.line = 1
self.t = None
self.var_id = -1
self.cur_tok = None
self.includes = []
self.proto = ''
self.cpp = ''
def parse_file(self, filename):
with open(filename) as f:
self.text = f.read()
while self.parse_statement() is not None:
pass
def add_include(self, filename):
self.includes.append(filename)
def get_next_token(self):
self.skip_ws()
if not len(self.text):
return None
if self.text[0] == '"':
return self.parse_txt_value()
if self.text[0] == '$':
return self.parse_var_value()
c, self.text = self.text[0], self.text[1:]
self.cur_tok = c
return c
def parse_var_value(self):
i = self.text.find(' ')
id_, self.text = self.text[1:i], self.text[i+1:]
self.var_id = int(id_)
self.cur_tok = TOKEN_VAR
return TOKEN_VAR
def parse_txt_value(self):
if self.text[0] != '"':
raise Exception("parse_txt_value: expected quote at the start")
self.t = ''
self.text = self.text[1:]
while self.text[0] != '"':
if self.text[0] == '\\':
if self.text[1] == 'x':
self.t += self.text[:4]
self.text = self.text[4:]
elif self.text[1] in 'nt\\"':
self.t += self.text[:2]
self.text = self.text[2:]
else:
raise Exception(f"parse_txt_value: unknown symbol {self.text[0]}")
else:
c, self.text = self.text[0], self.text[1:]
self.t += c
self.text = self.text[1:]
self.cur_tok = TOKEN_TEXT
return TOKEN_TEXT
def skip_ws(self):
while self.text and self.text[0] in string.whitespace:
if self.text[0] == '\n':
self.line += 1
self.col = 0
self.text = self.text[1:]
self.col += 1
if not self.text:
return None
return True
def skip_line(self):
self.line += 1
index = self.text.find('\n')
self.text = self.text[index:]
def parse_statement(self):
if self.skip_ws() is None:
return None
self.get_next_token()
if self.cur_tok == TOKEN_SLASH:
self.skip_line()
return TOKEN_SLASH
chain = []
while self.cur_tok != TOKEN_SEMI:
if self.cur_tok == TOKEN_TEXT:
chain.append(TextValue(self.t))
elif self.cur_tok == TOKEN_VAR:
chain.append(Var(self.var_id))
else:
self.fatal_parsing_error(f"unexpected token {self.tok}")
self.get_next_token()
if not chain:
self.fatal_parsing_error("empty chains are not allowed")
self.chains.append(chain)
return True
def generate(self):
self.proto = 'syntax = "proto3";\n\n'
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h>\n\n'
for incl_file in self.includes:
self.cpp += f'#include "{incl_file}"\n'
self.cpp += '\n'
self.proto += 'message Word {\n'
self.proto += '\tenum Value {\n'
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n'
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n'
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n'
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n'
self.cpp += '\t}\n'
self.cpp += '}\n'
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n'
self.cpp += '\tif (depth > 5) return;\n\n'
self.cpp += '\tswitch (word.value()) {\n'
for idx, chain in enumerate(self.chains):
self.proto += f'\t\tvalue_{idx} = {idx};\n'
self.cpp += f'\t\tcase {idx}: {{\n'
num_var = 0
for item in chain:
if isinstance(item, TextValue):
self.cpp += f'\t\t\ts += "{item.t}";\n'
elif isinstance(item, Var):
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n'
num_var += 1
else:
raise Exception("unknown token met during generation")
self.cpp += '\t\t\tbreak;\n\t\t}\n'
self.cpp += '\t\tdefault: break;\n'
self.cpp += '\t}\n'
self.proto += '\t}\n'
self.proto += '\tValue value = 1;\n'
self.proto += '\tSentence inner = 2;\n'
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}'
self.cpp += '}\n'
return self.cpp, self.proto
def fatal_parsing_error(self, msg):
print(f"Line: {self.line}, Col: {self.col}")
raise Exception(f"fatal error during parsing. {msg}")
def main(args):
input_file, outfile_cpp, outfile_proto = args
if not outfile_proto.endswith('.proto'):
raise Exception("outfile_proto (argv[3]) should end with `.proto`")
include_filename = outfile_proto[:-6] + ".pb.h"
p = Parser()
p.add_include(include_filename)
p.parse_file(input_file)
cpp, proto = p.generate()
proto = proto.replace('\t', ' ' * 4)
cpp = cpp.replace('\t', ' ' * 4)
with open(outfile_cpp, 'w') as f:
f.write(cpp)
with open(outfile_proto, 'w') as f:
f.write(proto)
if __name__ == '__main__':
if len(sys.argv) < 3:
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
sys.exit(1)
main(sys.argv[1:])

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,519 @@
syntax = "proto3";
message Word {
enum Value {
value_0 = 0;
value_1 = 1;
value_2 = 2;
value_3 = 3;
value_4 = 4;
value_5 = 5;
value_6 = 6;
value_7 = 7;
value_8 = 8;
value_9 = 9;
value_10 = 10;
value_11 = 11;
value_12 = 12;
value_13 = 13;
value_14 = 14;
value_15 = 15;
value_16 = 16;
value_17 = 17;
value_18 = 18;
value_19 = 19;
value_20 = 20;
value_21 = 21;
value_22 = 22;
value_23 = 23;
value_24 = 24;
value_25 = 25;
value_26 = 26;
value_27 = 27;
value_28 = 28;
value_29 = 29;
value_30 = 30;
value_31 = 31;
value_32 = 32;
value_33 = 33;
value_34 = 34;
value_35 = 35;
value_36 = 36;
value_37 = 37;
value_38 = 38;
value_39 = 39;
value_40 = 40;
value_41 = 41;
value_42 = 42;
value_43 = 43;
value_44 = 44;
value_45 = 45;
value_46 = 46;
value_47 = 47;
value_48 = 48;
value_49 = 49;
value_50 = 50;
value_51 = 51;
value_52 = 52;
value_53 = 53;
value_54 = 54;
value_55 = 55;
value_56 = 56;
value_57 = 57;
value_58 = 58;
value_59 = 59;
value_60 = 60;
value_61 = 61;
value_62 = 62;
value_63 = 63;
value_64 = 64;
value_65 = 65;
value_66 = 66;
value_67 = 67;
value_68 = 68;
value_69 = 69;
value_70 = 70;
value_71 = 71;
value_72 = 72;
value_73 = 73;
value_74 = 74;
value_75 = 75;
value_76 = 76;
value_77 = 77;
value_78 = 78;
value_79 = 79;
value_80 = 80;
value_81 = 81;
value_82 = 82;
value_83 = 83;
value_84 = 84;
value_85 = 85;
value_86 = 86;
value_87 = 87;
value_88 = 88;
value_89 = 89;
value_90 = 90;
value_91 = 91;
value_92 = 92;
value_93 = 93;
value_94 = 94;
value_95 = 95;
value_96 = 96;
value_97 = 97;
value_98 = 98;
value_99 = 99;
value_100 = 100;
value_101 = 101;
value_102 = 102;
value_103 = 103;
value_104 = 104;
value_105 = 105;
value_106 = 106;
value_107 = 107;
value_108 = 108;
value_109 = 109;
value_110 = 110;
value_111 = 111;
value_112 = 112;
value_113 = 113;
value_114 = 114;
value_115 = 115;
value_116 = 116;
value_117 = 117;
value_118 = 118;
value_119 = 119;
value_120 = 120;
value_121 = 121;
value_122 = 122;
value_123 = 123;
value_124 = 124;
value_125 = 125;
value_126 = 126;
value_127 = 127;
value_128 = 128;
value_129 = 129;
value_130 = 130;
value_131 = 131;
value_132 = 132;
value_133 = 133;
value_134 = 134;
value_135 = 135;
value_136 = 136;
value_137 = 137;
value_138 = 138;
value_139 = 139;
value_140 = 140;
value_141 = 141;
value_142 = 142;
value_143 = 143;
value_144 = 144;
value_145 = 145;
value_146 = 146;
value_147 = 147;
value_148 = 148;
value_149 = 149;
value_150 = 150;
value_151 = 151;
value_152 = 152;
value_153 = 153;
value_154 = 154;
value_155 = 155;
value_156 = 156;
value_157 = 157;
value_158 = 158;
value_159 = 159;
value_160 = 160;
value_161 = 161;
value_162 = 162;
value_163 = 163;
value_164 = 164;
value_165 = 165;
value_166 = 166;
value_167 = 167;
value_168 = 168;
value_169 = 169;
value_170 = 170;
value_171 = 171;
value_172 = 172;
value_173 = 173;
value_174 = 174;
value_175 = 175;
value_176 = 176;
value_177 = 177;
value_178 = 178;
value_179 = 179;
value_180 = 180;
value_181 = 181;
value_182 = 182;
value_183 = 183;
value_184 = 184;
value_185 = 185;
value_186 = 186;
value_187 = 187;
value_188 = 188;
value_189 = 189;
value_190 = 190;
value_191 = 191;
value_192 = 192;
value_193 = 193;
value_194 = 194;
value_195 = 195;
value_196 = 196;
value_197 = 197;
value_198 = 198;
value_199 = 199;
value_200 = 200;
value_201 = 201;
value_202 = 202;
value_203 = 203;
value_204 = 204;
value_205 = 205;
value_206 = 206;
value_207 = 207;
value_208 = 208;
value_209 = 209;
value_210 = 210;
value_211 = 211;
value_212 = 212;
value_213 = 213;
value_214 = 214;
value_215 = 215;
value_216 = 216;
value_217 = 217;
value_218 = 218;
value_219 = 219;
value_220 = 220;
value_221 = 221;
value_222 = 222;
value_223 = 223;
value_224 = 224;
value_225 = 225;
value_226 = 226;
value_227 = 227;
value_228 = 228;
value_229 = 229;
value_230 = 230;
value_231 = 231;
value_232 = 232;
value_233 = 233;
value_234 = 234;
value_235 = 235;
value_236 = 236;
value_237 = 237;
value_238 = 238;
value_239 = 239;
value_240 = 240;
value_241 = 241;
value_242 = 242;
value_243 = 243;
value_244 = 244;
value_245 = 245;
value_246 = 246;
value_247 = 247;
value_248 = 248;
value_249 = 249;
value_250 = 250;
value_251 = 251;
value_252 = 252;
value_253 = 253;
value_254 = 254;
value_255 = 255;
value_256 = 256;
value_257 = 257;
value_258 = 258;
value_259 = 259;
value_260 = 260;
value_261 = 261;
value_262 = 262;
value_263 = 263;
value_264 = 264;
value_265 = 265;
value_266 = 266;
value_267 = 267;
value_268 = 268;
value_269 = 269;
value_270 = 270;
value_271 = 271;
value_272 = 272;
value_273 = 273;
value_274 = 274;
value_275 = 275;
value_276 = 276;
value_277 = 277;
value_278 = 278;
value_279 = 279;
value_280 = 280;
value_281 = 281;
value_282 = 282;
value_283 = 283;
value_284 = 284;
value_285 = 285;
value_286 = 286;
value_287 = 287;
value_288 = 288;
value_289 = 289;
value_290 = 290;
value_291 = 291;
value_292 = 292;
value_293 = 293;
value_294 = 294;
value_295 = 295;
value_296 = 296;
value_297 = 297;
value_298 = 298;
value_299 = 299;
value_300 = 300;
value_301 = 301;
value_302 = 302;
value_303 = 303;
value_304 = 304;
value_305 = 305;
value_306 = 306;
value_307 = 307;
value_308 = 308;
value_309 = 309;
value_310 = 310;
value_311 = 311;
value_312 = 312;
value_313 = 313;
value_314 = 314;
value_315 = 315;
value_316 = 316;
value_317 = 317;
value_318 = 318;
value_319 = 319;
value_320 = 320;
value_321 = 321;
value_322 = 322;
value_323 = 323;
value_324 = 324;
value_325 = 325;
value_326 = 326;
value_327 = 327;
value_328 = 328;
value_329 = 329;
value_330 = 330;
value_331 = 331;
value_332 = 332;
value_333 = 333;
value_334 = 334;
value_335 = 335;
value_336 = 336;
value_337 = 337;
value_338 = 338;
value_339 = 339;
value_340 = 340;
value_341 = 341;
value_342 = 342;
value_343 = 343;
value_344 = 344;
value_345 = 345;
value_346 = 346;
value_347 = 347;
value_348 = 348;
value_349 = 349;
value_350 = 350;
value_351 = 351;
value_352 = 352;
value_353 = 353;
value_354 = 354;
value_355 = 355;
value_356 = 356;
value_357 = 357;
value_358 = 358;
value_359 = 359;
value_360 = 360;
value_361 = 361;
value_362 = 362;
value_363 = 363;
value_364 = 364;
value_365 = 365;
value_366 = 366;
value_367 = 367;
value_368 = 368;
value_369 = 369;
value_370 = 370;
value_371 = 371;
value_372 = 372;
value_373 = 373;
value_374 = 374;
value_375 = 375;
value_376 = 376;
value_377 = 377;
value_378 = 378;
value_379 = 379;
value_380 = 380;
value_381 = 381;
value_382 = 382;
value_383 = 383;
value_384 = 384;
value_385 = 385;
value_386 = 386;
value_387 = 387;
value_388 = 388;
value_389 = 389;
value_390 = 390;
value_391 = 391;
value_392 = 392;
value_393 = 393;
value_394 = 394;
value_395 = 395;
value_396 = 396;
value_397 = 397;
value_398 = 398;
value_399 = 399;
value_400 = 400;
value_401 = 401;
value_402 = 402;
value_403 = 403;
value_404 = 404;
value_405 = 405;
value_406 = 406;
value_407 = 407;
value_408 = 408;
value_409 = 409;
value_410 = 410;
value_411 = 411;
value_412 = 412;
value_413 = 413;
value_414 = 414;
value_415 = 415;
value_416 = 416;
value_417 = 417;
value_418 = 418;
value_419 = 419;
value_420 = 420;
value_421 = 421;
value_422 = 422;
value_423 = 423;
value_424 = 424;
value_425 = 425;
value_426 = 426;
value_427 = 427;
value_428 = 428;
value_429 = 429;
value_430 = 430;
value_431 = 431;
value_432 = 432;
value_433 = 433;
value_434 = 434;
value_435 = 435;
value_436 = 436;
value_437 = 437;
value_438 = 438;
value_439 = 439;
value_440 = 440;
value_441 = 441;
value_442 = 442;
value_443 = 443;
value_444 = 444;
value_445 = 445;
value_446 = 446;
value_447 = 447;
value_448 = 448;
value_449 = 449;
value_450 = 450;
value_451 = 451;
value_452 = 452;
value_453 = 453;
value_454 = 454;
value_455 = 455;
value_456 = 456;
value_457 = 457;
value_458 = 458;
value_459 = 459;
value_460 = 460;
value_461 = 461;
value_462 = 462;
value_463 = 463;
value_464 = 464;
value_465 = 465;
value_466 = 466;
value_467 = 467;
value_468 = 468;
value_469 = 469;
value_470 = 470;
value_471 = 471;
value_472 = 472;
value_473 = 473;
value_474 = 474;
value_475 = 475;
value_476 = 476;
value_477 = 477;
value_478 = 478;
value_479 = 479;
value_480 = 480;
value_481 = 481;
value_482 = 482;
value_483 = 483;
value_484 = 484;
value_485 = 485;
value_486 = 486;
value_487 = 487;
value_488 = 488;
value_489 = 489;
value_490 = 490;
value_491 = 491;
value_492 = 492;
value_493 = 493;
value_494 = 494;
value_495 = 495;
value_496 = 496;
value_497 = 497;
value_498 = 498;
value_499 = 499;
value_500 = 500;
value_501 = 501;
value_502 = 502;
value_503 = 503;
value_504 = 504;
value_505 = 505;
value_506 = 506;
value_507 = 507;
}
Value value = 1;
Sentence inner = 2;
}
message Sentence {
repeated Word words = 1;
}

View File

@ -0,0 +1,30 @@
#!/bin/bash
_main() {
local dict_filename="${1}"
if [[ $# -ne 1 ]];
then
echo "Usage: $0 <dict_filename>";
exit 1;
fi
if [[ ! -f $dict_filename ]];
then
echo "File $dict_filename doesn't exist";
exit 1
fi
cat clickhouse-template.g > clickhouse.g
while read line;
do
[[ -z "$line" ]] && continue
echo $line | sed -e 's/"\(.*\)"/" \1 ";/g'
done < $dict_filename >> clickhouse.g
}
_main "$@"
# Sample run: ./update.sh ../../../../tests/fuzz/ast.dict
# then run `python ./gen.py clickhouse.g out.cpp out.proto` to generate new files with tokens. Rebuild fuzzer