Added an ability to generate data in build time

This commit is contained in:
Nikita Mikhaylov 2021-10-15 14:35:50 +00:00
parent 57a623aba0
commit e9bf496d42
5 changed files with 70 additions and 20 deletions

View File

@ -136,6 +136,21 @@ if (ENABLE_FUZZING)
message (STATUS "Fuzzing instrumentation enabled")
set (FUZZER "libfuzzer")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nostdlib++")
set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF)
set (ENABLE_LIBRARIES 0)
set (ENABLE_SSL 1)
set (USE_INTERNAL_SSL_LIBRARY 1)
set (USE_UNWIND ON)
set (ENABLE_EMBEDDED_COMPILER 0)
set (ENABLE_EXAMPLES 0)
set (ENABLE_UTILS 0)
set (ENABLE_THINLTO 0)
set (ENABLE_TCMALLOC 0)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
set (ENABLE_PROTOBUF ON)
set (USE_INTERNAL_PROTOBUF_LIBRARY ON)
endif()
# Global libraries

View File

@ -1,5 +1,4 @@
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator)
set(NO_FUZZING_FLAGS "-fno-sanitize=fuzzer -fsanitize-coverage=0")
add_library(protobuf-mutator
${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc
@ -10,8 +9,6 @@ add_library(protobuf-mutator
${LIBRARY_DIR}/src/utf8_fix.cc)
target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}")
# target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
target_link_libraries(protobuf-mutator ${PROTOBUF_LIBRARIES})
set_target_properties(protobuf-mutator PROPERTIES
COMPILE_FLAGS "${NO_FUZZING_FLAGS}")
target_link_libraries(protobuf-mutator ${Protobuf_LIBRARY})

View File

@ -1,7 +1,47 @@
find_package(Protobuf REQUIRED)
protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS "out.proto")
set(FUZZER_SRCS codegen_select_fuzzer.cpp out.cpp ${PROTO_SRCS} ${PROTO_HDRS})
set (CURRENT_DIR_IN_SOURCES "${ClickHouse_SOURCE_DIR}/src/Parsers/fuzzers/codegen_fuzzer")
set (CURRENT_DIR_IN_BINARY "${ClickHouse_BINARY_DIR}/src/Parsers/fuzzers/codegen_fuzzer")
# Copy scripts and template file to build directory to generate .proto and .cpp file from them
configure_file(
"${CURRENT_DIR_IN_SOURCES}/gen.py"
"${CURRENT_DIR_IN_BINARY}/gen.py"
COPYONLY)
configure_file(
"${CURRENT_DIR_IN_SOURCES}/update.sh"
"${CURRENT_DIR_IN_BINARY}/update.sh"
COPYONLY)
# Delete this and uncomment the next block if you want to generage clickhouse.g automatically
configure_file(
"${CURRENT_DIR_IN_SOURCES}/clickhouse.g"
"${CURRENT_DIR_IN_BINARY}/clickhouse.g"
COPYONLY)
# # Uncomment to generate clickhouse.g automatically
# configure_file(
# "${CURRENT_DIR_IN_SOURCES}/clickhouse-template.g"
# "${CURRENT_DIR_IN_BINARY}/clickhouse-template.g"
# COPYONLY)
# # Note that it depends on all.dict file!
# add_custom_command(
# OUTPUT
# "${CURRENT_DIR_IN_BINARY}/clickhouse.g"
# COMMAND ./update.sh "${ClickHouse_SOURCE_DIR}/tests/fuzz/all.dict"
# )
add_custom_command(
OUTPUT
"${CURRENT_DIR_IN_BINARY}/out.cpp"
"${CURRENT_DIR_IN_BINARY}/out.proto"
COMMAND python3 gen.py clickhouse.g out.cpp out.proto
# DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g"
)
PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto")
set(FUZZER_SRCS codegen_select_fuzzer.cpp "${CURRENT_DIR_IN_BINARY}/out.cpp" ${PROTO_SRCS} ${PROTO_HDRS})
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
@ -9,7 +49,7 @@ add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIRS}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}")
target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src")
target_link_libraries(codegen_select_fuzzer PRIVATE clickhouse_parsers protobuf-mutator ${Protobuf_LIBRARIES} ${LIB_FUZZING_ENGINE})
target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator dbms ${LIB_FUZZING_ENGINE})

View File

@ -11,7 +11,6 @@
#include "out.pb.h"
void GenerateSentence(const Sentence&, std::string &, int);

View File

@ -58,14 +58,14 @@ class Parser:
self.var_id = -1
self.cur_tok = None
self.includes = []
self.proto = ''
self.cpp = ''
def parse_file(self, filename):
with open(filename) as f:
self.text = f.read()
while self.parse_statement() is not None:
pass
@ -90,12 +90,12 @@ class Parser:
def parse_var_value(self):
i = self.text.find(' ')
id_, self.text = self.text[1:i], self.text[i+1:]
self.var_id = int(id_)
self.cur_tok = TOKEN_VAR
return TOKEN_VAR
def parse_txt_value(self):
if self.text[0] != '"':
raise Exception("parse_txt_value: expected quote at the start")
@ -116,7 +116,7 @@ class Parser:
else:
c, self.text = self.text[0], self.text[1:]
self.t += c
self.text = self.text[1:]
self.cur_tok = TOKEN_TEXT
return TOKEN_TEXT
@ -137,7 +137,7 @@ class Parser:
index = self.text.find('\n')
self.text = self.text[index:]
def parse_statement(self):
if self.skip_ws() is None:
return None
@ -146,7 +146,7 @@ class Parser:
if self.cur_tok == TOKEN_SLASH:
self.skip_line()
return TOKEN_SLASH
chain = []
while self.cur_tok != TOKEN_SEMI:
if self.cur_tok == TOKEN_TEXT:
@ -164,7 +164,7 @@ class Parser:
def generate(self):
self.proto = 'syntax = "proto3";\n\n'
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h>\n\n'
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n'
for incl_file in self.includes:
self.cpp += f'#include "{incl_file}"\n'
@ -228,7 +228,7 @@ def main(args):
p = Parser()
p.add_include(include_filename)
p.parse_file(input_file)
cpp, proto = p.generate()
proto = proto.replace('\t', ' ' * 4)
@ -246,4 +246,3 @@ if __name__ == '__main__':
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
sys.exit(1)
main(sys.argv[1:])