Do not require gperf on build (#5701)

* Build fixes

* fix

* USE_GPERF

* wip

* wip

* wip

* test -j

* Simpler

* fix

* FunctionsURL.h -> URL/FunctionsURL.h

* clean

* clean

* Always use gperf-generated file

* Generator for tldLookup.generated.cpp

* clang-format
This commit is contained in:
proller 2019-06-21 02:50:53 +03:00 committed by alexey-milovidov
parent 606b074a4b
commit f111dbd45f
12 changed files with 95987 additions and 42 deletions

View File

@ -344,7 +344,6 @@ find_contrib_lib(metrohash)
find_contrib_lib(btrie)
find_contrib_lib(double-conversion)
include (cmake/find_parquet.cmake)
include (cmake/find_gperf.cmake)
if (ENABLE_TESTS)
include (cmake/find_gtest.cmake)

View File

@ -1,19 +1,8 @@
configure_file(config_functions_url.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/config_functions_url.h)
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
add_headers_and_sources(clickhouse_functions_url .)
if (USE_GPERF)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tldLookup.cpp
COMMAND ${GPERF} ${CMAKE_CURRENT_SOURCE_DIR}/tldLookup.gperf --output-file=${CMAKE_CURRENT_BINARY_DIR}/tldLookup.cpp
)
list(APPEND clickhouse_functions_url_sources ${CMAKE_CURRENT_BINARY_DIR}/tldLookup.cpp)
endif ()
add_library(clickhouse_functions_url ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers})
target_link_libraries(clickhouse_functions_url PRIVATE clickhouse_common_io)
target_include_directories(clickhouse_functions_url PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/../include)
target_include_directories(clickhouse_functions_url PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/../include) # ${CMAKE_CURRENT_BINARY_DIR}/include
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
@ -25,3 +14,23 @@ if(USE_HYPERSCAN)
target_link_libraries(clickhouse_functions_url PRIVATE ${HYPERSCAN_LIBRARY})
target_include_directories(clickhouse_functions_url SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
endif()
include(${ClickHouse_SOURCE_DIR}/cmake/find_gperf.cmake)
if (USE_GPERF)
# Only for regenerate
add_custom_target(generate-tldlookup-gperf ./tldLookup.sh
SOURCES tldLookup.sh
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/tldLookup.gperf
)
add_custom_target(generate-tldlookup ${GPERF} tldLookup.gperf --output-file=tldLookup.generated.cpp && clang-format -i tldLookup.generated.cpp
SOURCES tldLookup.gperf
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
# BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/tldLookup.generated.cpp
)
add_dependencies(generate-tldlookup generate-tldlookup-gperf)
if(NOT TARGET generate-source)
add_custom_target(generate-source)
endif()
add_dependencies(generate-source generate-tldlookup)
endif ()

View File

@ -1,5 +1,3 @@
#pragma once
// .h autogenerated by cmake!
#cmakedefine01 USE_GPERF

View File

@ -64,7 +64,6 @@ struct ExtractFirstSignificantSubdomain
end_of_level_domain = end;
}
#if USE_GPERF
if (tldLookup::is_valid(last_3_periods[1] + 1, end_of_level_domain - last_3_periods[1] - 1) != nullptr)
{
res_data += last_3_periods[2] + 1 - begin;
@ -75,7 +74,6 @@ struct ExtractFirstSignificantSubdomain
res_data += last_3_periods[1] + 1 - begin;
res_size = last_3_periods[0] - last_3_periods[1] - 1;
}
#endif
}
};

File diff suppressed because it is too large Load Diff

View File

@ -430,6 +430,7 @@ slz.br
sorocaba.br
srv.br
taxi.br
tc.br
teo.br
the.br
tmp.br
@ -3320,15 +3321,13 @@ gov.ru
int.ru
mil.ru
test.ru
gov.rw
net.rw
edu.rw
ac.rw
com.rw
co.rw
int.rw
coop.rw
gov.rw
mil.rw
gouv.rw
net.rw
org.rw
com.sa
net.sa
org.sa
@ -3504,26 +3503,27 @@ net.to
org.to
edu.to
mil.to
com.tr
info.tr
biz.tr
net.tr
org.tr
web.tr
gen.tr
tv.tr
av.tr
dr.tr
bbs.tr
name.tr
tel.tr
gov.tr
bel.tr
pol.tr
biz.tr
com.tr
dr.tr
edu.tr
gen.tr
gov.tr
info.tr
mil.tr
k12.tr
edu.tr
kep.tr
name.tr
net.tr
org.tr
pol.tr
tel.tr
tsk.tr
tv.tr
web.tr
nc.tr
co.tt
com.tt
@ -3836,6 +3836,7 @@ cc.ua
inf.ua
ltd.ua
beep.pl
barsy.ca
alwaysdata.net
cloudfront.net
elasticbeanstalk.com
@ -3851,7 +3852,9 @@ go-vip.co
go-vip.net
wpcomstaging.com
myfritz.net
b-data.io
backplaneapp.io
balena-devices.com
betainabox.com
bnr.la
blackbaudcdn.net
@ -3865,6 +3868,9 @@ bplaced.net
square7.net
browsersafetymark.io
mycd.eu
carrd.co
crd.co
uwu.ai
ae.org
ar.com
br.com
@ -3905,6 +3911,7 @@ virtueeldomein.nl
cleverapps.io
c66.me
cloud66.ws
cloud66.zone
jdevcloud.com
wpdevcloud.com
cloudaccess.host
@ -3912,7 +3919,9 @@ freesite.host
cloudaccess.net
cloudcontrolled.com
cloudcontrolapp.com
cloudera.site
workers.dev
wnext.app
co.ca
co.cz
cdn77-ssl.net
@ -4273,6 +4282,7 @@ blogsite.xyz
dynv6.net
e4.cz
mytuleap.com
onred.one
enonic.io
eu.org
twmail.cc
@ -4379,14 +4389,18 @@ freeboxos.fr
freedesktop.org
futurehosting.at
futuremailing.at
usercontent.jp
lab.ms
github.io
githubusercontent.com
gitlab.io
glitch.me
cloudapps.digital
ro.im
shop.ro
goip.de
run.app
web.app
appspot.com
blogspot.ae
blogspot.al
@ -4451,6 +4465,11 @@ pagespeedmobilizer.com
publishproxy.com
withgoogle.com
withyoutube.com
fin.ci
free.hr
caa.li
ua.rs
conf.se
hashbang.sh
hasura.app
hasura-app.io
@ -4462,6 +4481,16 @@ ravendb.community
ravendb.me
development.run
ravendb.run
bpl.biz
orx.biz
ng.city
ng.ink
biz.gl
col.ng
gen.ng
ltd.ng
sch.so
häkkinen.fi
moonscale.net
iki.fi
dyn-berlin.de
@ -4481,9 +4510,14 @@ pixolino.com
ipifony.net
mein-iserv.de
test-iserv.de
iobb.net
myjino.ru
js.org
kaas.gg
khplay.nl
keymachine.de
kinghost.net
uni5.net
knightpoint.systems
co.krd
edu.krd
@ -4504,6 +4538,11 @@ linkitools.space
linkyard.cloud
linkyard-cloud.ch
we.bs
loginline.app
loginline.dev
loginline.io
loginline.services
loginline.site
krasnik.pl
leczna.pl
lubartow.pl
@ -4547,6 +4586,20 @@ bmoattachments.org
net.ru
org.ru
pp.ru
pony.club
of.fashion
on.fashion
of.football
in.london
of.london
for.men
and.mom
for.mom
for.one
for.sale
of.work
to.work
nctu.me
bitballoon.com
netlify.com
4u.com
@ -4736,7 +4789,10 @@ pantheonsite.io
gotpantheon.com
mypep.link
on-web.fr
dyn53.io
co.bn
priv.at
prvcy.page
protonet.io
chirurgiens-dentistes-en-france.fr
byen.site
@ -4752,6 +4808,9 @@ rackmaze.com
rackmaze.net
readthedocs.io
rhcloud.com
onrender.com
repl.co
repl.run
resindevice.io
hzc.io
wellbeingzone.eu
@ -4776,6 +4835,7 @@ co.ua
pp.ua
shiftedit.io
myshopblocks.com
mo-siemens.io
1kapp.com
appchizi.com
applinzi.com
@ -4783,6 +4843,7 @@ sinaapp.com
vipsinaapp.com
siteleaf.net
bounty-full.com
stackhero-network.com
static.land
spacekit.io
storj.farm
@ -4811,8 +4872,12 @@ gdansk.pl
gdynia.pl
med.pl
sopot.pl
edugit.org
telebit.app
telebit.io
thingdustdata.com
arvo.network
azimuth.network
bloxcms.com
townnews-staging.com
12hp.at
@ -4858,6 +4923,8 @@ virtual-user.de
2038.io
router.management
v-info.info
voorloper.cloud
wafflecell.com
wedeploy.io
wedeploy.me
wedeploy.sh
@ -4883,5 +4950,8 @@ za.net
za.org
now.sh
bss.design
basicserver.io
virtualserver.io
enterprisecloud.nu
zone.id
%%

View File

@ -1,7 +1,5 @@
#pragma once
#include "config_functions_url.h"
#if USE_GPERF
// Definition of the class generated by gperf, present on gperf/tldLookup.gperf
class tldLookupHash
{
@ -15,4 +13,3 @@ namespace DB
{
using tldLookup = tldLookupHash;
}
#endif

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
[ ! -f public_suffix_list.dat ] && wget -O public_suffix_list.dat https://publicsuffix.org/list/public_suffix_list.dat
echo '%language=C++
%define lookup-function-name is_valid
%define class-name tldLookupHash
%readonly-tables
%includes
%compare-strncmp
%{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma GCC diagnostic ignored "-Wunused-macros"
%}
# List generated using https://publicsuffix.org/list/public_suffix_list.dat
%%' > tldLookup.gperf
grep -v "//" public_suffix_list.dat | grep . | grep "\." | grep -ve "\..*\..*" >> tldLookup.gperf
echo "%%" >> tldLookup.gperf

View File

@ -13,3 +13,11 @@ add_headers_and_sources(storages_system .)
list (APPEND storages_system_sources ${CONFIG_BUILD})
add_library(clickhouse_storages_system ${storages_system_headers} ${storages_system_sources})
target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper)
add_custom_target(generate-contributors ./StorageSystemContributors.sh SOURCES StorageSystemContributors.sh WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
# BYPRODUCTS StorageSystemContributors.generated.cpp
)
if(NOT TARGET generate-source)
add_custom_target(generate-source)
endif()
add_dependencies(generate-source generate-contributors)

View File

@ -61,7 +61,6 @@ const char * auto_config_build[]
"USE_SSL", "@USE_SSL@",
"USE_HYPERSCAN", "@USE_HYPERSCAN@",
"USE_SIMDJSON", "@USE_SIMDJSON@",
"USE_GPERF", "@USE_GPERF@",
nullptr, nullptr
};

View File

@ -11,8 +11,10 @@ const char * auto_contributors[] {
"Alexander Ermolaev",
"Alexander GQ Gerasiov",
"Alexander Kazakov",
"Alexander Kozhikhov",
"Alexander Krasheninnikov",
"Alexander Kuranoff",
"Alexander Kuzmenkov",
"Alexander Lukin",
"Alexander Makarov",
"Alexander Marshalov",
@ -49,6 +51,7 @@ const char * auto_contributors[] {
"Anton Popov",
"Anton Tihonov",
"Anton Tikhonov",
"Anton Yuzhaninov",
"Anton Zhabolenko",
"Arsen Hakobyan",
"Artem Andreenko",
@ -72,6 +75,7 @@ const char * auto_contributors[] {
"Bulat Gaifullin",
"Chen Yufei",
"Ciprian Hacman",
"Clément Rodriguez",
"Constantin S. Pan",
"CurtizJ",
"Daniel Bershatsky",
@ -104,6 +108,7 @@ const char * auto_contributors[] {
"George G",
"George3d6",
"Gleb Kanterov",
"Gleb Novikov",
"Guillaume Tassery",
"Hamoon",
"Hasnat",
@ -122,8 +127,12 @@ const char * auto_contributors[] {
"Ivan Babrou",
"Ivan Blinkov",
"Ivan He",
"Ivan Kush",
"Ivan Kushnarenko",
"Ivan Lezhankin",
"Ivan Remen",
"Ivan Zhukov",
"JaosnHsieh",
"Jason",
"Jean Baptiste Favre",
"Jonatas Freitas",
@ -144,6 +153,7 @@ const char * auto_contributors[] {
"LiuCong",
"LiuYangkuan",
"Lopatin Konstantin",
"Loud_Scream",
"Luis Bosque",
"Léo Ercolanelli",
"Maks Skorokhod",
@ -154,6 +164,7 @@ const char * auto_contributors[] {
"Marsel Arduanov",
"Marti Raudsepp",
"Martijn Bakker",
"Masha",
"Max",
"Max Akhmedov",
"Max Vetrov",
@ -161,6 +172,7 @@ const char * auto_contributors[] {
"Maxim Fedotov",
"Maxim Fridental",
"Maxim Khrisanfov",
"Maxim Kuznetsov",
"Maxim Nikulin",
"MaximAL",
"MeiK",
@ -193,6 +205,7 @@ const char * auto_contributors[] {
"Nikolay Volosatov",
"Odin Hultgren Van Der Horst",
"Okada Haruki",
"Oleg Favstov",
"Oleg Komarov",
"Oleg Obleukhov",
"Olga Khvostikova",
@ -206,6 +219,7 @@ const char * auto_contributors[] {
"Pavlo Bashynskiy",
"Pawel Rog",
"Persiyanov Dmitriy Andreevich",
"Quid37",
"Ravengg",
"Reto Kromer",
"Roman Lipovsky",
@ -225,6 +239,7 @@ const char * auto_contributors[] {
"Sergey Magidovich",
"Sergey V. Galtsev",
"Sergey Zaikin",
"Sergi Vladykin",
"SevaCode",
"Silviu Caragea",
"Simon Podlipsky",
@ -269,15 +284,19 @@ const char * auto_contributors[] {
"Vyacheslav Alipov",
"William Shallum",
"Winter Zhang",
"Yangkuan Liu",
"Yegor Andreenko",
"Yuri Dyachenko",
"Yurii Vlasenko",
"Yuriy",
"Yuriy Baranov",
"Yury Karpovich",
"Yury Stankevich",
"Zhichang Yu",
"abdrakhmanov",
"abyss7",
"achulkov2",
"akuzm",
"alesapin",
"alexander kozhikhov",
"alexey-milovidov",
@ -295,8 +314,11 @@ const char * auto_contributors[] {
"chenxing-xc",
"chenxing.xc",
"chertus",
"coraxster",
"daoready",
"decaseal",
"dmitry kuzmin",
"eejoin",
"egatov",
"elBroom",
"ezhaka",
@ -305,12 +327,14 @@ const char * auto_contributors[] {
"fessmage",
"filimonov",
"flow",
"foxxmary",
"ggerogery",
"glockbender",
"hcz",
"hotid",
"igor",
"igor.lapko",
"ivan-kush",
"ivanzhukov",
"javi",
"javi santana",
@ -318,13 +342,16 @@ const char * auto_contributors[] {
"ks1322",
"kshvakov",
"leozhang",
"levushkin aleksej",
"levysh",
"liangqian",
"linceyou",
"liuyangkuan",
"liuyimin",
"lomberts",
"luc1ph3r",
"maiha",
"maxkuzn",
"mf5137",
"mfridental",
"miha-g",
@ -341,6 +368,7 @@ const char * auto_contributors[] {
"proller",
"pyos",
"qianlixiang",
"quid",
"robot-clickhouse",
"robot-metrika-test",
"root",
@ -353,6 +381,7 @@ const char * auto_contributors[] {
"stavrolia",
"sundy-li",
"sundyli",
"svladykin",
"tai",
"topvisor",
"urgordeadbeef",
@ -366,6 +395,7 @@ const char * auto_contributors[] {
"Павел Литвиненко",
"Смитюх Вячеслав",
"Сундуков Алексей",
"吴健",
"小路",
"张健",
"张风啸",

View File

@ -11,7 +11,7 @@ git shortlog --summary | perl -lnE 's/^\s+\d+\s+(.+)/ "$1",/; next unless $1;
# If git history not available - dont make target file
if [ ! -s $CONTRIBUTORS_FILE.tmp ]; then
echo Empty result of git shortlog
git status
git status -uno
exit
fi