2021-05-21 18:56:22 +00:00
# if !defined(ARCADIA_BUILD)
# include <Common/config.h>
# endif
2018-01-15 19:07:47 +00:00
# include "ConfigProcessor.h"
2021-05-21 18:56:22 +00:00
# include "YAMLParser.h"
2018-01-15 19:07:47 +00:00
2015-10-05 01:26:43 +00:00
# include <sys/utsname.h>
# include <cerrno>
2018-07-27 17:12:01 +00:00
# include <cstdlib>
2015-10-05 01:26:43 +00:00
# include <cstring>
2018-02-20 08:50:11 +00:00
# include <algorithm>
2017-03-17 00:44:00 +00:00
# include <functional>
2020-08-08 03:42:42 +00:00
# include <filesystem>
2021-05-30 12:28:41 +00:00
# include <boost/algorithm/string.hpp>
2015-10-05 01:26:43 +00:00
# include <Poco/DOM/Text.h>
# include <Poco/DOM/Attr.h>
# include <Poco/DOM/Comment.h>
# include <Poco/Util/XMLConfiguration.h>
2017-06-19 20:06:35 +00:00
# include <Common/ZooKeeper/ZooKeeperNodeCache.h>
2018-04-03 17:35:48 +00:00
# include <Common/ZooKeeper/KeeperException.h>
2018-01-15 19:07:47 +00:00
# include <Common/StringUtils/StringUtils.h>
2020-08-08 03:42:42 +00:00
# include <Common/Exception.h>
# include <common/getResource.h>
2020-12-17 18:08:42 +00:00
# include <common/errnoToString.h>
2020-11-10 18:22:26 +00:00
# include <IO/WriteBufferFromString.h>
# include <IO/Operators.h>
2017-03-17 00:44:00 +00:00
2017-12-13 20:21:03 +00:00
# define PREPROCESSED_SUFFIX "-preprocessed"
2020-09-07 19:36:10 +00:00
namespace fs = std : : filesystem ;
2015-10-05 01:26:43 +00:00
using namespace Poco : : XML ;
2018-11-27 16:11:46 +00:00
namespace DB
{
2020-08-08 03:42:42 +00:00
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST ;
2021-05-30 11:33:45 +00:00
extern const int CANNOT_LOAD_CONFIG ;
2020-08-08 03:42:42 +00:00
}
2019-01-22 19:56:53 +00:00
/// For cutting preprocessed path to this base
2018-12-19 13:11:06 +00:00
static std : : string main_config_path ;
2015-10-05 01:26:43 +00:00
2017-03-16 22:39:52 +00:00
/// Extracts from a string the first encountered number consisting of at least two digits.
2015-10-05 01:26:43 +00:00
static std : : string numberFromHost ( const std : : string & s )
{
2017-04-01 07:20:54 +00:00
for ( size_t i = 0 ; i < s . size ( ) ; + + i )
{
std : : string res ;
size_t j = i ;
2017-07-11 18:22:02 +00:00
while ( j < s . size ( ) & & isNumericASCII ( s [ j ] ) )
2017-04-01 07:20:54 +00:00
res + = s [ j + + ] ;
if ( res . size ( ) > = 2 )
{
while ( res [ 0 ] = = ' 0 ' )
res . erase ( res . begin ( ) ) ;
return res ;
}
}
return " " ;
2015-10-05 01:26:43 +00:00
}
2017-12-13 20:21:03 +00:00
bool ConfigProcessor : : isPreprocessedFile ( const std : : string & path )
{
2021-05-09 11:59:49 +00:00
return endsWith ( fs : : path ( path ) . stem ( ) , PREPROCESSED_SUFFIX ) ;
2017-12-13 20:21:03 +00:00
}
2017-11-21 16:54:25 +00:00
ConfigProcessor : : ConfigProcessor (
const std : : string & path_ ,
bool throw_on_bad_incl_ ,
bool log_to_console ,
const Substitutions & substitutions_ )
: path ( path_ )
, throw_on_bad_incl ( throw_on_bad_incl_ )
2017-04-01 07:20:54 +00:00
, substitutions ( substitutions_ )
/// We need larger name pool to allow to support vast amount of users in users.xml files for ClickHouse.
/// Size is prime because Poco::XML::NamePool uses bad (inefficient, low quality)
/// hash function internally, and its size was prime by default.
, name_pool ( new Poco : : XML : : NamePool ( 65521 ) )
, dom_parser ( name_pool )
2015-10-05 01:26:43 +00:00
{
2020-05-30 21:57:37 +00:00
if ( log_to_console & & ! Poco : : Logger : : has ( " ConfigProcessor " ) )
2017-04-01 07:20:54 +00:00
{
channel_ptr = new Poco : : ConsoleChannel ;
2020-05-30 21:57:37 +00:00
log = & Poco : : Logger : : create ( " ConfigProcessor " , channel_ptr . get ( ) , Poco : : Message : : PRIO_TRACE ) ;
2017-04-01 07:20:54 +00:00
}
else
{
2020-05-30 21:57:37 +00:00
log = & Poco : : Logger : : get ( " ConfigProcessor " ) ;
2017-04-01 07:20:54 +00:00
}
2015-10-05 01:26:43 +00:00
}
2017-03-17 00:44:00 +00:00
ConfigProcessor : : ~ ConfigProcessor ( )
{
2017-04-01 07:20:54 +00:00
if ( channel_ptr ) /// This means we have created a new console logger in the constructor.
2020-05-30 21:57:37 +00:00
Poco : : Logger : : destroy ( " ConfigProcessor " ) ;
2017-03-17 00:44:00 +00:00
}
2017-03-16 22:39:52 +00:00
/// Vector containing the name of the element and a sorted list of attribute names and values
/// (except "remove" and "replace" attributes).
/// Serves as a unique identifier of the element contents for comparison.
2016-05-28 10:35:44 +00:00
using ElementIdentifier = std : : vector < std : : string > ;
2015-10-05 01:26:43 +00:00
2016-05-28 10:35:44 +00:00
using NamedNodeMapPtr = Poco : : AutoPtr < Poco : : XML : : NamedNodeMap > ;
2017-03-16 22:39:52 +00:00
/// NOTE getting rid of iterating over the result of Node.childNodes() call is a good idea
/// because accessing the i-th element of this list takes O(i) time.
2016-05-28 10:35:44 +00:00
using NodeListPtr = Poco : : AutoPtr < Poco : : XML : : NodeList > ;
2015-10-05 01:26:43 +00:00
static ElementIdentifier getElementIdentifier ( Node * element )
{
2018-03-03 01:01:14 +00:00
const NamedNodeMapPtr attrs = element - > attributes ( ) ;
2017-08-30 18:13:32 +00:00
std : : vector < std : : pair < std : : string , std : : string > > attrs_kv ;
2018-03-03 00:33:25 +00:00
for ( size_t i = 0 , size = attrs - > length ( ) ; i < size ; + + i )
2017-04-01 07:20:54 +00:00
{
2018-03-03 01:01:14 +00:00
const Node * node = attrs - > item ( i ) ;
2017-04-01 07:20:54 +00:00
std : : string name = node - > nodeName ( ) ;
2020-04-22 05:39:31 +00:00
const auto * subst_name_pos = std : : find ( ConfigProcessor : : SUBSTITUTION_ATTRS . begin ( ) , ConfigProcessor : : SUBSTITUTION_ATTRS . end ( ) , name ) ;
2018-07-27 15:36:21 +00:00
if ( name = = " replace " | | name = = " remove " | |
2018-07-27 17:12:01 +00:00
subst_name_pos ! = ConfigProcessor : : SUBSTITUTION_ATTRS . end ( ) )
2017-04-01 07:20:54 +00:00
continue ;
std : : string value = node - > nodeValue ( ) ;
attrs_kv . push_back ( std : : make_pair ( name , value ) ) ;
}
std : : sort ( attrs_kv . begin ( ) , attrs_kv . end ( ) ) ;
ElementIdentifier res ;
res . push_back ( element - > nodeName ( ) ) ;
for ( const auto & attr : attrs_kv )
{
res . push_back ( attr . first ) ;
res . push_back ( attr . second ) ;
}
return res ;
2015-10-05 01:26:43 +00:00
}
static Node * getRootNode ( Document * document )
{
2018-03-03 01:01:14 +00:00
const NodeListPtr children = document - > childNodes ( ) ;
2018-03-03 00:33:25 +00:00
for ( size_t i = 0 , size = children - > length ( ) ; i < size ; + + i )
2017-04-01 07:20:54 +00:00
{
Node * child = children - > item ( i ) ;
/// Besides the root element there can be comment nodes on the top level.
/// Skip them.
if ( child - > nodeType ( ) = = Node : : ELEMENT_NODE )
return child ;
}
throw Poco : : Exception ( " No root node in document " ) ;
2015-10-05 01:26:43 +00:00
}
static bool allWhitespace ( const std : : string & s )
{
2017-04-01 07:20:54 +00:00
return s . find_first_not_of ( " \t \n \r " ) = = std : : string : : npos ;
2015-10-05 01:26:43 +00:00
}
2018-03-03 01:01:14 +00:00
void ConfigProcessor : : mergeRecursive ( XMLDocumentPtr config , Node * config_root , const Node * with_root )
2015-10-05 01:26:43 +00:00
{
2018-03-03 01:01:14 +00:00
const NodeListPtr with_nodes = with_root - > childNodes ( ) ;
2017-04-01 07:20:54 +00:00
using ElementsByIdentifier = std : : multimap < ElementIdentifier , Node * > ;
ElementsByIdentifier config_element_by_id ;
for ( Node * node = config_root - > firstChild ( ) ; node ; )
{
Node * next_node = node - > nextSibling ( ) ;
/// Remove text from the original config node.
if ( node - > nodeType ( ) = = Node : : TEXT_NODE & & ! allWhitespace ( node - > getNodeValue ( ) ) )
{
config_root - > removeChild ( node ) ;
}
else if ( node - > nodeType ( ) = = Node : : ELEMENT_NODE )
{
config_element_by_id . insert ( ElementsByIdentifier : : value_type ( getElementIdentifier ( node ) , node ) ) ;
}
node = next_node ;
}
2018-03-03 00:33:25 +00:00
for ( size_t i = 0 , size = with_nodes - > length ( ) ; i < size ; + + i )
2017-04-01 07:20:54 +00:00
{
Node * with_node = with_nodes - > item ( i ) ;
bool merged = false ;
bool remove = false ;
if ( with_node - > nodeType ( ) = = Node : : ELEMENT_NODE )
{
2018-06-04 19:22:27 +00:00
Element & with_element = dynamic_cast < Element & > ( * with_node ) ;
remove = with_element . hasAttribute ( " remove " ) ;
bool replace = with_element . hasAttribute ( " replace " ) ;
2017-04-01 07:20:54 +00:00
if ( remove & & replace )
throw Poco : : Exception ( " both remove and replace attributes set for element < " + with_node - > nodeName ( ) + " > " ) ;
ElementsByIdentifier : : iterator it = config_element_by_id . find ( getElementIdentifier ( with_node ) ) ;
if ( it ! = config_element_by_id . end ( ) )
{
Node * config_node = it - > second ;
config_element_by_id . erase ( it ) ;
if ( remove )
{
config_root - > removeChild ( config_node ) ;
}
else if ( replace )
{
2018-06-04 19:22:27 +00:00
with_element . removeAttribute ( " replace " ) ;
2017-04-01 07:20:54 +00:00
NodePtr new_node = config - > importNode ( with_node , true ) ;
config_root - > replaceChild ( new_node , config_node ) ;
}
else
{
mergeRecursive ( config , config_node , with_node ) ;
}
merged = true ;
}
}
if ( ! merged & & ! remove )
{
NodePtr new_node = config - > importNode ( with_node , true ) ;
config_root - > appendChild ( new_node ) ;
}
}
2015-10-05 01:26:43 +00:00
}
2017-03-17 00:44:00 +00:00
void ConfigProcessor : : merge ( XMLDocumentPtr config , XMLDocumentPtr with )
2015-10-05 01:26:43 +00:00
{
2019-07-23 14:41:41 +00:00
Node * config_root = getRootNode ( config . get ( ) ) ;
Node * with_root = getRootNode ( with . get ( ) ) ;
2019-07-22 14:00:44 +00:00
if ( config_root - > nodeName ( ) ! = with_root - > nodeName ( ) )
throw Poco : : Exception ( " Root element doesn't have the corresponding root element as the config file. It must be < " + config_root - > nodeName ( ) + " > " ) ;
mergeRecursive ( config , config_root , with_root ) ;
2015-10-05 01:26:43 +00:00
}
2020-03-18 00:57:00 +00:00
static std : : string layerFromHost ( )
2015-10-05 01:26:43 +00:00
{
2021-04-28 23:32:41 +00:00
struct utsname buf ;
2017-04-01 07:20:54 +00:00
if ( uname ( & buf ) )
2020-12-17 18:08:42 +00:00
throw Poco : : Exception ( std : : string ( " uname failed: " ) + errnoToString ( errno ) ) ;
2015-10-05 01:26:43 +00:00
2017-04-01 07:20:54 +00:00
std : : string layer = numberFromHost ( buf . nodename ) ;
if ( layer . empty ( ) )
throw Poco : : Exception ( std : : string ( " no layer in host name: " ) + buf . nodename ) ;
2015-10-05 01:26:43 +00:00
2017-04-01 07:20:54 +00:00
return layer ;
2015-10-05 01:26:43 +00:00
}
2017-03-17 00:44:00 +00:00
void ConfigProcessor : : doIncludesRecursive (
2017-04-01 07:20:54 +00:00
XMLDocumentPtr config ,
XMLDocumentPtr include_from ,
Node * node ,
zkutil : : ZooKeeperNodeCache * zk_node_cache ,
2018-10-17 17:23:10 +00:00
const zkutil : : EventPtr & zk_changed_event ,
2017-04-01 07:20:54 +00:00
std : : unordered_set < std : : string > & contributing_zk_paths )
2015-10-05 01:26:43 +00:00
{
2017-04-01 07:20:54 +00:00
if ( node - > nodeType ( ) = = Node : : TEXT_NODE )
{
for ( auto & substitution : substitutions )
{
std : : string value = node - > nodeValue ( ) ;
bool replace_occured = false ;
size_t pos ;
while ( ( pos = value . find ( substitution . first ) ) ! = std : : string : : npos )
{
value . replace ( pos , substitution . first . length ( ) , substitution . second ) ;
replace_occured = true ;
}
if ( replace_occured )
node - > setNodeValue ( value ) ;
}
}
if ( node - > nodeType ( ) ! = Node : : ELEMENT_NODE )
return ;
/// Substitute <layer> for the number extracted from the hostname only if there is an
/// empty <layer> tag without attributes in the original file.
2018-11-24 01:48:06 +00:00
if ( node - > nodeName ( ) = = " layer "
& & ! node - > hasAttributes ( )
& & ! node - > hasChildNodes ( )
& & node - > nodeValue ( ) . empty ( ) )
2017-04-01 07:20:54 +00:00
{
NodePtr new_node = config - > createTextNode ( layerFromHost ( ) ) ;
node - > appendChild ( new_node ) ;
return ;
}
2018-07-27 17:12:01 +00:00
std : : map < std : : string , const Node * > attr_nodes ;
2017-04-01 07:20:54 +00:00
NamedNodeMapPtr attributes = node - > attributes ( ) ;
2018-07-27 17:12:01 +00:00
size_t substs_count = 0 ;
2018-07-27 15:36:21 +00:00
for ( const auto & attr_name : SUBSTITUTION_ATTRS )
2018-07-27 17:12:01 +00:00
{
2020-04-22 05:39:31 +00:00
const auto * subst = attributes - > getNamedItem ( attr_name ) ;
2018-07-27 17:12:01 +00:00
attr_nodes [ attr_name ] = subst ;
2021-05-24 13:07:21 +00:00
substs_count + = static_cast < size_t > ( subst ! = nullptr ) ;
2018-07-27 17:12:01 +00:00
}
2017-04-01 07:20:54 +00:00
2021-05-24 13:07:21 +00:00
if ( substs_count > 1 ) /// only one substitution is allowed
2018-07-27 15:36:21 +00:00
throw Poco : : Exception ( " several substitutions attributes set for element < " + node - > nodeName ( ) + " > " ) ;
2017-04-01 07:20:54 +00:00
2021-05-24 13:07:38 +00:00
if ( node - > nodeName ( ) = = " include " )
{
if ( node - > hasChildNodes ( ) )
throw Poco : : Exception ( " <include> element must have no children " ) ;
if ( substs_count = = 0 )
throw Poco : : Exception ( " no substitution attributes set for element <include>, must have one " ) ;
}
2017-04-01 07:20:54 +00:00
/// Replace the original contents, not add to it.
bool replace = attributes - > getNamedItem ( " replace " ) ;
2018-03-30 13:33:43 +00:00
bool included_something = false ;
2018-03-03 01:01:14 +00:00
auto process_include = [ & ] ( const Node * include_attr , const std : : function < const Node * ( const std : : string & ) > & get_node , const char * error_msg )
2017-04-01 07:20:54 +00:00
{
2020-03-18 03:27:32 +00:00
const std : : string & name = include_attr - > getNodeValue ( ) ;
2018-03-03 01:01:14 +00:00
const Node * node_to_include = get_node ( name ) ;
2017-04-01 07:20:54 +00:00
if ( ! node_to_include )
{
if ( attributes - > getNamedItem ( " optional " ) )
node - > parentNode ( ) - > removeChild ( node ) ;
else if ( throw_on_bad_incl )
throw Poco : : Exception ( error_msg + name ) ;
else
2021-05-24 12:34:39 +00:00
{
if ( node - > nodeName ( ) = = " include " )
node - > parentNode ( ) - > removeChild ( node ) ;
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " {}{} " , error_msg , name ) ;
2021-05-24 12:34:39 +00:00
}
2017-04-01 07:20:54 +00:00
}
else
{
2021-05-21 14:51:24 +00:00
/// Replace the whole node not just contents.
if ( node - > nodeName ( ) = = " include " )
2017-04-01 07:20:54 +00:00
{
2021-05-21 14:51:24 +00:00
const NodeListPtr children = node_to_include - > childNodes ( ) ;
for ( size_t i = 0 , size = children - > length ( ) ; i < size ; + + i )
{
NodePtr new_node = config - > importNode ( children - > item ( i ) , true ) ;
node - > parentNode ( ) - > insertBefore ( new_node , node ) ;
}
2017-04-01 07:20:54 +00:00
2021-05-21 14:51:24 +00:00
node - > parentNode ( ) - > removeChild ( node ) ;
2017-04-01 07:20:54 +00:00
}
2021-05-21 14:51:24 +00:00
else
2017-04-01 07:20:54 +00:00
{
2021-05-21 14:51:24 +00:00
Element & element = dynamic_cast < Element & > ( * node ) ;
2017-04-01 07:20:54 +00:00
2021-05-21 14:51:24 +00:00
for ( const auto & attr_name : SUBSTITUTION_ATTRS )
element . removeAttribute ( attr_name ) ;
2018-03-30 13:33:43 +00:00
2021-05-21 14:51:24 +00:00
if ( replace )
{
while ( Node * child = node - > firstChild ( ) )
node - > removeChild ( child ) ;
2018-03-30 13:33:43 +00:00
2021-05-21 14:51:24 +00:00
element . removeAttribute ( " replace " ) ;
}
const NodeListPtr children = node_to_include - > childNodes ( ) ;
for ( size_t i = 0 , size = children - > length ( ) ; i < size ; + + i )
{
NodePtr new_node = config - > importNode ( children - > item ( i ) , true ) ;
node - > appendChild ( new_node ) ;
}
const NamedNodeMapPtr from_attrs = node_to_include - > attributes ( ) ;
for ( size_t i = 0 , size = from_attrs - > length ( ) ; i < size ; + + i )
{
element . setAttributeNode ( dynamic_cast < Attr * > ( config - > importNode ( from_attrs - > item ( i ) , true ) ) ) ;
}
included_something = true ;
}
2017-04-01 07:20:54 +00:00
}
} ;
2018-07-27 17:12:01 +00:00
if ( attr_nodes [ " incl " ] ) // we have include subst
2017-04-01 07:20:54 +00:00
{
2018-07-27 15:36:21 +00:00
auto get_incl_node = [ & ] ( const std : : string & name )
{
return include_from ? include_from - > getNodeByPath ( " yandex/ " + name ) : nullptr ;
} ;
2018-07-27 17:12:01 +00:00
process_include ( attr_nodes [ " incl " ] , get_incl_node , " Include not found: " ) ;
2018-07-27 15:36:21 +00:00
}
2017-04-01 07:20:54 +00:00
2018-07-27 17:12:01 +00:00
if ( attr_nodes [ " from_zk " ] ) /// we have zookeeper subst
2017-04-01 07:20:54 +00:00
{
2018-07-27 17:12:01 +00:00
contributing_zk_paths . insert ( attr_nodes [ " from_zk " ] - > getNodeValue ( ) ) ;
2017-04-01 07:20:54 +00:00
if ( zk_node_cache )
{
XMLDocumentPtr zk_document ;
2018-03-03 01:01:14 +00:00
auto get_zk_node = [ & ] ( const std : : string & name ) - > const Node *
2017-04-01 07:20:54 +00:00
{
2018-11-28 11:40:59 +00:00
zkutil : : ZooKeeperNodeCache : : ZNode znode = zk_node_cache - > get ( name , zk_changed_event ) ;
if ( ! znode . exists )
2017-04-01 07:20:54 +00:00
return nullptr ;
/// Enclose contents into a fake <from_zk> tag to allow pure text substitutions.
2018-11-28 11:40:59 +00:00
zk_document = dom_parser . parseString ( " <from_zk> " + znode . contents + " </from_zk> " ) ;
2017-04-01 07:20:54 +00:00
return getRootNode ( zk_document . get ( ) ) ;
} ;
2018-07-27 17:12:01 +00:00
process_include ( attr_nodes [ " from_zk " ] , get_zk_node , " Could not get ZooKeeper node: " ) ;
2017-04-01 07:20:54 +00:00
}
}
2018-07-27 17:12:01 +00:00
if ( attr_nodes [ " from_env " ] ) /// we have env subst
2018-07-27 15:36:21 +00:00
{
XMLDocumentPtr env_document ;
auto get_env_node = [ & ] ( const std : : string & name ) - > const Node *
{
2018-07-27 17:12:01 +00:00
const char * env_val = std : : getenv ( name . c_str ( ) ) ;
if ( env_val = = nullptr )
2018-07-27 15:36:21 +00:00
return nullptr ;
2018-07-27 17:12:01 +00:00
env_document = dom_parser . parseString ( " <from_env> " + std : : string { env_val } + " </from_env> " ) ;
2018-07-27 15:36:21 +00:00
return getRootNode ( env_document . get ( ) ) ;
} ;
2018-07-27 17:12:01 +00:00
process_include ( attr_nodes [ " from_env " ] , get_env_node , " Env variable is not set: " ) ;
2018-07-27 15:36:21 +00:00
}
2018-03-30 13:33:43 +00:00
if ( included_something )
2018-10-17 17:23:10 +00:00
doIncludesRecursive ( config , include_from , node , zk_node_cache , zk_changed_event , contributing_zk_paths ) ;
2018-03-30 13:33:43 +00:00
else
2017-04-01 07:20:54 +00:00
{
2018-03-30 13:33:43 +00:00
NodeListPtr children = node - > childNodes ( ) ;
Node * child = nullptr ;
for ( size_t i = 0 ; ( child = children - > item ( i ) ) ; + + i )
2018-10-17 17:23:10 +00:00
doIncludesRecursive ( config , include_from , child , zk_node_cache , zk_changed_event , contributing_zk_paths ) ;
2017-04-01 07:20:54 +00:00
}
2015-10-05 01:26:43 +00:00
}
2016-10-10 08:44:52 +00:00
ConfigProcessor : : Files ConfigProcessor : : getConfigMergeFiles ( const std : : string & config_path )
2015-10-05 01:26:43 +00:00
{
2018-02-20 08:50:11 +00:00
Files files ;
2017-04-01 07:20:54 +00:00
2021-05-09 11:59:49 +00:00
fs : : path merge_dir_path ( config_path ) ;
2018-05-15 18:25:54 +00:00
std : : set < std : : string > merge_dirs ;
2017-04-01 07:20:54 +00:00
2018-05-15 18:25:54 +00:00
/// Add path_to_config/config_name.d dir
2021-05-21 21:12:46 +00:00
merge_dir_path . replace_extension ( " d " ) ;
2021-05-09 11:59:49 +00:00
merge_dirs . insert ( merge_dir_path ) ;
2018-05-15 18:25:54 +00:00
/// Add path_to_config/conf.d dir
2021-05-21 21:12:46 +00:00
merge_dir_path . replace_filename ( " conf.d " ) ;
2021-05-09 11:59:49 +00:00
merge_dirs . insert ( merge_dir_path ) ;
2017-04-01 07:20:54 +00:00
for ( const std : : string & merge_dir_name : merge_dirs )
{
2021-05-21 21:12:46 +00:00
if ( ! fs : : exists ( merge_dir_name ) | | ! fs : : is_directory ( merge_dir_name ) )
2017-04-01 07:20:54 +00:00
continue ;
2018-04-02 16:21:50 +00:00
2021-04-30 20:35:44 +00:00
for ( fs : : directory_iterator it ( merge_dir_name ) ; it ! = fs : : directory_iterator ( ) ; + + it )
2017-04-01 07:20:54 +00:00
{
2021-04-30 20:35:44 +00:00
fs : : path path ( it - > path ( ) ) ;
std : : string extension = path . extension ( ) ;
std : : string base_name = path . stem ( ) ;
2018-04-02 16:21:50 +00:00
2021-05-30 12:28:41 +00:00
boost : : algorithm : : to_lower ( extension ) ;
2018-04-02 16:21:50 +00:00
// Skip non-config and temporary files
2021-05-22 18:26:48 +00:00
if ( fs : : is_regular_file ( path )
& & ( extension = = " .xml " | | extension = = " .conf " | | extension = = " .yaml " | | extension = = " .yml " )
& & ! startsWith ( base_name , " . " ) )
2021-04-30 20:35:44 +00:00
files . push_back ( it - > path ( ) ) ;
2017-04-01 07:20:54 +00:00
}
2020-02-07 19:37:24 +00:00
}
2017-04-01 07:20:54 +00:00
2018-02-20 08:50:11 +00:00
std : : sort ( files . begin ( ) , files . end ( ) ) ;
return files ;
2016-10-10 08:44:52 +00:00
}
2017-03-17 00:44:00 +00:00
XMLDocumentPtr ConfigProcessor : : processConfig (
2017-11-21 16:54:25 +00:00
bool * has_zk_includes ,
2018-10-17 17:23:10 +00:00
zkutil : : ZooKeeperNodeCache * zk_node_cache ,
const zkutil : : EventPtr & zk_changed_event )
2016-10-10 08:44:52 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Processing configuration file '{}'. " , path ) ;
2020-02-06 15:07:08 +00:00
2021-05-21 18:56:22 +00:00
XMLDocumentPtr config ;
2020-09-07 19:36:10 +00:00
if ( fs : : exists ( path ) )
2020-08-08 03:42:42 +00:00
{
2021-05-21 18:56:22 +00:00
fs : : path p ( path ) ;
2021-05-30 12:28:41 +00:00
std : : string extension = p . extension ( ) ;
boost : : algorithm : : to_lower ( extension ) ;
if ( extension = = " .yaml " | | extension = = " .yml " )
2021-05-30 11:33:45 +00:00
{
config = YAMLParser : : parse ( path ) ;
}
2021-05-30 12:28:41 +00:00
else if ( extension = = " .xml " | | extension = = " .conf " | | extension . empty ( ) )
2021-05-21 18:56:22 +00:00
{
config = dom_parser . parse ( path ) ;
}
2021-05-30 11:33:45 +00:00
else
2021-05-21 18:56:22 +00:00
{
2021-05-30 11:33:45 +00:00
throw Exception ( ErrorCodes : : CANNOT_LOAD_CONFIG , " Unknown format of '{}' config " , path ) ;
2021-05-21 18:56:22 +00:00
}
2020-08-08 03:42:42 +00:00
}
else
{
2021-05-12 13:04:34 +00:00
/// These embedded files added during build with some cmake magic.
/// Look at the end of programs/sever/CMakeLists.txt.
std : : string embedded_name ;
2020-08-08 03:42:42 +00:00
if ( path = = " config.xml " )
2021-05-12 13:04:34 +00:00
embedded_name = " embedded.xml " ;
if ( path = = " keeper_config.xml " )
embedded_name = " keeper_embedded.xml " ;
/// When we can use config embedded in binary.
if ( ! embedded_name . empty ( ) )
2020-08-08 03:42:42 +00:00
{
2021-05-12 13:04:34 +00:00
auto resource = getResource ( embedded_name ) ;
2020-08-08 03:42:42 +00:00
if ( resource . empty ( ) )
throw Exception ( ErrorCodes : : FILE_DOESNT_EXIST , " Configuration file {} doesn't exist and there is no embedded config " , path ) ;
LOG_DEBUG ( log , " There is no file '{}', will use embedded config. " , path ) ;
config = dom_parser . parseMemory ( resource . data ( ) , resource . size ( ) ) ;
}
else
throw Exception ( ErrorCodes : : FILE_DOESNT_EXIST , " Configuration file {} doesn't exist " , path ) ;
}
2017-04-01 07:20:54 +00:00
std : : vector < std : : string > contributing_files ;
2017-11-21 16:54:25 +00:00
contributing_files . push_back ( path ) ;
2017-04-01 07:20:54 +00:00
2017-11-21 16:54:25 +00:00
for ( auto & merge_file : getConfigMergeFiles ( path ) )
2017-04-01 07:20:54 +00:00
{
try
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Merging configuration file '{}'. " , merge_file ) ;
2020-02-06 15:07:08 +00:00
2021-05-21 18:56:22 +00:00
XMLDocumentPtr with ;
fs : : path p ( merge_file ) ;
2021-05-30 12:28:41 +00:00
std : : string extension = p . extension ( ) ;
boost : : algorithm : : to_lower ( extension ) ;
if ( extension = = " .yaml " | | extension = = " .yml " )
2021-05-21 18:56:22 +00:00
{
with = YAMLParser : : parse ( merge_file ) ;
}
else
{
with = dom_parser . parse ( merge_file ) ;
}
2017-04-01 07:20:54 +00:00
merge ( config , with ) ;
2021-05-21 18:56:22 +00:00
2017-04-01 07:20:54 +00:00
contributing_files . push_back ( merge_file ) ;
}
2018-12-27 17:23:49 +00:00
catch ( Exception & e )
{
e . addMessage ( " while merging config ' " + path + " ' with ' " + merge_file + " ' " ) ;
throw ;
}
2017-04-01 07:20:54 +00:00
catch ( Poco : : Exception & e )
{
2017-11-24 22:52:49 +00:00
throw Poco : : Exception ( " Failed to merge config with ' " + merge_file + " ': " + e . displayText ( ) ) ;
2017-04-01 07:20:54 +00:00
}
}
std : : unordered_set < std : : string > contributing_zk_paths ;
try
{
Node * node = config - > getNodeByPath ( " yandex/include_from " ) ;
XMLDocumentPtr include_from ;
std : : string include_from_path ;
if ( node )
{
2018-10-26 09:22:33 +00:00
/// if we include_from env or zk.
2018-11-13 13:27:19 +00:00
doIncludesRecursive ( config , nullptr , node , zk_node_cache , zk_changed_event , contributing_zk_paths ) ;
2017-04-01 07:20:54 +00:00
include_from_path = node - > innerText ( ) ;
}
else
{
std : : string default_path = " /etc/metrika.xml " ;
2021-04-30 20:35:44 +00:00
if ( fs : : exists ( default_path ) )
2017-04-01 07:20:54 +00:00
include_from_path = default_path ;
}
if ( ! include_from_path . empty ( ) )
{
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Including configuration file '{}'. " , include_from_path ) ;
2020-02-06 15:07:08 +00:00
2017-04-01 07:20:54 +00:00
contributing_files . push_back ( include_from_path ) ;
include_from = dom_parser . parse ( include_from_path ) ;
}
2018-10-17 17:23:10 +00:00
doIncludesRecursive ( config , include_from , getRootNode ( config . get ( ) ) , zk_node_cache , zk_changed_event , contributing_zk_paths ) ;
2017-04-01 07:20:54 +00:00
}
2018-12-27 17:23:49 +00:00
catch ( Exception & e )
{
e . addMessage ( " while preprocessing config ' " + path + " ' " ) ;
throw ;
}
2017-04-01 07:20:54 +00:00
catch ( Poco : : Exception & e )
{
2017-11-24 22:52:49 +00:00
throw Poco : : Exception ( " Failed to preprocess config ' " + path + " ': " + e . displayText ( ) , e ) ;
2017-04-01 07:20:54 +00:00
}
if ( has_zk_includes )
* has_zk_includes = ! contributing_zk_paths . empty ( ) ;
2020-11-10 18:22:26 +00:00
WriteBufferFromOwnString comment ;
2017-04-01 07:20:54 +00:00
comment < < " This file was generated automatically. \n " ;
comment < < " Do not edit it: it is likely to be discarded and generated again before it's read next time. \n " ;
comment < < " Files used to generate this file: " ;
2017-11-21 16:54:25 +00:00
for ( const std : : string & contributing_file : contributing_files )
2017-04-01 07:20:54 +00:00
{
2017-11-21 16:54:25 +00:00
comment < < " \n " < < contributing_file ;
2017-04-01 07:20:54 +00:00
}
if ( zk_node_cache & & ! contributing_zk_paths . empty ( ) )
{
comment < < " \n ZooKeeper nodes used to generate this file: " ;
2017-11-21 16:54:25 +00:00
for ( const std : : string & contributing_zk_path : contributing_zk_paths )
comment < < " \n " < < contributing_zk_path ;
2017-04-01 07:20:54 +00:00
}
comment < < " " ;
NodePtr new_node = config - > createTextNode ( " \n \n " ) ;
config - > insertBefore ( new_node , config - > firstChild ( ) ) ;
new_node = config - > createComment ( comment . str ( ) ) ;
config - > insertBefore ( new_node , config - > firstChild ( ) ) ;
return config ;
2015-10-05 01:26:43 +00:00
}
2017-11-21 16:54:25 +00:00
ConfigProcessor : : LoadedConfig ConfigProcessor : : loadConfig ( bool allow_zk_includes )
2015-10-05 01:26:43 +00:00
{
2017-04-01 07:20:54 +00:00
bool has_zk_includes ;
2017-11-21 16:54:25 +00:00
XMLDocumentPtr config_xml = processConfig ( & has_zk_includes ) ;
2015-10-05 01:26:43 +00:00
2017-04-01 07:20:54 +00:00
if ( has_zk_includes & & ! allow_zk_includes )
2019-06-15 12:06:22 +00:00
throw Poco : : Exception ( " Error while loading config ' " + path + " ': from_zk includes are not allowed! " ) ;
2017-03-17 00:44:00 +00:00
2017-04-01 07:20:54 +00:00
ConfigurationPtr configuration ( new Poco : : Util : : XMLConfiguration ( config_xml ) ) ;
2017-03-17 00:44:00 +00:00
2018-11-27 16:11:46 +00:00
return LoadedConfig { configuration , has_zk_includes , /* loaded_from_preprocessed = */ false , config_xml , path } ;
2017-03-17 00:44:00 +00:00
}
ConfigProcessor : : LoadedConfig ConfigProcessor : : loadConfigWithZooKeeperIncludes (
2017-04-01 07:20:54 +00:00
zkutil : : ZooKeeperNodeCache & zk_node_cache ,
2018-10-17 17:23:10 +00:00
const zkutil : : EventPtr & zk_changed_event ,
2017-04-01 07:20:54 +00:00
bool fallback_to_preprocessed )
2017-03-17 00:44:00 +00:00
{
2017-04-01 07:20:54 +00:00
XMLDocumentPtr config_xml ;
bool has_zk_includes ;
bool processed_successfully = false ;
try
{
2018-10-17 17:23:10 +00:00
config_xml = processConfig ( & has_zk_includes , & zk_node_cache , zk_changed_event ) ;
2017-04-01 07:20:54 +00:00
processed_successfully = true ;
}
catch ( const Poco : : Exception & ex )
{
if ( ! fallback_to_preprocessed )
throw ;
2018-08-25 01:58:14 +00:00
const auto * zk_exception = dynamic_cast < const Coordination : : Exception * > ( ex . nested ( ) ) ;
2017-04-01 07:20:54 +00:00
if ( ! zk_exception )
throw ;
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {} " , zk_exception - > message ( ) , preprocessed_path ) ;
2017-04-01 07:20:54 +00:00
config_xml = dom_parser . parse ( preprocessed_path ) ;
}
ConfigurationPtr configuration ( new Poco : : Util : : XMLConfiguration ( config_xml ) ) ;
2018-11-27 16:11:46 +00:00
return LoadedConfig { configuration , has_zk_includes , ! processed_successfully , config_xml , path } ;
2017-03-17 00:44:00 +00:00
}
2018-11-27 16:11:46 +00:00
void ConfigProcessor : : savePreprocessedConfig ( const LoadedConfig & loaded_config , std : : string preprocessed_dir )
2017-03-17 00:44:00 +00:00
{
2019-05-15 09:16:09 +00:00
try
2018-11-27 16:11:46 +00:00
{
2019-05-15 09:16:09 +00:00
if ( preprocessed_path . empty ( ) )
2018-11-27 16:11:46 +00:00
{
2020-09-07 19:36:10 +00:00
fs : : path preprocessed_configs_path ( " preprocessed_configs/ " ) ;
2019-05-15 09:16:09 +00:00
auto new_path = loaded_config . config_path ;
2021-06-17 19:35:22 +00:00
if ( new_path . starts_with ( main_config_path ) )
new_path . erase ( 0 , main_config_path . size ( ) ) ;
2019-05-15 09:16:09 +00:00
std : : replace ( new_path . begin ( ) , new_path . end ( ) , ' / ' , ' _ ' ) ;
if ( preprocessed_dir . empty ( ) )
2018-11-27 16:11:46 +00:00
{
2019-05-15 09:16:09 +00:00
if ( ! loaded_config . configuration - > has ( " path " ) )
{
// Will use current directory
2021-05-13 09:32:52 +00:00
fs : : path parent_path = fs : : path ( loaded_config . config_path ) . parent_path ( ) ;
preprocessed_dir = parent_path . string ( ) ;
fs : : path fs_new_path ( new_path ) ;
2021-05-21 21:12:46 +00:00
fs_new_path . replace_filename ( fs_new_path . stem ( ) . string ( ) + PREPROCESSED_SUFFIX + fs_new_path . extension ( ) . string ( ) ) ;
2021-05-13 09:32:52 +00:00
new_path = fs_new_path . string ( ) ;
2019-05-15 09:16:09 +00:00
}
else
{
2020-09-07 19:36:10 +00:00
fs : : path loaded_config_path ( loaded_config . configuration - > getString ( " path " ) ) ;
preprocessed_dir = loaded_config_path / preprocessed_configs_path ;
2019-05-15 09:16:09 +00:00
}
2018-11-27 16:11:46 +00:00
}
else
{
2020-09-07 19:36:10 +00:00
fs : : path preprocessed_dir_path ( preprocessed_dir ) ;
preprocessed_dir = ( preprocessed_dir_path / preprocessed_configs_path ) . string ( ) ;
2018-11-27 16:11:46 +00:00
}
2020-09-07 19:36:10 +00:00
preprocessed_path = ( fs : : path ( preprocessed_dir ) / fs : : path ( new_path ) ) . string ( ) ;
2021-05-12 06:19:11 +00:00
auto preprocessed_path_parent = fs : : path ( preprocessed_path ) . parent_path ( ) ;
2021-05-21 21:12:46 +00:00
if ( ! preprocessed_path_parent . empty ( ) )
2021-05-12 06:19:11 +00:00
fs : : create_directories ( preprocessed_path_parent ) ;
2019-05-15 09:16:09 +00:00
}
2017-11-21 16:54:25 +00:00
DOMWriter ( ) . writeNode ( preprocessed_path , loaded_config . preprocessed_xml ) ;
2020-05-23 22:24:01 +00:00
LOG_DEBUG ( log , " Saved preprocessed configuration to '{}'. " , preprocessed_path ) ;
2017-04-01 07:20:54 +00:00
}
catch ( Poco : : Exception & e )
{
2020-05-23 22:24:01 +00:00
LOG_WARNING ( log , " Couldn't save preprocessed config to {}: {} " , preprocessed_path , e . displayText ( ) ) ;
2017-04-01 07:20:54 +00:00
}
2015-10-05 01:26:43 +00:00
}
2018-11-27 16:11:46 +00:00
void ConfigProcessor : : setConfigPath ( const std : : string & config_path )
{
main_config_path = config_path ;
2021-06-17 19:35:22 +00:00
if ( ! main_config_path . ends_with ( ' / ' ) )
main_config_path + = ' / ' ;
2018-11-27 16:11:46 +00:00
}
}