ClickHouse/src/Common/SymbolIndex.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

99 lines
2.7 KiB
C++
Raw Normal View History

2019-07-29 01:08:52 +00:00
#pragma once
#if defined(__ELF__) && !defined(OS_FREEBSD)
2019-08-21 00:48:34 +00:00
2019-07-29 01:08:52 +00:00
#include <vector>
#include <string>
#include <unordered_map>
2019-07-29 22:26:44 +00:00
#include <Common/Elf.h>
#include <boost/noncopyable.hpp>
2019-07-29 01:08:52 +00:00
2020-11-30 14:30:55 +00:00
#include <Common/MultiVersion.h>
2019-07-29 01:08:52 +00:00
namespace DB
{
/** Allow to quickly find symbol name from address.
* Used as a replacement for "dladdr" function which is extremely slow.
2019-07-29 22:26:44 +00:00
* It works better than "dladdr" because it also allows to search private symbols, that are not participated in shared linking.
2019-07-29 01:08:52 +00:00
*/
class SymbolIndex : private boost::noncopyable
2019-07-29 01:08:52 +00:00
{
2019-07-29 22:26:44 +00:00
protected:
SymbolIndex() { update(); }
2019-07-29 01:08:52 +00:00
public:
2021-08-28 03:43:43 +00:00
static MultiVersion<SymbolIndex>::Version instance();
2019-07-29 01:08:52 +00:00
struct Symbol
{
const void * address_begin;
const void * address_end;
2019-07-29 22:26:44 +00:00
const char * name;
2019-07-29 18:38:04 +00:00
};
2019-07-29 01:08:52 +00:00
2019-07-29 18:38:04 +00:00
struct Object
{
const void * address_begin;
const void * address_end;
std::string name;
std::shared_ptr<Elf> elf;
2019-07-29 01:08:52 +00:00
};
2019-12-24 20:07:44 +00:00
/// Address in virtual memory should be passed. These addresses include offset where the object is loaded in memory.
2019-07-29 18:38:04 +00:00
const Symbol * findSymbol(const void * address) const;
const Object * findObject(const void * address) const;
2019-07-29 01:08:52 +00:00
2019-07-29 18:38:04 +00:00
const std::vector<Symbol> & symbols() const { return data.symbols; }
const std::vector<Object> & objects() const { return data.objects; }
2019-07-29 01:08:52 +00:00
std::string_view getResource(String name) const
{
if (auto it = data.resources.find(name); it != data.resources.end())
return it->second.data();
return {};
}
2020-06-20 09:07:05 +00:00
/// The BuildID that is generated by compiler.
String getBuildID() const { return data.build_id; }
String getBuildIDHex() const;
Fix parsing symbols from resources (for shared builds) In case of shared builds the symbol can be represented in multiple shared objects, and so if one SO contains _end symbol firstly, then it it's address can subtracted from the symbol from another SO, and this give overflow. Here is an example of such symbol: $ llvm-readelf -a programs/server/libclickhouse-server-libd.so | fgrep schemata_sql 552: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 1271: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 3627: 000000000010b480 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 5822: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 5828: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 5831: 000000000010b480 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start $ llvm-readelf -W -a src/TableFunctions/libclickhouse_table_functionsd.* | fgrep schemata_sql 1709: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 5107: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 6090: 00000000001fc1b8 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 10975: 00000000001fc1b8 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 10976: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 10977: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size To address this, add base address of the Elf into the resources to compare with. Follow-up for: #33134 (cc @alexey-milovidov)
2021-12-28 18:18:55 +00:00
struct ResourcesBlob
{
/// Symbol can be presented in multiple shared objects,
/// base_address will be used to compare only symbols from the same SO.
ElfW(Addr) base_address = 0;
Fix parsing symbols from resources (for shared builds) In case of shared builds the symbol can be represented in multiple shared objects, and so if one SO contains _end symbol firstly, then it it's address can subtracted from the symbol from another SO, and this give overflow. Here is an example of such symbol: $ llvm-readelf -a programs/server/libclickhouse-server-libd.so | fgrep schemata_sql 552: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 1271: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 3627: 000000000010b480 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 5822: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 5828: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 5831: 000000000010b480 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start $ llvm-readelf -W -a src/TableFunctions/libclickhouse_table_functionsd.* | fgrep schemata_sql 1709: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 5107: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 6090: 00000000001fc1b8 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 10975: 00000000001fc1b8 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 10976: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 10977: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size To address this, add base address of the Elf into the resources to compare with. Follow-up for: #33134 (cc @alexey-milovidov)
2021-12-28 18:18:55 +00:00
/// Just a human name of the SO.
std::string_view object_name;
/// Data blob.
std::string_view start;
std::string_view end;
std::string_view data() const
{
assert(end.data() >= start.data());
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
}
Fix parsing symbols from resources (for shared builds) In case of shared builds the symbol can be represented in multiple shared objects, and so if one SO contains _end symbol firstly, then it it's address can subtracted from the symbol from another SO, and this give overflow. Here is an example of such symbol: $ llvm-readelf -a programs/server/libclickhouse-server-libd.so | fgrep schemata_sql 552: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 1271: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 3627: 000000000010b480 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 5822: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 5828: 000000000010b869 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 5831: 000000000010b480 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start $ llvm-readelf -W -a src/TableFunctions/libclickhouse_table_functionsd.* | fgrep schemata_sql 1709: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 5107: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size 6090: 00000000001fc1b8 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 10975: 00000000001fc1b8 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_start 10976: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_end 10977: 00000000001fc5a1 0 NOTYPE GLOBAL DEFAULT 22 _binary_schemata_sql_size To address this, add base address of the Elf into the resources to compare with. Follow-up for: #33134 (cc @alexey-milovidov)
2021-12-28 18:18:55 +00:00
};
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
2019-07-29 18:38:04 +00:00
struct Data
{
std::vector<Symbol> symbols;
std::vector<Object> objects;
2020-06-20 09:07:05 +00:00
String build_id;
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
Resources resources;
2019-07-29 18:38:04 +00:00
};
2019-07-29 01:08:52 +00:00
private:
2019-07-29 18:38:04 +00:00
Data data;
2019-07-29 22:26:44 +00:00
void update();
2021-08-28 03:43:43 +00:00
static MultiVersion<SymbolIndex> & instanceImpl();
2019-07-29 01:08:52 +00:00
};
}
2019-08-21 00:48:34 +00:00
#endif