Add support for DWARF-5 (without emitting them in binaries)

ClickHouse changes to the folly parser:
- use camel_case
- add NOLINT
- avoid using folly:: (use std:: instead)
- avoid using boost:: (use std:: instead)

But note, now it has not been enabled by default (like it was
initially), because you may need recent debugger to support DWARF-5
correctly, and to make debugging easier, let's do this later.

A good example is gdb 10, even though it looks like it should support
it, it still produce some errors, like here [1]:

    Dwarf Error: DW_FORM_strx1 found in non-DWO CU [in module /usr/bin/clickhouse]

  [1]: https://github.com/ClickHouse/ClickHouse/pull/40772#issuecomment-1236331323

And not only it complains, apparently it can "activate" SDT probes
(replace "nop" with "int3"), and I believe this is what happens here
[2].

  [2]: https://github.com/ClickHouse/ClickHouse/pull/41063#issuecomment-1242992314

There you got int3 in the case when ClickHouse got SIGTRAP:

<details>

```
    0x7f494705e093 <+1139>: jne    0x7f494705e450            ; <+2096> [inlined] update_tls_slotinfo at dl-open.c:732
    0x7f494705e099 <+1145>: testl  %r13d, %r13d
    0x7f494705e09c <+1148>: je     0x7f494705e09f            ; <+1151> at dl-open.c:744:6
    0x7f494705e09e <+1150>: int3
->  0x7f494705e09f <+1151>: movl   -0x54(%rbp), %eax
    0x7f494705e0a2 <+1154>: testl  %eax, %eax
    0x7f494705e0a4 <+1156>: jne    0x7f494705e410            ; <+2032> at dl-open.c:745:5

But if I repeat the query it does not:

    0x7ffff7fe5093 <+1139>: jne    0x7ffff7fe5450            ; <+2096> [inlined] update_tls_slotinfo at dl-open.c:732
    0x7ffff7fe5099 <+1145>: testl  %r13d, %r13d
    0x7ffff7fe509c <+1148>: je     0x7ffff7fe509f            ; <+1151> at dl-open.c:744:6
    0x7ffff7fe509e <+1150>: nop
->  0x7ffff7fe509f <+1151>: movl   -0x54(%rbp), %eax
    0x7ffff7fe50a2 <+1154>: testl  %eax, %eax
    0x7ffff7fe50a4 <+1156>: jne    0x7ffff7fe5410            ; <+2032> at dl-open.c:745:5
```

</details>

Test command was:

    clickhouse local --stacktrace -q "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'

*P.S. I did this, because I have libraries compiled with DWARF5 (i.e. glibc), and dwarf parser simply fails on my dev env.*

Refs: 490b287ca3
(cherry picked from commit ee5696bb32)
(cherry picked from commit e03870bc8b)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2022-08-27 18:50:09 +02:00
parent 9a0892c40c
commit 7e130aeb69
2 changed files with 899 additions and 233 deletions

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@
*/ */
/** This file was edited for ClickHouse. /** This file was edited for ClickHouse.
* Original is from folly library.
*/ */
#include <functional> #include <functional>
@ -171,8 +172,6 @@ public:
private: private:
static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset); static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset);
void init();
std::shared_ptr<const Elf> elf_; /// NOLINT std::shared_ptr<const Elf> elf_; /// NOLINT
// DWARF section made up of chunks, each prefixed with a length header. // DWARF section made up of chunks, each prefixed with a length header.
@ -228,6 +227,7 @@ private:
{ {
uint64_t name = 0; uint64_t name = 0;
uint64_t form = 0; uint64_t form = 0;
int64_t implicitConst = 0; // only set when form=DW_FORM_implicit_const
explicit operator bool() const { return name != 0 || form != 0; } explicit operator bool() const { return name != 0 || form != 0; }
}; };
@ -239,25 +239,43 @@ private:
std::variant<uint64_t, std::string_view> attr_value; std::variant<uint64_t, std::string_view> attr_value;
}; };
enum
{
DW_UT_compile = 0x01,
DW_UT_skeleton = 0x04,
};
struct CompilationUnit struct CompilationUnit
{ {
bool is64Bit; /// NOLINT bool is64Bit = false; /// NOLINT
uint8_t version; uint8_t version = 0;
uint8_t addr_size; uint8_t unit_type = DW_UT_compile; // DW_UT_compile or DW_UT_skeleton
uint8_t addr_size = 0;
// Offset in .debug_info of this compilation unit. // Offset in .debug_info of this compilation unit.
uint32_t offset; uint32_t offset = 0;
uint32_t size; uint32_t size = 0;
// Offset in .debug_info for the first DIE in this compilation unit. // Offset in .debug_info for the first DIE in this compilation unit.
uint32_t first_die; uint32_t first_die = 0;
uint64_t abbrev_offset; uint64_t abbrev_offset = 0;
// The beginning of the CU's contribution to .debug_addr
std::optional<uint64_t> addr_base; // DW_AT_addr_base (DWARF 5)
// The beginning of the offsets table (immediately following the
// header) of the CU's contribution to .debug_loclists
std::optional<uint64_t> loclists_base; // DW_AT_loclists_base (DWARF 5)
// The beginning of the offsets table (immediately following the
// header) of the CU's contribution to .debug_rnglists
std::optional<uint64_t> rnglists_base; // DW_AT_rnglists_base (DWARF 5)
// Points to the first string offset of the compilation units
// contribution to the .debug_str_offsets (or .debug_str_offsets.dwo) section.
std::optional<uint64_t> str_offsets_base; // DW_AT_str_offsets_base (DWARF 5)
// Only the CompilationUnit that contains the caller functions needs this cache. // Only the CompilationUnit that contains the caller functions needs this cache.
// Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size(); // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size();
std::vector<DIEAbbreviation> abbr_cache; std::vector<DIEAbbreviation> abbr_cache;
}; };
static CompilationUnit getCompilationUnit(std::string_view info, uint64_t offset); /** cu must exist during the life cycle of created Die. */
/** cu must exist during the life cycle of created detail::Die. */
Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const; Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const;
bool findLocation( bool findLocation(
@ -278,16 +296,16 @@ private:
class LineNumberVM class LineNumberVM
{ {
public: public:
LineNumberVM(std::string_view data, std::string_view compilationDirectory); LineNumberVM(
std::string_view data,
std::string_view compilationDirectory,
std::string_view debugStr,
std::string_view debugLineStr);
bool findAddress(uintptr_t target, Path & file, uint64_t & line); bool findAddress(uintptr_t target, Path & file, uint64_t & line);
/** Gets full file name at given index including directory. */ /** Gets full file name at given index including directory. */
Path getFullFileName(uint64_t index) const Path getFullFileName(uint64_t index) const;
{
auto fn = getFileName(index);
return Path({}, getIncludeDirectory(fn.directoryIndex), fn.relativeName);
}
private: private:
void init(); void init();
@ -330,6 +348,8 @@ private:
bool is64Bit_; /// NOLINT bool is64Bit_; /// NOLINT
std::string_view data_; /// NOLINT std::string_view data_; /// NOLINT
std::string_view compilationDirectory_; /// NOLINT std::string_view compilationDirectory_; /// NOLINT
std::string_view debugStr_; // needed for DWARF 5 /// NOLINT
std::string_view debugLineStr_; // DWARF 5 /// NOLINT
// Header // Header
uint16_t version_; /// NOLINT uint16_t version_; /// NOLINT
@ -340,11 +360,27 @@ private:
uint8_t opcodeBase_; /// NOLINT uint8_t opcodeBase_; /// NOLINT
const uint8_t * standardOpcodeLengths_; /// NOLINT const uint8_t * standardOpcodeLengths_; /// NOLINT
std::string_view includeDirectories_; /// NOLINT // 6.2.4 The Line Number Program Header.
size_t includeDirectoryCount_; /// NOLINT struct
{
size_t includeDirectoryCount;
std::string_view includeDirectories;
size_t fileNameCount;
std::string_view fileNames;
} v4_;
std::string_view fileNames_; /// NOLINT struct
size_t fileNameCount_; /// NOLINT {
uint8_t directoryEntryFormatCount;
std::string_view directoryEntryFormat;
uint64_t directoriesCount;
std::string_view directories;
uint8_t fileNameEntryFormatCount;
std::string_view fileNameEntryFormat;
uint64_t fileNamesCount;
std::string_view fileNames;
} v5_;
// State machine registers // State machine registers
uint64_t address_; /// NOLINT uint64_t address_; /// NOLINT
@ -397,20 +433,26 @@ private:
*/ */
size_t forEachAttribute(const CompilationUnit & cu, const Die & die, std::function<bool(const Attribute & die)> f) const; size_t forEachAttribute(const CompilationUnit & cu, const Die & die, std::function<bool(const Attribute & die)> f) const;
Attribute readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const; Attribute readAttribute(
const CompilationUnit & cu,
const Die & die,
AttributeSpec spec,
std::string_view & info) const;
// Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end. // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end.
static AttributeSpec readAttributeSpec(std::string_view & sp); static AttributeSpec readAttributeSpec(std::string_view & sp);
// Read one attribute value, remove_prefix sp // Read one attribute value, remove_prefix sp
using AttributeValue = std::variant<uint64_t, std::string_view>; using AttributeValue = std::variant<uint64_t, std::string_view>;
AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const; AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64_bit) const;
// Get an ELF section by name, return true if found // Get an ELF section by name, return true if found
bool getSection(const char * name, std::string_view * section) const; std::string_view getSection(const char * name) const;
CompilationUnit getCompilationUnit(uint64_t offset) const;
// Finds the Compilation Unit starting at offset.
CompilationUnit findCompilationUnit(uint64_t targetOffset) const;
// Get a string from the .debug_str section
std::string_view getStringFromStringSection(uint64_t offset) const;
template <class T> template <class T>
std::optional<T> getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const std::optional<T> getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const
@ -429,17 +471,24 @@ private:
} }
// Check if the given address is in the range list at the given offset in .debug_ranges. // Check if the given address is in the range list at the given offset in .debug_ranges.
bool isAddrInRangeList(uint64_t address, std::optional<uint64_t> base_addr, size_t offset, uint8_t addr_size) const; bool isAddrInRangeList(
const CompilationUnit & cu,
uint64_t address,
std::optional<uint64_t> base_addr,
size_t offset,
uint8_t addr_size) const;
// Finds the Compilation Unit starting at offset.
static CompilationUnit findCompilationUnit(std::string_view info, uint64_t targetOffset);
std::string_view info_; // .debug_info /// NOLINT
std::string_view abbrev_; // .debug_abbrev /// NOLINT std::string_view abbrev_; // .debug_abbrev /// NOLINT
std::string_view addr_; // .debug_addr (DWARF 5) /// NOLINT
std::string_view aranges_; // .debug_aranges /// NOLINT std::string_view aranges_; // .debug_aranges /// NOLINT
std::string_view info_; // .debug_info /// NOLINT
std::string_view line_; // .debug_line /// NOLINT std::string_view line_; // .debug_line /// NOLINT
std::string_view strings_; // .debug_str /// NOLINT std::string_view line_str_; // .debug_line_str (DWARF 5) /// NOLINT
std::string_view loclists_; // .debug_loclists (DWARF 5) /// NOLINT
std::string_view ranges_; // .debug_ranges /// NOLINT std::string_view ranges_; // .debug_ranges /// NOLINT
std::string_view rnglists_; // .debug_rnglists (DWARF 5) /// NOLINT
std::string_view str_; // .debug_str /// NOLINT
std::string_view str_offsets_; // .debug_str_offsets (DWARF 5) /// NOLINT
}; };
} }