From 69387acffa114a824dc76c7b9e77643694ce60e4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 29 Aug 2022 14:25:53 +0300 Subject: [PATCH] Revert "Support for DWARF-5 in in house DWARF parser" --- CMakeLists.txt | 3 +- src/Common/Dwarf.cpp | 985 ++++++++----------------------------------- src/Common/Dwarf.h | 141 ++----- 3 files changed, 232 insertions(+), 897 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bbea1ef3f13..dbbec2a600d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -368,7 +368,8 @@ endif() set (COMPILER_FLAGS "${COMPILER_FLAGS}") -set (DEBUG_INFO_FLAGS "-g") +# Our built-in unwinder only supports DWARF version up to 4. +set (DEBUG_INFO_FLAGS "-g -gdwarf-4") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index a912c49d2ae..c8ffe7d46a8 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -26,7 +26,6 @@ #include #define DW_CHILDREN_no 0 - #define DW_FORM_addr 1 #define DW_FORM_block1 0x0a #define DW_FORM_block2 3 @@ -52,25 +51,6 @@ #define DW_FORM_string 0x08 #define DW_FORM_strp 0x0e #define DW_FORM_indirect 0x16 -#define DW_FORM_strx 0x1a -#define DW_FORM_addrx 0x1b -#define DW_FORM_ref_sup4 0x1c -#define DW_FORM_strp_sup 0x1d -#define DW_FORM_data16 0x1e -#define DW_FORM_line_strp 0x1f -#define DW_FORM_implicit_const 0x21 -#define DW_FORM_rnglistx 0x23 -#define DW_FORM_loclistx 0x22 -#define DW_FORM_ref_sup8 0x24 -#define DW_FORM_strx1 0x25 -#define DW_FORM_strx2 0x26 -#define DW_FORM_strx3 0x27 -#define DW_FORM_strx4 0x28 -#define DW_FORM_addrx1 0x29 -#define DW_FORM_addrx2 0x2a -#define DW_FORM_addrx3 0x2b -#define DW_FORM_addrx4 0x2c - #define DW_TAG_compile_unit 0x11 #define DW_TAG_subprogram 0x2e #define DW_TAG_try_block 0x32 @@ -78,7 +58,6 @@ #define DW_TAG_entry_point 0x03 #define DW_TAG_common_block 0x1a #define DW_TAG_lexical_block 0x0b - #define DW_AT_stmt_list 0x10 #define DW_AT_comp_dir 0x1b #define DW_AT_name 0x03 @@ -91,13 +70,6 @@ #define DW_AT_call_file 0x58 #define DW_AT_linkage_name 0x6e #define DW_AT_specification 0x47 -#define DW_AT_str_offsets_base 0x72 -#define DW_AT_addr_base 0x73 -#define DW_AT_rnglists_base 0x74 -#define DW_AT_loclists_base 0x8c -#define DW_AT_GNU_ranges_base 0x2132 -#define DW_AT_GNU_addr_base 0x2133 - #define DW_LNE_define_file 0x03 #define DW_LNS_copy 0x01 #define DW_LNS_advance_pc 0x02 @@ -115,21 +87,6 @@ #define DW_LNE_set_address 0x02 #define DW_LNE_set_discriminator 0x04 -#define DW_LNCT_path 0x1 -#define DW_LNCT_directory_index 0x2 -#define DW_LNCT_timestamp 0x3 -#define DW_LNCT_size 0x4 -#define DW_LNCT_MD5 0x5 - -#define DW_RLE_end_of_list 0x0 -#define DW_RLE_base_addressx 0x1 -#define DW_RLE_startx_endx 0x2 -#define DW_RLE_startx_length 0x3 -#define DW_RLE_offset_pair 0x4 -#define DW_RLE_base_address 0x5 -#define DW_RLE_start_end 0x6 -#define DW_RLE_start_length 0x7 - namespace DB { @@ -140,31 +97,9 @@ namespace ErrorCodes } -Dwarf::Dwarf(const std::shared_ptr & elf) - : elf_(elf) - , abbrev_(getSection(".debug_abbrev")) - , addr_(getSection(".debug_addr")) - , aranges_(getSection(".debug_aranges")) - , info_(getSection(".debug_info")) - , line_(getSection(".debug_line")) - , line_str_(getSection(".debug_line_str")) - , loclists_(getSection(".debug_loclists")) - , ranges_(getSection(".debug_ranges")) - , rnglists_(getSection(".debug_rnglists")) - , str_(getSection(".debug_str")) - , str_offsets_(getSection(".debug_str_offsets")) +Dwarf::Dwarf(const std::shared_ptr & elf) : elf_(elf) { - // Optional sections: - // - debugAranges_: for fast address range lookup. - // If missing .debug_info can be used - but it's much slower (linear - // scan). - // - debugRanges_ (DWARF 4) / debugRnglists_ (DWARF 5): non-contiguous - // address ranges of debugging information entries. - // Used for inline function address lookup. - if (info_.empty() || abbrev_.empty() || line_.empty() || str_.empty()) - { - elf_ = nullptr; - } + init(); } Dwarf::Section::Section(std::string_view d) : is64_bit(false), data(d) @@ -172,7 +107,7 @@ Dwarf::Section::Section(std::string_view d) : is64_bit(false), data(d) } -#define SAFE_CHECK(cond, ...) do { if (!(cond)) throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, __VA_ARGS__); } while (false) +#define SAFE_CHECK(cond, message) do { if (!(cond)) throw Exception(message, ErrorCodes::CANNOT_PARSE_DWARF); } while (false) namespace @@ -189,24 +124,13 @@ template requires std::is_trivial_v && std::is_standard_layout_v T read(std::string_view & sp) { - SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), sp.size()); + SAFE_CHECK(sp.size() >= sizeof(T), fmt::format("underflow: expected bytes {}, got bytes {}", sizeof(T), sp.size())); T x; memcpy(&x, sp.data(), sizeof(T)); sp.remove_prefix(sizeof(T)); return x; } -// Read (bitwise) an unsigned number of N bytes (N in 1, 2, 3, 4). -template -uint64_t readU64(std::string_view & sp) -{ - SAFE_CHECK(sp.size() >= N, "underflow"); - uint64_t x = 0; - memcpy(&x, sp.data(), N); - sp.remove_prefix(N); - return x; -} - // Read ULEB (unsigned) varint value; algorithm from the DWARF spec uint64_t readULEB(std::string_view & sp, uint8_t & shift, uint8_t & val) { @@ -244,9 +168,9 @@ int64_t readSLEB(std::string_view & sp) } // Read a value of "section offset" type, which may be 4 or 8 bytes -uint64_t readOffset(std::string_view & sp, bool is64_bit) +uint64_t readOffset(std::string_view & sp, bool is64Bit) { - return is64_bit ? read(sp) : read(sp); + return is64Bit ? read(sp) : read(sp); } // Read "len" bytes @@ -268,15 +192,6 @@ std::string_view readNullTerminated(std::string_view & sp) return ret; } -// Get a string from the section -std::string_view getStringFromStringSection(std::string_view section, uint64_t offset) -{ - SAFE_CHECK(offset < section.size(), "invalid section offset"); - std::string_view sp(section); - sp.remove_prefix(offset); - return readNullTerminated(sp); -} - // Skip over padding until sp.data() - start is a multiple of alignment void skipPadding(std::string_view & sp, const char * start, size_t alignment) { @@ -444,18 +359,38 @@ bool Dwarf::Section::next(std::string_view & chunk) return true; } -std::string_view Dwarf::getSection(const char * name) const +bool Dwarf::getSection(const char * name, std::string_view * section) const { std::optional elf_section = elf_->findSectionByName(name); if (!elf_section) - return {}; + return false; #ifdef SHF_COMPRESSED if (elf_section->header.sh_flags & SHF_COMPRESSED) - return {}; + return false; #endif - return { elf_section->begin(), elf_section->size()}; + *section = { elf_section->begin(), elf_section->size()}; + return true; +} + +void Dwarf::init() +{ + // Make sure that all .debug_* sections exist + if (!getSection(".debug_info", &info_) + || !getSection(".debug_abbrev", &abbrev_) + || !getSection(".debug_line", &line_) + || !getSection(".debug_str", &strings_)) + { + elf_.reset(); + return; + } + + // Optional: fast address range lookup. If missing .debug_info can + // be used - but it's much slower (linear scan). + getSection(".debug_aranges", &aranges_); + + getSection(".debug_ranges", &ranges_); } // static @@ -538,7 +473,7 @@ size_t Dwarf::forEachAttribute(const CompilationUnit & cu, const Die & die, std: auto values = std::string_view{info_.data() + die.offset + die.attr_offset, cu.offset + cu.size - die.offset - die.attr_offset}; while (auto spec = readAttributeSpec(attrs)) { - auto attr = readAttribute(cu, die, spec, values); + auto attr = readAttribute(die, spec, values); if (!f(attr)) { return static_cast(-1); @@ -547,49 +482,8 @@ size_t Dwarf::forEachAttribute(const CompilationUnit & cu, const Die & die, std: return values.data() - info_.data(); } -Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit & cu, - const Die & die, - AttributeSpec spec, - std::string_view & info) const +Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const { - // DWARF 5 introduces new FORMs whose values are relative to some base attrs: - // DW_AT_str_offsets_base, DW_AT_rnglists_base, DW_AT_addr_base. - // Debug Fission DWARF 4 uses GNU DW_AT_GNU_ranges_base & DW_AT_GNU_addr_base. - // - // The order in which attributes appear in a CU is not defined. - // The DW_AT_*_base attrs may appear after attributes that need them. - // The DW_AT_*_base attrs are CU specific; so we read them just after - // reading the CU header. During this first pass return empty values - // when encountering a FORM that depends on DW_AT_*_base. - auto get_string_using_offset_table = [&](uint64_t index) - { - if (!cu.str_offsets_base.has_value()) - { - return std::string_view(); - } - // DWARF 5: 7.26 String Offsets Table - // The DW_AT_str_offsets_base attribute points to the first entry following - // the header. The entries are indexed sequentially from this base entry, - // starting from 0. - auto sp = str_offsets_.substr(*cu.str_offsets_base + index * (cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t))); - uint64_t str_offset = readOffset(sp, cu.is64Bit); - return getStringFromStringSection(str_, str_offset); - }; - - auto read_debug_addr = [&](uint64_t index) - { - if (!cu.addr_base.has_value()) - { - return uint64_t(0); - } - // DWARF 5: 7.27 Address Table - // The DW_AT_addr_base attribute points to the first entry following the - // header. The entries are indexed sequentially from this base entry, - // starting from 0. - auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); - return read(sp); - }; - switch (spec.form) { case DW_FORM_addr: @@ -623,7 +517,7 @@ Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit & cu, case DW_FORM_ref_sig8: return {spec, die, read(info)}; case DW_FORM_sdata: - return {spec, die, static_cast(readSLEB(info))}; + return {spec, die, uint64_t(readSLEB(info))}; case DW_FORM_udata: [[fallthrough]]; case DW_FORM_ref_udata: @@ -631,7 +525,7 @@ Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit & cu, case DW_FORM_flag: return {spec, die, read(info)}; case DW_FORM_flag_present: - return {spec, die, 1ULL}; + return {spec, die, 1u}; case DW_FORM_sec_offset: [[fallthrough]]; case DW_FORM_ref_addr: @@ -639,215 +533,49 @@ Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit & cu, case DW_FORM_string: return {spec, die, readNullTerminated(info)}; case DW_FORM_strp: - return {spec, die, getStringFromStringSection(str_, readOffset(info, die.is64Bit))}; + return {spec, die, getStringFromStringSection(readOffset(info, die.is64Bit))}; case DW_FORM_indirect: // form is explicitly specified // Update spec with the actual FORM. spec.form = readULEB(info); - return readAttribute(cu, die, spec, info); - - // DWARF 5: - case DW_FORM_implicit_const: // form is explicitly specified - // For attributes with this form, the attribute specification contains a - // third part, which is a signed LEB128 number. The value of this number - // is used as the value of the attribute, and no value is stored in the - // .debug_info section. - return {spec, die, static_cast(spec.implicitConst)}; - - case DW_FORM_addrx: - return {spec, die, read_debug_addr(readULEB(info))}; - case DW_FORM_addrx1: - return {spec, die, read_debug_addr(readU64<1>(info))}; - case DW_FORM_addrx2: - return {spec, die, read_debug_addr(readU64<2>(info))}; - case DW_FORM_addrx3: - return {spec, die, read_debug_addr(readU64<3>(info))}; - case DW_FORM_addrx4: - return {spec, die, read_debug_addr(readU64<4>(info))}; - - case DW_FORM_line_strp: - return {spec, die, getStringFromStringSection(line_str_, readOffset(info, die.is64Bit))}; - - case DW_FORM_strx: - return {spec, die, get_string_using_offset_table(readULEB(info))}; - case DW_FORM_strx1: - return {spec, die, get_string_using_offset_table(readU64<1>(info))}; - case DW_FORM_strx2: - return {spec, die, get_string_using_offset_table(readU64<2>(info))}; - case DW_FORM_strx3: - return {spec, die, get_string_using_offset_table(readU64<3>(info))}; - case DW_FORM_strx4: - return {spec, die, get_string_using_offset_table(readU64<4>(info))}; - - case DW_FORM_rnglistx: { - auto index = readULEB(info); - if (!cu.rnglists_base.has_value()) - { - return {spec, die, 0ULL}; - } - const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); - auto sp = rnglists_.substr(*cu.rnglists_base + index * offset_size); - auto offset = readOffset(sp, cu.is64Bit); - return {spec, die, *cu.rnglists_base + offset}; - } - - case DW_FORM_loclistx: { - auto index = readULEB(info); - if (!cu.loclists_base.has_value()) - { - return {spec, die, 0ULL}; - } - const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); - auto sp = loclists_.substr(*cu.loclists_base + index * offset_size); - auto offset = readOffset(sp, cu.is64Bit); - return {spec, die, *cu.loclists_base + offset}; - } - - case DW_FORM_data16: - return {spec, die, readBytes(info, 16)}; - - case DW_FORM_ref_sup4: - case DW_FORM_ref_sup8: - case DW_FORM_strp_sup: - SAFE_CHECK(false, "Unexpected DWARF5 supplimentary object files"); - + return readAttribute(die, spec, info); default: SAFE_CHECK(false, "invalid attribute form"); } - return {spec, die, 0ULL}; + + return {spec, die, 0u}; } // static Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view & sp) { - Dwarf::AttributeSpec spec; - spec.name = readULEB(sp); - spec.form = readULEB(sp); - if (spec.form == DW_FORM_implicit_const) - { - spec.implicitConst = readSLEB(sp); - } - return spec; + return {readULEB(sp), readULEB(sp)}; } -Dwarf::CompilationUnit Dwarf::getCompilationUnit(uint64_t offset) const +// static +Dwarf::CompilationUnit Dwarf::getCompilationUnit(std::string_view info, uint64_t offset) { - // SAFE_CHECK(offset < info_.size(), "unexpected offset"); + SAFE_CHECK(offset < info.size(), "unexpected offset"); CompilationUnit cu; - std::string_view chunk(info_); + std::string_view chunk(info); cu.offset = offset; chunk.remove_prefix(offset); - // 1) unit_length auto initial_length = read(chunk); cu.is64Bit = (initial_length == uint32_t(-1)); cu.size = cu.is64Bit ? read(chunk) : initial_length; SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size"); cu.size += cu.is64Bit ? 12 : 4; - // 2) version cu.version = read(chunk); - SAFE_CHECK(cu.version >= 2 && cu.version <= 5, "invalid info version"); + SAFE_CHECK(cu.version >= 2 && cu.version <= 4, "invalid info version"); + cu.abbrev_offset = readOffset(chunk, cu.is64Bit); + cu.addr_size = read(chunk); + SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); - if (cu.version == 5) - { - // DWARF5: 7.5.1.1 Full and Partial Compilation Unit Headers - // 3) unit_type (new DWARF 5) - cu.unit_type = read(chunk); - if (cu.unit_type != DW_UT_compile && cu.unit_type != DW_UT_skeleton) - { - return cu; - } - // 4) address_size - cu.addr_size = read(chunk); - SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); - - // 5) debug_abbrev_offset - cu.abbrev_offset = readOffset(chunk, cu.is64Bit); - - if (cu.unit_type == DW_UT_skeleton) - { - // 6) dwo_id - read(chunk); - } - } - else - { - // DWARF4 has a single type of unit in .debug_info - cu.unit_type = DW_UT_compile; - // 3) debug_abbrev_offset - cu.abbrev_offset = readOffset(chunk, cu.is64Bit); - // 4) address_size - cu.addr_size = read(chunk); - SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); - } - cu.first_die = chunk.data() - info_.data(); - if (cu.version < 5) - { - return cu; - } - - Die die = getDieAtOffset(cu, cu.first_die); - if (die.abbr.tag != DW_TAG_compile_unit) - { - return cu; - } - - // Read the DW_AT_*_base attributes. - // Attributes which use FORMs relative to these base attrs - // will not have valid values during this first pass! - forEachAttribute( - cu, - die, - [&](const Attribute & attr) - { - switch (attr.spec.name) - { - case DW_AT_addr_base: - case DW_AT_GNU_addr_base: - cu.addr_base = std::get(attr.attr_value); - break; - case DW_AT_loclists_base: - cu.loclists_base = std::get(attr.attr_value); - break; - case DW_AT_rnglists_base: - case DW_AT_GNU_ranges_base: - cu.rnglists_base = std::get(attr.attr_value); - break; - case DW_AT_str_offsets_base: - cu.str_offsets_base = std::get(attr.attr_value); - break; - } - return true; // continue forEachAttribute - }); + cu.first_die = chunk.data() - info.data(); return cu; } -// Finds the Compilation Unit starting at offset. -Dwarf::CompilationUnit Dwarf::findCompilationUnit(uint64_t targetOffset) const -{ - // SAFE_CHECK(targetOffset < info_.size(), "unexpected target address"); - uint64_t offset = 0; - while (offset < info_.size()) - { - std::string_view chunk(info_); - chunk.remove_prefix(offset); - - auto initial_length = read(chunk); - auto is64_bit = (initial_length == static_cast(-1)); - auto size = is64_bit ? read(chunk) : initial_length; - SAFE_CHECK(size <= chunk.size(), "invalid chunk size"); - size += is64_bit ? 12 : 4; - - if (offset + size > targetOffset) - { - break; - } - offset += size; - } - return getCompilationUnit(offset); -} - - Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const { // Linear search in the .debug_abbrev section, starting at offset @@ -862,7 +590,7 @@ Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) co SAFE_CHECK(false, "could not find abbreviation code"); } -Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64_bit) const +Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const { switch (form) { @@ -900,18 +628,26 @@ Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t return uint64_t(1); case DW_FORM_sec_offset: [[fallthrough]]; case DW_FORM_ref_addr: - return readOffset(sp, is64_bit); + return readOffset(sp, is64Bit); case DW_FORM_string: return readNullTerminated(sp); case DW_FORM_strp: - return getStringFromStringSection(str_, readOffset(sp, is64_bit)); + return getStringFromStringSection(readOffset(sp, is64Bit)); case DW_FORM_indirect: // form is explicitly specified - return readAttributeValue(sp, readULEB(sp), is64_bit); + return readAttributeValue(sp, readULEB(sp), is64Bit); default: SAFE_CHECK(false, "invalid attribute form"); } } +std::string_view Dwarf::getStringFromStringSection(uint64_t offset) const +{ + SAFE_CHECK(offset < strings_.size(), "invalid strp offset"); + std::string_view sp(strings_); + sp.remove_prefix(offset); + return readNullTerminated(sp); +} + /** * Find @address in .debug_aranges and return the offset in * .debug_info for compilation unit to which this address belongs. @@ -988,7 +724,7 @@ bool Dwarf::findLocation( // Partial compilation unit (DW_TAG_partial_unit) is not supported. SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry"); - // Offset in .debug_line for the line number VM program for this CU + // Read attributes, extracting the few we care about std::optional line_offset = 0; std::string_view compilation_directory; std::optional main_file_name; @@ -1036,7 +772,7 @@ bool Dwarf::findLocation( std::string_view line_section(line_); line_section.remove_prefix(*line_offset); - LineNumberVM line_vm(line_section, compilation_directory, str_, line_str_); + LineNumberVM line_vm(line_section, compilation_directory); // Execute line number VM program to find file and line info.has_file_and_line = line_vm.findAddress(address, info.file, info.line); @@ -1127,11 +863,8 @@ bool Dwarf::findLocation( return info.has_file_and_line; } -void Dwarf::findSubProgramDieForAddress(const CompilationUnit & cu, - const Die & die, - uint64_t address, - std::optional base_addr_cu, - Die & subprogram) const +void Dwarf::findSubProgramDieForAddress( + const CompilationUnit & cu, const Die & die, uint64_t address, std::optional base_addr_cu, Die & subprogram) const { forEachChild(cu, die, [&](const Die & child_die) { @@ -1152,14 +885,9 @@ void Dwarf::findSubProgramDieForAddress(const CompilationUnit & cu, low_pc = std::get(attr.attr_value); break; case DW_AT_high_pc: - // The value of the DW_AT_high_pc attribute can be - // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). - is_high_pc_addr = attr.spec.form == DW_FORM_addr || // - attr.spec.form == DW_FORM_addrx || // - attr.spec.form == DW_FORM_addrx1 || // - attr.spec.form == DW_FORM_addrx2 || // - attr.spec.form == DW_FORM_addrx3 || // - attr.spec.form == DW_FORM_addrx4; + // Value of DW_AT_high_pc attribute can be an address + // (DW_FORM_addr) or an offset (DW_FORM_data). + is_high_pc_addr = (attr.spec.form == DW_FORM_addr); high_pc = std::get(attr.attr_value); break; } @@ -1168,7 +896,7 @@ void Dwarf::findSubProgramDieForAddress(const CompilationUnit & cu, }); bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); - bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, range_offset.value(), cu.addr_size); + bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size); if (pc_match || range_match) { subprogram = child_die; @@ -1243,14 +971,9 @@ void Dwarf::findInlinedSubroutineDieForAddress( low_pc = std::get(attr.attr_value); break; case DW_AT_high_pc: - // The value of the DW_AT_high_pc attribute can be - // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). - is_high_pc_addr = attr.spec.form == DW_FORM_addr || // - attr.spec.form == DW_FORM_addrx || // - attr.spec.form == DW_FORM_addrx1 || // - attr.spec.form == DW_FORM_addrx2 || // - attr.spec.form == DW_FORM_addrx3 || // - attr.spec.form == DW_FORM_addrx4; + // Value of DW_AT_high_pc attribute can be an address + // (DW_FORM_addr) or an offset (DW_FORM_data). + is_high_pc_addr = (attr.spec.form == DW_FORM_addr); high_pc = std::get(attr.attr_value); break; case DW_AT_abstract_origin: @@ -1282,7 +1005,7 @@ void Dwarf::findInlinedSubroutineDieForAddress( // TODO: Support relocated address which requires lookup in relocation map. bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); - bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, range_offset.value(), cu.addr_size); + bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size); if (!pc_match && !range_match) { // Address doesn't match. Keep searching other children. @@ -1384,7 +1107,7 @@ void Dwarf::findInlinedSubroutineDieForAddress( // Not applicable for DW_AT_abstract_origin. location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr) ? get_function_name(cu, cu.offset + *abstract_origin) - : get_function_name(findCompilationUnit(*abstract_origin), *abstract_origin); + : get_function_name(findCompilationUnit(info_, *abstract_origin), *abstract_origin); /// FIXME: see comment above if (die_for_inline_broken) @@ -1421,11 +1144,7 @@ bool Dwarf::findAddress( if (findDebugInfoOffset(address, aranges_, offset)) { // Read compilation unit header from .debug_info - auto unit = getCompilationUnit(offset); - if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) - { - return false; - } + auto unit = getCompilationUnit(info_, offset); findLocation(address, mode, unit, locationInfo, inline_frames); return locationInfo.has_file_and_line; } @@ -1449,160 +1168,84 @@ bool Dwarf::findAddress( uint64_t offset = 0; while (offset < info_.size() && !locationInfo.has_file_and_line) { - auto unit = getCompilationUnit(offset); + auto unit = getCompilationUnit(info_, offset); offset += unit.size; - if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) - { - continue; - } findLocation(address, mode, unit, locationInfo, inline_frames); } return locationInfo.has_file_and_line; } -bool Dwarf::isAddrInRangeList(const CompilationUnit & cu, - uint64_t address, - std::optional base_addr, - size_t offset, - uint8_t addr_size) const +bool Dwarf::isAddrInRangeList(uint64_t address, std::optional base_addr, size_t offset, uint8_t addr_size) const { SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size"); - if (cu.version <= 4 && !ranges_.empty()) + if (ranges_.empty()) { - const bool is64_bit_addr = addr_size == 8; - std::string_view sp = ranges_; - sp.remove_prefix(offset); - const uint64_t max_addr = is64_bit_addr ? std::numeric_limits::max() : std::numeric_limits::max(); - while (!sp.empty()) - { - uint64_t begin = readOffset(sp, is64_bit_addr); - uint64_t end = readOffset(sp, is64_bit_addr); - // The range list entry is a base address selection entry. - if (begin == max_addr) - { - base_addr = end; - continue; - } - // The range list entry is an end of list entry. - if (begin == 0 && end == 0) - { - break; - } + return false; + } - // Check if the given address falls in the range list entry. - // 2.17.3 Non-Contiguous Address Ranges - // The applicable base address of a range list entry is determined by the - // closest preceding base address selection entry (see below) in the same - // range list. If there is no such selection entry, then the applicable - // base address defaults to the base address of the compilation unit. - if (base_addr && address >= begin + *base_addr && address < end + *base_addr) - { - return true; - } + const bool is_64bit_addr = addr_size == 8; + std::string_view sp = ranges_; + sp.remove_prefix(offset); + const uint64_t max_addr = is_64bit_addr ? std::numeric_limits::max() : std::numeric_limits::max(); + while (!sp.empty()) + { + uint64_t begin = readOffset(sp, is_64bit_addr); + uint64_t end = readOffset(sp, is_64bit_addr); + // The range list entry is a base address selection entry. + if (begin == max_addr) + { + base_addr = end; + continue; + } + // The range list entry is an end of list entry. + if (begin == 0 && end == 0) + { + break; + } + // Check if the given address falls in the range list entry. + // 2.17.3 Non-Contiguous Address Ranges + // The applicable base address of a range list entry is determined by the + // closest preceding base address selection entry (see below) in the same + // range list. If there is no such selection entry, then the applicable base + // address defaults to the base address of the compilation unit. + if (base_addr && address >= begin + *base_addr && address < end + *base_addr) + { + return true; } } - if (cu.version == 5 && !rnglists_.empty() && cu.addr_base.has_value()) - { - auto rnglists = rnglists_; - rnglists.remove_prefix(offset); - - while (!rnglists.empty()) - { - auto kind = read(rnglists); - switch (kind) - { - case DW_RLE_end_of_list: - return false; - case DW_RLE_base_addressx: { - auto index = readULEB(rnglists); - auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); - base_addr = read(sp); - } - break; - - case DW_RLE_startx_endx: { - auto index_start = readULEB(rnglists); - auto index_end = readULEB(rnglists); - auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); - auto start = read(sp_start); - - auto sp_end = addr_.substr(*cu.addr_base + index_end * sizeof(uint64_t)); - auto end = read(sp_end); - if (address >= start && address < end) - { - return true; - } - } - break; - - case DW_RLE_startx_length: { - auto index_start = readULEB(rnglists); - auto length = readULEB(rnglists); - auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); - auto start = read(sp_start); - - auto sp_end = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t) + length); - auto end = read(sp_end); - if (start != end && address >= start && address < end) - { - return true; - } - } - break; - - case DW_RLE_offset_pair: { - auto offset_start = readULEB(rnglists); - auto offset_end = readULEB(rnglists); - if (base_addr && address >= (*base_addr + offset_start) && address < (*base_addr + offset_end)) - { - return true; - } - } - break; - - case DW_RLE_base_address: - base_addr = read(rnglists); - break; - - case DW_RLE_start_end: { - uint64_t start = read(rnglists); - uint64_t end = read(rnglists); - if (address >= start && address < end) - { - return true; - } - } - break; - - case DW_RLE_start_length: { - uint64_t start = read(rnglists); - uint64_t end = start + readULEB(rnglists); - if (address >= start && address < end) - { - return true; - } - } - break; - - default: - SAFE_CHECK(false, "Unexpected debug_rnglists entry kind"); - } - } - } return false; } +// static +Dwarf::CompilationUnit Dwarf::findCompilationUnit(std::string_view info, uint64_t targetOffset) +{ + SAFE_CHECK(targetOffset < info.size(), "unexpected target address"); + uint64_t offset = 0; + while (offset < info.size()) + { + std::string_view chunk(info); + chunk.remove_prefix(offset); -Dwarf::LineNumberVM::LineNumberVM( - std::string_view data, - std::string_view compilationDirectory, - std::string_view debugStr, - std::string_view debugLineStr) + auto initial_length = read(chunk); + auto is_64bit = (initial_length == uint32_t(-1)); + auto size = is_64bit ? read(chunk) : initial_length; + SAFE_CHECK(size <= chunk.size(), "invalid chunk size"); + size += is_64bit ? 12 : 4; + + if (offset + size > targetOffset) + { + break; + } + offset += size; + } + return getCompilationUnit(info, offset); +} + + +Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory) : compilationDirectory_(compilationDirectory) - , debugStr_(debugStr) - , debugLineStr_(debugLineStr) { Section section(data); SAFE_CHECK(section.next(data_), "invalid line number VM"); @@ -1626,154 +1269,17 @@ void Dwarf::LineNumberVM::reset() discriminator_ = 0; } -struct LineNumberAttribute -{ - uint64_t content_type_code; - uint64_t form_code; - std::variant attr_value; -}; - -LineNumberAttribute readLineNumberAttribute( - bool is64_bit, std::string_view & format, std::string_view & entries, std::string_view debugStr, std::string_view debugLineStr) -{ - uint64_t content_type_code = readULEB(format); - uint64_t form_code = readULEB(format); - std::variant attr_value; - - switch (content_type_code) - { - case DW_LNCT_path: { - switch (form_code) - { - case DW_FORM_string: - attr_value = readNullTerminated(entries); - break; - case DW_FORM_line_strp: { - auto off = readOffset(entries, is64_bit); - attr_value = getStringFromStringSection(debugLineStr, off); - } - break; - case DW_FORM_strp: - attr_value = getStringFromStringSection(debugStr, readOffset(entries, is64_bit)); - break; - case DW_FORM_strp_sup: - SAFE_CHECK(false, "Unexpected DW_FORM_strp_sup"); - break; - default: - SAFE_CHECK(false, "Unexpected form for DW_LNCT_path"); - break; - } - } - break; - - case DW_LNCT_directory_index: { - switch (form_code) - { - case DW_FORM_data1: - attr_value = read(entries); - break; - case DW_FORM_data2: - attr_value = read(entries); - break; - case DW_FORM_udata: - attr_value = readULEB(entries); - break; - default: - SAFE_CHECK(false, "Unexpected form for DW_LNCT_directory_index"); - break; - } - } - break; - - case DW_LNCT_timestamp: { - switch (form_code) - { - case DW_FORM_udata: - attr_value = readULEB(entries); - break; - case DW_FORM_data4: - attr_value = read(entries); - break; - case DW_FORM_data8: - attr_value = read(entries); - break; - case DW_FORM_block: - attr_value = readBytes(entries, readULEB(entries)); - break; - default: - SAFE_CHECK(false, "Unexpected form for DW_LNCT_timestamp"); - } - } - break; - - case DW_LNCT_size: { - switch (form_code) - { - case DW_FORM_udata: - attr_value = readULEB(entries); - break; - case DW_FORM_data1: - attr_value = read(entries); - break; - case DW_FORM_data2: - attr_value = read(entries); - break; - case DW_FORM_data4: - attr_value = read(entries); - break; - case DW_FORM_data8: - attr_value = read(entries); - break; - default: - SAFE_CHECK(false, "Unexpected form for DW_LNCT_size"); - break; - } - } - break; - - case DW_LNCT_MD5: { - switch (form_code) - { - case DW_FORM_data16: - attr_value = readBytes(entries, 16); - break; - default: - SAFE_CHECK(false, "Unexpected form for DW_LNCT_MD5"); - break; - } - } - break; - - default: - // TODO: skip over vendor data as specified by the form instead. - SAFE_CHECK(false, "Unexpected vendor content type code"); - break; - } - return { - .content_type_code = content_type_code, - .form_code = form_code, - .attr_value = attr_value, - }; -} - void Dwarf::LineNumberVM::init() { version_ = read(data_); - SAFE_CHECK(version_ >= 2 && version_ <= 5, "invalid version in line number VM: {}", version_); - if (version_ == 5) - { - auto address_size = read(data_); - SAFE_CHECK(address_size == sizeof(uintptr_t), "Unexpected Line Number Table address_size"); - auto segment_selector_size = read(data_); - SAFE_CHECK(segment_selector_size == 0, "Segments not supported"); - } + SAFE_CHECK(version_ >= 2 && version_ <= 4, "invalid version in line number VM"); uint64_t header_length = readOffset(data_, is64Bit_); SAFE_CHECK(header_length <= data_.size(), "invalid line number VM header length"); std::string_view header(data_.data(), header_length); data_ = std::string_view(header.end(), data_.end() - header.end()); minLength_ = read(header); - if (version_ >= 4) + if (version_ == 4) { // Version 2 and 3 records don't have this uint8_t max_ops_per_instruction = read(header); SAFE_CHECK(max_ops_per_instruction == 1, "VLIW not supported"); @@ -1786,75 +1292,26 @@ void Dwarf::LineNumberVM::init() standardOpcodeLengths_ = reinterpret_cast(header.data()); //-V506 header.remove_prefix(opcodeBase_ - 1); - if (version_ <= 4) + // We don't want to use heap, so we don't keep an unbounded amount of state. + // We'll just skip over include directories and file names here, and + // we'll loop again when we actually need to retrieve one. + std::string_view sp; + const char * tmp = header.data(); + includeDirectoryCount_ = 0; + while (!(sp = readNullTerminated(header)).empty()) { - // We don't want to use heap, so we don't keep an unbounded amount of state. - // We'll just skip over include directories and file names here, and - // we'll loop again when we actually need to retrieve one. - std::string_view sp; - const char * tmp = header.data(); - v4_.includeDirectoryCount = 0; - while (!(sp = readNullTerminated(header)).empty()) - { - ++v4_.includeDirectoryCount; - } - v4_.includeDirectories = {tmp, header.data()}; - - tmp = header.data(); - FileName fn; - v4_.fileNameCount = 0; - while (readFileName(header, fn)) - { - ++v4_.fileNameCount; - } - v4_.fileNames = {tmp, header.data()}; + ++includeDirectoryCount_; } - else if (version_ == 5) + includeDirectories_ = std::string_view(tmp, header.data() - tmp); + + tmp = header.data(); + FileName fn; + fileNameCount_ = 0; + while (readFileName(header, fn)) { - v5_.directoryEntryFormatCount = read(header); - const char * tmp = header.data(); - for (uint8_t i = 0; i < v5_.directoryEntryFormatCount; i++) - { - // A sequence of directory entry format descriptions. Each description - // consists of a pair of ULEB128 values: - readULEB(header); // A content type code - readULEB(header); // A form code using the attribute form codes - } - v5_.directoryEntryFormat = {tmp, header.data()}; - v5_.directoriesCount = readULEB(header); - tmp = header.data(); - for (uint64_t i = 0; i < v5_.directoriesCount; i++) - { - std::string_view format = v5_.directoryEntryFormat; - for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) - { - readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); - } - } - v5_.directories = {tmp, header.data()}; - - v5_.fileNameEntryFormatCount = read(header); - tmp = header.data(); - for (uint8_t i = 0; i < v5_.fileNameEntryFormatCount; i++) - { - // A sequence of file entry format descriptions. Each description - // consists of a pair of ULEB128 values: - readULEB(header); // A content type code - readULEB(header); // A form code using the attribute form codes - } - v5_.fileNameEntryFormat = {tmp, header.data()}; - v5_.fileNamesCount = readULEB(header); - tmp = header.data(); - for (uint64_t i = 0; i < v5_.fileNamesCount; i++) - { - std::string_view format = v5_.fileNameEntryFormat; - for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) - { - readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); - } - } - v5_.fileNames = {tmp, header.data()}; + ++fileNameCount_; } + fileNames_ = std::string_view(tmp, header.data() - tmp); } bool Dwarf::LineNumberVM::next(std::string_view & program) @@ -1870,110 +1327,54 @@ bool Dwarf::LineNumberVM::next(std::string_view & program) Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const { - if (version_ <= 4) + SAFE_CHECK(index != 0, "invalid file index 0"); + + FileName fn; + if (index <= fileNameCount_) { - SAFE_CHECK(index != 0, "invalid file index 0"); - FileName fn; - if (index <= v4_.fileNameCount) - { - std::string_view file_names = v4_.fileNames; - for (; index; --index) - { - if (!readFileName(file_names, fn)) - { - abort(); - } - } - return fn; - } - - index -= v4_.fileNameCount; - - std::string_view program = data_; + std::string_view file_names = fileNames_; for (; index; --index) { - SAFE_CHECK(nextDefineFile(program, fn), "invalid file index"); - } - - return fn; - } - else - { - FileName fn; - SAFE_CHECK(index < v5_.fileNamesCount, "invalid file index"); - std::string_view file_names = v5_.fileNames; - for (uint64_t i = 0; i < v5_.fileNamesCount; i++) - { - std::string_view format = v5_.fileNameEntryFormat; - for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) + if (!readFileName(file_names, fn)) { - auto attr = readLineNumberAttribute(is64Bit_, format, file_names, debugStr_, debugLineStr_); - if (i == index) - { - switch (attr.content_type_code) - { - case DW_LNCT_path: - fn.relativeName = std::get(attr.attr_value); - break; - case DW_LNCT_directory_index: - fn.directoryIndex = std::get(attr.attr_value); - break; - } - } + abort(); } } return fn; } + + index -= fileNameCount_; + + std::string_view program = data_; + for (; index; --index) + { + SAFE_CHECK(nextDefineFile(program, fn), "invalid file index"); + } + + return fn; } std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const { - if (version_ <= 4) + if (index == 0) { - if (index == 0) - { - // In DWARF <= 4 the current directory is not represented in the - // directories field and a directory index of 0 implicitly referred to - // that directory as found in the DW_AT_comp_dir attribute of the - // compilation unit debugging information entry. - return {}; - } - - SAFE_CHECK(index <= v4_.includeDirectoryCount, "invalid include directory"); - - std::string_view include_directories = v4_.includeDirectories; - std::string_view dir; - for (; index; --index) - { - dir = readNullTerminated(include_directories); - if (dir.empty()) - { - abort(); // BUG - } - } - - return dir; + return std::string_view(); } - else + + SAFE_CHECK(index <= includeDirectoryCount_, "invalid include directory"); + + std::string_view include_directories = includeDirectories_; + std::string_view dir; + for (; index; --index) { - SAFE_CHECK(index < v5_.directoriesCount, "invalid file index"); - std::string_view directories = v5_.directories; - for (uint64_t i = 0; i < v5_.directoriesCount; i++) + dir = readNullTerminated(include_directories); + if (dir.empty()) { - std::string_view format = v5_.directoryEntryFormat; - for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) - { - auto attr = readLineNumberAttribute(is64Bit_, format, directories, debugStr_, debugLineStr_); - if (i == index && attr.content_type_code == DW_LNCT_path) - { - return std::get(attr.attr_value); - } - } + abort(); // BUG } - // This could only happen if DWARF5's directory_entry_format doesn't contain - // a DW_LNCT_path. Highly unlikely, but we shouldn't crash. - return std::string_view(""); } + + return dir; } bool Dwarf::LineNumberVM::readFileName(std::string_view & program, FileName & fn) @@ -2021,7 +1422,6 @@ bool Dwarf::LineNumberVM::nextDefineFile(std::string_view & program, FileName & if (opcode == DW_LNE_define_file) { - SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file"); return true; } @@ -2135,7 +1535,6 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro address_ = read(program); return CONTINUE; case DW_LNE_define_file: - SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); // We can't process DW_LNE_define_file here, as it would require us to // use unbounded amounts of state (ie. use the heap). We'll do a second // pass (using nextDefineFile()) if necessary. @@ -2150,16 +1549,6 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro return CONTINUE; } -Dwarf::Path Dwarf::LineNumberVM::getFullFileName(uint64_t index) const -{ - auto fn = getFileName(index); - // DWARF <= 4: the current dir is not represented in the CU's Line Number - // Program Header and relies on the CU's DW_AT_comp_dir. - // DWARF 5: the current directory is explicitly present. - const std::string_view base_dir = version_ == 5 ? "" : compilationDirectory_; - return Path(base_dir, getIncludeDirectory(fn.directoryIndex), fn.relativeName); -} - bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & line) { std::string_view program = data_; @@ -2199,18 +1588,12 @@ bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & // Found it! Note that ">" is indeed correct (not ">="), as each // sequence is guaranteed to have one entry past-the-end (emitted by // DW_LNE_end_sequence) - // - // NOTE: In DWARF <= 4 the file register is non-zero. - // See DWARF 4: 6.2.4 The Line Number Program Header - // "The line number program assigns numbers to each of the file - // entries in order, beginning with 1, and uses those numbers instead - // of file names in the file register." - // DWARF 5 has a different include directory/file header and 0 is valid. - if (version_ <= 4 && prev_file == 0) + if (prev_file == 0) { return false; } - file = getFullFileName(prev_file); + auto fn = getFileName(prev_file); + file = Path(compilationDirectory_, getIncludeDirectory(fn.directoryIndex), fn.relativeName); line = prev_line; return true; } diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h index 09178c66d47..6e3d3e74e81 100644 --- a/src/Common/Dwarf.h +++ b/src/Common/Dwarf.h @@ -19,7 +19,6 @@ */ /** This file was edited for ClickHouse. - * Original is from folly library. */ #include @@ -114,8 +113,8 @@ public: // seems as the same path can be represented in multiple ways private: std::string_view baseDir_; /// NOLINT - std::string_view subDir_; /// NOLINT - std::string_view file_; /// NOLINT + std::string_view subDir_; /// NOLINT + std::string_view file_; /// NOLINT }; // Indicates inline function `name` is called at `line@file`. @@ -172,6 +171,8 @@ public: private: static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset); + void init(); + std::shared_ptr elf_; /// NOLINT // DWARF section made up of chunks, each prefixed with a length header. @@ -227,7 +228,6 @@ private: { uint64_t name = 0; uint64_t form = 0; - int64_t implicitConst = 0; // only set when form=DW_FORM_implicit_const explicit operator bool() const { return name != 0 || form != 0; } }; @@ -239,43 +239,25 @@ private: std::variant attr_value; }; - enum - { - DW_UT_compile = 0x01, - DW_UT_skeleton = 0x04, - }; - struct CompilationUnit { - bool is64Bit = false; /// NOLINT - uint8_t version = 0; - uint8_t unit_type = DW_UT_compile; // DW_UT_compile or DW_UT_skeleton - uint8_t addr_size = 0; + bool is64Bit; /// NOLINT + uint8_t version; + uint8_t addr_size; // Offset in .debug_info of this compilation unit. - uint32_t offset = 0; - uint32_t size = 0; + uint32_t offset; + uint32_t size; // Offset in .debug_info for the first DIE in this compilation unit. - uint32_t first_die = 0; - uint64_t abbrev_offset = 0; - - // The beginning of the CU's contribution to .debug_addr - std::optional addr_base; // DW_AT_addr_base (DWARF 5) - // The beginning of the offsets table (immediately following the - // header) of the CU's contribution to .debug_loclists - std::optional loclists_base; // DW_AT_loclists_base (DWARF 5) - // The beginning of the offsets table (immediately following the - // header) of the CU's contribution to .debug_rnglists - std::optional rnglists_base; // DW_AT_rnglists_base (DWARF 5) - // Points to the first string offset of the compilation unit’s - // contribution to the .debug_str_offsets (or .debug_str_offsets.dwo) section. - std::optional str_offsets_base; // DW_AT_str_offsets_base (DWARF 5) - + uint32_t first_die; + uint64_t abbrev_offset; // Only the CompilationUnit that contains the caller functions needs this cache. // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size(); std::vector abbr_cache; }; - /** cu must exist during the life cycle of created Die. */ + static CompilationUnit getCompilationUnit(std::string_view info, uint64_t offset); + + /** cu must exist during the life cycle of created detail::Die. */ Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const; bool findLocation( @@ -296,16 +278,16 @@ private: class LineNumberVM { public: - LineNumberVM( - std::string_view data, - std::string_view compilationDirectory, - std::string_view debugStr, - std::string_view debugLineStr); + LineNumberVM(std::string_view data, std::string_view compilationDirectory); bool findAddress(uintptr_t target, Path & file, uint64_t & line); /** Gets full file name at given index including directory. */ - Path getFullFileName(uint64_t index) const; + Path getFullFileName(uint64_t index) const + { + auto fn = getFileName(index); + return Path({}, getIncludeDirectory(fn.directoryIndex), fn.relativeName); + } private: void init(); @@ -345,42 +327,24 @@ private: bool nextDefineFile(std::string_view & program, FileName & fn) const; // Initialization - bool is64Bit_; /// NOLINT - std::string_view data_; /// NOLINT - std::string_view compilationDirectory_; /// NOLINT - std::string_view debugStr_; // needed for DWARF 5 /// NOLINT - std::string_view debugLineStr_; // DWARF 5 /// NOLINT + bool is64Bit_; /// NOLINT + std::string_view data_; /// NOLINT + std::string_view compilationDirectory_; /// NOLINT // Header - uint16_t version_; /// NOLINT - uint8_t minLength_; /// NOLINT + uint16_t version_; /// NOLINT + uint8_t minLength_; /// NOLINT bool defaultIsStmt_; /// NOLINT - int8_t lineBase_; /// NOLINT - uint8_t lineRange_; /// NOLINT + int8_t lineBase_; /// NOLINT + uint8_t lineRange_; /// NOLINT uint8_t opcodeBase_; /// NOLINT const uint8_t * standardOpcodeLengths_; /// NOLINT - // 6.2.4 The Line Number Program Header. - struct - { - size_t includeDirectoryCount; - std::string_view includeDirectories; - size_t fileNameCount; - std::string_view fileNames; - } v4_; + std::string_view includeDirectories_; /// NOLINT + size_t includeDirectoryCount_; /// NOLINT - struct - { - uint8_t directoryEntryFormatCount; - std::string_view directoryEntryFormat; - uint64_t directoriesCount; - std::string_view directories; - - uint8_t fileNameEntryFormatCount; - std::string_view fileNameEntryFormat; - uint64_t fileNamesCount; - std::string_view fileNames; - } v5_; + std::string_view fileNames_; /// NOLINT + size_t fileNameCount_; /// NOLINT // State machine registers uint64_t address_; /// NOLINT @@ -433,26 +397,20 @@ private: */ size_t forEachAttribute(const CompilationUnit & cu, const Die & die, std::function f) const; - Attribute readAttribute( - const CompilationUnit & cu, - const Die & die, - AttributeSpec spec, - std::string_view & info) const; + Attribute readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const; // Read one attribute pair, remove_prefix sp; returns <0, 0> at end. static AttributeSpec readAttributeSpec(std::string_view & sp); // Read one attribute value, remove_prefix sp using AttributeValue = std::variant; - AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64_bit) const; + AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const; // Get an ELF section by name, return true if found - std::string_view getSection(const char * name) const; - - CompilationUnit getCompilationUnit(uint64_t offset) const; - // Finds the Compilation Unit starting at offset. - CompilationUnit findCompilationUnit(uint64_t targetOffset) const; + bool getSection(const char * name, std::string_view * section) const; + // Get a string from the .debug_str section + std::string_view getStringFromStringSection(uint64_t offset) const; template std::optional getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const @@ -471,24 +429,17 @@ private: } // Check if the given address is in the range list at the given offset in .debug_ranges. - bool isAddrInRangeList( - const CompilationUnit & cu, - uint64_t address, - std::optional base_addr, - size_t offset, - uint8_t addr_size) const; + bool isAddrInRangeList(uint64_t address, std::optional base_addr, size_t offset, uint8_t addr_size) const; - std::string_view abbrev_; // .debug_abbrev /// NOLINT - std::string_view addr_; // .debug_addr (DWARF 5) /// NOLINT - std::string_view aranges_; // .debug_aranges /// NOLINT - std::string_view info_; // .debug_info /// NOLINT - std::string_view line_; // .debug_line /// NOLINT - std::string_view line_str_; // .debug_line_str (DWARF 5) /// NOLINT - std::string_view loclists_; // .debug_loclists (DWARF 5) /// NOLINT - std::string_view ranges_; // .debug_ranges /// NOLINT - std::string_view rnglists_; // .debug_rnglists (DWARF 5) /// NOLINT - std::string_view str_; // .debug_str /// NOLINT - std::string_view str_offsets_; // .debug_str_offsets (DWARF 5) /// NOLINT + // Finds the Compilation Unit starting at offset. + static CompilationUnit findCompilationUnit(std::string_view info, uint64_t targetOffset); + + std::string_view info_; // .debug_info /// NOLINT + std::string_view abbrev_; // .debug_abbrev /// NOLINT + std::string_view aranges_; // .debug_aranges /// NOLINT + std::string_view line_; // .debug_line /// NOLINT + std::string_view strings_; // .debug_str /// NOLINT + std::string_view ranges_; // .debug_ranges /// NOLINT }; }