mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-12 10:34:21 +00:00
Imported Dwarf parser from Facebook folly
This commit is contained in:
parent
957b59f0d0
commit
daa36650fb
1097
dbms/src/Common/Dwarf.cpp
Normal file
1097
dbms/src/Common/Dwarf.cpp
Normal file
File diff suppressed because it is too large
Load Diff
287
dbms/src/Common/Dwarf.h
Normal file
287
dbms/src/Common/Dwarf.h
Normal file
@ -0,0 +1,287 @@
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* Copyright 2012-present Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** This file was edited for ClickHouse.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <variant>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Elf;
|
||||
|
||||
/**
|
||||
* DWARF record parser.
|
||||
*
|
||||
* We only implement enough DWARF functionality to convert from PC address
|
||||
* to file and line number information.
|
||||
*
|
||||
* This means (although they're not part of the public API of this class), we
|
||||
* can parse Debug Information Entries (DIEs), abbreviations, attributes (of
|
||||
* all forms), and we can interpret bytecode for the line number VM.
|
||||
*
|
||||
* We can interpret DWARF records of version 2, 3, or 4, although we don't
|
||||
* actually support many of the version 4 features (such as VLIW, multiple
|
||||
* operations per instruction)
|
||||
*
|
||||
* Note that the DWARF record parser does not allocate heap memory at all.
|
||||
* This is on purpose: you can use the parser from
|
||||
* memory-constrained situations (such as an exception handler for
|
||||
* std::out_of_memory) If it weren't for this requirement, some things would
|
||||
* be much simpler: the Path class would be unnecessary and would be replaced
|
||||
* with a std::string; the list of file names in the line number VM would be
|
||||
* kept as a vector of strings instead of re-executing the program to look for
|
||||
* DW_LNE_define_file instructions, etc.
|
||||
*/
|
||||
class Dwarf
|
||||
{
|
||||
// Note that Dwarf uses (and returns) std::string_view a lot.
|
||||
// The std::string_view point within sections in the ELF file, and so will
|
||||
// be live for as long as the passed-in Elf is live.
|
||||
public:
|
||||
/** Create a DWARF parser around an ELF file. */
|
||||
explicit Dwarf(const Elf & elf);
|
||||
|
||||
/**
|
||||
* Represent a file path a s collection of three parts (base directory,
|
||||
* subdirectory, and file).
|
||||
*/
|
||||
class Path
|
||||
{
|
||||
public:
|
||||
Path() {}
|
||||
|
||||
Path(std::string_view baseDir, std::string_view subDir, std::string_view file);
|
||||
|
||||
std::string_view baseDir() const { return baseDir_; }
|
||||
std::string_view subDir() const { return subDir_; }
|
||||
std::string_view file() const { return file_; }
|
||||
|
||||
size_t size() const;
|
||||
|
||||
/**
|
||||
* Copy the Path to a buffer of size bufSize.
|
||||
*
|
||||
* toBuffer behaves like snprintf: It will always null-terminate the
|
||||
* buffer (so it will copy at most bufSize-1 bytes), and it will return
|
||||
* the number of bytes that would have been written if there had been
|
||||
* enough room, so, if toBuffer returns a value >= bufSize, the output
|
||||
* was truncated.
|
||||
*/
|
||||
size_t toBuffer(char * buf, size_t bufSize) const;
|
||||
|
||||
void toString(std::string & dest) const;
|
||||
std::string toString() const
|
||||
{
|
||||
std::string s;
|
||||
toString(s);
|
||||
return s;
|
||||
}
|
||||
|
||||
// TODO(tudorb): Implement operator==, operator!=; not as easy as it
|
||||
// seems as the same path can be represented in multiple ways
|
||||
private:
|
||||
std::string_view baseDir_;
|
||||
std::string_view subDir_;
|
||||
std::string_view file_;
|
||||
};
|
||||
|
||||
enum class LocationInfoMode
|
||||
{
|
||||
// Don't resolve location info.
|
||||
DISABLED,
|
||||
// Perform CU lookup using .debug_aranges (might be incomplete).
|
||||
FAST,
|
||||
// Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure.
|
||||
FULL,
|
||||
};
|
||||
|
||||
struct LocationInfo
|
||||
{
|
||||
bool hasMainFile = false;
|
||||
Path mainFile;
|
||||
|
||||
bool hasFileAndLine = false;
|
||||
Path file;
|
||||
uint64_t line = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Find the file and line number information corresponding to address.
|
||||
*/
|
||||
bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const;
|
||||
|
||||
private:
|
||||
static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset);
|
||||
|
||||
void init();
|
||||
bool findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & info) const;
|
||||
|
||||
const Elf * elf_;
|
||||
|
||||
// DWARF section made up of chunks, each prefixed with a length header.
|
||||
// The length indicates whether the chunk is DWARF-32 or DWARF-64, which
|
||||
// guides interpretation of "section offset" records.
|
||||
// (yes, DWARF-32 and DWARF-64 sections may coexist in the same file)
|
||||
class Section
|
||||
{
|
||||
public:
|
||||
Section() : is64Bit_(false) {}
|
||||
|
||||
explicit Section(std::string_view d);
|
||||
|
||||
// Return next chunk, if any; the 4- or 12-byte length was already
|
||||
// parsed and isn't part of the chunk.
|
||||
bool next(std::string_view & chunk);
|
||||
|
||||
// Is the current chunk 64 bit?
|
||||
bool is64Bit() const { return is64Bit_; }
|
||||
|
||||
private:
|
||||
// Yes, 32- and 64- bit sections may coexist. Yikes!
|
||||
bool is64Bit_;
|
||||
std::string_view data_;
|
||||
};
|
||||
|
||||
// Abbreviation for a Debugging Information Entry.
|
||||
struct DIEAbbreviation
|
||||
{
|
||||
uint64_t code;
|
||||
uint64_t tag;
|
||||
bool hasChildren;
|
||||
|
||||
struct Attribute
|
||||
{
|
||||
uint64_t name;
|
||||
uint64_t form;
|
||||
};
|
||||
|
||||
std::string_view attributes;
|
||||
};
|
||||
|
||||
// Interpreter for the line number bytecode VM
|
||||
class LineNumberVM
|
||||
{
|
||||
public:
|
||||
LineNumberVM(std::string_view data, std::string_view compilationDirectory);
|
||||
|
||||
bool findAddress(uintptr_t address, Path & file, uint64_t & line);
|
||||
|
||||
private:
|
||||
void init();
|
||||
void reset();
|
||||
|
||||
// Execute until we commit one new row to the line number matrix
|
||||
bool next(std::string_view & program);
|
||||
enum StepResult
|
||||
{
|
||||
CONTINUE, // Continue feeding opcodes
|
||||
COMMIT, // Commit new <address, file, line> tuple
|
||||
END, // End of sequence
|
||||
};
|
||||
// Execute one opcode
|
||||
StepResult step(std::string_view & program);
|
||||
|
||||
struct FileName
|
||||
{
|
||||
std::string_view relativeName;
|
||||
// 0 = current compilation directory
|
||||
// otherwise, 1-based index in the list of include directories
|
||||
uint64_t directoryIndex;
|
||||
};
|
||||
// Read one FileName object, remove_prefix sp
|
||||
static bool readFileName(std::string_view & sp, FileName & fn);
|
||||
|
||||
// Get file name at given index; may be in the initial table
|
||||
// (fileNames_) or defined using DW_LNE_define_file (and we reexecute
|
||||
// enough of the program to find it, if so)
|
||||
FileName getFileName(uint64_t index) const;
|
||||
|
||||
// Get include directory at given index
|
||||
std::string_view getIncludeDirectory(uint64_t index) const;
|
||||
|
||||
// Execute opcodes until finding a DW_LNE_define_file and return true;
|
||||
// return file at the end.
|
||||
bool nextDefineFile(std::string_view & program, FileName & fn) const;
|
||||
|
||||
// Initialization
|
||||
bool is64Bit_;
|
||||
std::string_view data_;
|
||||
std::string_view compilationDirectory_;
|
||||
|
||||
// Header
|
||||
uint16_t version_;
|
||||
uint8_t minLength_;
|
||||
bool defaultIsStmt_;
|
||||
int8_t lineBase_;
|
||||
uint8_t lineRange_;
|
||||
uint8_t opcodeBase_;
|
||||
const uint8_t * standardOpcodeLengths_;
|
||||
|
||||
std::string_view includeDirectories_;
|
||||
size_t includeDirectoryCount_;
|
||||
|
||||
std::string_view fileNames_;
|
||||
size_t fileNameCount_;
|
||||
|
||||
// State machine registers
|
||||
uint64_t address_;
|
||||
uint64_t file_;
|
||||
uint64_t line_;
|
||||
uint64_t column_;
|
||||
bool isStmt_;
|
||||
bool basicBlock_;
|
||||
bool endSequence_;
|
||||
bool prologueEnd_;
|
||||
bool epilogueBegin_;
|
||||
uint64_t isa_;
|
||||
uint64_t discriminator_;
|
||||
};
|
||||
|
||||
// Read an abbreviation from a std::string_view, return true if at end; remove_prefix sp
|
||||
static bool readAbbreviation(std::string_view & sp, DIEAbbreviation & abbr);
|
||||
|
||||
// Get abbreviation corresponding to a code, in the chunk starting at
|
||||
// offset in the .debug_abbrev section
|
||||
DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
|
||||
|
||||
// Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end.
|
||||
static DIEAbbreviation::Attribute readAttribute(std::string_view & sp);
|
||||
|
||||
// Read one attribute value, remove_prefix sp
|
||||
typedef std::variant<uint64_t, std::string_view> AttributeValue;
|
||||
AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const;
|
||||
|
||||
// Get an ELF section by name, return true if found
|
||||
bool getSection(const char * name, std::string_view * section) const;
|
||||
|
||||
// Get a string from the .debug_str section
|
||||
std::string_view getStringFromStringSection(uint64_t offset) const;
|
||||
|
||||
std::string_view info_; // .debug_info
|
||||
std::string_view abbrev_; // .debug_abbrev
|
||||
std::string_view aranges_; // .debug_aranges
|
||||
std::string_view line_; // .debug_line
|
||||
std::string_view strings_; // .debug_str
|
||||
};
|
||||
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
#include <Common/Elf.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -16,8 +16,6 @@ namespace ErrorCodes
|
||||
Elf::Elf(const std::string & path)
|
||||
: in(path, 0)
|
||||
{
|
||||
std::cerr << "Processing path " << path << "\n";
|
||||
|
||||
/// Check if it's an elf.
|
||||
size = in.buffer().size();
|
||||
if (size < sizeof(ElfEhdr))
|
||||
@ -98,6 +96,12 @@ std::optional<Elf::Section> Elf::findSection(std::function<bool(const Section &
|
||||
}
|
||||
|
||||
|
||||
std::optional<Elf::Section> Elf::findSectionByName(const char * name) const
|
||||
{
|
||||
return findSection([&](const Section & section, size_t) { return 0 == strcmp(name, section.name()); });
|
||||
}
|
||||
|
||||
|
||||
const char * Elf::Section::name() const
|
||||
{
|
||||
if (!elf.section_names)
|
||||
@ -115,7 +119,12 @@ const char * Elf::Section::begin() const
|
||||
|
||||
const char * Elf::Section::end() const
|
||||
{
|
||||
return elf.mapped + header.sh_offset + header.sh_size;
|
||||
return begin() + size();
|
||||
}
|
||||
|
||||
size_t Elf::Section::size() const
|
||||
{
|
||||
return header.sh_size;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -21,7 +21,9 @@ using ElfSym = ElfW(Sym);
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Elf
|
||||
/** Allow to navigate sections in ELF.
|
||||
*/
|
||||
class Elf final
|
||||
{
|
||||
public:
|
||||
struct Section
|
||||
@ -31,6 +33,7 @@ public:
|
||||
|
||||
const char * begin() const;
|
||||
const char * end() const;
|
||||
size_t size() const;
|
||||
|
||||
Section(const ElfShdr & header, const Elf & elf);
|
||||
|
||||
@ -40,8 +43,9 @@ public:
|
||||
|
||||
Elf(const std::string & path);
|
||||
|
||||
std::optional<Section> findSection(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
||||
bool iterateSections(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
||||
std::optional<Section> findSection(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
||||
std::optional<Section> findSectionByName(const char * name) const;
|
||||
|
||||
const char * end() const { return mapped + size; }
|
||||
|
||||
|
@ -439,6 +439,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_DELETE_TIMER = 462;
|
||||
extern const int CANNOT_FCNTL = 463;
|
||||
extern const int CANNOT_PARSE_ELF = 464;
|
||||
extern const int CANNOT_PARSE_DWARF = 465;
|
||||
|
||||
extern const int KEEPER_EXCEPTION = 999;
|
||||
extern const int POCO_EXCEPTION = 1000;
|
||||
|
Loading…
Reference in New Issue
Block a user