diff --git a/build-logic/conventions/src/main/kotlin/com/datadoghq/native/scanbuild/ScanBuildPlugin.kt b/build-logic/conventions/src/main/kotlin/com/datadoghq/native/scanbuild/ScanBuildPlugin.kt index aff275b52..30151b8e4 100644 --- a/build-logic/conventions/src/main/kotlin/com/datadoghq/native/scanbuild/ScanBuildPlugin.kt +++ b/build-logic/conventions/src/main/kotlin/com/datadoghq/native/scanbuild/ScanBuildPlugin.kt @@ -72,6 +72,11 @@ class ScanBuildPlugin : Plugin { "scan-build", "-o", outputDir.absolutePath, "--force-analyze-debug-code", + // core.StackAddressEscape fires on the intentional setjmp/longjmp pattern in + // StackWalker::walkVM: the jmp_buf address is stored in vm_thread->exception() + // for the duration of the stack walk and is always restored before the function + // returns. The analyzer cannot prove the lifetime is safe, but we can. + "-disable-checker", "core.StackAddressEscape", "--use-analyzer", analyzer, "make", "-j$parallelJobs" ) diff --git a/ddprof-lib/src/main/cpp/codeCache.cpp b/ddprof-lib/src/main/cpp/codeCache.cpp index 30e271466..d94d3f26d 100644 --- a/ddprof-lib/src/main/cpp/codeCache.cpp +++ b/ddprof-lib/src/main/cpp/codeCache.cpp @@ -99,7 +99,7 @@ void CodeCache::copyFrom(const CodeCache& other) { _imports_patchable = other._imports_patchable; _dwarf_table_length = other._dwarf_table_length; - _dwarf_table = new FrameDesc[_dwarf_table_length]; + _dwarf_table = (FrameDesc*)malloc(_dwarf_table_length * sizeof(FrameDesc)); memcpy(_dwarf_table, other._dwarf_table, _dwarf_table_length * sizeof(FrameDesc)); _default_frame = other._default_frame; @@ -120,7 +120,7 @@ CodeCache &CodeCache::operator=(const CodeCache &other) { } NativeFunc::destroy(_name); - delete[] _dwarf_table; + free(_dwarf_table); delete[] _blobs; free(_build_id); @@ -135,7 +135,7 @@ CodeCache::~CodeCache() { } NativeFunc::destroy(_name); delete[] _blobs; - delete[] _dwarf_table; + free(_dwarf_table); free(_build_id); // Free build-id memory } diff --git a/ddprof-lib/src/main/cpp/dwarf.cpp b/ddprof-lib/src/main/cpp/dwarf.cpp index b2c18ae67..14d8f4bad 100644 --- a/ddprof-lib/src/main/cpp/dwarf.cpp +++ b/ddprof-lib/src/main/cpp/dwarf.cpp @@ -1,5 +1,6 @@ /* * Copyright 2021 Andrei Pangin + * Copyright 2026, Datadog, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -92,8 +93,7 @@ FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | LINKED_FRAME_SIZE << 8, FrameDesc FrameDesc::default_clang_frame = {0, DW_REG_FP | LINKED_FRAME_CLANG_SIZE << 8, -LINKED_FRAME_CLANG_SIZE, -LINKED_FRAME_CLANG_SIZE + DW_STACK_SLOT}; FrameDesc FrameDesc::no_dwarf_frame = {0, DW_REG_INVALID, DW_REG_INVALID, DW_REG_INVALID}; -DwarfParser::DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr) { +void DwarfParser::init(const char *name, const char *image_base) { _name = name; _image_base = image_base; @@ -105,10 +105,21 @@ DwarfParser::DwarfParser(const char *name, const char *image_base, _code_align = sizeof(instruction_t); _data_align = -(int)sizeof(void *); _linked_frame_size = -1; + _has_z_augmentation = false; +} +DwarfParser::DwarfParser(const char *name, const char *image_base, + const char *eh_frame_hdr) { + init(name, image_base); parse(eh_frame_hdr); } +DwarfParser::DwarfParser(const char *name, const char *image_base, + const char *eh_frame, size_t eh_frame_size) { + init(name, image_base); + parseEhFrame(eh_frame, eh_frame_size); +} + static constexpr u8 omit_sign_bit(u8 value) { // each signed flag = unsigned equivalent | 0x80 return value & 0xf7; @@ -144,6 +155,93 @@ void DwarfParser::parse(const char *eh_frame_hdr) { } } +// Parse raw .eh_frame (or __eh_frame on macOS) without a binary-search index. +// Records are CIE/FDE sequences laid out linearly; terminated by a 4-byte zero or EOF. +void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { + if (eh_frame == NULL || size < 4) { + return; + } + const char *section_end = eh_frame + size; + _ptr = eh_frame; + + while (_ptr + 4 <= section_end) { + const char *record_start = _ptr; + u32 length = get32(); + if (length == 0) { + break; // terminator + } + if (length == 0xffffffff) { + break; // 64-bit DWARF not supported + } + + if (length > (size_t)(section_end - record_start) - 4) { + break; + } + const char *record_end = record_start + 4 + length; + + u32 cie_id = get32(); + + if (cie_id == 0) { + // CIE: update code and data alignment factors. + // Layout after cie_id: [1-byte version][augmentation string \0][code_align LEB][data_align SLEB] + // [return_address_register][augmentation data (if 'z')]... + // return_address_register and everything after data_align are not consumed; _ptr = record_end + // at the bottom of the loop skips them. + // + // _has_z_augmentation is overwritten by every CIE encountered. The DWARF spec allows + // multiple CIEs with different augmentation strings in a single .eh_frame section, so + // strictly speaking each FDE should resolve its own CIE via the backward cie_id offset. + // We intentionally skip that: macOS binaries compiled by clang typically emit a single CIE + // per module, and this parser is only called for macOS __eh_frame sections. Multi-CIE + // binaries are not produced by the toolchains we target here. + if (_ptr >= record_end) { + _ptr = record_end; + continue; + } + _ptr++; // skip version + if (_ptr >= record_end) { + _ptr = record_end; + continue; + } + _has_z_augmentation = (*_ptr == 'z'); + while (_ptr < record_end && *_ptr++) { + } // skip null-terminated augmentation string + if (_ptr >= record_end) { + _ptr = record_end; + continue; + } + _code_align = getLeb(record_end); + _data_align = getSLeb(record_end); + } else { + // FDE: parse frame description for the covered PC range. + // After cie_id: [pcrel-range-start 4 bytes][range-len 4 bytes][aug-data-len LEB][aug-data][instructions] + // Assumes DW_EH_PE_pcrel | DW_EH_PE_sdata4 encoding for range-start (clang macOS default). + // The augmentation data length field (and the data itself) is only present when the CIE + // augmentation string starts with 'z'. + if (_ptr + 8 > record_end) { + break; + } + u32 range_start = (u32)(getPtr() - _image_base); + u32 range_len = get32(); + if (_has_z_augmentation) { + _ptr += getLeb(record_end); // getLeb reads the length; advance past the augmentation data bytes + if (_ptr > record_end) { + break; + } + } + parseInstructions(range_start, record_end); + addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, + -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); + } + + _ptr = record_end; + } + + if (_count > 1) { + qsort(_table, _count, sizeof(FrameDesc), FrameDesc::comparator); + } +} + void DwarfParser::parseCie() { u32 cie_len = get32(); if (cie_len == 0 || cie_len == 0xffffffff) { diff --git a/ddprof-lib/src/main/cpp/dwarf.h b/ddprof-lib/src/main/cpp/dwarf.h index fd65b21f2..d31e72fdf 100644 --- a/ddprof-lib/src/main/cpp/dwarf.h +++ b/ddprof-lib/src/main/cpp/dwarf.h @@ -1,6 +1,6 @@ /* * Copyright The async-profiler authors - * Copyright 2025, Datadog, Inc. + * Copyright 2025, 2026, Datadog, Inc. * SPDX-License-Identifier: Apache-2.0 */ @@ -116,6 +116,7 @@ class DwarfParser { u32 _code_align; int _data_align; int _linked_frame_size; // detected from FP-based DWARF entries; -1 = undetected + bool _has_z_augmentation; const char* add(size_t size) { const char* ptr = _ptr; @@ -152,6 +153,18 @@ class DwarfParser { } } + u32 getLeb(const char* end) { + u32 result = 0; + for (u32 shift = 0; _ptr < end; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + return result; + } + } + return result; + } + int getSLeb() { int result = 0; for (u32 shift = 0; ; shift += 7) { @@ -166,6 +179,21 @@ class DwarfParser { } } + int getSLeb(const char* end) { + int result = 0; + for (u32 shift = 0; _ptr < end; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + if ((b & 0x40) != 0 && (shift += 7) < 32) { + result |= ~0U << shift; + } + return result; + } + } + return result; + } + void skipLeb() { while (*_ptr++ & 0x80) {} } @@ -178,7 +206,9 @@ class DwarfParser { return ptr + offset; } + void init(const char* name, const char* image_base); void parse(const char* eh_frame_hdr); + void parseEhFrame(const char* eh_frame, size_t size); void parseCie(); void parseFde(); void parseInstructions(u32 loc, const char* end); @@ -189,7 +219,11 @@ class DwarfParser { public: DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr); + DwarfParser(const char* name, const char* image_base, const char* eh_frame, size_t eh_frame_size); + // Ownership of the returned pointer transfers to the caller. + // The caller is responsible for freeing it with free() (not delete[]). + // DwarfParser has no destructor; _table is left dangling after this call is used. FrameDesc* table() const { return _table; } diff --git a/ddprof-lib/src/main/cpp/symbols_macos.cpp b/ddprof-lib/src/main/cpp/symbols_macos.cpp index f7f4c5c76..e09c0f149 100644 --- a/ddprof-lib/src/main/cpp/symbols_macos.cpp +++ b/ddprof-lib/src/main/cpp/symbols_macos.cpp @@ -1,5 +1,6 @@ /* * Copyright The async-profiler authors + * Copyright 2026, Datadog, Inc. * SPDX-License-Identifier: Apache-2.0 */ @@ -11,8 +12,8 @@ #include #include #include -#include "symbols.h" #include "dwarf.h" +#include "symbols.h" #include "log.h" UnloadProtection::UnloadProtection(const CodeCache *cc) { @@ -139,15 +140,19 @@ class MachOParser { const symtab_command* symtab = NULL; const dysymtab_command* dysymtab = NULL; const section_64* stubs_section = NULL; - bool has_eh_frame = false; - + const char* eh_frame = NULL; + size_t eh_frame_size = 0; for (uint32_t i = 0; i < header->ncmds; i++) { if (lc->cmd == LC_SEGMENT_64) { const segment_command_64* sc = (const segment_command_64*)lc; if (strcmp(sc->segname, "__TEXT") == 0) { _cc->updateBounds(_image_base, add(_image_base, sc->vmsize)); stubs_section = findSection(sc, "__stubs"); - has_eh_frame = findSection(sc, "__eh_frame") != NULL; + const section_64* eh_frame_section = findSection(sc, "__eh_frame"); + if (eh_frame_section != NULL) { + eh_frame = _vmaddr_slide + eh_frame_section->addr; + eh_frame_size = eh_frame_section->size; + } } else if (strcmp(sc->segname, "__LINKEDIT") == 0) { link_base = _vmaddr_slide + sc->vmaddr - sc->fileoff; } else if (strcmp(sc->segname, "__DATA") == 0 || strcmp(sc->segname, "__DATA_CONST") == 0) { @@ -171,11 +176,16 @@ class MachOParser { } } - // GCC emits __eh_frame (DWARF CFI); clang emits __unwind_info (compact unwind). - // On aarch64, GCC and clang use different frame layouts, so detecting the - // compiler matters. On x86_64 both use the same layout (no-op distinction). - const FrameDesc& frame = has_eh_frame ? FrameDesc::default_frame : FrameDesc::fallback_default_frame(); - _cc->setDwarfTable(NULL, 0, frame); + if (DWARF_SUPPORTED && eh_frame != NULL && eh_frame_size > 0) { + DwarfParser dwarf(_cc->name(), _vmaddr_slide, eh_frame, eh_frame_size); + _cc->setDwarfTable(dwarf.table(), dwarf.count(), dwarf.detectedDefaultFrame()); + } else { + // No __eh_frame (clang compact-unwind-only libraries): fall back to the + // library-wide default frame. On aarch64, clang uses a different frame + // layout from GCC, so we must pass fallback_default_frame() rather than + // letting CodeCache keep its constructor default of FrameDesc::default_frame. + _cc->setDwarfTable(NULL, 0, FrameDesc::fallback_default_frame()); + } return true; } @@ -239,4 +249,8 @@ bool Symbols::isLibcOrPthreadAddress(uintptr_t pc) { return false; } +void Symbols::clearParsingCaches() { + _parsed_libraries.clear(); +} + #endif // __APPLE__ diff --git a/ddprof-lib/src/test/cpp/dwarf_ut.cpp b/ddprof-lib/src/test/cpp/dwarf_ut.cpp new file mode 100644 index 000000000..f6877e148 --- /dev/null +++ b/ddprof-lib/src/test/cpp/dwarf_ut.cpp @@ -0,0 +1,193 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "dwarf.h" +#include "../../main/cpp/gtest_crash_handler.h" + +#include +#include +#include + +// Test name for crash handler +static constexpr char DWARF_TEST_NAME[] = "DwarfTest"; + +class DwarfGlobalSetup { + public: + DwarfGlobalSetup() { + installGtestCrashHandler(); + } + ~DwarfGlobalSetup() { + restoreDefaultSignalHandlers(); + } +}; +static DwarfGlobalSetup dwarf_global_setup; + +#if DWARF_SUPPORTED + +// Helpers to write little-endian integers into a byte buffer. +static void put32(std::vector& buf, uint32_t v) { + buf.push_back(static_cast(v)); + buf.push_back(static_cast(v >> 8)); + buf.push_back(static_cast(v >> 16)); + buf.push_back(static_cast(v >> 24)); +} + +static void put8(std::vector& buf, uint8_t v) { + buf.push_back(v); +} + +// Append a minimal CIE with "z" augmentation to buf. +// Layout: [4-len=11][4-cie_id=0][1-ver=1][2-aug="z\0"][1-code_align=4][1-data_align=-8][1-ra=30][1-aug_data_len=0] +// Total: 15 bytes. +static void appendCie(std::vector& buf) { + // body size = cie_id(4) + version(1) + "z\0"(2) + code_align(1) + data_align(1) + ra_col(1) + aug_data_len(1) = 11 + put32(buf, 11); // length + put32(buf, 0); // cie_id = 0 + put8(buf, 1); // version + put8(buf, 'z'); // augmentation "z" + put8(buf, 0); // null terminator + put8(buf, 4); // code_align = 4 (LEB128) + put8(buf, 0x78); // data_align = -8 (SLEB128: 0x78) + put8(buf, 30); // return address column = 30 (lr) + put8(buf, 0); // augmentation data length = 0 (LEB128) +} + +// Append an FDE referencing the CIE at cie_start_offset from the buf start. +// cie_offset in the FDE = offset from FDE's cie_id field to the CIE start. +// range_start is encoded as a 4-byte PC-relative signed integer (pcrel). +// With pcrel=0 and image_base=&buf[0]: range_start = offset_of_pcrel_field_within_buf. +// Layout: [4-len=13][4-cie_offset][4-pcrel=0][4-range_len][1-aug_data_len=0] +// Total: 17 bytes. +static void appendFde(std::vector& buf, uint32_t cie_start_offset, uint32_t range_len) { + // The FDE's cie_id field will be at buf.size() + 4 (after length field). + uint32_t cie_id_field_offset = static_cast(buf.size()) + 4; + uint32_t cie_offset = cie_id_field_offset - cie_start_offset; + + // body = cie_offset(4) + range_start(4) + range_len(4) + aug_data_len(1) = 13 + put32(buf, 13); // length + put32(buf, cie_offset); // cie_offset from this field back to CIE start + put32(buf, 0); // range_start pcrel = 0 (absolute value = field_address - image_base) + put32(buf, range_len); // range_len + put8(buf, 0); // aug data length = 0 (LEB128, for "z" augmentation) + // no DWARF call frame instructions +} + +static void appendTerminator(std::vector& buf) { + put32(buf, 0); +} + +// Parse a raw __eh_frame section using the linear DwarfParser constructor. +// image_base is set to buf.data() so that pcrel=0 yields range_start = field_offset_in_buf. +static DwarfParser* parseBuf(const std::vector& buf) { + const char* base = reinterpret_cast(buf.data()); + return new DwarfParser("test", base, base, buf.size()); +} + +TEST(DwarfEhFrame, EmptySection) { + std::vector buf; + DwarfParser* dwarf = parseBuf(buf); + EXPECT_EQ(dwarf->count(), 0); + free(dwarf->table()); + delete dwarf; +} + +TEST(DwarfEhFrame, TerminatorOnly) { + std::vector buf; + appendTerminator(buf); + DwarfParser* dwarf = parseBuf(buf); + EXPECT_EQ(dwarf->count(), 0); + free(dwarf->table()); + delete dwarf; +} + +TEST(DwarfEhFrame, CieOnly) { + std::vector buf; + appendCie(buf); + appendTerminator(buf); + DwarfParser* dwarf = parseBuf(buf); + // CIE alone generates no frame records. + EXPECT_EQ(dwarf->count(), 0); + free(dwarf->table()); + delete dwarf; +} + +TEST(DwarfEhFrame, CieAndFde) { + // CIE starts at offset 0. + std::vector buf; + appendCie(buf); // 15 bytes + appendFde(buf, 0, 256); // 17 bytes (cie_offset = 19) + appendTerminator(buf); // 4 bytes + ASSERT_EQ(buf.size(), static_cast(36)); + + DwarfParser* dwarf = parseBuf(buf); + // The FDE with no instructions generates two records: + // one from parseInstructions (initial state at range_start) and one sentinel (at range_start + range_len). + EXPECT_EQ(dwarf->count(), 2); + + // Table must be in ascending loc order (sorted). + const FrameDesc* table = dwarf->table(); + ASSERT_NE(table, nullptr); + EXPECT_LT(table[0].loc, table[1].loc); + + // Sentinel record covers the end of the FDE's range. + // range_start = offset of pcrel field in buf = 15+4+4 = 23; range_end = 23+256 = 279. + EXPECT_EQ(table[1].loc, static_cast(279)); + + free(dwarf->table()); + delete dwarf; +} + +// --- Bounds-guard tests --- + +TEST(DwarfEhFrame, TruncatedRecord) { + // Build a valid CIE then truncate the buffer so record_end > section_end. + // The length-overflow guard should fire and produce no records. + std::vector buf; + appendCie(buf); // 15 bytes: length=11, so record_end = 15 + buf.resize(10); // section_end = 10 < record_end → overflow guard triggers + DwarfParser* dwarf = parseBuf(buf); + EXPECT_EQ(dwarf->count(), 0); + free(dwarf->table()); + delete dwarf; +} + +TEST(DwarfEhFrame, ShortCieBody) { + // CIE with length=4: body is exactly cie_id (4 bytes), nothing else. + // After reading cie_id, _ptr == record_end; the version/augmentation guard triggers. + std::vector buf; + put32(buf, 4); // length = 4 + put32(buf, 0); // cie_id = 0 → CIE + appendTerminator(buf); + DwarfParser* dwarf = parseBuf(buf); + EXPECT_EQ(dwarf->count(), 0); + free(dwarf->table()); + delete dwarf; +} + +TEST(DwarfEhFrame, FdeAugDataOverrun) { + // CIE with 'z' augmentation followed by an FDE whose aug-data-length encodes + // a value (100) larger than remaining bytes in the record (0). + // The FDE should be skipped without a crash. + std::vector buf; + appendCie(buf); // 15 bytes; _has_z_augmentation = true + + // FDE body: cie_offset(4) + range_start(4) + range_len(4) + aug_data_len(1) = 13 + // aug_data_len = 100 but no aug data bytes follow → _ptr += 100 > record_end → break + uint32_t cie_id_field_offset = static_cast(buf.size()) + 4; + put32(buf, 13); // length + put32(buf, cie_id_field_offset - 0); // cie_offset back to CIE at offset 0 + put32(buf, 0); // range_start pcrel + put32(buf, 128); // range_len + put8(buf, 100); // aug_data_len = 100 but 0 bytes of aug data follow + appendTerminator(buf); + DwarfParser* dwarf = parseBuf(buf); + EXPECT_EQ(dwarf->count(), 0); + free(dwarf->table()); + delete dwarf; +} + +#endif // DWARF_SUPPORTED