Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ class ScanBuildPlugin : Plugin<Project> {
"scan-build",
"-o", outputDir.absolutePath,
"--force-analyze-debug-code",
// core.StackAddressEscape fires on the intentional setjmp/longjmp pattern in
// StackWalker::walkVM: the jmp_buf address is stored in vm_thread->exception()
// for the duration of the stack walk and is always restored before the function
// returns. The analyzer cannot prove the lifetime is safe, but we can.
"-disable-checker", "core.StackAddressEscape",
"--use-analyzer", analyzer,
"make", "-j$parallelJobs"
)
Expand Down
6 changes: 3 additions & 3 deletions ddprof-lib/src/main/cpp/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void CodeCache::copyFrom(const CodeCache& other) {
_imports_patchable = other._imports_patchable;

_dwarf_table_length = other._dwarf_table_length;
_dwarf_table = new FrameDesc[_dwarf_table_length];
_dwarf_table = (FrameDesc*)malloc(_dwarf_table_length * sizeof(FrameDesc));
memcpy(_dwarf_table, other._dwarf_table,
_dwarf_table_length * sizeof(FrameDesc));
_default_frame = other._default_frame;
Expand All @@ -120,7 +120,7 @@ CodeCache &CodeCache::operator=(const CodeCache &other) {
}

NativeFunc::destroy(_name);
delete[] _dwarf_table;
free(_dwarf_table);
delete[] _blobs;
free(_build_id);

Expand All @@ -135,7 +135,7 @@ CodeCache::~CodeCache() {
}
NativeFunc::destroy(_name);
delete[] _blobs;
delete[] _dwarf_table;
free(_dwarf_table);
free(_build_id); // Free build-id memory
}

Expand Down
102 changes: 100 additions & 2 deletions ddprof-lib/src/main/cpp/dwarf.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright 2021 Andrei Pangin
* Copyright 2026, Datadog, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -92,8 +93,7 @@ FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | LINKED_FRAME_SIZE << 8,
FrameDesc FrameDesc::default_clang_frame = {0, DW_REG_FP | LINKED_FRAME_CLANG_SIZE << 8, -LINKED_FRAME_CLANG_SIZE, -LINKED_FRAME_CLANG_SIZE + DW_STACK_SLOT};
FrameDesc FrameDesc::no_dwarf_frame = {0, DW_REG_INVALID, DW_REG_INVALID, DW_REG_INVALID};

DwarfParser::DwarfParser(const char *name, const char *image_base,
const char *eh_frame_hdr) {
void DwarfParser::init(const char *name, const char *image_base) {
_name = name;
_image_base = image_base;

Expand All @@ -105,10 +105,21 @@ DwarfParser::DwarfParser(const char *name, const char *image_base,
_code_align = sizeof(instruction_t);
_data_align = -(int)sizeof(void *);
_linked_frame_size = -1;
_has_z_augmentation = false;
}

DwarfParser::DwarfParser(const char *name, const char *image_base,
const char *eh_frame_hdr) {
init(name, image_base);
parse(eh_frame_hdr);
}

DwarfParser::DwarfParser(const char *name, const char *image_base,
const char *eh_frame, size_t eh_frame_size) {
init(name, image_base);
parseEhFrame(eh_frame, eh_frame_size);
}

static constexpr u8 omit_sign_bit(u8 value) {
// each signed flag = unsigned equivalent | 0x80
return value & 0xf7;
Expand Down Expand Up @@ -144,6 +155,93 @@ void DwarfParser::parse(const char *eh_frame_hdr) {
}
}

// Parse raw .eh_frame (or __eh_frame on macOS) without a binary-search index.
// Records are CIE/FDE sequences laid out linearly; terminated by a 4-byte zero or EOF.
void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) {
if (eh_frame == NULL || size < 4) {
return;
}
const char *section_end = eh_frame + size;
_ptr = eh_frame;

while (_ptr + 4 <= section_end) {
const char *record_start = _ptr;
u32 length = get32();
if (length == 0) {
break; // terminator
}
if (length == 0xffffffff) {
break; // 64-bit DWARF not supported
}

if (length > (size_t)(section_end - record_start) - 4) {
break;
}
const char *record_end = record_start + 4 + length;

u32 cie_id = get32();

if (cie_id == 0) {
// CIE: update code and data alignment factors.
// Layout after cie_id: [1-byte version][augmentation string \0][code_align LEB][data_align SLEB]
// [return_address_register][augmentation data (if 'z')]...
// return_address_register and everything after data_align are not consumed; _ptr = record_end
// at the bottom of the loop skips them.
//
// _has_z_augmentation is overwritten by every CIE encountered. The DWARF spec allows
// multiple CIEs with different augmentation strings in a single .eh_frame section, so
// strictly speaking each FDE should resolve its own CIE via the backward cie_id offset.
// We intentionally skip that: macOS binaries compiled by clang typically emit a single CIE
// per module, and this parser is only called for macOS __eh_frame sections. Multi-CIE
// binaries are not produced by the toolchains we target here.
if (_ptr >= record_end) {
_ptr = record_end;
continue;
}
_ptr++; // skip version
Comment on lines +196 to +201
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CIE parsing advances and dereferences _ptr (_ptr++ / *_ptr == 'z') without first ensuring there’s at least 1 byte available in the record. With a short/empty CIE body this can read past record_end. Add explicit if (_ptr >= record_end) { _ptr = record_end; continue; } (or similar) before skipping the version / reading the augmentation.

Suggested change
// binaries are not produced by the toolchains we target here.
_ptr++; // skip version
// binaries are not produced by the toolchains we target here.
if (_ptr >= record_end) {
_ptr = record_end;
continue;
}
_ptr++; // skip version
if (_ptr >= record_end) {
_ptr = record_end;
continue;
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Added if (_ptr >= record_end) { _ptr = record_end; continue; } guards before the version skip and before the augmentation string read.

if (_ptr >= record_end) {
_ptr = record_end;
continue;
}
_has_z_augmentation = (*_ptr == 'z');
while (_ptr < record_end && *_ptr++) {
} // skip null-terminated augmentation string
if (_ptr >= record_end) {
_ptr = record_end;
continue;
}
_code_align = getLeb(record_end);
_data_align = getSLeb(record_end);
} else {
// FDE: parse frame description for the covered PC range.
// After cie_id: [pcrel-range-start 4 bytes][range-len 4 bytes][aug-data-len LEB][aug-data][instructions]
// Assumes DW_EH_PE_pcrel | DW_EH_PE_sdata4 encoding for range-start (clang macOS default).
// The augmentation data length field (and the data itself) is only present when the CIE
// augmentation string starts with 'z'.
if (_ptr + 8 > record_end) {
break;
}
u32 range_start = (u32)(getPtr() - _image_base);
u32 range_len = get32();
if (_has_z_augmentation) {
_ptr += getLeb(record_end); // getLeb reads the length; advance past the augmentation data bytes
if (_ptr > record_end) {
break;
}
Comment on lines +226 to +230
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the FDE path, _ptr += getLeb() assumes the augmentation-data-length LEB is fully readable. If the record ends mid-LEB, getLeb() can read past record_end (same issue as in CIE parsing). Use a bounded LEB decoder (or at least check _ptr < record_end per-byte) and fail the record if the LEB doesn’t complete within record_end.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. The FDE augmentation-data-length read now uses getLeb(record_end) (the bounded overload).

}
parseInstructions(range_start, record_end);
addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE,
-LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT);
}

_ptr = record_end;
}

if (_count > 1) {
qsort(_table, _count, sizeof(FrameDesc), FrameDesc::comparator);
}
}

void DwarfParser::parseCie() {
u32 cie_len = get32();
if (cie_len == 0 || cie_len == 0xffffffff) {
Expand Down
36 changes: 35 additions & 1 deletion ddprof-lib/src/main/cpp/dwarf.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright The async-profiler authors
* Copyright 2025, Datadog, Inc.
* Copyright 2025, 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -116,6 +116,7 @@ class DwarfParser {
u32 _code_align;
int _data_align;
int _linked_frame_size; // detected from FP-based DWARF entries; -1 = undetected
bool _has_z_augmentation;

const char* add(size_t size) {
const char* ptr = _ptr;
Expand Down Expand Up @@ -152,6 +153,18 @@ class DwarfParser {
}
}

u32 getLeb(const char* end) {
u32 result = 0;
for (u32 shift = 0; _ptr < end; shift += 7) {
u8 b = *_ptr++;
result |= (b & 0x7f) << shift;
if ((b & 0x80) == 0) {
return result;
}
}
return result;
}

int getSLeb() {
int result = 0;
for (u32 shift = 0; ; shift += 7) {
Expand All @@ -166,6 +179,21 @@ class DwarfParser {
}
}

int getSLeb(const char* end) {
int result = 0;
for (u32 shift = 0; _ptr < end; shift += 7) {
u8 b = *_ptr++;
result |= (b & 0x7f) << shift;
if ((b & 0x80) == 0) {
if ((b & 0x40) != 0 && (shift += 7) < 32) {
result |= ~0U << shift;
}
return result;
}
}
return result;
}

void skipLeb() {
while (*_ptr++ & 0x80) {}
}
Expand All @@ -178,7 +206,9 @@ class DwarfParser {
return ptr + offset;
}

void init(const char* name, const char* image_base);
void parse(const char* eh_frame_hdr);
void parseEhFrame(const char* eh_frame, size_t size);
void parseCie();
void parseFde();
void parseInstructions(u32 loc, const char* end);
Expand All @@ -189,7 +219,11 @@ class DwarfParser {

public:
DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr);
DwarfParser(const char* name, const char* image_base, const char* eh_frame, size_t eh_frame_size);

// Ownership of the returned pointer transfers to the caller.
// The caller is responsible for freeing it with free() (not delete[]).
// DwarfParser has no destructor; _table is left dangling after this call is used.
FrameDesc* table() const {
return _table;
}
Expand Down
32 changes: 23 additions & 9 deletions ddprof-lib/src/main/cpp/symbols_macos.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright The async-profiler authors
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -11,8 +12,8 @@
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include "symbols.h"
#include "dwarf.h"
#include "symbols.h"
#include "log.h"

UnloadProtection::UnloadProtection(const CodeCache *cc) {
Expand Down Expand Up @@ -139,15 +140,19 @@ class MachOParser {
const symtab_command* symtab = NULL;
const dysymtab_command* dysymtab = NULL;
const section_64* stubs_section = NULL;
bool has_eh_frame = false;

const char* eh_frame = NULL;
size_t eh_frame_size = 0;
for (uint32_t i = 0; i < header->ncmds; i++) {
if (lc->cmd == LC_SEGMENT_64) {
const segment_command_64* sc = (const segment_command_64*)lc;
if (strcmp(sc->segname, "__TEXT") == 0) {
_cc->updateBounds(_image_base, add(_image_base, sc->vmsize));
stubs_section = findSection(sc, "__stubs");
has_eh_frame = findSection(sc, "__eh_frame") != NULL;
const section_64* eh_frame_section = findSection(sc, "__eh_frame");
if (eh_frame_section != NULL) {
eh_frame = _vmaddr_slide + eh_frame_section->addr;
eh_frame_size = eh_frame_section->size;
}
} else if (strcmp(sc->segname, "__LINKEDIT") == 0) {
link_base = _vmaddr_slide + sc->vmaddr - sc->fileoff;
} else if (strcmp(sc->segname, "__DATA") == 0 || strcmp(sc->segname, "__DATA_CONST") == 0) {
Expand All @@ -171,11 +176,16 @@ class MachOParser {
}
}

// GCC emits __eh_frame (DWARF CFI); clang emits __unwind_info (compact unwind).
// On aarch64, GCC and clang use different frame layouts, so detecting the
// compiler matters. On x86_64 both use the same layout (no-op distinction).
const FrameDesc& frame = has_eh_frame ? FrameDesc::default_frame : FrameDesc::fallback_default_frame();
_cc->setDwarfTable(NULL, 0, frame);
if (DWARF_SUPPORTED && eh_frame != NULL && eh_frame_size > 0) {
DwarfParser dwarf(_cc->name(), _vmaddr_slide, eh_frame, eh_frame_size);
_cc->setDwarfTable(dwarf.table(), dwarf.count(), dwarf.detectedDefaultFrame());
} else {
// No __eh_frame (clang compact-unwind-only libraries): fall back to the
// library-wide default frame. On aarch64, clang uses a different frame
// layout from GCC, so we must pass fallback_default_frame() rather than
// letting CodeCache keep its constructor default of FrameDesc::default_frame.
_cc->setDwarfTable(NULL, 0, FrameDesc::fallback_default_frame());
}

return true;
}
Expand Down Expand Up @@ -239,4 +249,8 @@ bool Symbols::isLibcOrPthreadAddress(uintptr_t pc) {
return false;
}

void Symbols::clearParsingCaches() {
_parsed_libraries.clear();
}

#endif // __APPLE__
Loading
Loading