|
1 | 1 | /* |
2 | 2 | * Copyright 2021 Andrei Pangin |
| 3 | + * Copyright 2026, Datadog, Inc. |
3 | 4 | * |
4 | 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | 6 | * you may not use this file except in compliance with the License. |
@@ -105,10 +106,29 @@ DwarfParser::DwarfParser(const char *name, const char *image_base, |
105 | 106 | _code_align = sizeof(instruction_t); |
106 | 107 | _data_align = -(int)sizeof(void *); |
107 | 108 | _linked_frame_size = -1; |
| 109 | + _has_z_augmentation = false; |
108 | 110 |
|
109 | 111 | parse(eh_frame_hdr); |
110 | 112 | } |
111 | 113 |
|
| 114 | +DwarfParser::DwarfParser(const char *name, const char *image_base, |
| 115 | + const char *eh_frame, size_t eh_frame_size) { |
| 116 | + _name = name; |
| 117 | + _image_base = image_base; |
| 118 | + |
| 119 | + _capacity = 128; |
| 120 | + _count = 0; |
| 121 | + _table = (FrameDesc *)malloc(_capacity * sizeof(FrameDesc)); |
| 122 | + _prev = NULL; |
| 123 | + |
| 124 | + _code_align = sizeof(instruction_t); |
| 125 | + _data_align = -(int)sizeof(void *); |
| 126 | + _linked_frame_size = -1; |
| 127 | + _has_z_augmentation = false; |
| 128 | + |
| 129 | + parseEhFrame(eh_frame, eh_frame_size); |
| 130 | +} |
| 131 | + |
112 | 132 | static constexpr u8 omit_sign_bit(u8 value) { |
113 | 133 | // each signed flag = unsigned equivalent | 0x80 |
114 | 134 | return value & 0xf7; |
@@ -144,6 +164,81 @@ void DwarfParser::parse(const char *eh_frame_hdr) { |
144 | 164 | } |
145 | 165 | } |
146 | 166 |
|
| 167 | +// Parse raw .eh_frame (or __eh_frame on macOS) without a binary-search index. |
| 168 | +// Records are CIE/FDE sequences laid out linearly; terminated by a 4-byte zero. |
| 169 | +void DwarfParser::parseEhFrame(const char *eh_frame, size_t size) { |
| 170 | + if (eh_frame == NULL || size < 4) { |
| 171 | + return; |
| 172 | + } |
| 173 | + const char *section_end = eh_frame + size; |
| 174 | + _ptr = eh_frame; |
| 175 | + |
| 176 | + while (_ptr + 4 <= section_end) { |
| 177 | + const char *record_start = _ptr; |
| 178 | + u32 length = get32(); |
| 179 | + if (length == 0) { |
| 180 | + break; // terminator |
| 181 | + } |
| 182 | + if (length == 0xffffffff) { |
| 183 | + break; // 64-bit DWARF not supported |
| 184 | + } |
| 185 | + |
| 186 | + const char *record_end = record_start + 4 + length; |
| 187 | + if (record_end > section_end) { |
| 188 | + break; |
| 189 | + } |
| 190 | + |
| 191 | + u32 cie_id = get32(); |
| 192 | + |
| 193 | + if (cie_id == 0) { |
| 194 | + // CIE: update code and data alignment factors. |
| 195 | + // Layout after cie_id: [1-byte version][augmentation string][code_align LEB][data_align SLEB]... |
| 196 | + // |
| 197 | + // _has_z_augmentation is overwritten by every CIE encountered. The DWARF spec allows |
| 198 | + // multiple CIEs with different augmentation strings in a single .eh_frame section, so |
| 199 | + // strictly speaking each FDE should resolve its own CIE via the backward cie_id offset. |
| 200 | + // We intentionally skip that: macOS binaries compiled by clang always emit a single CIE |
| 201 | + // per module, and this parser is only called for macOS __eh_frame sections. Multi-CIE |
| 202 | + // binaries are not produced by the toolchains we target here. |
| 203 | + _ptr++; // skip version |
| 204 | + _has_z_augmentation = (*_ptr == 'z'); |
| 205 | + while (_ptr < record_end && *_ptr++) { |
| 206 | + } // skip null-terminated augmentation string |
| 207 | + if (_ptr + 2 > record_end) { |
| 208 | + _ptr = record_end; |
| 209 | + continue; |
| 210 | + } |
| 211 | + _code_align = getLeb(); |
| 212 | + _data_align = getSLeb(); |
| 213 | + } else { |
| 214 | + // FDE: parse frame description for the covered PC range. |
| 215 | + // After cie_id: [pcrel-range-start 4 bytes][range-len 4 bytes][aug-data-len LEB][aug-data][instructions] |
| 216 | + // The augmentation data length field (and the data itself) is only present when the CIE |
| 217 | + // augmentation string starts with 'z'. |
| 218 | + if (_ptr + 8 > record_end) { |
| 219 | + break; |
| 220 | + } |
| 221 | + u32 range_start = (u32)(getPtr() - _image_base); |
| 222 | + u32 range_len = get32(); |
| 223 | + if (_has_z_augmentation) { |
| 224 | + _ptr += getLeb(); // skip augmentation data length + data |
| 225 | + if (_ptr > record_end) { |
| 226 | + break; |
| 227 | + } |
| 228 | + } |
| 229 | + parseInstructions(range_start, record_end); |
| 230 | + addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, |
| 231 | + -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); |
| 232 | + } |
| 233 | + |
| 234 | + _ptr = record_end; |
| 235 | + } |
| 236 | + |
| 237 | + if (_count > 1) { |
| 238 | + qsort(_table, _count, sizeof(FrameDesc), FrameDesc::comparator); |
| 239 | + } |
| 240 | +} |
| 241 | + |
147 | 242 | void DwarfParser::parseCie() { |
148 | 243 | u32 cie_len = get32(); |
149 | 244 | if (cie_len == 0 || cie_len == 0xffffffff) { |
|
0 commit comments