Skip to content

Commit 2e93e3e

Browse files
committed
Add and use readLE/writeLE helpers
According to godbolt.org these functions optimize to a simple *(T *)ptr in many cases while ensuring that memory alignment requirements and endianess does not effect the behavior. The unroll pragma is needed for GCC to properly optimize the code. The approach of using bit shifts is also used in multiple other parts of binaryen (eg. WasmBinaryReader::getInt16) but usage of the helper function doesn't seem to be that easy there. Ref #2983
1 parent 216ea0b commit 2e93e3e

8 files changed

Lines changed: 77 additions & 55 deletions

File tree

src/literal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <array>
2121
#include <iostream>
2222

23+
#include "support/bits.h"
2324
#include "support/hash.h"
2425
#include "support/name.h"
2526
#include "support/small_vector.h"
@@ -823,7 +824,8 @@ template<> struct hash<wasm::Literal> {
823824
return digest;
824825
case wasm::Type::v128:
825826
uint64_t chunks[2];
826-
memcpy(&chunks, a.getv128Ptr(), 16);
827+
chunks[0] = wasm::Bits::readLE<uint64_t>(a.getv128Ptr());
828+
chunks[1] = wasm::Bits::readLE<uint64_t>(&a.getv128Ptr()[8]);
827829
wasm::rehash(digest, chunks[0]);
828830
wasm::rehash(digest, chunks[1]);
829831
return digest;

src/shell-interface.h

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "interpreter/exception.h"
2626
#include "ir/module-utils.h"
2727
#include "shared-constants.h"
28+
#include "support/bits.h"
2829
#include "support/name.h"
2930
#include "support/utilities.h"
3031
#include "wasm-interpreter.h"
@@ -33,20 +34,9 @@
3334
namespace wasm {
3435

3536
struct ShellExternalInterface : ModuleRunner::ExternalInterface {
36-
// The underlying memory can be accessed through unaligned pointers which
37-
// isn't well-behaved in C++. WebAssembly nonetheless expects it to behave
38-
// properly. Avoid emitting unaligned load/store by checking for alignment
39-
// explicitly, and performing memcpy if unaligned.
40-
//
41-
// The allocated memory tries to have the same alignment as the memory being
42-
// simulated.
4337
class Memory {
4438
// Use char because it doesn't run afoul of aliasing rules.
4539
std::vector<char> memory;
46-
template<typename T> static bool aligned(const char* address) {
47-
static_assert(!(sizeof(T) & (sizeof(T) - 1)), "must be a power of 2");
48-
return 0 == (reinterpret_cast<uintptr_t>(address) & (sizeof(T) - 1));
49-
}
5040

5141
public:
5242
Memory() = default;
@@ -65,20 +55,10 @@ struct ShellExternalInterface : ModuleRunner::ExternalInterface {
6555
}
6656
}
6757
template<typename T> void set(size_t address, T value) {
68-
if (aligned<T>(&memory[address])) {
69-
*reinterpret_cast<T*>(&memory[address]) = value;
70-
} else {
71-
std::memcpy(&memory[address], &value, sizeof(T));
72-
}
58+
Bits::writeLE<T>(value, &memory[address]);
7359
}
7460
template<typename T> T get(size_t address) {
75-
if (aligned<T>(&memory[address])) {
76-
return *reinterpret_cast<T*>(&memory[address]);
77-
} else {
78-
T loaded;
79-
std::memcpy(&loaded, &memory[address], sizeof(T));
80-
return loaded;
81-
}
61+
return Bits::readLE<T>(&memory[address]);
8262
}
8363
};
8464

src/support/bits.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#ifndef wasm_support_bits_h
1818
#define wasm_support_bits_h
1919

20+
#include <array>
2021
#include <climits>
2122
#include <cstdint>
23+
#include <cstring>
2224
#include <type_traits>
2325

2426
/*
@@ -94,6 +96,56 @@ template<typename T, typename U> inline static T rotateRight(T val, U count) {
9496
uint32_t log2(uint32_t v);
9597
uint32_t pow2(uint32_t v);
9698

99+
template<
100+
typename T,
101+
typename std::enable_if<
102+
std::is_same<T, typename std::array<uint8_t, std::tuple_size<T>::value>>::
103+
value,
104+
bool>::type = true>
105+
void writeLE(T val, void* ptr) {
106+
memcpy(ptr, val.data(), sizeof(T));
107+
}
108+
109+
template<typename T,
110+
typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
111+
void writeLE(T val, void* ptr) {
112+
auto v = typename std::conditional<std::is_signed<T>::value,
113+
typename std::make_unsigned<T>::type,
114+
T>::type(val);
115+
unsigned char* buf = reinterpret_cast<unsigned char*>(ptr);
116+
#pragma GCC unroll 10
117+
for (size_t i = 0; i < sizeof(T); ++i) {
118+
buf[i] = v >> (CHAR_BIT * i);
119+
}
120+
}
121+
122+
template<
123+
typename T,
124+
typename std::enable_if<
125+
std::is_same<T, typename std::array<uint8_t, std::tuple_size<T>::value>>::
126+
value,
127+
bool>::type = true>
128+
T readLE(const void* ptr) {
129+
T v;
130+
memcpy(v.data(), ptr, sizeof(T));
131+
return v;
132+
}
133+
134+
template<typename T,
135+
typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
136+
T readLE(const void* ptr) {
137+
using TU = typename std::conditional<std::is_signed<T>::value,
138+
typename std::make_unsigned<T>::type,
139+
T>::type;
140+
TU v = 0;
141+
const unsigned char* buf = reinterpret_cast<const unsigned char*>(ptr);
142+
#pragma GCC unroll 10
143+
for (size_t i = 0; i < sizeof(T); ++i) {
144+
v += (TU)buf[i] << (CHAR_BIT * i);
145+
}
146+
return v;
147+
}
148+
97149
} // namespace wasm::Bits
98150

99151
#endif // wasm_support_bits_h

src/tools/wasm-ctor-eval.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "ir/memory-utils.h"
3333
#include "ir/names.h"
3434
#include "pass.h"
35+
#include "support/bits.h"
3536
#include "support/colors.h"
3637
#include "support/file.h"
3738
#include "support/insert_ordered.h"
@@ -497,15 +498,11 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface {
497498
}
498499

499500
template<typename T> void doStore(Address address, T value, Name memoryName) {
500-
// Use memcpy to avoid UB if unaligned.
501-
memcpy(getMemory(address, memoryName, sizeof(T)), &value, sizeof(T));
501+
Bits::writeLE<T>(value, getMemory(address, memoryName, sizeof(T)));
502502
}
503503

504504
template<typename T> T doLoad(Address address, Name memoryName) {
505-
// Use memcpy to avoid UB if unaligned.
506-
T ret;
507-
memcpy(&ret, getMemory(address, memoryName, sizeof(T)), sizeof(T));
508-
return ret;
505+
return Bits::readLE<T>(getMemory(address, memoryName, sizeof(T)));
509506
}
510507

511508
// Clear the state of the operation of applying the interpreter's runtime

src/tools/wasm-fuzz-lattices.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "analysis/reaching-definitions-transfer-function.h"
3737
#include "analysis/transfer-function.h"
3838

39+
#include "support/bits.h"
3940
#include "support/command-line.h"
4041
#include "tools/fuzzing.h"
4142
#include "tools/fuzzing/random.h"
@@ -995,7 +996,7 @@ struct Fuzzer {
995996
// Fewer bytes are needed to generate three random lattices.
996997
std::vector<char> funcBytes(128);
997998
for (size_t i = 0; i < funcBytes.size(); i += sizeof(uint64_t)) {
998-
*(uint64_t*)(funcBytes.data() + i) = getFuncRand();
999+
Bits::writeLE<uint64_t>(getFuncRand(), funcBytes.data() + i);
9991000
}
10001001

10011002
Random rand(std::move(funcBytes));
@@ -1030,7 +1031,7 @@ struct Fuzzer {
10301031
// 4kb of random bytes should be enough for anyone!
10311032
std::vector<char> bytes(4096);
10321033
for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) {
1033-
*(uint64_t*)(bytes.data() + i) = getRand();
1034+
Bits::writeLE<uint64_t>(getRand(), bytes.data() + i);
10341035
}
10351036

10361037
Module testModule;

src/tools/wasm-fuzz-types.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <string>
2121
#include <variant>
2222

23+
#include "support/bits.h"
2324
#include "support/command-line.h"
2425
#include "tools/fuzzing/heap-types.h"
2526
#include "tools/fuzzing/random.h"
@@ -68,7 +69,7 @@ void Fuzzer::run(uint64_t seed) {
6869
// 4kb of random bytes should be enough for anyone!
6970
std::vector<char> bytes(4096);
7071
for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) {
71-
*(uint64_t*)(bytes.data() + i) = getRand();
72+
Bits::writeLE<uint64_t>(getRand(), bytes.data() + i);
7273
}
7374
rand = Random(std::move(bytes));
7475

src/wasm-interpreter.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2777,14 +2777,12 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
27772777
case Field::NotPacked:
27782778
return Literal::makeFromMemory(p, field.type);
27792779
case Field::i8: {
2780-
int8_t i;
2781-
memcpy(&i, p, sizeof(i));
2782-
return truncateForPacking(Literal(int32_t(i)), field);
2780+
return truncateForPacking(Literal(int32_t(Bits::readLE<int8_t>(p))),
2781+
field);
27832782
}
27842783
case Field::i16: {
2785-
int16_t i;
2786-
memcpy(&i, p, sizeof(i));
2787-
return truncateForPacking(Literal(int32_t(i)), field);
2784+
return truncateForPacking(Literal(int32_t(Bits::readLE<int16_t>(p))),
2785+
field);
27882786
}
27892787
case Field::WaitQueue: {
27902788
WASM_UNREACHABLE("waitqueue not implemented");

src/wasm/literal.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,7 @@ static void extractBytes(uint8_t (&dest)[16], const LaneArray<Lanes>& lanes) {
240240
for (size_t lane_index = 0; lane_index < Lanes; ++lane_index) {
241241
uint8_t bits[16];
242242
lanes[lane_index].getBits(bits);
243-
LaneT lane;
244-
memcpy(&lane, bits, sizeof(lane));
243+
LaneT lane = Bits::readLE<LaneT>(bits);
245244
for (size_t offset = 0; offset < lane_width; ++offset) {
246245
bytes.at(lane_index * lane_width + offset) =
247246
uint8_t(lane >> (8 * offset));
@@ -316,24 +315,16 @@ Literal Literal::makeFromMemory(void* p, Type type) {
316315
assert(type.isNumber());
317316
switch (type.getBasic()) {
318317
case Type::i32: {
319-
int32_t i;
320-
memcpy(&i, p, sizeof(i));
321-
return Literal(i);
318+
return Literal(Bits::readLE<int32_t>(p));
322319
}
323320
case Type::i64: {
324-
int64_t i;
325-
memcpy(&i, p, sizeof(i));
326-
return Literal(i);
321+
return Literal(Bits::readLE<int64_t>(p));
327322
}
328323
case Type::f32: {
329-
int32_t i;
330-
memcpy(&i, p, sizeof(i));
331-
return Literal(bit_cast<float>(i));
324+
return Literal(bit_cast<float>(Bits::readLE<int32_t>(p)));
332325
}
333326
case Type::f64: {
334-
int64_t i;
335-
memcpy(&i, p, sizeof(i));
336-
return Literal(bit_cast<double>(i));
327+
return Literal(bit_cast<double>(Bits::readLE<int64_t>(p)));
337328
}
338329
case Type::v128: {
339330
uint8_t bytes[16];
@@ -460,11 +451,11 @@ void Literal::getBits(uint8_t (&buf)[16]) const {
460451
switch (type.getBasic()) {
461452
case Type::i32:
462453
case Type::f32:
463-
memcpy(buf, &i32, sizeof(i32));
454+
Bits::writeLE<int32_t>(i32, buf);
464455
break;
465456
case Type::i64:
466457
case Type::f64:
467-
memcpy(buf, &i64, sizeof(i64));
458+
Bits::writeLE<int64_t>(i64, buf);
468459
break;
469460
case Type::v128:
470461
memcpy(buf, &v128, sizeof(v128));

0 commit comments

Comments
 (0)