From caaf83e5194f2f8982455d6d328e5391f8158643 Mon Sep 17 00:00:00 2001 From: Sertonix Date: Sat, 14 Mar 2026 20:48:06 +0100 Subject: [PATCH] Add and use readLE/writeLE helpers According to godbolt.org these functions optimize to a simple *(T *)ptr in many cases while ensuring that memory alignment requirements and endianess does not effect the behavior. The unroll pragma is needed for GCC to properly optimize the code. The approach of using bit shifts is also used in multiple other parts of binaryen (eg. WasmBinaryReader::getInt16) but usage of the helper function doesn't seem to be that easy there. Ref https://github.com/WebAssembly/binaryen/issues/2983 --- src/literal.h | 3 ++- src/shell-interface.h | 25 ++---------------------- src/support/utilities.h | 33 ++++++++++++++++++++++++++++++++ src/tools/wasm-ctor-eval.cpp | 9 +++------ src/tools/wasm-fuzz-lattices.cpp | 5 +++-- src/tools/wasm-fuzz-types.cpp | 3 ++- src/wasm-interpreter.h | 9 +++------ src/wasm/literal.cpp | 23 +++++++--------------- 8 files changed, 55 insertions(+), 55 deletions(-) diff --git a/src/literal.h b/src/literal.h index 9eb27177d75..52970ee11f7 100644 --- a/src/literal.h +++ b/src/literal.h @@ -823,7 +823,8 @@ template<> struct hash { return digest; case wasm::Type::v128: uint64_t chunks[2]; - memcpy(&chunks, a.getv128Ptr(), 16); + chunks[0] = wasm::readLE(a.getv128Ptr()); + chunks[1] = wasm::readLE(&a.getv128Ptr()[8]); wasm::rehash(digest, chunks[0]); wasm::rehash(digest, chunks[1]); return digest; diff --git a/src/shell-interface.h b/src/shell-interface.h index 9a16499f4e3..e25c3412748 100644 --- a/src/shell-interface.h +++ b/src/shell-interface.h @@ -33,20 +33,9 @@ namespace wasm { struct ShellExternalInterface : ModuleRunner::ExternalInterface { - // The underlying memory can be accessed through unaligned pointers which - // isn't well-behaved in C++. WebAssembly nonetheless expects it to behave - // properly. Avoid emitting unaligned load/store by checking for alignment - // explicitly, and performing memcpy if unaligned. - // - // The allocated memory tries to have the same alignment as the memory being - // simulated. class Memory { // Use char because it doesn't run afoul of aliasing rules. std::vector memory; - template static bool aligned(const char* address) { - static_assert(!(sizeof(T) & (sizeof(T) - 1)), "must be a power of 2"); - return 0 == (reinterpret_cast(address) & (sizeof(T) - 1)); - } public: Memory() = default; @@ -65,20 +54,10 @@ struct ShellExternalInterface : ModuleRunner::ExternalInterface { } } template void set(size_t address, T value) { - if (aligned(&memory[address])) { - *reinterpret_cast(&memory[address]) = value; - } else { - std::memcpy(&memory[address], &value, sizeof(T)); - } + writeLE(value, &memory[address]); } template T get(size_t address) { - if (aligned(&memory[address])) { - return *reinterpret_cast(&memory[address]); - } else { - T loaded; - std::memcpy(&loaded, &memory[address], sizeof(T)); - return loaded; - } + return readLE(&memory[address]); } }; diff --git a/src/support/utilities.h b/src/support/utilities.h index ca2e1b89ba0..864b694fa3b 100644 --- a/src/support/utilities.h +++ b/src/support/utilities.h @@ -20,6 +20,7 @@ #include "compiler-support.h" #include +#include #include #include #include @@ -103,6 +104,38 @@ class Fatal { #define WASM_UNREACHABLE(msg) wasm::handle_unreachable() #endif +template::value>>::value, bool>::type = true> +void writeLE(T val, void *ptr) { + memcpy(ptr, val.data(), sizeof(T)); +} + +template::value, bool>::type = true> +void writeLE(T val, void *ptr) { + auto v = typename std::conditional::value, typename std::make_unsigned::type, T>::type(val); + unsigned char *buf = reinterpret_cast(ptr); +#pragma GCC unroll 10 + for (size_t i = 0; i < sizeof(T); ++i) + buf[i] = v >> (CHAR_BIT * i); +} + +template::value>>::value, bool>::type = true> +T readLE(const void *ptr) { + T v; + memcpy(v.data(), ptr, sizeof(T)); + return v; +} + +template::value, bool>::type = true> +T readLE(const void *ptr) { + using TU = typename std::conditional::value, typename std::make_unsigned::type, T>::type; + TU v = 0; + const unsigned char *buf = reinterpret_cast(ptr); +#pragma GCC unroll 10 + for (size_t i = 0; i < sizeof(T); ++i) + v += (TU)buf[i] << (CHAR_BIT * i); + return v; +} + } // namespace wasm #endif // wasm_support_utilities_h diff --git a/src/tools/wasm-ctor-eval.cpp b/src/tools/wasm-ctor-eval.cpp index 9b2800a2d84..5968df7ace7 100644 --- a/src/tools/wasm-ctor-eval.cpp +++ b/src/tools/wasm-ctor-eval.cpp @@ -37,6 +37,7 @@ #include "support/insert_ordered.h" #include "support/string.h" #include "support/topological_sort.h" +#include "support/utilities.h" #include "tool-options.h" #include "wasm-builder.h" #include "wasm-interpreter.h" @@ -497,15 +498,11 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface { } template void doStore(Address address, T value, Name memoryName) { - // Use memcpy to avoid UB if unaligned. - memcpy(getMemory(address, memoryName, sizeof(T)), &value, sizeof(T)); + writeLE(value, getMemory(address, memoryName, sizeof(T))); } template T doLoad(Address address, Name memoryName) { - // Use memcpy to avoid UB if unaligned. - T ret; - memcpy(&ret, getMemory(address, memoryName, sizeof(T)), sizeof(T)); - return ret; + return readLE(getMemory(address, memoryName, sizeof(T))); } // Clear the state of the operation of applying the interpreter's runtime diff --git a/src/tools/wasm-fuzz-lattices.cpp b/src/tools/wasm-fuzz-lattices.cpp index a6231cd42ea..25d09c1392d 100644 --- a/src/tools/wasm-fuzz-lattices.cpp +++ b/src/tools/wasm-fuzz-lattices.cpp @@ -37,6 +37,7 @@ #include "analysis/transfer-function.h" #include "support/command-line.h" +#include "support/utilities.h" #include "tools/fuzzing.h" #include "tools/fuzzing/random.h" @@ -995,7 +996,7 @@ struct Fuzzer { // Fewer bytes are needed to generate three random lattices. std::vector funcBytes(128); for (size_t i = 0; i < funcBytes.size(); i += sizeof(uint64_t)) { - *(uint64_t*)(funcBytes.data() + i) = getFuncRand(); + writeLE(getFuncRand(), funcBytes.data() + i); } Random rand(std::move(funcBytes)); @@ -1030,7 +1031,7 @@ struct Fuzzer { // 4kb of random bytes should be enough for anyone! std::vector bytes(4096); for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) { - *(uint64_t*)(bytes.data() + i) = getRand(); + writeLE(getRand(), bytes.data() + i); } Module testModule; diff --git a/src/tools/wasm-fuzz-types.cpp b/src/tools/wasm-fuzz-types.cpp index dc04ae96733..bca4745fdae 100644 --- a/src/tools/wasm-fuzz-types.cpp +++ b/src/tools/wasm-fuzz-types.cpp @@ -21,6 +21,7 @@ #include #include "support/command-line.h" +#include "support/utilities.h" #include "tools/fuzzing/heap-types.h" #include "tools/fuzzing/random.h" #include "wasm-type-printing.h" @@ -68,7 +69,7 @@ void Fuzzer::run(uint64_t seed) { // 4kb of random bytes should be enough for anyone! std::vector bytes(4096); for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) { - *(uint64_t*)(bytes.data() + i) = getRand(); + writeLE(getRand(), bytes.data() + i); } rand = Random(std::move(bytes)); diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 369745d0b67..cc6feedb3c2 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -47,6 +47,7 @@ #include "support/safe_integer.h" #include "support/stdckdint.h" #include "support/string.h" +#include "support/utilities.h" #include "wasm-builder.h" #include "wasm-limits.h" #include "wasm-traversal.h" @@ -2777,14 +2778,10 @@ class ExpressionRunner : public OverriddenVisitor { case Field::NotPacked: return Literal::makeFromMemory(p, field.type); case Field::i8: { - int8_t i; - memcpy(&i, p, sizeof(i)); - return truncateForPacking(Literal(int32_t(i)), field); + return truncateForPacking(Literal(int32_t(readLE(p))), field); } case Field::i16: { - int16_t i; - memcpy(&i, p, sizeof(i)); - return truncateForPacking(Literal(int32_t(i)), field); + return truncateForPacking(Literal(int32_t(readLE(p))), field); } case Field::WaitQueue: { WASM_UNREACHABLE("waitqueue not implemented"); diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index 1bf14432c90..a57d486b786 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -240,8 +240,7 @@ static void extractBytes(uint8_t (&dest)[16], const LaneArray& lanes) { for (size_t lane_index = 0; lane_index < Lanes; ++lane_index) { uint8_t bits[16]; lanes[lane_index].getBits(bits); - LaneT lane; - memcpy(&lane, bits, sizeof(lane)); + LaneT lane = readLE(bits); for (size_t offset = 0; offset < lane_width; ++offset) { bytes.at(lane_index * lane_width + offset) = uint8_t(lane >> (8 * offset)); @@ -316,24 +315,16 @@ Literal Literal::makeFromMemory(void* p, Type type) { assert(type.isNumber()); switch (type.getBasic()) { case Type::i32: { - int32_t i; - memcpy(&i, p, sizeof(i)); - return Literal(i); + return Literal(readLE(p)); } case Type::i64: { - int64_t i; - memcpy(&i, p, sizeof(i)); - return Literal(i); + return Literal(readLE(p)); } case Type::f32: { - int32_t i; - memcpy(&i, p, sizeof(i)); - return Literal(bit_cast(i)); + return Literal(bit_cast(readLE(p))); } case Type::f64: { - int64_t i; - memcpy(&i, p, sizeof(i)); - return Literal(bit_cast(i)); + return Literal(bit_cast(readLE(p))); } case Type::v128: { uint8_t bytes[16]; @@ -460,11 +451,11 @@ void Literal::getBits(uint8_t (&buf)[16]) const { switch (type.getBasic()) { case Type::i32: case Type::f32: - memcpy(buf, &i32, sizeof(i32)); + writeLE(i32, buf); break; case Type::i64: case Type::f64: - memcpy(buf, &i64, sizeof(i64)); + writeLE(i64, buf); break; case Type::v128: memcpy(buf, &v128, sizeof(v128));