From 2818fd14c5f9fcce57e410cd9a03dead5a6e2fe7 Mon Sep 17 00:00:00 2001 From: Ernesto Lanchares Date: Thu, 13 Mar 2025 22:51:56 +0000 Subject: Fixed wrong calculation of locals size when parsing a wasm binary The fix involves moving the function leb128Decode over to parser. To me it makes more sense for the function to belong in that module so I think of it as a positive change. However I do not think that returning two values is really necessary. I think a proper solution would be either to parse the code or wrap the stream so we can count how many bytes are readed. Therefore we could use std.leb.readUleb128 which should be less error-prone. --- src/vm/parse.zig | 55 +++++++++++++++++++++++++++++++++++++++++++++++++------ src/vm/vm.zig | 35 ++++------------------------------- 2 files changed, 53 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/vm/parse.zig b/src/vm/parse.zig index 40eefc1..7080e66 100644 --- a/src/vm/parse.zig +++ b/src/vm/parse.zig @@ -2,6 +2,46 @@ const std = @import("std"); const wasm = @import("wasm.zig"); const Allocator = std.mem.Allocator; +pub fn leb128Result(T: type) type { + return struct { len: usize, val: T }; +} + +pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) { + switch (@typeInfo(T)) { + .int => {}, + else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)), + } + if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) { + @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits})); + } + + var result: T = 0; + // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day... + var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0; + var byte: u8 = undefined; + var len: usize = 0; + while (stream.readByte()) |b| { + len += 1; + result |= @as(T, @intCast((b & 0x7f))) << shift; + if ((b & (0x1 << 7)) == 0) { + byte = b; + break; + } + shift += 7; + } else |err| { + return err; + } + + if (@typeInfo(T).int.signedness == .signed) { + const size = @sizeOf(T) * 8; + if (shift < size and (byte & 0x40) != 0) { + result |= (~@as(T, 0) << shift); + } + } + + return .{ .len = len, .val = result }; +} + pub const Error = error{ malformed_wasm, invalid_utf8, @@ -242,20 +282,23 @@ pub fn parseWasm(allocator: Allocator, stream: anytype) !Module { code = try allocator.alloc(FunctionBody, code_count); for (0..code_count) |i| { const code_size = try std.leb.readULEB128(u32, stream); - const local_count = try std.leb.readULEB128(u32, stream); - const locals = try allocator.alloc(Local, local_count); + var locals_size: usize = 0; + const local_count = try leb128Decode(u32, stream); + locals_size += local_count.len; + const locals = try allocator.alloc(Local, local_count.val); for (locals) |*l| { - const n = try std.leb.readULEB128(u32, stream); - l.types = try allocator.alloc(u8, n); + const n = try leb128Decode(u32, stream); + l.types = try allocator.alloc(u8, n.val); @memset(l.types, try stream.readByte()); + locals_size += n.len + 1; } code[i].locals = locals; // TODO: maybe is better to parse code into ast here and not do it every frame? // FIXME: This calculation is plain wrong. Resolving above TODO should help - code[i].code = try allocator.alloc(u8, code_size - local_count - 1); + code[i].code = try allocator.alloc(u8, code_size - locals_size); // TODO: better error reporting - if (try stream.read(code[i].code) != code_size - local_count - 1) return Error.malformed_wasm; + if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm; const f = Function{ .internal = @intCast(i) }; try funcs.append(f); diff --git a/src/vm/vm.zig b/src/vm/vm.zig index dfa457c..f8c7db5 100644 --- a/src/vm/vm.zig +++ b/src/vm/vm.zig @@ -4,37 +4,10 @@ const Parser = @import("parse.zig"); const Allocator = std.mem.Allocator; const AllocationError = error{OutOfMemory}; -pub fn leb128Decode(comptime T: type, bytes: []u8) struct { len: usize, val: T } { - switch (@typeInfo(T)) { - .int => {}, - else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)), - } - if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) { - @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits})); - } - - var result: T = 0; - // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day... - var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0; - var byte: u8 = undefined; - var len: usize = 0; - for (bytes) |b| { - len += 1; - result |= @as(T, @intCast((b & 0x7f))) << shift; - if ((b & (0x1 << 7)) == 0) { - byte = b; - break; - } - shift += 7; - } - if (@typeInfo(T).int.signedness == .signed) { - const size = @sizeOf(T) * 8; - if (shift < size and (byte & 0x40) != 0) { - result |= (~@as(T, 0) << shift); - } - } - - return .{ .len = len, .val = result }; +fn leb128Decode(comptime T: type, bytes: []u8) Parser.leb128Result(T) { + var fbs = std.io.fixedBufferStream(bytes); + // TODO: this catch should be unrecheable + return Parser.leb128Decode(T, fbs.reader()) catch .{ .len = 0, .val = 0 }; } pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T { -- cgit v1.2.3