diff options
author | Ernesto Lanchares <elancha98@proton.me> | 2025-03-13 22:51:56 +0000 |
---|---|---|
committer | Lorenzo Torres <torres@sideros.org> | 2025-03-14 16:50:59 +0100 |
commit | 2818fd14c5f9fcce57e410cd9a03dead5a6e2fe7 (patch) | |
tree | 6fd1cd91c4627f1f3dd93b227c01dc1bd4214b40 /src/vm/parse.zig | |
parent | 1c3f9702afa0638c386c6217fa57873823567887 (diff) |
Fixed wrong calculation of locals size when parsing a wasm binary
The fix involves moving the function leb128Decode over to parser. To me
it makes more sense for the function to belong in that module so I think
of it as a positive change. However I do not think that returning two
values is really necessary. I think a proper solution would be either
to parse the code or wrap the stream so we can count how many bytes are
readed. Therefore we could use std.leb.readUleb128 which should be less
error-prone.
Diffstat (limited to 'src/vm/parse.zig')
-rw-r--r-- | src/vm/parse.zig | 55 |
1 files changed, 49 insertions, 6 deletions
diff --git a/src/vm/parse.zig b/src/vm/parse.zig index 40eefc1..7080e66 100644 --- a/src/vm/parse.zig +++ b/src/vm/parse.zig @@ -2,6 +2,46 @@ const std = @import("std"); const wasm = @import("wasm.zig"); const Allocator = std.mem.Allocator; +pub fn leb128Result(T: type) type { + return struct { len: usize, val: T }; +} + +pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) { + switch (@typeInfo(T)) { + .int => {}, + else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)), + } + if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) { + @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits})); + } + + var result: T = 0; + // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day... + var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0; + var byte: u8 = undefined; + var len: usize = 0; + while (stream.readByte()) |b| { + len += 1; + result |= @as(T, @intCast((b & 0x7f))) << shift; + if ((b & (0x1 << 7)) == 0) { + byte = b; + break; + } + shift += 7; + } else |err| { + return err; + } + + if (@typeInfo(T).int.signedness == .signed) { + const size = @sizeOf(T) * 8; + if (shift < size and (byte & 0x40) != 0) { + result |= (~@as(T, 0) << shift); + } + } + + return .{ .len = len, .val = result }; +} + pub const Error = error{ malformed_wasm, invalid_utf8, @@ -242,20 +282,23 @@ pub fn parseWasm(allocator: Allocator, stream: anytype) !Module { code = try allocator.alloc(FunctionBody, code_count); for (0..code_count) |i| { const code_size = try std.leb.readULEB128(u32, stream); - const local_count = try std.leb.readULEB128(u32, stream); - const locals = try allocator.alloc(Local, local_count); + var locals_size: usize = 0; + const local_count = try leb128Decode(u32, stream); + locals_size += local_count.len; + const locals = try allocator.alloc(Local, local_count.val); for (locals) |*l| { - const n = try std.leb.readULEB128(u32, stream); - l.types = try allocator.alloc(u8, n); + const n = try leb128Decode(u32, stream); + l.types = try allocator.alloc(u8, n.val); @memset(l.types, try stream.readByte()); + locals_size += n.len + 1; } code[i].locals = locals; // TODO: maybe is better to parse code into ast here and not do it every frame? // FIXME: This calculation is plain wrong. Resolving above TODO should help - code[i].code = try allocator.alloc(u8, code_size - local_count - 1); + code[i].code = try allocator.alloc(u8, code_size - locals_size); // TODO: better error reporting - if (try stream.read(code[i].code) != code_size - local_count - 1) return Error.malformed_wasm; + if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm; const f = Function{ .internal = @intCast(i) }; try funcs.append(f); |