Fixed wrong calculation of locals size when parsing a wasm binary

The fix involves moving the function leb128Decode over to parser. To me it makes more sense for the function to belong in that module so I think of it as a positive change. However I do not think that returning two values is really necessary. I think a proper solution would be either to parse the code or wrap the stream so we can count how many bytes are readed. Therefore we could use std.leb.readUleb128 which should be less error-prone.
author: Ernesto Lanchares <elancha98@proton.me> 2025-03-13 22:51:56 +0000
committer: Lorenzo Torres <torres@sideros.org> 2025-03-14 16:50:59 +0100
commit: 2818fd14c5f9fcce57e410cd9a03dead5a6e2fe7 (patch)
tree: 6fd1cd91c4627f1f3dd93b227c01dc1bd4214b40 /src/vm/parse.zig
parent: 1c3f9702afa0638c386c6217fa57873823567887 (diff)
1 files changed, 49 insertions, 6 deletions
diff --git a/src/vm/parse.zig b/src/vm/parse.zig
index 40eefc1..7080e66 100644
--- a/src/vm/parse.zig
+++ b/src/vm/parse.zig
@@ -2,6 +2,46 @@ const std = @import("std");
 const wasm = @import("wasm.zig");
 const Allocator = std.mem.Allocator;
 
+pub fn leb128Result(T: type) type {
+    return struct { len: usize, val: T };
+}
+
+pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) {
+    switch (@typeInfo(T)) {
+        .int => {},
+        else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
+    }
+    if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
+        @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
+    }
+
+    var result: T = 0;
+    // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
+    var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
+    var byte: u8 = undefined;
+    var len: usize = 0;
+    while (stream.readByte()) |b| {
+        len += 1;
+        result |= @as(T, @intCast((b & 0x7f))) << shift;
+        if ((b & (0x1 << 7)) == 0) {
+            byte = b;
+            break;
+        }
+        shift += 7;
+    } else |err| {
+        return err;
+    }
+
+    if (@typeInfo(T).int.signedness == .signed) {
+        const size = @sizeOf(T) * 8;
+        if (shift < size and (byte & 0x40) != 0) {
+            result |= (~@as(T, 0) << shift);
+        }
+    }
+
+    return .{ .len = len, .val = result };
+}
+
 pub const Error = error{
     malformed_wasm,
     invalid_utf8,
@@ -242,20 +282,23 @@ pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
                 code = try allocator.alloc(FunctionBody, code_count);
                 for (0..code_count) |i| {
                     const code_size = try std.leb.readULEB128(u32, stream);
-                    const local_count = try std.leb.readULEB128(u32, stream);
-                    const locals = try allocator.alloc(Local, local_count);
+                    var locals_size: usize = 0;
+                    const local_count = try leb128Decode(u32, stream);
+                    locals_size += local_count.len;
+                    const locals = try allocator.alloc(Local, local_count.val);
                     for (locals) |*l| {
-                        const n = try std.leb.readULEB128(u32, stream);
-                        l.types = try allocator.alloc(u8, n);
+                        const n = try leb128Decode(u32, stream);
+                        l.types = try allocator.alloc(u8, n.val);
                         @memset(l.types, try stream.readByte());
+                        locals_size += n.len + 1;
                     }
                     code[i].locals = locals;
 
                     // TODO: maybe is better to parse code into ast here and not do it every frame?
                     // FIXME: This calculation is plain wrong. Resolving above TODO should help
-                    code[i].code = try allocator.alloc(u8, code_size - local_count - 1);
+                    code[i].code = try allocator.alloc(u8, code_size - locals_size);
                     // TODO: better error reporting
-                    if (try stream.read(code[i].code) != code_size - local_count - 1) return Error.malformed_wasm;
+                    if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm;
 
                     const f = Function{ .internal = @intCast(i) };
                     try funcs.append(f);
author	Ernesto Lanchares <elancha98@proton.me>	2025-03-13 22:51:56 +0000
committer	Lorenzo Torres <torres@sideros.org>	2025-03-14 16:50:59 +0100
commit	2818fd14c5f9fcce57e410cd9a03dead5a6e2fe7 (patch)
tree	6fd1cd91c4627f1f3dd93b227c01dc1bd4214b40 /src/vm/parse.zig
parent	1c3f9702afa0638c386c6217fa57873823567887 (diff)