Big rework of the parser!

It now follows a more functional style but it should be waaay easier to add functionality. Probably the parser is a bit slower than the previous one but the code is much cleaner and a good enough compiler should be able to inline the function calls and make it par with the previous one. As a TODO, runtime structs should not depends on the parser, but I think that is a topic for another commit.
author: Ernesto Lanchares <elancha98@proton.me> 2025-03-23 13:38:57 +0000
committer: Lorenzo Torres <torres@sideros.org> 2025-03-23 14:39:49 +0100
commit: b7854d7325dfe35ca41e56dcccfb8fb7b7d0aa22 (patch)
tree: 407925432c7c092ef763ae205c1936fa50bfb5e7 /src
parent: 00d695e5f08ddff7ba66f2dd1aea4cdaf14f45e7 (diff)
4 files changed, 605 insertions, 362 deletions
diff --git a/src/main.zig b/src/main.zig
index 597011d..97aa5bf 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -5,7 +5,7 @@ const window = @import("rendering/window.zig");
 const config = @import("config");
 const Renderer = @import("rendering/renderer_vulkan.zig");
 const math = @import("math.zig");
-const Parser = @import("mods/parse.zig");
+const Parser = @import("mods/Parser.zig");
 const vm = @import("mods/vm.zig");
 const wasm = @import("mods/wasm.zig");
 const components = @import("ecs/components.zig");
@@ -28,10 +28,19 @@ pub fn main() !void {
         //defer global_runtime.deinit();
         //try global_runtime.addFunction("debug", wasm.debug);
 
-        //const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
-        //const module = try Parser.parseWasm(allocator, file.reader());
-        //var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
-        //defer runtime.deinit(allocator);
+        // const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
+        // const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB
+        // var parser = Parser{
+        //     .bytes = all,
+        //     .byte_idx = 0,
+        //     .allocator = allocator,
+        // };
+        // const module = parser.parseModule() catch |err| {
+        //     std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] });
+        //     return err;
+        // };
+        // var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
+        // defer runtime.deinit(allocator);
 
         //var parameters = [_]usize{};
         //try runtime.callExternal(allocator, "preinit", &parameters);
diff --git a/src/mods/Parser.zig b/src/mods/Parser.zig
new file mode 100644
index 0000000..29f18d8
--- /dev/null
+++ b/src/mods/Parser.zig
@@ -0,0 +1,486 @@
+const std = @import("std");
+const vm = @import("vm.zig");
+const Allocator = std.mem.Allocator;
+
+bytes: []const u8,
+byte_idx: usize,
+allocator: Allocator,
+
+// TODO: We don't really need ArrayLists
+types: std.ArrayListUnmanaged(Functype) = .{},
+imports: std.ArrayListUnmanaged(Import) = .{},
+exports: std.StringHashMapUnmanaged(u32) = .{},
+functions: std.ArrayListUnmanaged(u32) = .{},
+memory: ?Memtype = null,
+code: std.ArrayListUnmanaged(Func) = .{},
+funcs: std.ArrayListUnmanaged(vm.Func) = .{},
+
+pub const FunctionType = struct {
+    parameters: []u8,
+    results: []u8,
+
+    pub fn deinit(self: FunctionType, allocator: Allocator) void {
+        allocator.free(self.parameters);
+        allocator.free(self.results);
+    }
+};
+
+pub const FunctionBody = struct {
+    locals: []Local,
+    code: []u8,
+};
+
+pub const FunctionScope = enum {
+    external,
+    internal,
+};
+
+const Parser = @This();
+
+pub const Error = error{
+    invalid_magic,
+    invalid_version,
+    invalid_section,
+    invalid_functype,
+    invalid_vectype,
+    invalid_numtype,
+    invalid_reftype,
+    invalid_valtype,
+    invalid_string,
+    invalid_limits,
+    invalid_globaltype,
+    invalid_importdesc,
+    invalid_exportdesc,
+    unterminated_wasm,
+};
+
+// TODO: This function should not exists
+fn warn(self: Parser, s: []const u8) void {
+    std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx });
+}
+
+// TODO: remove peek
+fn peek(self: Parser) ?u8 {
+    return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null;
+}
+
+fn read(self: *Parser, n: usize) ![]const u8 {
+    if (self.byte_idx + n > self.bytes.len) return Error.unterminated_wasm;
+    defer self.byte_idx += n;
+    return self.bytes[self.byte_idx .. self.byte_idx + n];
+}
+
+// ==========
+// = VALUES =
+// ==========
+
+pub fn readByte(self: *Parser) !u8 {
+    return (try self.read(1))[0];
+}
+
+fn readU32(self: *Parser) !u32 {
+    return std.leb.readUleb128(u32, self);
+}
+
+fn readName(self: *Parser) ![]const u8 {
+    // NOTE: This should be the only vector not parsed through parseVector
+    const size = try self.readU32();
+    const str = try self.allocator.alloc(u8, size);
+    @memcpy(str, try self.read(size));
+    if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_string;
+    return str;
+}
+
+// =========
+// = TYPES =
+// =========
+// NOTE: This should return a value
+
+fn VectorFnResult(parse_fn: anytype) type {
+    const type_info = @typeInfo(@TypeOf(parse_fn));
+    if (type_info != .@"fn") {
+        @compileError("cannot determine return type of " ++ @typeName(@TypeOf(parse_fn)));
+    }
+    const ret_type = type_info.@"fn".return_type.?;
+    const ret_type_info = @typeInfo(ret_type);
+    return switch (ret_type_info) {
+        .error_union => ret_type_info.error_union.payload,
+        else => ret_type,
+    };
+}
+fn parseVector(self: *Parser, parse_fn: anytype) ![]VectorFnResult(parse_fn) {
+    const n = try self.readU32();
+    const ret = try self.allocator.alloc(VectorFnResult(parse_fn), n);
+    for (ret) |*i| {
+        i.* = try parse_fn(self);
+    }
+    return ret;
+}
+
+fn parseNumtype(self: *Parser) !std.wasm.Valtype {
+    return switch (try self.readByte()) {
+        0x7F => .i32,
+        0x7E => .i32,
+        0x7D => .f32,
+        0x7C => .f64,
+        else => Error.invalid_numtype,
+    };
+}
+
+fn parseVectype(self: *Parser) !std.wasm.Valtype {
+    return switch (try self.readByte()) {
+        0x7B => .v128,
+        else => Error.invalid_vectype,
+    };
+}
+
+fn parseReftype(self: *Parser) !std.wasm.RefType {
+    return switch (try self.readByte()) {
+        0x70 => .funcref,
+        0x6F => .externref,
+        else => Error.invalid_reftype,
+    };
+}
+
+// NOTE: Parsing of Valtype can be improved but it makes it less close to spec so...
+// TODO: Do we really need Valtype?
+const Valtype = union(enum) {
+    val: std.wasm.Valtype,
+    ref: std.wasm.RefType,
+};
+fn parseValtype(self: *Parser) !Valtype {
+    const pb = self.peek() orelse return Error.unterminated_wasm;
+    return switch (pb) {
+        0x7F, 0x7E, 0x7D, 0x7C => .{ .val = try self.parseNumtype() },
+        0x7B => .{ .val = try self.parseVectype() },
+        0x70, 0x6F => .{ .ref = try self.parseReftype() },
+        else => Error.invalid_valtype,
+    };
+}
+
+fn parseResultType(self: *Parser) ![]Valtype {
+    return try self.parseVector(Parser.parseValtype);
+}
+
+pub const Functype = struct {
+    parameters: []Valtype,
+    rt2: []Valtype,
+
+    pub fn deinit(self: Functype, allocator: Allocator) void {
+        allocator.free(self.parameters);
+        allocator.free(self.rt2);
+    }
+};
+fn parseFunctype(self: *Parser) !Functype {
+    if (try self.readByte() != 0x60) return Error.invalid_functype;
+    return .{
+        .parameters = try self.parseResultType(),
+        .rt2 = try self.parseResultType(),
+    };
+}
+
+const Limits = struct {
+    min: u32,
+    max: ?u32,
+};
+
+fn parseLimits(self: *Parser) !Limits {
+    return switch (try self.readByte()) {
+        0x00 => .{
+            .min = try self.readU32(),
+            .max = null,
+        },
+        0x01 => .{
+            .min = try self.readU32(),
+            .max = try self.readU32(),
+        },
+        else => Error.invalid_limits,
+    };
+}
+
+const Memtype = struct {
+    lim: Limits,
+};
+fn parseMemtype(self: *Parser) !Memtype {
+    return .{ .lim = try self.parseLimits() };
+}
+
+const Tabletype = struct {
+    et: std.wasm.RefType,
+    lim: Limits,
+};
+fn parseTabletype(self: *Parser) !Tabletype {
+    return .{
+        .et = try self.parseReftype(),
+        .lim = try self.parseLimits(),
+    };
+}
+
+const Globaltype = struct {
+    t: Valtype,
+    m: enum {
+        @"const",
+        @"var",
+    },
+};
+fn parseGlobaltype(self: *Parser) !Globaltype {
+    return .{
+        .t = try self.parseValtype(),
+        .m = switch (try self.readByte()) {
+            0x00 => .@"const",
+            0x01 => .@"var",
+            else => return Error.invalid_globaltype,
+        },
+    };
+}
+
+// ===========
+// = MODULES =
+// ===========
+// NOTE: This should not return anything but modify IR
+
+pub fn parseModule(self: *Parser) !vm.Module {
+    if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic;
+    if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version;
+    // TODO: Ensure only one section of each type (except for custom section), some code depends on it
+    while (self.byte_idx < self.bytes.len) {
+        try switch (try self.readByte()) {
+            0 => self.parseCustomsec(),
+            1 => self.parseTypesec(),
+            2 => self.parseImportsec(),
+            3 => self.parseFuncsec(),
+            4 => self.parseTablesec(),
+            5 => self.parseMemsec(),
+            6 => self.parseGlobalsec(),
+            7 => self.parseExportsec(),
+            8 => self.parseStartsec(),
+            9 => self.parseElemsec(),
+            10 => self.parseCodesec(),
+            11 => self.parseDatasec(),
+            12 => self.parseDatacountsec(),
+            else => return Error.invalid_section,
+        };
+    }
+
+    return .{
+        .memory = .{
+            .min = self.memory.?.lim.min,
+            .max = self.memory.?.lim.max,
+        },
+        .exports = self.exports,
+        .funcs = try self.funcs.toOwnedSlice(self.allocator),
+        .types = try self.types.toOwnedSlice(self.allocator),
+        .functions = try self.functions.toOwnedSlice(self.allocator),
+        .imports = try self.imports.toOwnedSlice(self.allocator),
+        .code = try self.code.toOwnedSlice(self.allocator),
+    };
+}
+
+fn parseCustomsec(self: *Parser) !void {
+    self.warn("customsec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
+
+fn parseTypesec(self: *Parser) !void {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const ft = try self.parseVector(Parser.parseFunctype);
+    // TODO: Maybe the interface should be better?
+    try self.types.appendSlice(self.allocator, ft);
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+}
+
+pub const Import = struct {
+    name: []const u8,
+    module: []const u8,
+    importdesc: union { func: u32, table: Tabletype, mem: Memtype, global: Globaltype },
+    pub fn deinit(self: Import, allocator: Allocator) void {
+        allocator.free(self.name);
+        allocator.free(self.module);
+    }
+};
+fn parseImport(self: *Parser) !Import {
+    return .{
+        .name = try self.readName(),
+        .module = try self.readName(),
+        .importdesc = switch (try self.readByte()) {
+            0x00 => .{ .func = try self.readU32() },
+            0x01 => .{ .table = try self.parseTabletype() },
+            0x02 => .{ .mem = try self.parseMemtype() },
+            0x03 => .{ .global = try self.parseGlobaltype() },
+            else => return Error.invalid_importdesc,
+        },
+    };
+}
+
+fn parseImportsec(self: *Parser) !void {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const imports = try self.parseVector(Parser.parseImport);
+    try self.imports.appendSlice(self.allocator, imports);
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseFuncsec(self: *Parser) !void {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const types = try self.parseVector(Parser.readU32);
+    try self.functions.appendSlice(self.allocator, types);
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseTablesec(self: *Parser) !void {
+    self.warn("tablesec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
+
+fn parseMemsec(self: *Parser) !void {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const mems = try self.parseVector(Parser.parseMemtype);
+    if (mems.len == 0) {
+        // WTF?
+    } else if (mems.len == 1) {
+        self.memory = mems[0];
+    } else {
+        std.debug.print("[WARN]: Parsing more than one memory is not yet supported\n", .{});
+    }
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseGlobalsec(self: *Parser) !void {
+    self.warn("globalsec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
+
+pub const Export = struct {
+    name: []const u8,
+    exportdesc: union(enum) { func: u32, table: u32, mem: u32, global: u32 },
+    pub fn deinit(self: Import, allocator: Allocator) void {
+        allocator.free(self.name);
+    }
+};
+
+fn parseExport(self: *Parser) !Export {
+    return .{
+        .name = try self.readName(),
+        .exportdesc = switch (try self.readByte()) {
+            0x00 => .{ .func = try self.readU32() },
+            0x01 => .{ .table = try self.readU32() },
+            0x02 => .{ .mem = try self.readU32() },
+            0x03 => .{ .global = try self.readU32() },
+            else => return Error.invalid_exportdesc,
+        },
+    };
+}
+
+fn parseExportsec(self: *Parser) !void {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const exports = try self.parseVector(Parser.parseExport);
+    for (exports) |e| {
+        switch (e.exportdesc) {
+            .func => try self.exports.put(self.allocator, e.name, e.exportdesc.func),
+            else => std.debug.print("[WARN]: export ignored\n", .{}),
+        }
+    }
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseStartsec(self: *Parser) !void {
+    self.warn("startsec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
+
+fn parseElemsec(self: *Parser) !void {
+    self.warn("elemsec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
+
+pub const Func = struct {
+    locals: []Valtype,
+    code: []const u8,
+};
+const Local = struct {
+    n: u32,
+    t: Valtype,
+};
+fn parseLocal(self: *Parser) !Local {
+    return .{
+        .n = try self.readU32(),
+        .t = try self.parseValtype(),
+    };
+}
+
+fn parseCode(self: *Parser) !Func {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const locals = try self.parseVector(Parser.parseLocal);
+    var local_count: usize = 0;
+    for (locals) |l| {
+        local_count += l.n;
+    }
+
+    const func = Func{
+        .locals = try self.allocator.alloc(Valtype, local_count),
+        .code = try self.read(end_idx - self.byte_idx),
+    };
+
+    var li: usize = 0;
+    for (locals) |l| {
+        @memset(func.locals[li .. li + l.n], l.t);
+        li += l.n;
+    }
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+
+    return func;
+}
+
+fn parseCodesec(self: *Parser) !void {
+    const size = try self.readU32();
+    const end_idx = self.byte_idx + size;
+
+    const codes = try self.parseVector(Parser.parseCode);
+    for (codes, 0..) |_, i| {
+        try self.funcs.append(self.allocator, .{ .internal = @intCast(i) });
+    }
+    try self.code.appendSlice(self.allocator, codes);
+
+    // TODO: run this check not only on debug
+    std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseDatasec(self: *Parser) !void {
+    self.warn("datasec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
+
+fn parseDatacountsec(self: *Parser) !void {
+    self.warn("datacountsec");
+    const size = try self.readU32();
+    _ = try self.read(size);
+}
diff --git a/src/mods/parse.zig b/src/mods/parse.zig
deleted file mode 100644
index f125303..0000000
--- a/src/mods/parse.zig
+++ /dev/null
@@ -1,334 +0,0 @@
-const std = @import("std");
-const wasm = @import("wasm.zig");
-const Allocator = std.mem.Allocator;
-
-pub fn leb128Result(T: type) type {
-    return struct { len: usize, val: T };
-}
-
-pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) {
-    switch (@typeInfo(T)) {
-        .int => {},
-        else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
-    }
-    if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
-        @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
-    }
-
-    var result: T = 0;
-    // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
-    var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
-    var byte: u8 = undefined;
-    var len: usize = 0;
-    while (stream.readByte()) |b| {
-        len += 1;
-        result |= @as(T, @intCast((b & 0x7f))) << shift;
-        if ((b & (0x1 << 7)) == 0) {
-            byte = b;
-            break;
-        }
-        shift += 7;
-    } else |err| {
-        return err;
-    }
-
-    if (@typeInfo(T).int.signedness == .signed) {
-        const size = @sizeOf(T) * 8;
-        if (shift < size and (byte & 0x40) != 0) {
-            result |= (~@as(T, 0) << shift);
-        }
-    }
-
-    return .{ .len = len, .val = result };
-}
-
-pub const Error = error{
-    malformed_wasm,
-    invalid_utf8,
-};
-
-pub const Module = struct {
-    types: []FunctionType,
-    imports: std.ArrayList(Import),
-    exports: std.StringHashMap(u32),
-    functions: []u32,
-    memory: Memory,
-    code: []FunctionBody,
-    funcs: std.ArrayList(Function),
-
-    pub fn deinit(self: *Module, allocator: Allocator) void {
-        for (self.types) |t| {
-            t.deinit(allocator);
-        }
-        allocator.free(self.types);
-
-        for (self.imports.items) |i| {
-            i.deinit(allocator);
-        }
-        self.imports.deinit();
-
-        var iter = self.exports.iterator();
-        while (iter.next()) |entry| {
-            allocator.free(entry.key_ptr.*);
-        }
-        self.exports.deinit();
-
-        allocator.free(self.functions);
-
-        for (self.code) |f| {
-            for (f.locals) |l| {
-                allocator.free(l.types);
-            }
-            allocator.free(f.code);
-        }
-        allocator.free(self.code);
-
-        self.funcs.deinit();
-    }
-};
-
-pub const FunctionScope = enum {
-    external,
-    internal,
-};
-
-pub const Function = union(FunctionScope) {
-    external: u8,
-    internal: u8,
-};
-
-// TODO: refactor locals
-pub const Local = struct {
-    types: []u8,
-};
-
-pub const FunctionBody = struct {
-    locals: []Local,
-    code: []u8,
-};
-
-pub const Memory = struct {
-    initial: u32,
-    max: u32,
-};
-
-pub const FunctionType = struct {
-    parameters: []u8,
-    results: []u8,
-
-    pub fn deinit(self: FunctionType, allocator: Allocator) void {
-        allocator.free(self.parameters);
-        allocator.free(self.results);
-    }
-};
-
-pub const Import = struct {
-    name: []u8,
-    module: []u8,
-    signature: u32,
-
-    pub fn deinit(self: Import, allocator: Allocator) void {
-        allocator.free(self.name);
-        allocator.free(self.module);
-    }
-};
-
-pub fn parseType(t: u8) wasm.Type {
-    return @enumFromInt(t);
-}
-
-pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 {
-    const size = try std.leb.readULEB128(u32, stream);
-    const str = try allocator.alloc(u8, size);
-    if (try stream.read(str) != size) {
-        // TODO: better error
-        return Error.malformed_wasm;
-    }
-
-    if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8;
-
-    return str;
-}
-
-// TODO: parse Global Section
-// TODO: Consider Arena allocator
-pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
-    var types: []FunctionType = undefined;
-    var imports = std.ArrayList(Import).init(allocator);
-    var exports = std.StringHashMap(u32).init(allocator);
-    var funcs = std.ArrayList(Function).init(allocator);
-    var functions: []u32 = undefined;
-    var memory: Memory = undefined;
-    var code: []FunctionBody = undefined;
-
-    // Parse magic
-    if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm;
-    // Parse version
-    if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm;
-
-    // NOTE: This ensures that (in this block) illegal behavior is safety-checked.
-    //     This slows down the code but since this function is only called at the start
-    //     I believe it is better to take the ``hit'' in performance (should only be @enumFromInt)
-    //     rather than  having undefined behavior when user provides an invalid wasm file.
-    @setRuntimeSafety(true);
-    loop: while (stream.readByte()) |byte| {
-        const section_size = try std.leb.readULEB128(u32, stream);
-        switch (@as(std.wasm.Section, @enumFromInt(byte))) {
-            std.wasm.Section.custom => {
-                // TODO: unimplemented
-                break :loop;
-            },
-            std.wasm.Section.type => {
-                const type_count = try std.leb.readULEB128(u32, stream);
-                types = try allocator.alloc(FunctionType, type_count);
-                for (types) |*t| {
-                    if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm;
-                    const params_count = try std.leb.readULEB128(u32, stream);
-                    t.parameters = try allocator.alloc(u8, params_count);
-                    if (try stream.read(t.parameters) != params_count) {
-                        // TODO: better errors
-                        return Error.malformed_wasm;
-                    }
-                    const results = try std.leb.readULEB128(u32, stream);
-                    t.results = try allocator.alloc(u8, results);
-                    if (try stream.read(t.results) != results) {
-                        // TODO: better errors
-                        return Error.malformed_wasm;
-                    }
-                }
-            },
-            std.wasm.Section.import => {
-                // Can there be more than one import section?
-                const import_count = try std.leb.readULEB128(u32, stream);
-                for (0..import_count) |i| {
-                    const mod = try parseName(allocator, stream);
-                    const nm = try parseName(allocator, stream);
-
-                    const b = try stream.readByte();
-                    switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
-                        std.wasm.ExternalKind.function => {
-                            try funcs.append(.{ .external = @intCast(i) });
-
-                            const idx = try std.leb.readULEB128(u32, stream);
-                            try imports.append(.{
-                                .module = mod,
-                                .name = nm,
-                                .signature = idx,
-                            });
-                        },
-                        // TODO: not implemented
-                        std.wasm.ExternalKind.table => try stream.skipBytes(3, .{}),
-                        std.wasm.ExternalKind.memory => try stream.skipBytes(2, .{}),
-                        std.wasm.ExternalKind.global => try stream.skipBytes(2, .{}),
-                    }
-                }
-            },
-            std.wasm.Section.function => {
-                const function_count = try std.leb.readULEB128(u32, stream);
-                functions = try allocator.alloc(u32, function_count);
-                for (functions) |*f| {
-                    f.* = try std.leb.readULEB128(u32, stream);
-                }
-            },
-            std.wasm.Section.table => {
-                // TODO: not implemented
-                try stream.skipBytes(section_size, .{});
-            },
-            std.wasm.Section.memory => {
-                const memory_count = try std.leb.readULEB128(u32, stream);
-                for (0..memory_count) |_| {
-                    const b = try stream.readByte();
-                    const n = try std.leb.readULEB128(u32, stream);
-                    var m: u32 = 0;
-                    switch (b) {
-                        0x00 => {},
-                        0x01 => m = try std.leb.readULEB128(u32, stream),
-                        else => return Error.malformed_wasm,
-                    }
-                    // TODO: support multiple memories
-                    memory = .{
-                        .initial = n,
-                        .max = m,
-                    };
-                }
-            },
-            std.wasm.Section.global => {
-                // TODO: unimplemented
-                try stream.skipBytes(section_size, .{});
-            },
-            // TODO: Can there be more than one export section? Otherwise we can optimize allocations
-            std.wasm.Section.@"export" => {
-                const export_count = try std.leb.readULEB128(u32, stream);
-                for (0..export_count) |_| {
-                    const nm = try parseName(allocator, stream);
-                    const b = try stream.readByte();
-                    const idx = try std.leb.readULEB128(u32, stream);
-                    switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
-                        std.wasm.ExternalKind.function => try exports.put(nm, idx),
-                        // TODO: unimplemented,
-                        std.wasm.ExternalKind.table => allocator.free(nm),
-                        std.wasm.ExternalKind.memory => allocator.free(nm),
-                        std.wasm.ExternalKind.global => allocator.free(nm),
-                    }
-                }
-            },
-            std.wasm.Section.start => {
-                // TODO: unimplemented
-                try stream.skipBytes(section_size, .{});
-            },
-            std.wasm.Section.element => {
-                // TODO: unimplemented
-                try stream.skipBytes(section_size, .{});
-            },
-            std.wasm.Section.code => {
-                const code_count = try std.leb.readULEB128(u32, stream);
-                code = try allocator.alloc(FunctionBody, code_count);
-                for (0..code_count) |i| {
-                    const code_size = try std.leb.readULEB128(u32, stream);
-                    var locals_size: usize = 0;
-                    const local_count = try leb128Decode(u32, stream);
-                    locals_size += local_count.len;
-                    const locals = try allocator.alloc(Local, local_count.val);
-                    for (locals) |*l| {
-                        const n = try leb128Decode(u32, stream);
-                        l.types = try allocator.alloc(u8, n.val);
-                        @memset(l.types, try stream.readByte());
-                        locals_size += n.len + 1;
-                    }
-                    code[i].locals = locals;
-
-                    // TODO: maybe is better to parse code into ast here and not do it every frame?
-                    // FIXME: This calculation is plain wrong. Resolving above TODO should help
-                    code[i].code = try allocator.alloc(u8, code_size - locals_size);
-                    // TODO: better error reporting
-                    if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm;
-
-                    const f = Function{ .internal = @intCast(i) };
-                    try funcs.append(f);
-                }
-            },
-            std.wasm.Section.data => {
-                // TODO: unimplemented
-                try stream.skipBytes(section_size, .{});
-            },
-            std.wasm.Section.data_count => {
-                // TODO: unimplemented
-                try stream.skipBytes(section_size, .{});
-            },
-            else => return Error.malformed_wasm,
-        }
-    } else |err| switch (err) {
-        error.EndOfStream => {},
-        else => return err,
-    }
-
-    return Module{
-        .types = types,
-        .imports = imports,
-        .functions = functions,
-        .memory = memory,
-        .exports = exports,
-        .code = code,
-        .funcs = funcs,
-    };
-}
diff --git a/src/mods/vm.zig b/src/mods/vm.zig
index cbeb865..b2a373d 100644
--- a/src/mods/vm.zig
+++ b/src/mods/vm.zig
@@ -1,13 +1,83 @@
 const std = @import("std");
 const wasm = @import("wasm.zig");
-const Parser = @import("parse.zig");
+const Parser = @import("Parser.zig");
 const Allocator = std.mem.Allocator;
 const AllocationError = error{OutOfMemory};
 
-fn leb128Decode(comptime T: type, bytes: []u8) Parser.leb128Result(T) {
+pub const Memory = struct {
+    min: u32,
+    max: ?u32,
+};
+// TODO: Resolve function calls at parse time
+// TODO: Resolve function types at compile time
+pub const Func = union(enum) {
+    internal: u32,
+    external: u32,
+};
+
+pub const Module = struct {
+    memory: Memory,
+    funcs: []Func,
+    exports: std.StringHashMapUnmanaged(u32),
+    imports: []Parser.Import,
+    types: []Parser.Functype,
+    functions: []u32,
+    code: []Parser.Func,
+
+    fn deinit(self: *Module, allocator: Allocator) void {
+        self.exports.deinit(allocator);
+        allocator.free(self.funcs);
+        allocator.free(self.imports);
+        allocator.free(self.types);
+        allocator.free(self.functions);
+        allocator.free(self.code);
+    }
+};
+
+pub fn leb128Result(T: type) type {
+    return struct { len: usize, val: T };
+}
+
+pub fn leb128Decode_stream(comptime T: type, stream: anytype) !leb128Result(T) {
+    switch (@typeInfo(T)) {
+        .int => {},
+        else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
+    }
+    if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
+        @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
+    }
+
+    var result: T = 0;
+    // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
+    var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
+    var byte: u8 = undefined;
+    var len: usize = 0;
+    while (stream.readByte()) |b| {
+        len += 1;
+        result |= @as(T, @intCast((b & 0x7f))) << shift;
+        if ((b & (0x1 << 7)) == 0) {
+            byte = b;
+            break;
+        }
+        shift += 7;
+    } else |err| {
+        return err;
+    }
+
+    if (@typeInfo(T).int.signedness == .signed) {
+        const size = @sizeOf(T) * 8;
+        if (shift < size and (byte & 0x40) != 0) {
+            result |= (~@as(T, 0) << shift);
+        }
+    }
+
+    return .{ .len = len, .val = result };
+}
+
+fn leb128Decode(comptime T: type, bytes: []const u8) leb128Result(T) {
     var fbs = std.io.fixedBufferStream(bytes);
     // TODO: this catch should be unrecheable
-    return Parser.leb128Decode(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
+    return leb128Decode_stream(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
 }
 
 pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T {
@@ -30,7 +100,7 @@ pub fn encodeLittleEndian(comptime T: type, bytes: *[]u8, value: T) void {
 
 pub const CallFrame = struct {
     program_counter: usize,
-    code: []u8,
+    code: []const u8,
     locals: []Value,
 };
 
@@ -45,15 +115,20 @@ pub const Value = union(ValueType) {
 };
 
 pub const Runtime = struct {
-    module: Parser.Module,
+    module: Module,
     stack: std.ArrayList(Value),
     call_stack: std.ArrayList(CallFrame),
     memory: []u8,
     global_runtime: *wasm.GlobalRuntime,
     labels: std.ArrayList(usize),
 
-    pub fn init(allocator: Allocator, module: Parser.Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
-        const memory = try allocator.alloc(u8, module.memory.max);
+    pub fn init(allocator: Allocator, module: Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
+        // if memory max is not set the memory is allowed to grow but it is not supported at the moment
+        const max = module.memory.max orelse 1_000;
+        if (module.memory.max == null) {
+            std.debug.print("[WARN]: growing memory is not yet supported, usign a default value of 1Kb\n", .{});
+        }
+        const memory = try allocator.alloc(u8, max);
         return Runtime{
             .module = module,
             .stack = try std.ArrayList(Value).initCapacity(allocator, 10),
@@ -492,6 +567,7 @@ pub const Runtime = struct {
         }
     }
 
+    // TODO: Do name resolution
     pub fn callExternal(self: *Runtime, allocator: Allocator, name: []const u8, parameters: []usize) !void {
         if (self.module.exports.get(name)) |function| {
             try self.call(allocator, function, parameters);
@@ -501,7 +577,7 @@ pub const Runtime = struct {
     }
 
     pub fn call(self: *Runtime, allocator: Allocator, function: usize, parameters: []usize) AllocationError!void {
-        const f = self.module.funcs.items[function];
+        const f = self.module.funcs[function];
         switch (f) {
             .internal => {
                 const function_type = self.module.types[self.module.functions[f.internal]];
@@ -512,26 +588,32 @@ pub const Runtime = struct {
                 };
 
                 for (parameters, 0..) |p, i| {
-                    switch (Parser.parseType(function_type.parameters[i])) {
-                        .i32 => {
-                            frame.locals[i] = .{ .i32 = @intCast(p) };
+                    switch (function_type.parameters[i]) {
+                        .val => |v| switch (v) {
+                            .i32 => {
+                                frame.locals[i] = .{ .i32 = @intCast(p) };
+                            },
+                            .i64 => {
+                                frame.locals[i] = .{ .i64 = @intCast(p) };
+                            },
+                            else => unreachable,
                         },
-                        .i64 => {
-                            frame.locals[i] = .{ .i64 = @intCast(p) };
-                        },
-                        else => unreachable,
+                        .ref => unreachable,
                     }
                 }
 
                 for (self.module.code[f.internal].locals, function_type.parameters.len..) |local, i| {
-                    switch (Parser.parseType(local.types[0])) {
-                        .i32 => {
-                            frame.locals[i] = .{ .i32 = 0 };
-                        },
-                        .i64 => {
-                            frame.locals[i] = .{ .i64 = 0 };
+                    switch (local) {
+                        .val => |v| switch (v) {
+                            .i32 => {
+                                frame.locals[i] = .{ .i32 = 0 };
+                            },
+                            .i64 => {
+                                frame.locals[i] = .{ .i64 = 0 };
+                            },
+                            else => unreachable,
                         },
-                        else => unreachable,
+                        .ref => unreachable,
                     }
                 }
 
@@ -540,7 +622,7 @@ pub const Runtime = struct {
                 allocator.free(frame.locals);
             },
             .external => {
-                const name = self.module.imports.items[f.external].name;
+                const name = self.module.imports[f.external].name;
                 if (self.global_runtime.functions.get(name)) |external| {
                     external(&self.stack);
                 }
author	Ernesto Lanchares <elancha98@proton.me>	2025-03-23 13:38:57 +0000
committer	Lorenzo Torres <torres@sideros.org>	2025-03-23 14:39:49 +0100
commit	b7854d7325dfe35ca41e56dcccfb8fb7b7d0aa22 (patch)
tree	407925432c7c092ef763ae205c1936fa50bfb5e7 /src
parent	00d695e5f08ddff7ba66f2dd1aea4cdaf14f45e7 (diff)