summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorErnesto Lanchares <elancha98@proton.me>2025-03-23 13:38:57 +0000
committerLorenzo Torres <torres@sideros.org>2025-03-23 14:39:49 +0100
commitb7854d7325dfe35ca41e56dcccfb8fb7b7d0aa22 (patch)
tree407925432c7c092ef763ae205c1936fa50bfb5e7 /src
parent00d695e5f08ddff7ba66f2dd1aea4cdaf14f45e7 (diff)
Big rework of the parser!
It now follows a more functional style but it should be waaay easier to add functionality. Probably the parser is a bit slower than the previous one but the code is much cleaner and a good enough compiler should be able to inline the function calls and make it par with the previous one. As a TODO, runtime structs should not depends on the parser, but I think that is a topic for another commit.
Diffstat (limited to 'src')
-rw-r--r--src/main.zig19
-rw-r--r--src/mods/Parser.zig486
-rw-r--r--src/mods/parse.zig334
-rw-r--r--src/mods/vm.zig128
4 files changed, 605 insertions, 362 deletions
diff --git a/src/main.zig b/src/main.zig
index 597011d..97aa5bf 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -5,7 +5,7 @@ const window = @import("rendering/window.zig");
const config = @import("config");
const Renderer = @import("rendering/renderer_vulkan.zig");
const math = @import("math.zig");
-const Parser = @import("mods/parse.zig");
+const Parser = @import("mods/Parser.zig");
const vm = @import("mods/vm.zig");
const wasm = @import("mods/wasm.zig");
const components = @import("ecs/components.zig");
@@ -28,10 +28,19 @@ pub fn main() !void {
//defer global_runtime.deinit();
//try global_runtime.addFunction("debug", wasm.debug);
- //const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
- //const module = try Parser.parseWasm(allocator, file.reader());
- //var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
- //defer runtime.deinit(allocator);
+ // const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
+ // const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB
+ // var parser = Parser{
+ // .bytes = all,
+ // .byte_idx = 0,
+ // .allocator = allocator,
+ // };
+ // const module = parser.parseModule() catch |err| {
+ // std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] });
+ // return err;
+ // };
+ // var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
+ // defer runtime.deinit(allocator);
//var parameters = [_]usize{};
//try runtime.callExternal(allocator, "preinit", &parameters);
diff --git a/src/mods/Parser.zig b/src/mods/Parser.zig
new file mode 100644
index 0000000..29f18d8
--- /dev/null
+++ b/src/mods/Parser.zig
@@ -0,0 +1,486 @@
+const std = @import("std");
+const vm = @import("vm.zig");
+const Allocator = std.mem.Allocator;
+
+bytes: []const u8,
+byte_idx: usize,
+allocator: Allocator,
+
+// TODO: We don't really need ArrayLists
+types: std.ArrayListUnmanaged(Functype) = .{},
+imports: std.ArrayListUnmanaged(Import) = .{},
+exports: std.StringHashMapUnmanaged(u32) = .{},
+functions: std.ArrayListUnmanaged(u32) = .{},
+memory: ?Memtype = null,
+code: std.ArrayListUnmanaged(Func) = .{},
+funcs: std.ArrayListUnmanaged(vm.Func) = .{},
+
+pub const FunctionType = struct {
+ parameters: []u8,
+ results: []u8,
+
+ pub fn deinit(self: FunctionType, allocator: Allocator) void {
+ allocator.free(self.parameters);
+ allocator.free(self.results);
+ }
+};
+
+pub const FunctionBody = struct {
+ locals: []Local,
+ code: []u8,
+};
+
+pub const FunctionScope = enum {
+ external,
+ internal,
+};
+
+const Parser = @This();
+
+pub const Error = error{
+ invalid_magic,
+ invalid_version,
+ invalid_section,
+ invalid_functype,
+ invalid_vectype,
+ invalid_numtype,
+ invalid_reftype,
+ invalid_valtype,
+ invalid_string,
+ invalid_limits,
+ invalid_globaltype,
+ invalid_importdesc,
+ invalid_exportdesc,
+ unterminated_wasm,
+};
+
+// TODO: This function should not exists
+fn warn(self: Parser, s: []const u8) void {
+ std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx });
+}
+
+// TODO: remove peek
+fn peek(self: Parser) ?u8 {
+ return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null;
+}
+
+fn read(self: *Parser, n: usize) ![]const u8 {
+ if (self.byte_idx + n > self.bytes.len) return Error.unterminated_wasm;
+ defer self.byte_idx += n;
+ return self.bytes[self.byte_idx .. self.byte_idx + n];
+}
+
+// ==========
+// = VALUES =
+// ==========
+
+pub fn readByte(self: *Parser) !u8 {
+ return (try self.read(1))[0];
+}
+
+fn readU32(self: *Parser) !u32 {
+ return std.leb.readUleb128(u32, self);
+}
+
+fn readName(self: *Parser) ![]const u8 {
+ // NOTE: This should be the only vector not parsed through parseVector
+ const size = try self.readU32();
+ const str = try self.allocator.alloc(u8, size);
+ @memcpy(str, try self.read(size));
+ if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_string;
+ return str;
+}
+
+// =========
+// = TYPES =
+// =========
+// NOTE: This should return a value
+
+fn VectorFnResult(parse_fn: anytype) type {
+ const type_info = @typeInfo(@TypeOf(parse_fn));
+ if (type_info != .@"fn") {
+ @compileError("cannot determine return type of " ++ @typeName(@TypeOf(parse_fn)));
+ }
+ const ret_type = type_info.@"fn".return_type.?;
+ const ret_type_info = @typeInfo(ret_type);
+ return switch (ret_type_info) {
+ .error_union => ret_type_info.error_union.payload,
+ else => ret_type,
+ };
+}
+fn parseVector(self: *Parser, parse_fn: anytype) ![]VectorFnResult(parse_fn) {
+ const n = try self.readU32();
+ const ret = try self.allocator.alloc(VectorFnResult(parse_fn), n);
+ for (ret) |*i| {
+ i.* = try parse_fn(self);
+ }
+ return ret;
+}
+
+fn parseNumtype(self: *Parser) !std.wasm.Valtype {
+ return switch (try self.readByte()) {
+ 0x7F => .i32,
+ 0x7E => .i32,
+ 0x7D => .f32,
+ 0x7C => .f64,
+ else => Error.invalid_numtype,
+ };
+}
+
+fn parseVectype(self: *Parser) !std.wasm.Valtype {
+ return switch (try self.readByte()) {
+ 0x7B => .v128,
+ else => Error.invalid_vectype,
+ };
+}
+
+fn parseReftype(self: *Parser) !std.wasm.RefType {
+ return switch (try self.readByte()) {
+ 0x70 => .funcref,
+ 0x6F => .externref,
+ else => Error.invalid_reftype,
+ };
+}
+
+// NOTE: Parsing of Valtype can be improved but it makes it less close to spec so...
+// TODO: Do we really need Valtype?
+const Valtype = union(enum) {
+ val: std.wasm.Valtype,
+ ref: std.wasm.RefType,
+};
+fn parseValtype(self: *Parser) !Valtype {
+ const pb = self.peek() orelse return Error.unterminated_wasm;
+ return switch (pb) {
+ 0x7F, 0x7E, 0x7D, 0x7C => .{ .val = try self.parseNumtype() },
+ 0x7B => .{ .val = try self.parseVectype() },
+ 0x70, 0x6F => .{ .ref = try self.parseReftype() },
+ else => Error.invalid_valtype,
+ };
+}
+
+fn parseResultType(self: *Parser) ![]Valtype {
+ return try self.parseVector(Parser.parseValtype);
+}
+
+pub const Functype = struct {
+ parameters: []Valtype,
+ rt2: []Valtype,
+
+ pub fn deinit(self: Functype, allocator: Allocator) void {
+ allocator.free(self.parameters);
+ allocator.free(self.rt2);
+ }
+};
+fn parseFunctype(self: *Parser) !Functype {
+ if (try self.readByte() != 0x60) return Error.invalid_functype;
+ return .{
+ .parameters = try self.parseResultType(),
+ .rt2 = try self.parseResultType(),
+ };
+}
+
+const Limits = struct {
+ min: u32,
+ max: ?u32,
+};
+
+fn parseLimits(self: *Parser) !Limits {
+ return switch (try self.readByte()) {
+ 0x00 => .{
+ .min = try self.readU32(),
+ .max = null,
+ },
+ 0x01 => .{
+ .min = try self.readU32(),
+ .max = try self.readU32(),
+ },
+ else => Error.invalid_limits,
+ };
+}
+
+const Memtype = struct {
+ lim: Limits,
+};
+fn parseMemtype(self: *Parser) !Memtype {
+ return .{ .lim = try self.parseLimits() };
+}
+
+const Tabletype = struct {
+ et: std.wasm.RefType,
+ lim: Limits,
+};
+fn parseTabletype(self: *Parser) !Tabletype {
+ return .{
+ .et = try self.parseReftype(),
+ .lim = try self.parseLimits(),
+ };
+}
+
+const Globaltype = struct {
+ t: Valtype,
+ m: enum {
+ @"const",
+ @"var",
+ },
+};
+fn parseGlobaltype(self: *Parser) !Globaltype {
+ return .{
+ .t = try self.parseValtype(),
+ .m = switch (try self.readByte()) {
+ 0x00 => .@"const",
+ 0x01 => .@"var",
+ else => return Error.invalid_globaltype,
+ },
+ };
+}
+
+// ===========
+// = MODULES =
+// ===========
+// NOTE: This should not return anything but modify IR
+
+pub fn parseModule(self: *Parser) !vm.Module {
+ if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic;
+ if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version;
+ // TODO: Ensure only one section of each type (except for custom section), some code depends on it
+ while (self.byte_idx < self.bytes.len) {
+ try switch (try self.readByte()) {
+ 0 => self.parseCustomsec(),
+ 1 => self.parseTypesec(),
+ 2 => self.parseImportsec(),
+ 3 => self.parseFuncsec(),
+ 4 => self.parseTablesec(),
+ 5 => self.parseMemsec(),
+ 6 => self.parseGlobalsec(),
+ 7 => self.parseExportsec(),
+ 8 => self.parseStartsec(),
+ 9 => self.parseElemsec(),
+ 10 => self.parseCodesec(),
+ 11 => self.parseDatasec(),
+ 12 => self.parseDatacountsec(),
+ else => return Error.invalid_section,
+ };
+ }
+
+ return .{
+ .memory = .{
+ .min = self.memory.?.lim.min,
+ .max = self.memory.?.lim.max,
+ },
+ .exports = self.exports,
+ .funcs = try self.funcs.toOwnedSlice(self.allocator),
+ .types = try self.types.toOwnedSlice(self.allocator),
+ .functions = try self.functions.toOwnedSlice(self.allocator),
+ .imports = try self.imports.toOwnedSlice(self.allocator),
+ .code = try self.code.toOwnedSlice(self.allocator),
+ };
+}
+
+fn parseCustomsec(self: *Parser) !void {
+ self.warn("customsec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
+
+fn parseTypesec(self: *Parser) !void {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const ft = try self.parseVector(Parser.parseFunctype);
+ // TODO: Maybe the interface should be better?
+ try self.types.appendSlice(self.allocator, ft);
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+}
+
+pub const Import = struct {
+ name: []const u8,
+ module: []const u8,
+ importdesc: union { func: u32, table: Tabletype, mem: Memtype, global: Globaltype },
+ pub fn deinit(self: Import, allocator: Allocator) void {
+ allocator.free(self.name);
+ allocator.free(self.module);
+ }
+};
+fn parseImport(self: *Parser) !Import {
+ return .{
+ .name = try self.readName(),
+ .module = try self.readName(),
+ .importdesc = switch (try self.readByte()) {
+ 0x00 => .{ .func = try self.readU32() },
+ 0x01 => .{ .table = try self.parseTabletype() },
+ 0x02 => .{ .mem = try self.parseMemtype() },
+ 0x03 => .{ .global = try self.parseGlobaltype() },
+ else => return Error.invalid_importdesc,
+ },
+ };
+}
+
+fn parseImportsec(self: *Parser) !void {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const imports = try self.parseVector(Parser.parseImport);
+ try self.imports.appendSlice(self.allocator, imports);
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseFuncsec(self: *Parser) !void {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const types = try self.parseVector(Parser.readU32);
+ try self.functions.appendSlice(self.allocator, types);
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseTablesec(self: *Parser) !void {
+ self.warn("tablesec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
+
+fn parseMemsec(self: *Parser) !void {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const mems = try self.parseVector(Parser.parseMemtype);
+ if (mems.len == 0) {
+ // WTF?
+ } else if (mems.len == 1) {
+ self.memory = mems[0];
+ } else {
+ std.debug.print("[WARN]: Parsing more than one memory is not yet supported\n", .{});
+ }
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseGlobalsec(self: *Parser) !void {
+ self.warn("globalsec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
+
+pub const Export = struct {
+ name: []const u8,
+ exportdesc: union(enum) { func: u32, table: u32, mem: u32, global: u32 },
+ pub fn deinit(self: Import, allocator: Allocator) void {
+ allocator.free(self.name);
+ }
+};
+
+fn parseExport(self: *Parser) !Export {
+ return .{
+ .name = try self.readName(),
+ .exportdesc = switch (try self.readByte()) {
+ 0x00 => .{ .func = try self.readU32() },
+ 0x01 => .{ .table = try self.readU32() },
+ 0x02 => .{ .mem = try self.readU32() },
+ 0x03 => .{ .global = try self.readU32() },
+ else => return Error.invalid_exportdesc,
+ },
+ };
+}
+
+fn parseExportsec(self: *Parser) !void {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const exports = try self.parseVector(Parser.parseExport);
+ for (exports) |e| {
+ switch (e.exportdesc) {
+ .func => try self.exports.put(self.allocator, e.name, e.exportdesc.func),
+ else => std.debug.print("[WARN]: export ignored\n", .{}),
+ }
+ }
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseStartsec(self: *Parser) !void {
+ self.warn("startsec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
+
+fn parseElemsec(self: *Parser) !void {
+ self.warn("elemsec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
+
+pub const Func = struct {
+ locals: []Valtype,
+ code: []const u8,
+};
+const Local = struct {
+ n: u32,
+ t: Valtype,
+};
+fn parseLocal(self: *Parser) !Local {
+ return .{
+ .n = try self.readU32(),
+ .t = try self.parseValtype(),
+ };
+}
+
+fn parseCode(self: *Parser) !Func {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const locals = try self.parseVector(Parser.parseLocal);
+ var local_count: usize = 0;
+ for (locals) |l| {
+ local_count += l.n;
+ }
+
+ const func = Func{
+ .locals = try self.allocator.alloc(Valtype, local_count),
+ .code = try self.read(end_idx - self.byte_idx),
+ };
+
+ var li: usize = 0;
+ for (locals) |l| {
+ @memset(func.locals[li .. li + l.n], l.t);
+ li += l.n;
+ }
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+
+ return func;
+}
+
+fn parseCodesec(self: *Parser) !void {
+ const size = try self.readU32();
+ const end_idx = self.byte_idx + size;
+
+ const codes = try self.parseVector(Parser.parseCode);
+ for (codes, 0..) |_, i| {
+ try self.funcs.append(self.allocator, .{ .internal = @intCast(i) });
+ }
+ try self.code.appendSlice(self.allocator, codes);
+
+ // TODO: run this check not only on debug
+ std.debug.assert(self.byte_idx == end_idx);
+}
+
+fn parseDatasec(self: *Parser) !void {
+ self.warn("datasec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
+
+fn parseDatacountsec(self: *Parser) !void {
+ self.warn("datacountsec");
+ const size = try self.readU32();
+ _ = try self.read(size);
+}
diff --git a/src/mods/parse.zig b/src/mods/parse.zig
deleted file mode 100644
index f125303..0000000
--- a/src/mods/parse.zig
+++ /dev/null
@@ -1,334 +0,0 @@
-const std = @import("std");
-const wasm = @import("wasm.zig");
-const Allocator = std.mem.Allocator;
-
-pub fn leb128Result(T: type) type {
- return struct { len: usize, val: T };
-}
-
-pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) {
- switch (@typeInfo(T)) {
- .int => {},
- else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
- }
- if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
- @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
- }
-
- var result: T = 0;
- // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
- var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
- var byte: u8 = undefined;
- var len: usize = 0;
- while (stream.readByte()) |b| {
- len += 1;
- result |= @as(T, @intCast((b & 0x7f))) << shift;
- if ((b & (0x1 << 7)) == 0) {
- byte = b;
- break;
- }
- shift += 7;
- } else |err| {
- return err;
- }
-
- if (@typeInfo(T).int.signedness == .signed) {
- const size = @sizeOf(T) * 8;
- if (shift < size and (byte & 0x40) != 0) {
- result |= (~@as(T, 0) << shift);
- }
- }
-
- return .{ .len = len, .val = result };
-}
-
-pub const Error = error{
- malformed_wasm,
- invalid_utf8,
-};
-
-pub const Module = struct {
- types: []FunctionType,
- imports: std.ArrayList(Import),
- exports: std.StringHashMap(u32),
- functions: []u32,
- memory: Memory,
- code: []FunctionBody,
- funcs: std.ArrayList(Function),
-
- pub fn deinit(self: *Module, allocator: Allocator) void {
- for (self.types) |t| {
- t.deinit(allocator);
- }
- allocator.free(self.types);
-
- for (self.imports.items) |i| {
- i.deinit(allocator);
- }
- self.imports.deinit();
-
- var iter = self.exports.iterator();
- while (iter.next()) |entry| {
- allocator.free(entry.key_ptr.*);
- }
- self.exports.deinit();
-
- allocator.free(self.functions);
-
- for (self.code) |f| {
- for (f.locals) |l| {
- allocator.free(l.types);
- }
- allocator.free(f.code);
- }
- allocator.free(self.code);
-
- self.funcs.deinit();
- }
-};
-
-pub const FunctionScope = enum {
- external,
- internal,
-};
-
-pub const Function = union(FunctionScope) {
- external: u8,
- internal: u8,
-};
-
-// TODO: refactor locals
-pub const Local = struct {
- types: []u8,
-};
-
-pub const FunctionBody = struct {
- locals: []Local,
- code: []u8,
-};
-
-pub const Memory = struct {
- initial: u32,
- max: u32,
-};
-
-pub const FunctionType = struct {
- parameters: []u8,
- results: []u8,
-
- pub fn deinit(self: FunctionType, allocator: Allocator) void {
- allocator.free(self.parameters);
- allocator.free(self.results);
- }
-};
-
-pub const Import = struct {
- name: []u8,
- module: []u8,
- signature: u32,
-
- pub fn deinit(self: Import, allocator: Allocator) void {
- allocator.free(self.name);
- allocator.free(self.module);
- }
-};
-
-pub fn parseType(t: u8) wasm.Type {
- return @enumFromInt(t);
-}
-
-pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 {
- const size = try std.leb.readULEB128(u32, stream);
- const str = try allocator.alloc(u8, size);
- if (try stream.read(str) != size) {
- // TODO: better error
- return Error.malformed_wasm;
- }
-
- if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8;
-
- return str;
-}
-
-// TODO: parse Global Section
-// TODO: Consider Arena allocator
-pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
- var types: []FunctionType = undefined;
- var imports = std.ArrayList(Import).init(allocator);
- var exports = std.StringHashMap(u32).init(allocator);
- var funcs = std.ArrayList(Function).init(allocator);
- var functions: []u32 = undefined;
- var memory: Memory = undefined;
- var code: []FunctionBody = undefined;
-
- // Parse magic
- if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm;
- // Parse version
- if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm;
-
- // NOTE: This ensures that (in this block) illegal behavior is safety-checked.
- // This slows down the code but since this function is only called at the start
- // I believe it is better to take the ``hit'' in performance (should only be @enumFromInt)
- // rather than having undefined behavior when user provides an invalid wasm file.
- @setRuntimeSafety(true);
- loop: while (stream.readByte()) |byte| {
- const section_size = try std.leb.readULEB128(u32, stream);
- switch (@as(std.wasm.Section, @enumFromInt(byte))) {
- std.wasm.Section.custom => {
- // TODO: unimplemented
- break :loop;
- },
- std.wasm.Section.type => {
- const type_count = try std.leb.readULEB128(u32, stream);
- types = try allocator.alloc(FunctionType, type_count);
- for (types) |*t| {
- if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm;
- const params_count = try std.leb.readULEB128(u32, stream);
- t.parameters = try allocator.alloc(u8, params_count);
- if (try stream.read(t.parameters) != params_count) {
- // TODO: better errors
- return Error.malformed_wasm;
- }
- const results = try std.leb.readULEB128(u32, stream);
- t.results = try allocator.alloc(u8, results);
- if (try stream.read(t.results) != results) {
- // TODO: better errors
- return Error.malformed_wasm;
- }
- }
- },
- std.wasm.Section.import => {
- // Can there be more than one import section?
- const import_count = try std.leb.readULEB128(u32, stream);
- for (0..import_count) |i| {
- const mod = try parseName(allocator, stream);
- const nm = try parseName(allocator, stream);
-
- const b = try stream.readByte();
- switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
- std.wasm.ExternalKind.function => {
- try funcs.append(.{ .external = @intCast(i) });
-
- const idx = try std.leb.readULEB128(u32, stream);
- try imports.append(.{
- .module = mod,
- .name = nm,
- .signature = idx,
- });
- },
- // TODO: not implemented
- std.wasm.ExternalKind.table => try stream.skipBytes(3, .{}),
- std.wasm.ExternalKind.memory => try stream.skipBytes(2, .{}),
- std.wasm.ExternalKind.global => try stream.skipBytes(2, .{}),
- }
- }
- },
- std.wasm.Section.function => {
- const function_count = try std.leb.readULEB128(u32, stream);
- functions = try allocator.alloc(u32, function_count);
- for (functions) |*f| {
- f.* = try std.leb.readULEB128(u32, stream);
- }
- },
- std.wasm.Section.table => {
- // TODO: not implemented
- try stream.skipBytes(section_size, .{});
- },
- std.wasm.Section.memory => {
- const memory_count = try std.leb.readULEB128(u32, stream);
- for (0..memory_count) |_| {
- const b = try stream.readByte();
- const n = try std.leb.readULEB128(u32, stream);
- var m: u32 = 0;
- switch (b) {
- 0x00 => {},
- 0x01 => m = try std.leb.readULEB128(u32, stream),
- else => return Error.malformed_wasm,
- }
- // TODO: support multiple memories
- memory = .{
- .initial = n,
- .max = m,
- };
- }
- },
- std.wasm.Section.global => {
- // TODO: unimplemented
- try stream.skipBytes(section_size, .{});
- },
- // TODO: Can there be more than one export section? Otherwise we can optimize allocations
- std.wasm.Section.@"export" => {
- const export_count = try std.leb.readULEB128(u32, stream);
- for (0..export_count) |_| {
- const nm = try parseName(allocator, stream);
- const b = try stream.readByte();
- const idx = try std.leb.readULEB128(u32, stream);
- switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
- std.wasm.ExternalKind.function => try exports.put(nm, idx),
- // TODO: unimplemented,
- std.wasm.ExternalKind.table => allocator.free(nm),
- std.wasm.ExternalKind.memory => allocator.free(nm),
- std.wasm.ExternalKind.global => allocator.free(nm),
- }
- }
- },
- std.wasm.Section.start => {
- // TODO: unimplemented
- try stream.skipBytes(section_size, .{});
- },
- std.wasm.Section.element => {
- // TODO: unimplemented
- try stream.skipBytes(section_size, .{});
- },
- std.wasm.Section.code => {
- const code_count = try std.leb.readULEB128(u32, stream);
- code = try allocator.alloc(FunctionBody, code_count);
- for (0..code_count) |i| {
- const code_size = try std.leb.readULEB128(u32, stream);
- var locals_size: usize = 0;
- const local_count = try leb128Decode(u32, stream);
- locals_size += local_count.len;
- const locals = try allocator.alloc(Local, local_count.val);
- for (locals) |*l| {
- const n = try leb128Decode(u32, stream);
- l.types = try allocator.alloc(u8, n.val);
- @memset(l.types, try stream.readByte());
- locals_size += n.len + 1;
- }
- code[i].locals = locals;
-
- // TODO: maybe is better to parse code into ast here and not do it every frame?
- // FIXME: This calculation is plain wrong. Resolving above TODO should help
- code[i].code = try allocator.alloc(u8, code_size - locals_size);
- // TODO: better error reporting
- if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm;
-
- const f = Function{ .internal = @intCast(i) };
- try funcs.append(f);
- }
- },
- std.wasm.Section.data => {
- // TODO: unimplemented
- try stream.skipBytes(section_size, .{});
- },
- std.wasm.Section.data_count => {
- // TODO: unimplemented
- try stream.skipBytes(section_size, .{});
- },
- else => return Error.malformed_wasm,
- }
- } else |err| switch (err) {
- error.EndOfStream => {},
- else => return err,
- }
-
- return Module{
- .types = types,
- .imports = imports,
- .functions = functions,
- .memory = memory,
- .exports = exports,
- .code = code,
- .funcs = funcs,
- };
-}
diff --git a/src/mods/vm.zig b/src/mods/vm.zig
index cbeb865..b2a373d 100644
--- a/src/mods/vm.zig
+++ b/src/mods/vm.zig
@@ -1,13 +1,83 @@
const std = @import("std");
const wasm = @import("wasm.zig");
-const Parser = @import("parse.zig");
+const Parser = @import("Parser.zig");
const Allocator = std.mem.Allocator;
const AllocationError = error{OutOfMemory};
-fn leb128Decode(comptime T: type, bytes: []u8) Parser.leb128Result(T) {
+pub const Memory = struct {
+ min: u32,
+ max: ?u32,
+};
+// TODO: Resolve function calls at parse time
+// TODO: Resolve function types at compile time
+pub const Func = union(enum) {
+ internal: u32,
+ external: u32,
+};
+
+pub const Module = struct {
+ memory: Memory,
+ funcs: []Func,
+ exports: std.StringHashMapUnmanaged(u32),
+ imports: []Parser.Import,
+ types: []Parser.Functype,
+ functions: []u32,
+ code: []Parser.Func,
+
+ fn deinit(self: *Module, allocator: Allocator) void {
+ self.exports.deinit(allocator);
+ allocator.free(self.funcs);
+ allocator.free(self.imports);
+ allocator.free(self.types);
+ allocator.free(self.functions);
+ allocator.free(self.code);
+ }
+};
+
+pub fn leb128Result(T: type) type {
+ return struct { len: usize, val: T };
+}
+
+pub fn leb128Decode_stream(comptime T: type, stream: anytype) !leb128Result(T) {
+ switch (@typeInfo(T)) {
+ .int => {},
+ else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
+ }
+ if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
+ @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
+ }
+
+ var result: T = 0;
+ // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
+ var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
+ var byte: u8 = undefined;
+ var len: usize = 0;
+ while (stream.readByte()) |b| {
+ len += 1;
+ result |= @as(T, @intCast((b & 0x7f))) << shift;
+ if ((b & (0x1 << 7)) == 0) {
+ byte = b;
+ break;
+ }
+ shift += 7;
+ } else |err| {
+ return err;
+ }
+
+ if (@typeInfo(T).int.signedness == .signed) {
+ const size = @sizeOf(T) * 8;
+ if (shift < size and (byte & 0x40) != 0) {
+ result |= (~@as(T, 0) << shift);
+ }
+ }
+
+ return .{ .len = len, .val = result };
+}
+
+fn leb128Decode(comptime T: type, bytes: []const u8) leb128Result(T) {
var fbs = std.io.fixedBufferStream(bytes);
// TODO: this catch should be unrecheable
- return Parser.leb128Decode(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
+ return leb128Decode_stream(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
}
pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T {
@@ -30,7 +100,7 @@ pub fn encodeLittleEndian(comptime T: type, bytes: *[]u8, value: T) void {
pub const CallFrame = struct {
program_counter: usize,
- code: []u8,
+ code: []const u8,
locals: []Value,
};
@@ -45,15 +115,20 @@ pub const Value = union(ValueType) {
};
pub const Runtime = struct {
- module: Parser.Module,
+ module: Module,
stack: std.ArrayList(Value),
call_stack: std.ArrayList(CallFrame),
memory: []u8,
global_runtime: *wasm.GlobalRuntime,
labels: std.ArrayList(usize),
- pub fn init(allocator: Allocator, module: Parser.Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
- const memory = try allocator.alloc(u8, module.memory.max);
+ pub fn init(allocator: Allocator, module: Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
+ // if memory max is not set the memory is allowed to grow but it is not supported at the moment
+ const max = module.memory.max orelse 1_000;
+ if (module.memory.max == null) {
+ std.debug.print("[WARN]: growing memory is not yet supported, usign a default value of 1Kb\n", .{});
+ }
+ const memory = try allocator.alloc(u8, max);
return Runtime{
.module = module,
.stack = try std.ArrayList(Value).initCapacity(allocator, 10),
@@ -492,6 +567,7 @@ pub const Runtime = struct {
}
}
+ // TODO: Do name resolution
pub fn callExternal(self: *Runtime, allocator: Allocator, name: []const u8, parameters: []usize) !void {
if (self.module.exports.get(name)) |function| {
try self.call(allocator, function, parameters);
@@ -501,7 +577,7 @@ pub const Runtime = struct {
}
pub fn call(self: *Runtime, allocator: Allocator, function: usize, parameters: []usize) AllocationError!void {
- const f = self.module.funcs.items[function];
+ const f = self.module.funcs[function];
switch (f) {
.internal => {
const function_type = self.module.types[self.module.functions[f.internal]];
@@ -512,26 +588,32 @@ pub const Runtime = struct {
};
for (parameters, 0..) |p, i| {
- switch (Parser.parseType(function_type.parameters[i])) {
- .i32 => {
- frame.locals[i] = .{ .i32 = @intCast(p) };
+ switch (function_type.parameters[i]) {
+ .val => |v| switch (v) {
+ .i32 => {
+ frame.locals[i] = .{ .i32 = @intCast(p) };
+ },
+ .i64 => {
+ frame.locals[i] = .{ .i64 = @intCast(p) };
+ },
+ else => unreachable,
},
- .i64 => {
- frame.locals[i] = .{ .i64 = @intCast(p) };
- },
- else => unreachable,
+ .ref => unreachable,
}
}
for (self.module.code[f.internal].locals, function_type.parameters.len..) |local, i| {
- switch (Parser.parseType(local.types[0])) {
- .i32 => {
- frame.locals[i] = .{ .i32 = 0 };
- },
- .i64 => {
- frame.locals[i] = .{ .i64 = 0 };
+ switch (local) {
+ .val => |v| switch (v) {
+ .i32 => {
+ frame.locals[i] = .{ .i32 = 0 };
+ },
+ .i64 => {
+ frame.locals[i] = .{ .i64 = 0 };
+ },
+ else => unreachable,
},
- else => unreachable,
+ .ref => unreachable,
}
}
@@ -540,7 +622,7 @@ pub const Runtime = struct {
allocator.free(frame.locals);
},
.external => {
- const name = self.module.imports.items[f.external].name;
+ const name = self.module.imports[f.external].name;
if (self.global_runtime.functions.get(name)) |external| {
external(&self.stack);
}