From b7854d7325dfe35ca41e56dcccfb8fb7b7d0aa22 Mon Sep 17 00:00:00 2001 From: Ernesto Lanchares Date: Sun, 23 Mar 2025 13:38:57 +0000 Subject: Big rework of the parser! It now follows a more functional style but it should be waaay easier to add functionality. Probably the parser is a bit slower than the previous one but the code is much cleaner and a good enough compiler should be able to inline the function calls and make it par with the previous one. As a TODO, runtime structs should not depends on the parser, but I think that is a topic for another commit. --- src/main.zig | 19 +- src/mods/Parser.zig | 486 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/mods/parse.zig | 334 ------------------------------------ src/mods/vm.zig | 128 +++++++++++--- 4 files changed, 605 insertions(+), 362 deletions(-) create mode 100644 src/mods/Parser.zig delete mode 100644 src/mods/parse.zig (limited to 'src') diff --git a/src/main.zig b/src/main.zig index 597011d..97aa5bf 100644 --- a/src/main.zig +++ b/src/main.zig @@ -5,7 +5,7 @@ const window = @import("rendering/window.zig"); const config = @import("config"); const Renderer = @import("rendering/renderer_vulkan.zig"); const math = @import("math.zig"); -const Parser = @import("mods/parse.zig"); +const Parser = @import("mods/Parser.zig"); const vm = @import("mods/vm.zig"); const wasm = @import("mods/wasm.zig"); const components = @import("ecs/components.zig"); @@ -28,10 +28,19 @@ pub fn main() !void { //defer global_runtime.deinit(); //try global_runtime.addFunction("debug", wasm.debug); - //const file = try std.fs.cwd().openFile("assets/core.wasm", .{}); - //const module = try Parser.parseWasm(allocator, file.reader()); - //var runtime = try vm.Runtime.init(allocator, module, &global_runtime); - //defer runtime.deinit(allocator); + // const file = try std.fs.cwd().openFile("assets/core.wasm", .{}); + // const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB + // var parser = Parser{ + // .bytes = all, + // .byte_idx = 0, + // .allocator = allocator, + // }; + // const module = parser.parseModule() catch |err| { + // std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] }); + // return err; + // }; + // var runtime = try vm.Runtime.init(allocator, module, &global_runtime); + // defer runtime.deinit(allocator); //var parameters = [_]usize{}; //try runtime.callExternal(allocator, "preinit", ¶meters); diff --git a/src/mods/Parser.zig b/src/mods/Parser.zig new file mode 100644 index 0000000..29f18d8 --- /dev/null +++ b/src/mods/Parser.zig @@ -0,0 +1,486 @@ +const std = @import("std"); +const vm = @import("vm.zig"); +const Allocator = std.mem.Allocator; + +bytes: []const u8, +byte_idx: usize, +allocator: Allocator, + +// TODO: We don't really need ArrayLists +types: std.ArrayListUnmanaged(Functype) = .{}, +imports: std.ArrayListUnmanaged(Import) = .{}, +exports: std.StringHashMapUnmanaged(u32) = .{}, +functions: std.ArrayListUnmanaged(u32) = .{}, +memory: ?Memtype = null, +code: std.ArrayListUnmanaged(Func) = .{}, +funcs: std.ArrayListUnmanaged(vm.Func) = .{}, + +pub const FunctionType = struct { + parameters: []u8, + results: []u8, + + pub fn deinit(self: FunctionType, allocator: Allocator) void { + allocator.free(self.parameters); + allocator.free(self.results); + } +}; + +pub const FunctionBody = struct { + locals: []Local, + code: []u8, +}; + +pub const FunctionScope = enum { + external, + internal, +}; + +const Parser = @This(); + +pub const Error = error{ + invalid_magic, + invalid_version, + invalid_section, + invalid_functype, + invalid_vectype, + invalid_numtype, + invalid_reftype, + invalid_valtype, + invalid_string, + invalid_limits, + invalid_globaltype, + invalid_importdesc, + invalid_exportdesc, + unterminated_wasm, +}; + +// TODO: This function should not exists +fn warn(self: Parser, s: []const u8) void { + std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx }); +} + +// TODO: remove peek +fn peek(self: Parser) ?u8 { + return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null; +} + +fn read(self: *Parser, n: usize) ![]const u8 { + if (self.byte_idx + n > self.bytes.len) return Error.unterminated_wasm; + defer self.byte_idx += n; + return self.bytes[self.byte_idx .. self.byte_idx + n]; +} + +// ========== +// = VALUES = +// ========== + +pub fn readByte(self: *Parser) !u8 { + return (try self.read(1))[0]; +} + +fn readU32(self: *Parser) !u32 { + return std.leb.readUleb128(u32, self); +} + +fn readName(self: *Parser) ![]const u8 { + // NOTE: This should be the only vector not parsed through parseVector + const size = try self.readU32(); + const str = try self.allocator.alloc(u8, size); + @memcpy(str, try self.read(size)); + if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_string; + return str; +} + +// ========= +// = TYPES = +// ========= +// NOTE: This should return a value + +fn VectorFnResult(parse_fn: anytype) type { + const type_info = @typeInfo(@TypeOf(parse_fn)); + if (type_info != .@"fn") { + @compileError("cannot determine return type of " ++ @typeName(@TypeOf(parse_fn))); + } + const ret_type = type_info.@"fn".return_type.?; + const ret_type_info = @typeInfo(ret_type); + return switch (ret_type_info) { + .error_union => ret_type_info.error_union.payload, + else => ret_type, + }; +} +fn parseVector(self: *Parser, parse_fn: anytype) ![]VectorFnResult(parse_fn) { + const n = try self.readU32(); + const ret = try self.allocator.alloc(VectorFnResult(parse_fn), n); + for (ret) |*i| { + i.* = try parse_fn(self); + } + return ret; +} + +fn parseNumtype(self: *Parser) !std.wasm.Valtype { + return switch (try self.readByte()) { + 0x7F => .i32, + 0x7E => .i32, + 0x7D => .f32, + 0x7C => .f64, + else => Error.invalid_numtype, + }; +} + +fn parseVectype(self: *Parser) !std.wasm.Valtype { + return switch (try self.readByte()) { + 0x7B => .v128, + else => Error.invalid_vectype, + }; +} + +fn parseReftype(self: *Parser) !std.wasm.RefType { + return switch (try self.readByte()) { + 0x70 => .funcref, + 0x6F => .externref, + else => Error.invalid_reftype, + }; +} + +// NOTE: Parsing of Valtype can be improved but it makes it less close to spec so... +// TODO: Do we really need Valtype? +const Valtype = union(enum) { + val: std.wasm.Valtype, + ref: std.wasm.RefType, +}; +fn parseValtype(self: *Parser) !Valtype { + const pb = self.peek() orelse return Error.unterminated_wasm; + return switch (pb) { + 0x7F, 0x7E, 0x7D, 0x7C => .{ .val = try self.parseNumtype() }, + 0x7B => .{ .val = try self.parseVectype() }, + 0x70, 0x6F => .{ .ref = try self.parseReftype() }, + else => Error.invalid_valtype, + }; +} + +fn parseResultType(self: *Parser) ![]Valtype { + return try self.parseVector(Parser.parseValtype); +} + +pub const Functype = struct { + parameters: []Valtype, + rt2: []Valtype, + + pub fn deinit(self: Functype, allocator: Allocator) void { + allocator.free(self.parameters); + allocator.free(self.rt2); + } +}; +fn parseFunctype(self: *Parser) !Functype { + if (try self.readByte() != 0x60) return Error.invalid_functype; + return .{ + .parameters = try self.parseResultType(), + .rt2 = try self.parseResultType(), + }; +} + +const Limits = struct { + min: u32, + max: ?u32, +}; + +fn parseLimits(self: *Parser) !Limits { + return switch (try self.readByte()) { + 0x00 => .{ + .min = try self.readU32(), + .max = null, + }, + 0x01 => .{ + .min = try self.readU32(), + .max = try self.readU32(), + }, + else => Error.invalid_limits, + }; +} + +const Memtype = struct { + lim: Limits, +}; +fn parseMemtype(self: *Parser) !Memtype { + return .{ .lim = try self.parseLimits() }; +} + +const Tabletype = struct { + et: std.wasm.RefType, + lim: Limits, +}; +fn parseTabletype(self: *Parser) !Tabletype { + return .{ + .et = try self.parseReftype(), + .lim = try self.parseLimits(), + }; +} + +const Globaltype = struct { + t: Valtype, + m: enum { + @"const", + @"var", + }, +}; +fn parseGlobaltype(self: *Parser) !Globaltype { + return .{ + .t = try self.parseValtype(), + .m = switch (try self.readByte()) { + 0x00 => .@"const", + 0x01 => .@"var", + else => return Error.invalid_globaltype, + }, + }; +} + +// =========== +// = MODULES = +// =========== +// NOTE: This should not return anything but modify IR + +pub fn parseModule(self: *Parser) !vm.Module { + if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic; + if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version; + // TODO: Ensure only one section of each type (except for custom section), some code depends on it + while (self.byte_idx < self.bytes.len) { + try switch (try self.readByte()) { + 0 => self.parseCustomsec(), + 1 => self.parseTypesec(), + 2 => self.parseImportsec(), + 3 => self.parseFuncsec(), + 4 => self.parseTablesec(), + 5 => self.parseMemsec(), + 6 => self.parseGlobalsec(), + 7 => self.parseExportsec(), + 8 => self.parseStartsec(), + 9 => self.parseElemsec(), + 10 => self.parseCodesec(), + 11 => self.parseDatasec(), + 12 => self.parseDatacountsec(), + else => return Error.invalid_section, + }; + } + + return .{ + .memory = .{ + .min = self.memory.?.lim.min, + .max = self.memory.?.lim.max, + }, + .exports = self.exports, + .funcs = try self.funcs.toOwnedSlice(self.allocator), + .types = try self.types.toOwnedSlice(self.allocator), + .functions = try self.functions.toOwnedSlice(self.allocator), + .imports = try self.imports.toOwnedSlice(self.allocator), + .code = try self.code.toOwnedSlice(self.allocator), + }; +} + +fn parseCustomsec(self: *Parser) !void { + self.warn("customsec"); + const size = try self.readU32(); + _ = try self.read(size); +} + +fn parseTypesec(self: *Parser) !void { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const ft = try self.parseVector(Parser.parseFunctype); + // TODO: Maybe the interface should be better? + try self.types.appendSlice(self.allocator, ft); + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); +} + +pub const Import = struct { + name: []const u8, + module: []const u8, + importdesc: union { func: u32, table: Tabletype, mem: Memtype, global: Globaltype }, + pub fn deinit(self: Import, allocator: Allocator) void { + allocator.free(self.name); + allocator.free(self.module); + } +}; +fn parseImport(self: *Parser) !Import { + return .{ + .name = try self.readName(), + .module = try self.readName(), + .importdesc = switch (try self.readByte()) { + 0x00 => .{ .func = try self.readU32() }, + 0x01 => .{ .table = try self.parseTabletype() }, + 0x02 => .{ .mem = try self.parseMemtype() }, + 0x03 => .{ .global = try self.parseGlobaltype() }, + else => return Error.invalid_importdesc, + }, + }; +} + +fn parseImportsec(self: *Parser) !void { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const imports = try self.parseVector(Parser.parseImport); + try self.imports.appendSlice(self.allocator, imports); + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); +} + +fn parseFuncsec(self: *Parser) !void { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const types = try self.parseVector(Parser.readU32); + try self.functions.appendSlice(self.allocator, types); + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); +} + +fn parseTablesec(self: *Parser) !void { + self.warn("tablesec"); + const size = try self.readU32(); + _ = try self.read(size); +} + +fn parseMemsec(self: *Parser) !void { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const mems = try self.parseVector(Parser.parseMemtype); + if (mems.len == 0) { + // WTF? + } else if (mems.len == 1) { + self.memory = mems[0]; + } else { + std.debug.print("[WARN]: Parsing more than one memory is not yet supported\n", .{}); + } + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); +} + +fn parseGlobalsec(self: *Parser) !void { + self.warn("globalsec"); + const size = try self.readU32(); + _ = try self.read(size); +} + +pub const Export = struct { + name: []const u8, + exportdesc: union(enum) { func: u32, table: u32, mem: u32, global: u32 }, + pub fn deinit(self: Import, allocator: Allocator) void { + allocator.free(self.name); + } +}; + +fn parseExport(self: *Parser) !Export { + return .{ + .name = try self.readName(), + .exportdesc = switch (try self.readByte()) { + 0x00 => .{ .func = try self.readU32() }, + 0x01 => .{ .table = try self.readU32() }, + 0x02 => .{ .mem = try self.readU32() }, + 0x03 => .{ .global = try self.readU32() }, + else => return Error.invalid_exportdesc, + }, + }; +} + +fn parseExportsec(self: *Parser) !void { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const exports = try self.parseVector(Parser.parseExport); + for (exports) |e| { + switch (e.exportdesc) { + .func => try self.exports.put(self.allocator, e.name, e.exportdesc.func), + else => std.debug.print("[WARN]: export ignored\n", .{}), + } + } + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); +} + +fn parseStartsec(self: *Parser) !void { + self.warn("startsec"); + const size = try self.readU32(); + _ = try self.read(size); +} + +fn parseElemsec(self: *Parser) !void { + self.warn("elemsec"); + const size = try self.readU32(); + _ = try self.read(size); +} + +pub const Func = struct { + locals: []Valtype, + code: []const u8, +}; +const Local = struct { + n: u32, + t: Valtype, +}; +fn parseLocal(self: *Parser) !Local { + return .{ + .n = try self.readU32(), + .t = try self.parseValtype(), + }; +} + +fn parseCode(self: *Parser) !Func { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const locals = try self.parseVector(Parser.parseLocal); + var local_count: usize = 0; + for (locals) |l| { + local_count += l.n; + } + + const func = Func{ + .locals = try self.allocator.alloc(Valtype, local_count), + .code = try self.read(end_idx - self.byte_idx), + }; + + var li: usize = 0; + for (locals) |l| { + @memset(func.locals[li .. li + l.n], l.t); + li += l.n; + } + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); + + return func; +} + +fn parseCodesec(self: *Parser) !void { + const size = try self.readU32(); + const end_idx = self.byte_idx + size; + + const codes = try self.parseVector(Parser.parseCode); + for (codes, 0..) |_, i| { + try self.funcs.append(self.allocator, .{ .internal = @intCast(i) }); + } + try self.code.appendSlice(self.allocator, codes); + + // TODO: run this check not only on debug + std.debug.assert(self.byte_idx == end_idx); +} + +fn parseDatasec(self: *Parser) !void { + self.warn("datasec"); + const size = try self.readU32(); + _ = try self.read(size); +} + +fn parseDatacountsec(self: *Parser) !void { + self.warn("datacountsec"); + const size = try self.readU32(); + _ = try self.read(size); +} diff --git a/src/mods/parse.zig b/src/mods/parse.zig deleted file mode 100644 index f125303..0000000 --- a/src/mods/parse.zig +++ /dev/null @@ -1,334 +0,0 @@ -const std = @import("std"); -const wasm = @import("wasm.zig"); -const Allocator = std.mem.Allocator; - -pub fn leb128Result(T: type) type { - return struct { len: usize, val: T }; -} - -pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) { - switch (@typeInfo(T)) { - .int => {}, - else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)), - } - if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) { - @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits})); - } - - var result: T = 0; - // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day... - var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0; - var byte: u8 = undefined; - var len: usize = 0; - while (stream.readByte()) |b| { - len += 1; - result |= @as(T, @intCast((b & 0x7f))) << shift; - if ((b & (0x1 << 7)) == 0) { - byte = b; - break; - } - shift += 7; - } else |err| { - return err; - } - - if (@typeInfo(T).int.signedness == .signed) { - const size = @sizeOf(T) * 8; - if (shift < size and (byte & 0x40) != 0) { - result |= (~@as(T, 0) << shift); - } - } - - return .{ .len = len, .val = result }; -} - -pub const Error = error{ - malformed_wasm, - invalid_utf8, -}; - -pub const Module = struct { - types: []FunctionType, - imports: std.ArrayList(Import), - exports: std.StringHashMap(u32), - functions: []u32, - memory: Memory, - code: []FunctionBody, - funcs: std.ArrayList(Function), - - pub fn deinit(self: *Module, allocator: Allocator) void { - for (self.types) |t| { - t.deinit(allocator); - } - allocator.free(self.types); - - for (self.imports.items) |i| { - i.deinit(allocator); - } - self.imports.deinit(); - - var iter = self.exports.iterator(); - while (iter.next()) |entry| { - allocator.free(entry.key_ptr.*); - } - self.exports.deinit(); - - allocator.free(self.functions); - - for (self.code) |f| { - for (f.locals) |l| { - allocator.free(l.types); - } - allocator.free(f.code); - } - allocator.free(self.code); - - self.funcs.deinit(); - } -}; - -pub const FunctionScope = enum { - external, - internal, -}; - -pub const Function = union(FunctionScope) { - external: u8, - internal: u8, -}; - -// TODO: refactor locals -pub const Local = struct { - types: []u8, -}; - -pub const FunctionBody = struct { - locals: []Local, - code: []u8, -}; - -pub const Memory = struct { - initial: u32, - max: u32, -}; - -pub const FunctionType = struct { - parameters: []u8, - results: []u8, - - pub fn deinit(self: FunctionType, allocator: Allocator) void { - allocator.free(self.parameters); - allocator.free(self.results); - } -}; - -pub const Import = struct { - name: []u8, - module: []u8, - signature: u32, - - pub fn deinit(self: Import, allocator: Allocator) void { - allocator.free(self.name); - allocator.free(self.module); - } -}; - -pub fn parseType(t: u8) wasm.Type { - return @enumFromInt(t); -} - -pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 { - const size = try std.leb.readULEB128(u32, stream); - const str = try allocator.alloc(u8, size); - if (try stream.read(str) != size) { - // TODO: better error - return Error.malformed_wasm; - } - - if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8; - - return str; -} - -// TODO: parse Global Section -// TODO: Consider Arena allocator -pub fn parseWasm(allocator: Allocator, stream: anytype) !Module { - var types: []FunctionType = undefined; - var imports = std.ArrayList(Import).init(allocator); - var exports = std.StringHashMap(u32).init(allocator); - var funcs = std.ArrayList(Function).init(allocator); - var functions: []u32 = undefined; - var memory: Memory = undefined; - var code: []FunctionBody = undefined; - - // Parse magic - if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm; - // Parse version - if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm; - - // NOTE: This ensures that (in this block) illegal behavior is safety-checked. - // This slows down the code but since this function is only called at the start - // I believe it is better to take the ``hit'' in performance (should only be @enumFromInt) - // rather than having undefined behavior when user provides an invalid wasm file. - @setRuntimeSafety(true); - loop: while (stream.readByte()) |byte| { - const section_size = try std.leb.readULEB128(u32, stream); - switch (@as(std.wasm.Section, @enumFromInt(byte))) { - std.wasm.Section.custom => { - // TODO: unimplemented - break :loop; - }, - std.wasm.Section.type => { - const type_count = try std.leb.readULEB128(u32, stream); - types = try allocator.alloc(FunctionType, type_count); - for (types) |*t| { - if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm; - const params_count = try std.leb.readULEB128(u32, stream); - t.parameters = try allocator.alloc(u8, params_count); - if (try stream.read(t.parameters) != params_count) { - // TODO: better errors - return Error.malformed_wasm; - } - const results = try std.leb.readULEB128(u32, stream); - t.results = try allocator.alloc(u8, results); - if (try stream.read(t.results) != results) { - // TODO: better errors - return Error.malformed_wasm; - } - } - }, - std.wasm.Section.import => { - // Can there be more than one import section? - const import_count = try std.leb.readULEB128(u32, stream); - for (0..import_count) |i| { - const mod = try parseName(allocator, stream); - const nm = try parseName(allocator, stream); - - const b = try stream.readByte(); - switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) { - std.wasm.ExternalKind.function => { - try funcs.append(.{ .external = @intCast(i) }); - - const idx = try std.leb.readULEB128(u32, stream); - try imports.append(.{ - .module = mod, - .name = nm, - .signature = idx, - }); - }, - // TODO: not implemented - std.wasm.ExternalKind.table => try stream.skipBytes(3, .{}), - std.wasm.ExternalKind.memory => try stream.skipBytes(2, .{}), - std.wasm.ExternalKind.global => try stream.skipBytes(2, .{}), - } - } - }, - std.wasm.Section.function => { - const function_count = try std.leb.readULEB128(u32, stream); - functions = try allocator.alloc(u32, function_count); - for (functions) |*f| { - f.* = try std.leb.readULEB128(u32, stream); - } - }, - std.wasm.Section.table => { - // TODO: not implemented - try stream.skipBytes(section_size, .{}); - }, - std.wasm.Section.memory => { - const memory_count = try std.leb.readULEB128(u32, stream); - for (0..memory_count) |_| { - const b = try stream.readByte(); - const n = try std.leb.readULEB128(u32, stream); - var m: u32 = 0; - switch (b) { - 0x00 => {}, - 0x01 => m = try std.leb.readULEB128(u32, stream), - else => return Error.malformed_wasm, - } - // TODO: support multiple memories - memory = .{ - .initial = n, - .max = m, - }; - } - }, - std.wasm.Section.global => { - // TODO: unimplemented - try stream.skipBytes(section_size, .{}); - }, - // TODO: Can there be more than one export section? Otherwise we can optimize allocations - std.wasm.Section.@"export" => { - const export_count = try std.leb.readULEB128(u32, stream); - for (0..export_count) |_| { - const nm = try parseName(allocator, stream); - const b = try stream.readByte(); - const idx = try std.leb.readULEB128(u32, stream); - switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) { - std.wasm.ExternalKind.function => try exports.put(nm, idx), - // TODO: unimplemented, - std.wasm.ExternalKind.table => allocator.free(nm), - std.wasm.ExternalKind.memory => allocator.free(nm), - std.wasm.ExternalKind.global => allocator.free(nm), - } - } - }, - std.wasm.Section.start => { - // TODO: unimplemented - try stream.skipBytes(section_size, .{}); - }, - std.wasm.Section.element => { - // TODO: unimplemented - try stream.skipBytes(section_size, .{}); - }, - std.wasm.Section.code => { - const code_count = try std.leb.readULEB128(u32, stream); - code = try allocator.alloc(FunctionBody, code_count); - for (0..code_count) |i| { - const code_size = try std.leb.readULEB128(u32, stream); - var locals_size: usize = 0; - const local_count = try leb128Decode(u32, stream); - locals_size += local_count.len; - const locals = try allocator.alloc(Local, local_count.val); - for (locals) |*l| { - const n = try leb128Decode(u32, stream); - l.types = try allocator.alloc(u8, n.val); - @memset(l.types, try stream.readByte()); - locals_size += n.len + 1; - } - code[i].locals = locals; - - // TODO: maybe is better to parse code into ast here and not do it every frame? - // FIXME: This calculation is plain wrong. Resolving above TODO should help - code[i].code = try allocator.alloc(u8, code_size - locals_size); - // TODO: better error reporting - if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm; - - const f = Function{ .internal = @intCast(i) }; - try funcs.append(f); - } - }, - std.wasm.Section.data => { - // TODO: unimplemented - try stream.skipBytes(section_size, .{}); - }, - std.wasm.Section.data_count => { - // TODO: unimplemented - try stream.skipBytes(section_size, .{}); - }, - else => return Error.malformed_wasm, - } - } else |err| switch (err) { - error.EndOfStream => {}, - else => return err, - } - - return Module{ - .types = types, - .imports = imports, - .functions = functions, - .memory = memory, - .exports = exports, - .code = code, - .funcs = funcs, - }; -} diff --git a/src/mods/vm.zig b/src/mods/vm.zig index cbeb865..b2a373d 100644 --- a/src/mods/vm.zig +++ b/src/mods/vm.zig @@ -1,13 +1,83 @@ const std = @import("std"); const wasm = @import("wasm.zig"); -const Parser = @import("parse.zig"); +const Parser = @import("Parser.zig"); const Allocator = std.mem.Allocator; const AllocationError = error{OutOfMemory}; -fn leb128Decode(comptime T: type, bytes: []u8) Parser.leb128Result(T) { +pub const Memory = struct { + min: u32, + max: ?u32, +}; +// TODO: Resolve function calls at parse time +// TODO: Resolve function types at compile time +pub const Func = union(enum) { + internal: u32, + external: u32, +}; + +pub const Module = struct { + memory: Memory, + funcs: []Func, + exports: std.StringHashMapUnmanaged(u32), + imports: []Parser.Import, + types: []Parser.Functype, + functions: []u32, + code: []Parser.Func, + + fn deinit(self: *Module, allocator: Allocator) void { + self.exports.deinit(allocator); + allocator.free(self.funcs); + allocator.free(self.imports); + allocator.free(self.types); + allocator.free(self.functions); + allocator.free(self.code); + } +}; + +pub fn leb128Result(T: type) type { + return struct { len: usize, val: T }; +} + +pub fn leb128Decode_stream(comptime T: type, stream: anytype) !leb128Result(T) { + switch (@typeInfo(T)) { + .int => {}, + else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)), + } + if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) { + @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits})); + } + + var result: T = 0; + // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day... + var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0; + var byte: u8 = undefined; + var len: usize = 0; + while (stream.readByte()) |b| { + len += 1; + result |= @as(T, @intCast((b & 0x7f))) << shift; + if ((b & (0x1 << 7)) == 0) { + byte = b; + break; + } + shift += 7; + } else |err| { + return err; + } + + if (@typeInfo(T).int.signedness == .signed) { + const size = @sizeOf(T) * 8; + if (shift < size and (byte & 0x40) != 0) { + result |= (~@as(T, 0) << shift); + } + } + + return .{ .len = len, .val = result }; +} + +fn leb128Decode(comptime T: type, bytes: []const u8) leb128Result(T) { var fbs = std.io.fixedBufferStream(bytes); // TODO: this catch should be unrecheable - return Parser.leb128Decode(T, fbs.reader()) catch .{ .len = 0, .val = 0 }; + return leb128Decode_stream(T, fbs.reader()) catch .{ .len = 0, .val = 0 }; } pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T { @@ -30,7 +100,7 @@ pub fn encodeLittleEndian(comptime T: type, bytes: *[]u8, value: T) void { pub const CallFrame = struct { program_counter: usize, - code: []u8, + code: []const u8, locals: []Value, }; @@ -45,15 +115,20 @@ pub const Value = union(ValueType) { }; pub const Runtime = struct { - module: Parser.Module, + module: Module, stack: std.ArrayList(Value), call_stack: std.ArrayList(CallFrame), memory: []u8, global_runtime: *wasm.GlobalRuntime, labels: std.ArrayList(usize), - pub fn init(allocator: Allocator, module: Parser.Module, global_runtime: *wasm.GlobalRuntime) !Runtime { - const memory = try allocator.alloc(u8, module.memory.max); + pub fn init(allocator: Allocator, module: Module, global_runtime: *wasm.GlobalRuntime) !Runtime { + // if memory max is not set the memory is allowed to grow but it is not supported at the moment + const max = module.memory.max orelse 1_000; + if (module.memory.max == null) { + std.debug.print("[WARN]: growing memory is not yet supported, usign a default value of 1Kb\n", .{}); + } + const memory = try allocator.alloc(u8, max); return Runtime{ .module = module, .stack = try std.ArrayList(Value).initCapacity(allocator, 10), @@ -492,6 +567,7 @@ pub const Runtime = struct { } } + // TODO: Do name resolution pub fn callExternal(self: *Runtime, allocator: Allocator, name: []const u8, parameters: []usize) !void { if (self.module.exports.get(name)) |function| { try self.call(allocator, function, parameters); @@ -501,7 +577,7 @@ pub const Runtime = struct { } pub fn call(self: *Runtime, allocator: Allocator, function: usize, parameters: []usize) AllocationError!void { - const f = self.module.funcs.items[function]; + const f = self.module.funcs[function]; switch (f) { .internal => { const function_type = self.module.types[self.module.functions[f.internal]]; @@ -512,26 +588,32 @@ pub const Runtime = struct { }; for (parameters, 0..) |p, i| { - switch (Parser.parseType(function_type.parameters[i])) { - .i32 => { - frame.locals[i] = .{ .i32 = @intCast(p) }; + switch (function_type.parameters[i]) { + .val => |v| switch (v) { + .i32 => { + frame.locals[i] = .{ .i32 = @intCast(p) }; + }, + .i64 => { + frame.locals[i] = .{ .i64 = @intCast(p) }; + }, + else => unreachable, }, - .i64 => { - frame.locals[i] = .{ .i64 = @intCast(p) }; - }, - else => unreachable, + .ref => unreachable, } } for (self.module.code[f.internal].locals, function_type.parameters.len..) |local, i| { - switch (Parser.parseType(local.types[0])) { - .i32 => { - frame.locals[i] = .{ .i32 = 0 }; - }, - .i64 => { - frame.locals[i] = .{ .i64 = 0 }; + switch (local) { + .val => |v| switch (v) { + .i32 => { + frame.locals[i] = .{ .i32 = 0 }; + }, + .i64 => { + frame.locals[i] = .{ .i64 = 0 }; + }, + else => unreachable, }, - else => unreachable, + .ref => unreachable, } } @@ -540,7 +622,7 @@ pub const Runtime = struct { allocator.free(frame.locals); }, .external => { - const name = self.module.imports.items[f.external].name; + const name = self.module.imports[f.external].name; if (self.global_runtime.functions.get(name)) |external| { external(&self.stack); } -- cgit v1.2.3