From 09691ec4d93cda6ab31d28d6e478257209fe625e Mon Sep 17 00:00:00 2001 From: Ernesto Lanchares Date: Mon, 24 Mar 2025 21:18:40 +0000 Subject: Some progress on IR parsing. Alhtough IR parsing is technically called while parsing, since we lack the hability to parse blocks or labels or if or any hard stuff really, it does not affect code parsing. However it is nice to have it there as zig compiles it :) --- src/main.zig | 88 +++++++++++++++---------------- src/mods/Parser.zig | 24 ++++++++- src/mods/ir.zig | 147 +++++++++++++++++++++++++++++++++++++++++++++------- src/mods/mods.zig | 3 ++ 4 files changed, 196 insertions(+), 66 deletions(-) diff --git a/src/main.zig b/src/main.zig index ea74ca4..aaeaf38 100644 --- a/src/main.zig +++ b/src/main.zig @@ -21,58 +21,54 @@ fn testSystem2(pool: *ecs.Pool) void { pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); - { - //var global_runtime = wasm.GlobalRuntime.init(allocator); - //defer global_runtime.deinit(); - //try global_runtime.addFunction("debug", wasm.debug); + defer if (gpa.deinit() != .ok) @panic("Leaked memory"); - // const file = try std.fs.cwd().openFile("assets/core.wasm", .{}); - // const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB - // var parser = mods.Parser{ - // .bytes = all, - // .byte_idx = 0, - // .allocator = allocator, - // }; - // const module = parser.parseModule() catch |err| { - // std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] }); - // return err; - // }; - // var runtime = try vm.Runtime.init(allocator, module, &global_runtime); - // defer runtime.deinit(allocator); + var global_runtime = mods.GlobalRuntime.init(allocator); + defer global_runtime.deinit(); + try global_runtime.addFunction("debug", mods.Wasm.debug); - //var parameters = [_]usize{}; - //try runtime.callExternal(allocator, "preinit", ¶meters); - const w = try window.Window.create(800, 600, "sideros"); - defer w.destroy(); + const file = try std.fs.cwd().openFile("assets/core.wasm", .{}); + const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB + var parser = mods.Parser{ + .bytes = all, + .byte_idx = 0, + .allocator = allocator, + }; + const module = parser.parseModule() catch |err| { + std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] }); + return err; + }; + var runtime = try mods.Runtime.init(allocator, module, &global_runtime); + defer runtime.deinit(allocator); - //var pool = try ecs.Pool.init(allocator); - //defer pool.deinit(allocator); + var parameters = [_]usize{}; + try runtime.callExternal(allocator, "preinit", ¶meters); + const w = try window.Window.create(800, 600, "sideros"); + defer w.destroy(); - ////try pool.addSystemGroup(&[_]entities.System{ - //// testSystem, - ////}); - //try pool.addSystemGroup(&[_]ecs.System{ - // testSystem2, - //}); + // var pool = try ecs.Pool.init(allocator); + // defer pool.deinit(allocator); - //for (0..1000) |_| { - // const entity = try pool.createEntity(); - // try pool.addComponent(entity, ecs.components.Position{ .x = 1.0, .y = 0.5, .z = 3.0 }); - // try pool.addComponent(entity, ecs.components.Speed{ .speed = 5.0 }); - //} + //try pool.addSystemGroup(&[_]entities.System{ + // testSystem, + //}); + // try pool.addSystemGroup(&[_]ecs.System{ + // testSystem2, + // }); - // TODO(luccie-cmd): Renderer.create shouldn't return an error - var r = try Renderer.create(allocator, w); - defer r.destroy(); + // for (0..1000) |_| { + // const entity = try pool.createEntity(); + // try pool.addComponent(entity, ecs.components.Position{ .x = 1.0, .y = 0.5, .z = 3.0 }); + // try pool.addComponent(entity, ecs.components.Speed{ .speed = 5.0 }); + // } - while (!w.shouldClose()) { - c.glfwPollEvents(); - try r.tick(); - //pool.tick(); - } - } + // TODO(luccie-cmd): Renderer.create shouldn't return an error + // var r = try Renderer.create(allocator, w); + // defer r.destroy(); - if (gpa.detectLeaks()) { - return error.leaked_memory; - } + // while (!w.shouldClose()) { + // c.glfwPollEvents(); + // try r.tick(); + // pool.tick(); + // } } diff --git a/src/mods/Parser.zig b/src/mods/Parser.zig index d9f7ccf..2e3c434 100644 --- a/src/mods/Parser.zig +++ b/src/mods/Parser.zig @@ -1,5 +1,6 @@ const std = @import("std"); const vm = @import("vm.zig"); +const IR = @import("ir.zig"); const Allocator = std.mem.Allocator; bytes: []const u8, @@ -38,6 +39,7 @@ pub const FunctionScope = enum { const Parser = @This(); pub const Error = error{ + invalid_instruction, invalid_magic, invalid_version, invalid_section, @@ -78,10 +80,28 @@ pub fn readByte(self: *Parser) !u8 { return (try self.read(1))[0]; } -fn readU32(self: *Parser) !u32 { +pub fn readU32(self: *Parser) !u32 { return std.leb.readUleb128(u32, self); } +pub fn readI32(self: *Parser) !i32 { + return std.leb.readIleb128(i32, self); +} + +pub fn readI64(self: *Parser) !i64 { + return std.leb.readIleb128(i64, self); +} + +pub fn readF32(self: *Parser) !f32 { + const bytes = try self.read(@sizeOf(f32)); + return std.mem.bytesAsValue(f32, bytes).*; +} + +pub fn readF64(self: *Parser) !f64 { + const bytes = try self.read(@sizeOf(f64)); + return std.mem.bytesAsValue(f64, bytes).*; +} + fn readName(self: *Parser) ![]const u8 { // NOTE: This should be the only vector not parsed through parseVector const size = try self.readU32(); @@ -442,6 +462,8 @@ fn parseCode(self: *Parser) !Func { local_count += l.n; } + _ = try IR.parse(self); + const func = Func{ .locals = try self.allocator.alloc(Valtype, local_count), .code = try self.read(end_idx - self.byte_idx), diff --git a/src/mods/ir.zig b/src/mods/ir.zig index 7509cf4..c261da6 100644 --- a/src/mods/ir.zig +++ b/src/mods/ir.zig @@ -3,6 +3,8 @@ const Parser = @import("Parser.zig"); const Allocator = std.mem.Allocator; +const IR = @This(); + const VectorIndex = packed struct { opcode: VectorOpcode, laneidx: u8, @@ -46,7 +48,7 @@ select_valtypes: []Parser.Valtype, /// Opcodes. /// This is a mix of wasm opcodes mixed with a few of our own. -/// Mainly for `0xFC` opcodes we use `0xD3` to `0xF5`. +/// Mainly for `0xFC` opcodes we use `0xD3` to `0xE4`. pub const Opcode = enum(u8) { // CONTROL INSTRUCTIONS // The rest of instructions should be implemented in terms of these ones @@ -97,17 +99,17 @@ pub const Opcode = enum(u8) { /// Index: `u32`. Meaning: index into table index tableset = 0x26, /// Index: `DIndex`. Meaning: TODO - tableinit = 0xD3, + tableinit = 0xDF, /// Index: `u32`. Meaning: TODO - elemdrop = 0xD4, + elemdrop = 0xE0, /// Index: `DIndex`. Meaning: `DIndex.x` is destination `DIndex.y` is source - tablecopy = 0xD5, + tablecopy = 0xE1, /// Index: `u32`. Meaning: tableidx - tablegrow = 0xD6, + tablegrow = 0xE2, /// Index: `u32`. Meaning: tableidx - tablesize = 0xD7, + tablesize = 0xE3, /// Index: `u32`. Meaning: tableidx - tablefill = 0xD8, + tablefill = 0xE4, // MEMORY INSTRUCTIONS /// Index: `Memarg`. Meaning: memarg @@ -159,11 +161,11 @@ pub const Opcode = enum(u8) { memorysize = 0x3F, memorygrow = 0x40, /// Index: `u32`. Meaning: dataidx - memoryinit = 0xD9, + memoryinit = 0xDB, /// Index: `u32`. Meaning: dataidx - datadrop = 0xDA, - memorycopy = 0xDB, - memoryfill = 0xDC, + datadrop = 0xDC, + memorycopy = 0xDD, + memoryfill = 0xDE, // NUMERIC INSTRUCTION /// Index: `i32`. Meaning: constant @@ -313,14 +315,14 @@ pub const Opcode = enum(u8) { i64_extend16_s = 0xC3, i64_extend32_s = 0xC4, - i32_trunc_sat_f32_s = 0xDD, - i32_trunc_sat_f32_u = 0xDF, - i32_trunc_sat_f64_s = 0xF0, - i32_trunc_sat_f64_u = 0xF1, - i64_trunc_sat_f32_s = 0xF2, - i64_trunc_sat_f32_u = 0xF3, - i64_trunc_sat_f64_s = 0xF4, - i64_trunc_sat_f64_u = 0xF5, + i32_trunc_sat_f32_s = 0xD3, + i32_trunc_sat_f32_u = 0xD4, + i32_trunc_sat_f64_s = 0xD5, + i32_trunc_sat_f64_u = 0xD6, + i64_trunc_sat_f32_s = 0xD7, + i64_trunc_sat_f32_u = 0xD8, + i64_trunc_sat_f64_s = 0xD9, + i64_trunc_sat_f64_u = 0xDA, // VECTOR INSTRUCTIONS /// Index: `VectorIndex`. Meaning: See `VectorOpcode` @@ -584,3 +586,110 @@ const VectorOpcode = enum(u8) { f32x4_demote_f64x2_zero = 94, f64x2_promote_low_f32x4 = 95, }; + +const IRParserState = struct { + parser: *Parser, + allocator: Allocator, + + opcodes: std.ArrayListUnmanaged(Opcode), + indices: std.ArrayListUnmanaged(Index), + + fn parseExpression(self: *IRParserState) !void { + const b = try self.parser.readByte(); + try switch (b) { + 0x00...0x01 => {}, // TODO + 0x02...0x04 => {}, // TODO + 0x0C...0x11 => {}, // TODO + 0xD0...0xD2 => {}, // TODO + 0x1A...0x1C => {}, // TODO + 0x20...0x24 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }), + 0x25...0x26 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }), + 0x28...0x3E => self.push(@enumFromInt(b), .{ .memarg = try self.parseMemarg() }), + 0x3F...0x40 => self.parseMemsizeorgrow(b), + 0x41...0x44 => self.parseConst(b), + 0x45...0xC4 => self.push(@enumFromInt(b), .{ .u64 = 0 }), + 0xFD => self.parseVector(), + 0xFC => self.parseMisc(), + else => { + std.log.err("Invalid instruction {x} at position {d}\n", .{ b, self.parser.byte_idx }); + return Parser.Error.invalid_instruction; + }, + }; + } + + fn push(self: *IRParserState, opcode: Opcode, index: Index) !void { + try self.opcodes.append(self.allocator, opcode); + try self.indices.append(self.allocator, index); + } + + fn parseMemarg(self: *IRParserState) !Memarg { + return .{ + // TODO: assert this intCast does not fail + .alignment = @intCast(try self.parser.readU32()), + .offset = try self.parser.readU32(), + }; + } + + fn parseMemsizeorgrow(self: *IRParserState, b: u8) !void { + if (try self.parser.readByte() != 0x00) return Parser.Error.invalid_instruction; + try self.push(@enumFromInt(b), .{ .u64 = 0 }); + } + + fn parseConst(self: *IRParserState, b: u8) !void { + try switch (b) { + 0x41 => self.push(.i32_const, .{ .i32 = try self.parser.readI32() }), + 0x42 => self.push(.i64_const, .{ .i64 = try self.parser.readI64() }), + 0x43 => self.push(.f32_const, .{ .f32 = try self.parser.readF32() }), + 0x44 => self.push(.f64_const, .{ .f64 = try self.parser.readF64() }), + else => unreachable, + }; + } + + fn parseMisc(self: *IRParserState) !void { + const n = try self.parser.readU32(); + try switch (n) { + 0...7 => self.push(@enumFromInt(0xD3 + @as(u8, @intCast(n))), .{ .u64 = 0 }), + 8...11 => {}, // TODO + 12...17 => {}, // TODO + else => { + std.log.err("Invalid misc instruction {d} at position {d}\n", .{ n, self.parser.byte_idx }); + return Parser.Error.invalid_instruction; + }, + }; + } + + fn parseVector(self: *IRParserState) !void { + const n = try self.parser.readU32(); + try switch (n) { + 0...10, 92...93, 11 => self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = try self.parseMemarg(), .laneidx = 0 } }), + 84...91 => self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = try self.parseMemarg(), .laneidx = try self.parser.readByte() } }), + 12 => {}, + 13 => {}, + 21...34 => self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = .{ .alignment = 0, .offset = 0 }, .laneidx = try self.parser.readByte() } }), + // Yes, there are this random gaps in wasm vector instructions don't ask me how I know... + 14...20, 35...83, 94...153, 155...161, 163...164, 167...174, 177, 181...186, 188...193, 195...196, 199...206, 209, 213...225, 227...237, 239...255 => { + try self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = .{ .alignment = 0, .offset = 0 }, .laneidx = 0 } }); + }, + else => { + std.log.err("Invalid vector instruction {d} at position {d}\n", .{ n, self.parser.byte_idx }); + return Parser.Error.invalid_instruction; + }, + }; + } +}; + +pub fn parse(parser: *Parser) !IR { + var state = IRParserState{ + .opcodes = .{}, + .indices = .{}, + .parser = parser, + .allocator = parser.allocator, + }; + std.debug.print("Parsing\n", .{}); + try state.parseExpression(); + return .{ + .opcodes = try state.opcodes.toOwnedSlice(state.allocator), + .indices = try state.indices.toOwnedSlice(state.allocator), + .select_valtypes = &.{}, + }; +} diff --git a/src/mods/mods.zig b/src/mods/mods.zig index 9d845e1..f91fa7d 100644 --- a/src/mods/mods.zig +++ b/src/mods/mods.zig @@ -3,3 +3,6 @@ pub const VM = @import("vm.zig"); // TODO: is this really needed? pub const Wasm = @import("wasm.zig"); pub const IR = @import("ir.zig"); + +pub const GlobalRuntime = Wasm.GlobalRuntime; +pub const Runtime = VM.Runtime; -- cgit v1.2.3