From 6fe4855a67d025c7a283827d566130e1c6ade638 Mon Sep 17 00:00:00 2001 From: Ernesto Lanchares Date: Thu, 27 Mar 2025 17:05:50 +0100 Subject: IR can now parse itself. :) Now we have to reimplement the vm. :_) --- src/mods/Parser.zig | 20 +++++-- src/mods/ir.zig | 148 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 153 insertions(+), 15 deletions(-) diff --git a/src/mods/Parser.zig b/src/mods/Parser.zig index 080b407..6883fd3 100644 --- a/src/mods/Parser.zig +++ b/src/mods/Parser.zig @@ -39,6 +39,8 @@ pub const FunctionScope = enum { const Parser = @This(); pub const Error = error{ + OutOfMemory, + Overflow, invalid_instruction, invalid_magic, invalid_version, @@ -53,6 +55,8 @@ pub const Error = error{ invalid_globaltype, invalid_importdesc, invalid_exportdesc, + double_else, + unresolved_branch, unterminated_wasm, }; @@ -61,8 +65,8 @@ fn warn(self: Parser, s: []const u8) void { std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx }); } -// TODO: remove peek -fn peek(self: Parser) ?u8 { +// TODO: remove peek? +pub fn peek(self: Parser) ?u8 { return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null; } @@ -92,6 +96,10 @@ pub fn readI64(self: *Parser) !i64 { return std.leb.readIleb128(i64, self); } +pub fn readI33(self: *Parser) !i33 { + return std.leb.readIleb128(i33, self); +} + pub fn readF32(self: *Parser) !f32 { const bytes = try self.read(@sizeOf(f32)); return std.mem.bytesAsValue(f32, bytes).*; @@ -154,7 +162,7 @@ fn parseVectype(self: *Parser) !std.wasm.Valtype { }; } -fn parseReftype(self: *Parser) !std.wasm.RefType { +pub fn parseReftype(self: *Parser) !std.wasm.RefType { return switch (try self.readByte()) { 0x70 => .funcref, 0x6F => .externref, @@ -462,11 +470,13 @@ fn parseCode(self: *Parser) !Func { local_count += l.n; } - // _ = try IR.parse(self); + const ir = try IR.parse(self); + const stdout = std.io.getStdOut().writer(); + try ir.print(stdout); const func = Func{ .locals = try self.allocator.alloc(Valtype, local_count), - .code = try self.read(end_idx - self.byte_idx), + .code = &.{}, }; var li: usize = 0; diff --git a/src/mods/ir.zig b/src/mods/ir.zig index c261da6..25409e4 100644 --- a/src/mods/ir.zig +++ b/src/mods/ir.zig @@ -46,6 +46,16 @@ indices: []Index, // TODO: this could be a byte array and v128.const and i8x16.shuffle could live here too select_valtypes: []Parser.Valtype, +pub fn print(self: IR, writer: anytype) !void { + for (self.opcodes, 0..) |op, i| { + try writer.print("{x:3} {s}", .{ i, @tagName(op) }); + if (op == .br or op == .br_if) { + try writer.print(" {x:3}", .{self.indices[i].u32}); + } + _ = try writer.write("\n"); + } +} + /// Opcodes. /// This is a mix of wasm opcodes mixed with a few of our own. /// Mainly for `0xFC` opcodes we use `0xD3` to `0xE4`. @@ -591,17 +601,39 @@ const IRParserState = struct { parser: *Parser, allocator: Allocator, + branches: std.AutoHashMapUnmanaged(u32, u32), + opcodes: std.ArrayListUnmanaged(Opcode), indices: std.ArrayListUnmanaged(Index), - fn parseExpression(self: *IRParserState) !void { + fn parseFunction(self: *IRParserState) !void { + while (true) { + const op = self.parser.peek() orelse return Parser.Error.unterminated_wasm; + if (op == 0x0B) { + _ = try self.parser.readByte(); + break; + } else { + try self.parseExpression(); + } + } + } + + fn parseExpression(self: *IRParserState) Parser.Error!void { const b = try self.parser.readByte(); try switch (b) { - 0x00...0x01 => {}, // TODO - 0x02...0x04 => {}, // TODO - 0x0C...0x11 => {}, // TODO - 0xD0...0xD2 => {}, // TODO - 0x1A...0x1C => {}, // TODO + 0x00 => {}, // TODO + 0x01 => {}, + 0x02...0x03 => self.parseBlock(b), + 0x04 => self.parseIf(), + 0x0C...0x0D => self.parseBranch(b), + 0x0E => @panic("UNIMPLEMENTED"), + 0x0F => self.push(@enumFromInt(b), .{ .u64 = 0 }), + 0x10 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }), + 0x11 => @panic("UNIMPLEMENTED"), + 0xD0 => self.push(@enumFromInt(b), .{ .reftype = try self.parser.parseReftype() }), + 0xD1 => self.push(@enumFromInt(b), .{ .u64 = 0 }), + 0xD2 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }), + 0x1A...0x1C => @panic("UNIMPLEMENTED"), 0x20...0x24 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }), 0x25...0x26 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }), 0x28...0x3E => self.push(@enumFromInt(b), .{ .memarg = try self.parseMemarg() }), @@ -649,8 +681,8 @@ const IRParserState = struct { const n = try self.parser.readU32(); try switch (n) { 0...7 => self.push(@enumFromInt(0xD3 + @as(u8, @intCast(n))), .{ .u64 = 0 }), - 8...11 => {}, // TODO - 12...17 => {}, // TODO + 8...11 => @panic("UNIMPLEMENTED"), + 12...17 => @panic("UNIMPLEMENTED"), else => { std.log.err("Invalid misc instruction {d} at position {d}\n", .{ n, self.parser.byte_idx }); return Parser.Error.invalid_instruction; @@ -658,6 +690,101 @@ const IRParserState = struct { }; } + fn parseBlockType(self: *IRParserState) !void { + const b = self.parser.peek() orelse return Parser.Error.unterminated_wasm; + switch (b) { + 0x40 => _ = try self.parser.readByte(), + 0x6F...0x70, 0x7B...0x7F => _ = try self.parser.readByte(), + else => _ = try self.parser.readI33(), + } + } + + fn parseBlock(self: *IRParserState, b: u8) !void { + // TODO: Should we do something with this? + _ = try self.parseBlockType(); + const start: u32 = @intCast(self.opcodes.items.len); + while (true) { + const op = self.parser.peek() orelse return Parser.Error.unterminated_wasm; + if (op == 0x0B) { + _ = try self.parser.readByte(); + break; + } else { + try self.parseExpression(); + } + } + const end: u32 = @intCast(self.opcodes.items.len); + const jump_addr: u32 = switch (b) { + 0x02 => end, + 0x03 => start, + else => unreachable, + }; + try self.fix_branches_for_block(start, end, jump_addr); + } + + fn parseIf(self: *IRParserState) !void { + // TODO: Should we do something with this? + _ = try self.parseBlockType(); + + try self.push(.br_if, .{ .u32 = @intCast(self.opcodes.items.len + 2) }); + const start: u32 = @intCast(self.opcodes.items.len); + try self.push(.br, .{ .u32 = 0 }); + + var else_addr: u32 = 0; + while (true) { + const op = self.parser.peek() orelse return Parser.Error.unterminated_wasm; + + if (op == 0x05) { + if (else_addr != 0) return Parser.Error.double_else; + _ = try self.parser.readByte(); + else_addr = @intCast(self.opcodes.items.len); + try self.push(.br, .{ .u32 = 0 }); + } else if (op == 0x0B) { + _ = try self.parser.readByte(); + break; + } else { + try self.parseExpression(); + } + } + const end: u32 = @intCast(self.opcodes.items.len); + + if (else_addr > 0) { + self.indices.items[start].u32 = else_addr + 1; + self.indices.items[else_addr].u32 = end; + } else { + self.indices.items[start].u32 = end; + } + + try self.fix_branches_for_block(start, end, end); + } + + fn fix_branches_for_block(self: *IRParserState, start: u32, end: u32, jump_addr: u32) !void { + var todel: std.ArrayListUnmanaged(u32) = .{}; + defer todel.deinit(self.allocator); + + var it = self.branches.iterator(); + while (it.next()) |branch| { + if (start < branch.key_ptr.* and branch.key_ptr.* < end) { + if (branch.value_ptr.* == 0) { + self.indices.items[branch.key_ptr.*].u32 = jump_addr; + try todel.append(self.allocator, branch.key_ptr.*); + } else { + branch.value_ptr.* -= 1; + } + } + } + + for (todel.items) |d| { + // TODO: Do we need to assert this is true? + _ = self.branches.remove(d); + } + } + + fn parseBranch(self: *IRParserState, b: u8) !void { + const idx = try self.parser.readU32(); + try self.branches.put(self.allocator, @intCast(self.opcodes.items.len), idx); + try self.push(@enumFromInt(b), .{ .u64 = 0 }); + } + fn parseVector(self: *IRParserState) !void { const n = try self.parser.readU32(); try switch (n) { @@ -682,11 +809,12 @@ pub fn parse(parser: *Parser) !IR { var state = IRParserState{ .opcodes = .{}, .indices = .{}, + .branches = .{}, .parser = parser, .allocator = parser.allocator, }; - std.debug.print("Parsing\n", .{}); - try state.parseExpression(); + try state.parseFunction(); + if (state.branches.count() != 0) return Parser.Error.unresolved_branch; return .{ .opcodes = try state.opcodes.toOwnedSlice(state.allocator), .indices = try state.indices.toOwnedSlice(state.allocator), -- cgit v1.2.3