diff options
author | Lorenzo Torres <torres@sideros.org> | 2025-03-17 19:44:08 +0100 |
---|---|---|
committer | Lorenzo Torres <torres@sideros.org> | 2025-03-17 19:44:08 +0100 |
commit | 1d64275dee5e5716b1a32f22e2f0ccba885898db (patch) | |
tree | f1aaf8d487cff5853495a1e19563aa09e5889e23 /src/mods/parse.zig | |
parent | 5bab2c4bcf5870e421ae40123963f0c3b13af88a (diff) |
Refactored source code structure.
Diffstat (limited to 'src/mods/parse.zig')
-rw-r--r-- | src/mods/parse.zig | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/src/mods/parse.zig b/src/mods/parse.zig new file mode 100644 index 0000000..7080e66 --- /dev/null +++ b/src/mods/parse.zig @@ -0,0 +1,331 @@ +const std = @import("std"); +const wasm = @import("wasm.zig"); +const Allocator = std.mem.Allocator; + +pub fn leb128Result(T: type) type { + return struct { len: usize, val: T }; +} + +pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) { + switch (@typeInfo(T)) { + .int => {}, + else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)), + } + if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) { + @compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits})); + } + + var result: T = 0; + // TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day... + var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0; + var byte: u8 = undefined; + var len: usize = 0; + while (stream.readByte()) |b| { + len += 1; + result |= @as(T, @intCast((b & 0x7f))) << shift; + if ((b & (0x1 << 7)) == 0) { + byte = b; + break; + } + shift += 7; + } else |err| { + return err; + } + + if (@typeInfo(T).int.signedness == .signed) { + const size = @sizeOf(T) * 8; + if (shift < size and (byte & 0x40) != 0) { + result |= (~@as(T, 0) << shift); + } + } + + return .{ .len = len, .val = result }; +} + +pub const Error = error{ + malformed_wasm, + invalid_utf8, +}; + +pub const Module = struct { + types: []FunctionType, + imports: std.ArrayList(Import), + exports: std.StringHashMap(u32), + functions: []u32, + memory: Memory, + code: []FunctionBody, + funcs: std.ArrayList(Function), + + pub fn deinit(self: *Module, allocator: Allocator) void { + for (self.types) |t| { + t.deinit(allocator); + } + allocator.free(self.types); + + for (self.imports.items) |i| { + i.deinit(allocator); + } + self.imports.deinit(); + + var iter = self.exports.iterator(); + while (iter.next()) |entry| { + allocator.free(entry.key_ptr.*); + } + self.exports.deinit(); + + allocator.free(self.functions); + + for (self.code) |f| { + for (f.locals) |l| { + allocator.free(l.types); + } + allocator.free(f.code); + } + allocator.free(self.code); + + self.funcs.deinit(); + } +}; + +pub const FunctionScope = enum { + external, + internal, +}; + +pub const Function = union(FunctionScope) { + external: u8, + internal: u8, +}; + +// TODO: refactor locals +pub const Local = struct { + types: []u8, +}; + +pub const FunctionBody = struct { + locals: []Local, + code: []u8, +}; + +pub const Memory = struct { + initial: u32, + max: u32, +}; + +pub const FunctionType = struct { + parameters: []u8, + results: []u8, + + pub fn deinit(self: FunctionType, allocator: Allocator) void { + allocator.free(self.parameters); + allocator.free(self.results); + } +}; + +pub const Import = struct { + name: []u8, + module: []u8, + signature: u32, + + pub fn deinit(self: Import, allocator: Allocator) void { + allocator.free(self.name); + allocator.free(self.module); + } +}; + +pub fn parseType(t: u8) wasm.Type { + return @enumFromInt(t); +} + +pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 { + const size = try std.leb.readULEB128(u32, stream); + const str = try allocator.alloc(u8, size); + if (try stream.read(str) != size) { + // TODO: better error + return Error.malformed_wasm; + } + + if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8; + + return str; +} + +// TODO: parse Global Section +// TODO: Consider Arena allocator +pub fn parseWasm(allocator: Allocator, stream: anytype) !Module { + var types: []FunctionType = undefined; + var imports = std.ArrayList(Import).init(allocator); + var exports = std.StringHashMap(u32).init(allocator); + var funcs = std.ArrayList(Function).init(allocator); + var functions: []u32 = undefined; + var memory: Memory = undefined; + var code: []FunctionBody = undefined; + + // Parse magic + if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm; + // Parse version + if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm; + + // NOTE: This ensures that (in this block) illegal behavior is safety-checked. + // This slows down the code but since this function is only called at the start + // I believe it is better to take the ``hit'' in performance (should only be @enumFromInt) + // rather than having undefined behavior when user provides an invalid wasm file. + @setRuntimeSafety(true); + loop: while (stream.readByte()) |byte| { + const section_size = try std.leb.readULEB128(u32, stream); + switch (@as(std.wasm.Section, @enumFromInt(byte))) { + std.wasm.Section.custom => { + // TODO: unimplemented + break :loop; + }, + std.wasm.Section.type => { + const type_count = try std.leb.readULEB128(u32, stream); + types = try allocator.alloc(FunctionType, type_count); + for (types) |*t| { + if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm; + const params_count = try std.leb.readULEB128(u32, stream); + t.parameters = try allocator.alloc(u8, params_count); + if (try stream.read(t.parameters) != params_count) { + // TODO: better errors + return Error.malformed_wasm; + } + const results = try std.leb.readULEB128(u32, stream); + t.results = try allocator.alloc(u8, results); + if (try stream.read(t.results) != results) { + // TODO: better errors + return Error.malformed_wasm; + } + } + }, + std.wasm.Section.import => { + // Can there be more than one import section? + const import_count = try std.leb.readULEB128(u32, stream); + for (0..import_count) |i| { + const mod = try parseName(allocator, stream); + const nm = try parseName(allocator, stream); + + const b = try stream.readByte(); + switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) { + std.wasm.ExternalKind.function => try funcs.append(.{ .external = @intCast(i) }), + // TODO: not implemented + std.wasm.ExternalKind.table => {}, + std.wasm.ExternalKind.memory => {}, + std.wasm.ExternalKind.global => {}, + } + const idx = try std.leb.readULEB128(u32, stream); + try imports.append(.{ + .module = mod, + .name = nm, + .signature = idx, + }); + } + }, + std.wasm.Section.function => { + const function_count = try std.leb.readULEB128(u32, stream); + functions = try allocator.alloc(u32, function_count); + for (functions) |*f| { + f.* = try std.leb.readULEB128(u32, stream); + } + }, + std.wasm.Section.table => { + // TODO: not implemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.memory => { + const memory_count = try std.leb.readULEB128(u32, stream); + for (0..memory_count) |_| { + const b = try stream.readByte(); + const n = try std.leb.readULEB128(u32, stream); + var m: u32 = 0; + switch (b) { + 0x00 => {}, + 0x01 => m = try std.leb.readULEB128(u32, stream), + else => return Error.malformed_wasm, + } + // TODO: support multiple memories + memory = .{ + .initial = n, + .max = m, + }; + } + }, + std.wasm.Section.global => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + // TODO: Can there be more than one export section? Otherwise we can optimize allocations + std.wasm.Section.@"export" => { + const export_count = try std.leb.readULEB128(u32, stream); + for (0..export_count) |_| { + const nm = try parseName(allocator, stream); + const b = try stream.readByte(); + const idx = try std.leb.readULEB128(u32, stream); + switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) { + std.wasm.ExternalKind.function => try exports.put(nm, idx), + // TODO: unimplemented, + std.wasm.ExternalKind.table => allocator.free(nm), + std.wasm.ExternalKind.memory => allocator.free(nm), + std.wasm.ExternalKind.global => allocator.free(nm), + } + } + }, + std.wasm.Section.start => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.element => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.code => { + const code_count = try std.leb.readULEB128(u32, stream); + code = try allocator.alloc(FunctionBody, code_count); + for (0..code_count) |i| { + const code_size = try std.leb.readULEB128(u32, stream); + var locals_size: usize = 0; + const local_count = try leb128Decode(u32, stream); + locals_size += local_count.len; + const locals = try allocator.alloc(Local, local_count.val); + for (locals) |*l| { + const n = try leb128Decode(u32, stream); + l.types = try allocator.alloc(u8, n.val); + @memset(l.types, try stream.readByte()); + locals_size += n.len + 1; + } + code[i].locals = locals; + + // TODO: maybe is better to parse code into ast here and not do it every frame? + // FIXME: This calculation is plain wrong. Resolving above TODO should help + code[i].code = try allocator.alloc(u8, code_size - locals_size); + // TODO: better error reporting + if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm; + + const f = Function{ .internal = @intCast(i) }; + try funcs.append(f); + } + }, + std.wasm.Section.data => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.data_count => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + else => return Error.malformed_wasm, + } + } else |err| switch (err) { + error.EndOfStream => {}, + else => return err, + } + + return Module{ + .types = types, + .imports = imports, + .functions = functions, + .memory = memory, + .exports = exports, + .code = code, + .funcs = funcs, + }; +} |