diff options
author | Lorenzo Torres <lorenzotorres@outlook.it> | 2025-03-12 19:56:19 +0100 |
---|---|---|
committer | Lorenzo Torres <lorenzotorres@outlook.it> | 2025-03-12 19:56:19 +0100 |
commit | 4f45899a3c28440724ffcaa3a604ad48f18a25c8 (patch) | |
tree | cecd79da4e3e3cb17ffd86ec1a58d6e77a44cb83 /src/vm/parse.zig | |
parent | fc39b277155044366f1647b238a415fab4028d83 (diff) |
base code
Diffstat (limited to 'src/vm/parse.zig')
-rw-r--r-- | src/vm/parse.zig | 288 |
1 files changed, 288 insertions, 0 deletions
diff --git a/src/vm/parse.zig b/src/vm/parse.zig new file mode 100644 index 0000000..40eefc1 --- /dev/null +++ b/src/vm/parse.zig @@ -0,0 +1,288 @@ +const std = @import("std"); +const wasm = @import("wasm.zig"); +const Allocator = std.mem.Allocator; + +pub const Error = error{ + malformed_wasm, + invalid_utf8, +}; + +pub const Module = struct { + types: []FunctionType, + imports: std.ArrayList(Import), + exports: std.StringHashMap(u32), + functions: []u32, + memory: Memory, + code: []FunctionBody, + funcs: std.ArrayList(Function), + + pub fn deinit(self: *Module, allocator: Allocator) void { + for (self.types) |t| { + t.deinit(allocator); + } + allocator.free(self.types); + + for (self.imports.items) |i| { + i.deinit(allocator); + } + self.imports.deinit(); + + var iter = self.exports.iterator(); + while (iter.next()) |entry| { + allocator.free(entry.key_ptr.*); + } + self.exports.deinit(); + + allocator.free(self.functions); + + for (self.code) |f| { + for (f.locals) |l| { + allocator.free(l.types); + } + allocator.free(f.code); + } + allocator.free(self.code); + + self.funcs.deinit(); + } +}; + +pub const FunctionScope = enum { + external, + internal, +}; + +pub const Function = union(FunctionScope) { + external: u8, + internal: u8, +}; + +// TODO: refactor locals +pub const Local = struct { + types: []u8, +}; + +pub const FunctionBody = struct { + locals: []Local, + code: []u8, +}; + +pub const Memory = struct { + initial: u32, + max: u32, +}; + +pub const FunctionType = struct { + parameters: []u8, + results: []u8, + + pub fn deinit(self: FunctionType, allocator: Allocator) void { + allocator.free(self.parameters); + allocator.free(self.results); + } +}; + +pub const Import = struct { + name: []u8, + module: []u8, + signature: u32, + + pub fn deinit(self: Import, allocator: Allocator) void { + allocator.free(self.name); + allocator.free(self.module); + } +}; + +pub fn parseType(t: u8) wasm.Type { + return @enumFromInt(t); +} + +pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 { + const size = try std.leb.readULEB128(u32, stream); + const str = try allocator.alloc(u8, size); + if (try stream.read(str) != size) { + // TODO: better error + return Error.malformed_wasm; + } + + if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8; + + return str; +} + +// TODO: parse Global Section +// TODO: Consider Arena allocator +pub fn parseWasm(allocator: Allocator, stream: anytype) !Module { + var types: []FunctionType = undefined; + var imports = std.ArrayList(Import).init(allocator); + var exports = std.StringHashMap(u32).init(allocator); + var funcs = std.ArrayList(Function).init(allocator); + var functions: []u32 = undefined; + var memory: Memory = undefined; + var code: []FunctionBody = undefined; + + // Parse magic + if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm; + // Parse version + if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm; + + // NOTE: This ensures that (in this block) illegal behavior is safety-checked. + // This slows down the code but since this function is only called at the start + // I believe it is better to take the ``hit'' in performance (should only be @enumFromInt) + // rather than having undefined behavior when user provides an invalid wasm file. + @setRuntimeSafety(true); + loop: while (stream.readByte()) |byte| { + const section_size = try std.leb.readULEB128(u32, stream); + switch (@as(std.wasm.Section, @enumFromInt(byte))) { + std.wasm.Section.custom => { + // TODO: unimplemented + break :loop; + }, + std.wasm.Section.type => { + const type_count = try std.leb.readULEB128(u32, stream); + types = try allocator.alloc(FunctionType, type_count); + for (types) |*t| { + if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm; + const params_count = try std.leb.readULEB128(u32, stream); + t.parameters = try allocator.alloc(u8, params_count); + if (try stream.read(t.parameters) != params_count) { + // TODO: better errors + return Error.malformed_wasm; + } + const results = try std.leb.readULEB128(u32, stream); + t.results = try allocator.alloc(u8, results); + if (try stream.read(t.results) != results) { + // TODO: better errors + return Error.malformed_wasm; + } + } + }, + std.wasm.Section.import => { + // Can there be more than one import section? + const import_count = try std.leb.readULEB128(u32, stream); + for (0..import_count) |i| { + const mod = try parseName(allocator, stream); + const nm = try parseName(allocator, stream); + + const b = try stream.readByte(); + switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) { + std.wasm.ExternalKind.function => try funcs.append(.{ .external = @intCast(i) }), + // TODO: not implemented + std.wasm.ExternalKind.table => {}, + std.wasm.ExternalKind.memory => {}, + std.wasm.ExternalKind.global => {}, + } + const idx = try std.leb.readULEB128(u32, stream); + try imports.append(.{ + .module = mod, + .name = nm, + .signature = idx, + }); + } + }, + std.wasm.Section.function => { + const function_count = try std.leb.readULEB128(u32, stream); + functions = try allocator.alloc(u32, function_count); + for (functions) |*f| { + f.* = try std.leb.readULEB128(u32, stream); + } + }, + std.wasm.Section.table => { + // TODO: not implemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.memory => { + const memory_count = try std.leb.readULEB128(u32, stream); + for (0..memory_count) |_| { + const b = try stream.readByte(); + const n = try std.leb.readULEB128(u32, stream); + var m: u32 = 0; + switch (b) { + 0x00 => {}, + 0x01 => m = try std.leb.readULEB128(u32, stream), + else => return Error.malformed_wasm, + } + // TODO: support multiple memories + memory = .{ + .initial = n, + .max = m, + }; + } + }, + std.wasm.Section.global => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + // TODO: Can there be more than one export section? Otherwise we can optimize allocations + std.wasm.Section.@"export" => { + const export_count = try std.leb.readULEB128(u32, stream); + for (0..export_count) |_| { + const nm = try parseName(allocator, stream); + const b = try stream.readByte(); + const idx = try std.leb.readULEB128(u32, stream); + switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) { + std.wasm.ExternalKind.function => try exports.put(nm, idx), + // TODO: unimplemented, + std.wasm.ExternalKind.table => allocator.free(nm), + std.wasm.ExternalKind.memory => allocator.free(nm), + std.wasm.ExternalKind.global => allocator.free(nm), + } + } + }, + std.wasm.Section.start => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.element => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.code => { + const code_count = try std.leb.readULEB128(u32, stream); + code = try allocator.alloc(FunctionBody, code_count); + for (0..code_count) |i| { + const code_size = try std.leb.readULEB128(u32, stream); + const local_count = try std.leb.readULEB128(u32, stream); + const locals = try allocator.alloc(Local, local_count); + for (locals) |*l| { + const n = try std.leb.readULEB128(u32, stream); + l.types = try allocator.alloc(u8, n); + @memset(l.types, try stream.readByte()); + } + code[i].locals = locals; + + // TODO: maybe is better to parse code into ast here and not do it every frame? + // FIXME: This calculation is plain wrong. Resolving above TODO should help + code[i].code = try allocator.alloc(u8, code_size - local_count - 1); + // TODO: better error reporting + if (try stream.read(code[i].code) != code_size - local_count - 1) return Error.malformed_wasm; + + const f = Function{ .internal = @intCast(i) }; + try funcs.append(f); + } + }, + std.wasm.Section.data => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + std.wasm.Section.data_count => { + // TODO: unimplemented + try stream.skipBytes(section_size, .{}); + }, + else => return Error.malformed_wasm, + } + } else |err| switch (err) { + error.EndOfStream => {}, + else => return err, + } + + return Module{ + .types = types, + .imports = imports, + .functions = functions, + .memory = memory, + .exports = exports, + .code = code, + .funcs = funcs, + }; +} |