summaryrefslogtreecommitdiff
path: root/src/vm/parse.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/parse.zig')
-rw-r--r--src/vm/parse.zig288
1 files changed, 288 insertions, 0 deletions
diff --git a/src/vm/parse.zig b/src/vm/parse.zig
new file mode 100644
index 0000000..40eefc1
--- /dev/null
+++ b/src/vm/parse.zig
@@ -0,0 +1,288 @@
+const std = @import("std");
+const wasm = @import("wasm.zig");
+const Allocator = std.mem.Allocator;
+
+pub const Error = error{
+ malformed_wasm,
+ invalid_utf8,
+};
+
+pub const Module = struct {
+ types: []FunctionType,
+ imports: std.ArrayList(Import),
+ exports: std.StringHashMap(u32),
+ functions: []u32,
+ memory: Memory,
+ code: []FunctionBody,
+ funcs: std.ArrayList(Function),
+
+ pub fn deinit(self: *Module, allocator: Allocator) void {
+ for (self.types) |t| {
+ t.deinit(allocator);
+ }
+ allocator.free(self.types);
+
+ for (self.imports.items) |i| {
+ i.deinit(allocator);
+ }
+ self.imports.deinit();
+
+ var iter = self.exports.iterator();
+ while (iter.next()) |entry| {
+ allocator.free(entry.key_ptr.*);
+ }
+ self.exports.deinit();
+
+ allocator.free(self.functions);
+
+ for (self.code) |f| {
+ for (f.locals) |l| {
+ allocator.free(l.types);
+ }
+ allocator.free(f.code);
+ }
+ allocator.free(self.code);
+
+ self.funcs.deinit();
+ }
+};
+
+pub const FunctionScope = enum {
+ external,
+ internal,
+};
+
+pub const Function = union(FunctionScope) {
+ external: u8,
+ internal: u8,
+};
+
+// TODO: refactor locals
+pub const Local = struct {
+ types: []u8,
+};
+
+pub const FunctionBody = struct {
+ locals: []Local,
+ code: []u8,
+};
+
+pub const Memory = struct {
+ initial: u32,
+ max: u32,
+};
+
+pub const FunctionType = struct {
+ parameters: []u8,
+ results: []u8,
+
+ pub fn deinit(self: FunctionType, allocator: Allocator) void {
+ allocator.free(self.parameters);
+ allocator.free(self.results);
+ }
+};
+
+pub const Import = struct {
+ name: []u8,
+ module: []u8,
+ signature: u32,
+
+ pub fn deinit(self: Import, allocator: Allocator) void {
+ allocator.free(self.name);
+ allocator.free(self.module);
+ }
+};
+
+pub fn parseType(t: u8) wasm.Type {
+ return @enumFromInt(t);
+}
+
+pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 {
+ const size = try std.leb.readULEB128(u32, stream);
+ const str = try allocator.alloc(u8, size);
+ if (try stream.read(str) != size) {
+ // TODO: better error
+ return Error.malformed_wasm;
+ }
+
+ if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8;
+
+ return str;
+}
+
+// TODO: parse Global Section
+// TODO: Consider Arena allocator
+pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
+ var types: []FunctionType = undefined;
+ var imports = std.ArrayList(Import).init(allocator);
+ var exports = std.StringHashMap(u32).init(allocator);
+ var funcs = std.ArrayList(Function).init(allocator);
+ var functions: []u32 = undefined;
+ var memory: Memory = undefined;
+ var code: []FunctionBody = undefined;
+
+ // Parse magic
+ if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm;
+ // Parse version
+ if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm;
+
+ // NOTE: This ensures that (in this block) illegal behavior is safety-checked.
+ // This slows down the code but since this function is only called at the start
+ // I believe it is better to take the ``hit'' in performance (should only be @enumFromInt)
+ // rather than having undefined behavior when user provides an invalid wasm file.
+ @setRuntimeSafety(true);
+ loop: while (stream.readByte()) |byte| {
+ const section_size = try std.leb.readULEB128(u32, stream);
+ switch (@as(std.wasm.Section, @enumFromInt(byte))) {
+ std.wasm.Section.custom => {
+ // TODO: unimplemented
+ break :loop;
+ },
+ std.wasm.Section.type => {
+ const type_count = try std.leb.readULEB128(u32, stream);
+ types = try allocator.alloc(FunctionType, type_count);
+ for (types) |*t| {
+ if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm;
+ const params_count = try std.leb.readULEB128(u32, stream);
+ t.parameters = try allocator.alloc(u8, params_count);
+ if (try stream.read(t.parameters) != params_count) {
+ // TODO: better errors
+ return Error.malformed_wasm;
+ }
+ const results = try std.leb.readULEB128(u32, stream);
+ t.results = try allocator.alloc(u8, results);
+ if (try stream.read(t.results) != results) {
+ // TODO: better errors
+ return Error.malformed_wasm;
+ }
+ }
+ },
+ std.wasm.Section.import => {
+ // Can there be more than one import section?
+ const import_count = try std.leb.readULEB128(u32, stream);
+ for (0..import_count) |i| {
+ const mod = try parseName(allocator, stream);
+ const nm = try parseName(allocator, stream);
+
+ const b = try stream.readByte();
+ switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
+ std.wasm.ExternalKind.function => try funcs.append(.{ .external = @intCast(i) }),
+ // TODO: not implemented
+ std.wasm.ExternalKind.table => {},
+ std.wasm.ExternalKind.memory => {},
+ std.wasm.ExternalKind.global => {},
+ }
+ const idx = try std.leb.readULEB128(u32, stream);
+ try imports.append(.{
+ .module = mod,
+ .name = nm,
+ .signature = idx,
+ });
+ }
+ },
+ std.wasm.Section.function => {
+ const function_count = try std.leb.readULEB128(u32, stream);
+ functions = try allocator.alloc(u32, function_count);
+ for (functions) |*f| {
+ f.* = try std.leb.readULEB128(u32, stream);
+ }
+ },
+ std.wasm.Section.table => {
+ // TODO: not implemented
+ try stream.skipBytes(section_size, .{});
+ },
+ std.wasm.Section.memory => {
+ const memory_count = try std.leb.readULEB128(u32, stream);
+ for (0..memory_count) |_| {
+ const b = try stream.readByte();
+ const n = try std.leb.readULEB128(u32, stream);
+ var m: u32 = 0;
+ switch (b) {
+ 0x00 => {},
+ 0x01 => m = try std.leb.readULEB128(u32, stream),
+ else => return Error.malformed_wasm,
+ }
+ // TODO: support multiple memories
+ memory = .{
+ .initial = n,
+ .max = m,
+ };
+ }
+ },
+ std.wasm.Section.global => {
+ // TODO: unimplemented
+ try stream.skipBytes(section_size, .{});
+ },
+ // TODO: Can there be more than one export section? Otherwise we can optimize allocations
+ std.wasm.Section.@"export" => {
+ const export_count = try std.leb.readULEB128(u32, stream);
+ for (0..export_count) |_| {
+ const nm = try parseName(allocator, stream);
+ const b = try stream.readByte();
+ const idx = try std.leb.readULEB128(u32, stream);
+ switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
+ std.wasm.ExternalKind.function => try exports.put(nm, idx),
+ // TODO: unimplemented,
+ std.wasm.ExternalKind.table => allocator.free(nm),
+ std.wasm.ExternalKind.memory => allocator.free(nm),
+ std.wasm.ExternalKind.global => allocator.free(nm),
+ }
+ }
+ },
+ std.wasm.Section.start => {
+ // TODO: unimplemented
+ try stream.skipBytes(section_size, .{});
+ },
+ std.wasm.Section.element => {
+ // TODO: unimplemented
+ try stream.skipBytes(section_size, .{});
+ },
+ std.wasm.Section.code => {
+ const code_count = try std.leb.readULEB128(u32, stream);
+ code = try allocator.alloc(FunctionBody, code_count);
+ for (0..code_count) |i| {
+ const code_size = try std.leb.readULEB128(u32, stream);
+ const local_count = try std.leb.readULEB128(u32, stream);
+ const locals = try allocator.alloc(Local, local_count);
+ for (locals) |*l| {
+ const n = try std.leb.readULEB128(u32, stream);
+ l.types = try allocator.alloc(u8, n);
+ @memset(l.types, try stream.readByte());
+ }
+ code[i].locals = locals;
+
+ // TODO: maybe is better to parse code into ast here and not do it every frame?
+ // FIXME: This calculation is plain wrong. Resolving above TODO should help
+ code[i].code = try allocator.alloc(u8, code_size - local_count - 1);
+ // TODO: better error reporting
+ if (try stream.read(code[i].code) != code_size - local_count - 1) return Error.malformed_wasm;
+
+ const f = Function{ .internal = @intCast(i) };
+ try funcs.append(f);
+ }
+ },
+ std.wasm.Section.data => {
+ // TODO: unimplemented
+ try stream.skipBytes(section_size, .{});
+ },
+ std.wasm.Section.data_count => {
+ // TODO: unimplemented
+ try stream.skipBytes(section_size, .{});
+ },
+ else => return Error.malformed_wasm,
+ }
+ } else |err| switch (err) {
+ error.EndOfStream => {},
+ else => return err,
+ }
+
+ return Module{
+ .types = types,
+ .imports = imports,
+ .functions = functions,
+ .memory = memory,
+ .exports = exports,
+ .code = code,
+ .funcs = funcs,
+ };
+}