diff --git a/go.mod b/go.mod
index f7114bfd6..b0165a1d9 100644
--- a/go.mod
+++ b/go.mod
@@ -44,6 +44,7 @@ require (
 	github.com/miekg/dns v1.1.59
 	github.com/minio/minio-go/v7 v7.0.70
 	github.com/mitchellh/mapstructure v1.5.0
+	github.com/ncruces/go-sqlite3 v0.16.0
 	github.com/oklog/ulid v1.3.1
 	github.com/prometheus/client_golang v1.19.1
 	github.com/spf13/cobra v1.8.0
@@ -78,7 +79,7 @@ require (
 	golang.org/x/text v0.15.0
 	gopkg.in/mcuadros/go-syslog.v2 v2.3.0
 	gopkg.in/yaml.v3 v3.0.1
-	modernc.org/sqlite v1.29.8
+	modernc.org/sqlite v0.0.0-00010101000000-000000000000
 	mvdan.cc/xurls/v2 v2.5.0
 )
 
@@ -173,6 +174,7 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/ncruces/go-strftime v0.1.9 // indirect
+	github.com/ncruces/julianday v1.0.0 // indirect
 	github.com/opencontainers/runtime-spec v1.0.2 // indirect
 	github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
@@ -197,6 +199,7 @@ require (
 	github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe // indirect
 	github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB // indirect
 	github.com/tdewolff/parse/v2 v2.7.14 // indirect
+	github.com/tetratelabs/wazero v1.7.2 // indirect
 	github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc // indirect
 	github.com/toqueteos/webbrowser v1.2.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
diff --git a/go.sum b/go.sum
index 7c2471ed3..07de139d2 100644
--- a/go.sum
+++ b/go.sum
@@ -445,8 +445,12 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs=
 github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
+github.com/ncruces/go-sqlite3 v0.16.0 h1:O7eULuEjvSBnS1QCN+dDL/ixLQZoUGWr466A02Gx1xc=
+github.com/ncruces/go-sqlite3 v0.16.0/go.mod h1:2TmAeD93ImsKXJRsUIKohfMvt17dZSbS6pzJ3k6YYFg=
 github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
 github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
+github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
+github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g=
 github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
 github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
 github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
@@ -558,6 +562,8 @@ github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03
 github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8=
 github.com/technologize/otel-go-contrib v1.1.1 h1:wZH9aSPNWZWIkEh3vfaKfMb15AJ80jJ1aVj/4GZdqIw=
 github.com/technologize/otel-go-contrib v1.1.1/go.mod h1:dCN/wj2WyUO8aFZFdIN+6tfJHImjTML/8r2YVYAy3So=
+github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc=
+github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=
 github.com/tidwall/btree v0.0.0-20191029221954-400434d76274 h1:G6Z6HvJuPjG6XfNGi/feOATzeJrfgTNJY+rGrHbA04E=
 github.com/tidwall/btree v0.0.0-20191029221954-400434d76274/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8=
 github.com/tidwall/buntdb v1.1.2 h1:noCrqQXL9EKMtcdwJcmuVKSEjqu1ua99RHHgbLTEHRo=
diff --git a/internal/api/client/media/mediacreate_test.go b/internal/api/client/media/mediacreate_test.go
index 00f385032..c2871aff0 100644
--- a/internal/api/client/media/mediacreate_test.go
+++ b/internal/api/client/media/mediacreate_test.go
@@ -38,7 +38,6 @@
 	"github.com/superseriousbusiness/gotosocial/internal/federation"
 	"github.com/superseriousbusiness/gotosocial/internal/filter/visibility"
 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
-	"github.com/superseriousbusiness/gotosocial/internal/log"
 	"github.com/superseriousbusiness/gotosocial/internal/media"
 	"github.com/superseriousbusiness/gotosocial/internal/oauth"
 	"github.com/superseriousbusiness/gotosocial/internal/processing"
@@ -77,19 +76,22 @@ type MediaCreateTestSuite struct {
 	TEST INFRASTRUCTURE
 */
 
-func (suite *MediaCreateTestSuite) SetupSuite() {
-	suite.state.Caches.Init()
+func (suite *MediaCreateTestSuite) SetupTest() {
 	testrig.StartNoopWorkers(&suite.state)
 
 	// setup standard items
 	testrig.InitTestConfig()
 	testrig.InitTestLog()
 
-	suite.db = testrig.NewTestDB(&suite.state)
-	suite.state.DB = suite.db
+	suite.state.Caches.Init()
+
 	suite.storage = testrig.NewInMemoryStorage()
 	suite.state.Storage = suite.storage
 
+	suite.db = testrig.NewTestDB(&suite.state)
+	testrig.StandardDBSetup(suite.db, nil)
+	testrig.StandardStorageSetup(suite.storage, "../../../../testrig/media")
+
 	suite.tc = typeutils.NewConverter(&suite.state)
 
 	testrig.StartTimelines(
@@ -106,21 +108,8 @@ func (suite *MediaCreateTestSuite) SetupSuite() {
 
 	// setup module being tested
 	suite.mediaModule = mediamodule.New(suite.processor)
-}
-
-func (suite *MediaCreateTestSuite) TearDownSuite() {
-	if err := suite.db.Close(); err != nil {
-		log.Panicf(nil, "error closing db connection: %s", err)
-	}
-	testrig.StopWorkers(&suite.state)
-}
-
-func (suite *MediaCreateTestSuite) SetupTest() {
-	suite.state.Caches.Init()
-
-	testrig.StandardDBSetup(suite.db, nil)
-	testrig.StandardStorageSetup(suite.storage, "../../../../testrig/media")
 
+	// setup test data
 	suite.testTokens = testrig.NewTestTokens()
 	suite.testClients = testrig.NewTestClients()
 	suite.testApplications = testrig.NewTestApplications()
@@ -132,6 +121,7 @@ func (suite *MediaCreateTestSuite) SetupTest() {
 func (suite *MediaCreateTestSuite) TearDownTest() {
 	testrig.StandardDBTeardown(suite.db)
 	testrig.StandardStorageTeardown(suite.storage)
+	testrig.StopWorkers(&suite.state)
 }
 
 /*
diff --git a/internal/api/client/media/mediaupdate_test.go b/internal/api/client/media/mediaupdate_test.go
index bb4e0f4ad..bb260ae4d 100644
--- a/internal/api/client/media/mediaupdate_test.go
+++ b/internal/api/client/media/mediaupdate_test.go
@@ -36,7 +36,6 @@
 	"github.com/superseriousbusiness/gotosocial/internal/federation"
 	"github.com/superseriousbusiness/gotosocial/internal/filter/visibility"
 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
-	"github.com/superseriousbusiness/gotosocial/internal/log"
 	"github.com/superseriousbusiness/gotosocial/internal/media"
 	"github.com/superseriousbusiness/gotosocial/internal/oauth"
 	"github.com/superseriousbusiness/gotosocial/internal/processing"
@@ -75,18 +74,22 @@ type MediaUpdateTestSuite struct {
 	TEST INFRASTRUCTURE
 */
 
-func (suite *MediaUpdateTestSuite) SetupSuite() {
+func (suite *MediaUpdateTestSuite) SetupTest() {
 	testrig.StartNoopWorkers(&suite.state)
 
 	// setup standard items
 	testrig.InitTestConfig()
 	testrig.InitTestLog()
 
-	suite.db = testrig.NewTestDB(&suite.state)
-	suite.state.DB = suite.db
+	suite.state.Caches.Init()
+
 	suite.storage = testrig.NewInMemoryStorage()
 	suite.state.Storage = suite.storage
 
+	suite.db = testrig.NewTestDB(&suite.state)
+	testrig.StandardDBSetup(suite.db, nil)
+	testrig.StandardStorageSetup(suite.storage, "../../../../testrig/media")
+
 	suite.tc = typeutils.NewConverter(&suite.state)
 
 	testrig.StartTimelines(
@@ -103,21 +106,8 @@ func (suite *MediaUpdateTestSuite) SetupSuite() {
 
 	// setup module being tested
 	suite.mediaModule = mediamodule.New(suite.processor)
-}
-
-func (suite *MediaUpdateTestSuite) TearDownSuite() {
-	if err := suite.db.Close(); err != nil {
-		log.Panicf(nil, "error closing db connection: %s", err)
-	}
-	testrig.StopWorkers(&suite.state)
-}
-
-func (suite *MediaUpdateTestSuite) SetupTest() {
-	suite.state.Caches.Init()
-
-	testrig.StandardDBSetup(suite.db, nil)
-	testrig.StandardStorageSetup(suite.storage, "../../../../testrig/media")
 
+	// setup test data
 	suite.testTokens = testrig.NewTestTokens()
 	suite.testClients = testrig.NewTestClients()
 	suite.testApplications = testrig.NewTestApplications()
@@ -129,6 +119,7 @@ func (suite *MediaUpdateTestSuite) SetupTest() {
 func (suite *MediaUpdateTestSuite) TearDownTest() {
 	testrig.StandardDBTeardown(suite.db)
 	testrig.StandardStorageTeardown(suite.storage)
+	testrig.StopWorkers(&suite.state)
 }
 
 /*
diff --git a/internal/api/fileserver/fileserver_test.go b/internal/api/fileserver/fileserver_test.go
index b58433b9f..e5f684d0c 100644
--- a/internal/api/fileserver/fileserver_test.go
+++ b/internal/api/fileserver/fileserver_test.go
@@ -70,8 +70,6 @@ func (suite *FileserverTestSuite) SetupSuite() {
 	testrig.InitTestConfig()
 	testrig.InitTestLog()
 
-	suite.db = testrig.NewTestDB(&suite.state)
-	suite.state.DB = suite.db
 	suite.storage = testrig.NewInMemoryStorage()
 	suite.state.Storage = suite.storage
 
@@ -98,8 +96,12 @@ func (suite *FileserverTestSuite) SetupTest() {
 	suite.state.Caches.Init()
 	testrig.StartNoopWorkers(&suite.state)
 
+	suite.db = testrig.NewTestDB(&suite.state)
+	suite.state.DB = suite.db
+
 	testrig.StandardDBSetup(suite.db, nil)
 	testrig.StandardStorageSetup(suite.storage, "../../../testrig/media")
+
 	suite.testTokens = testrig.NewTestTokens()
 	suite.testClients = testrig.NewTestClients()
 	suite.testApplications = testrig.NewTestApplications()
diff --git a/internal/config/global.go b/internal/config/global.go
index 4bc5ac3d2..57af89d05 100644
--- a/internal/config/global.go
+++ b/internal/config/global.go
@@ -52,3 +52,9 @@ func LoadEarlyFlags(cmd *cobra.Command) error {
 func BindFlags(cmd *cobra.Command) error {
 	return global.BindFlags(cmd)
 }
+
+// Reset will totally clear global
+// ConfigState{}, loading defaults.
+func Reset() {
+	global.Reset()
+}
diff --git a/internal/config/state.go b/internal/config/state.go
index c55f7b2ec..d01e853a5 100644
--- a/internal/config/state.go
+++ b/internal/config/state.go
@@ -37,25 +37,9 @@ type ConfigState struct {
 
 // NewState returns a new initialized ConfigState instance.
 func NewState() *ConfigState {
-	viper := viper.New()
-
-	// Flag 'some-flag-name' becomes env var 'GTS_SOME_FLAG_NAME'
-	viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
-	viper.SetEnvPrefix("gts")
-
-	// Load appropriate named vals from env
-	viper.AutomaticEnv()
-
-	// Create new ConfigState with defaults
-	state := &ConfigState{
-		viper:  viper,
-		config: Defaults,
-	}
-
-	// Perform initial load into viper
-	state.reloadToViper()
-
-	return state
+	st := new(ConfigState)
+	st.Reset()
+	return st
 }
 
 // Config provides safe access to the ConfigState's contained Configuration,
@@ -116,6 +100,32 @@ func (st *ConfigState) Reload() (err error) {
 	return
 }
 
+// Reset will totally clear
+// ConfigState{}, loading defaults.
+func (st *ConfigState) Reset() {
+	// Do within lock.
+	st.mutex.Lock()
+	defer st.mutex.Unlock()
+
+	// Create new viper.
+	viper := viper.New()
+
+	// Flag 'some-flag-name' becomes env var 'GTS_SOME_FLAG_NAME'
+	viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
+	viper.SetEnvPrefix("gts")
+
+	// Load appropriate
+	// named vals from env.
+	viper.AutomaticEnv()
+
+	// Reset variables.
+	st.viper = viper
+	st.config = Defaults
+
+	// Load into viper.
+	st.reloadToViper()
+}
+
 // reloadToViper will reload Configuration{} values into viper.
 func (st *ConfigState) reloadToViper() {
 	raw, err := st.config.MarshalMap()
diff --git a/internal/db/bundb/admin_test.go b/internal/db/bundb/admin_test.go
index da3370c4e..8018ef3fa 100644
--- a/internal/db/bundb/admin_test.go
+++ b/internal/db/bundb/admin_test.go
@@ -74,6 +74,7 @@ func (suite *AdminTestSuite) TestCreateInstanceAccount() {
 	// we need to take an empty db for this...
 	testrig.StandardDBTeardown(suite.db)
 	// ...with tables created but no data
+	suite.db = testrig.NewTestDB(&suite.state)
 	testrig.CreateTestTables(suite.db)
 
 	// make sure there's no instance account in the db yet
diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go
index b0ce575e6..e7256c276 100644
--- a/internal/db/bundb/bundb.go
+++ b/internal/db/bundb/bundb.go
@@ -48,8 +48,6 @@
 	"github.com/uptrace/bun/dialect/pgdialect"
 	"github.com/uptrace/bun/dialect/sqlitedialect"
 	"github.com/uptrace/bun/migrate"
-
-	"modernc.org/sqlite"
 )
 
 // DBService satisfies the DB interface
@@ -133,12 +131,12 @@ func NewBunDBService(ctx context.Context, state *state.State) (db.DB, error) {
 
 	switch t {
 	case "postgres":
-		db, err = pgConn(ctx, state)
+		db, err = pgConn(ctx)
 		if err != nil {
 			return nil, err
 		}
 	case "sqlite":
-		db, err = sqliteConn(ctx, state)
+		db, err = sqliteConn(ctx)
 		if err != nil {
 			return nil, err
 		}
@@ -295,7 +293,7 @@ func NewBunDBService(ctx context.Context, state *state.State) (db.DB, error) {
 	return ps, nil
 }
 
-func pgConn(ctx context.Context, state *state.State) (*bun.DB, error) {
+func pgConn(ctx context.Context) (*bun.DB, error) {
 	opts, err := deriveBunDBPGOptions() //nolint:contextcheck
 	if err != nil {
 		return nil, fmt.Errorf("could not create bundb postgres options: %w", err)
@@ -326,7 +324,7 @@ func pgConn(ctx context.Context, state *state.State) (*bun.DB, error) {
 	return db, nil
 }
 
-func sqliteConn(ctx context.Context, state *state.State) (*bun.DB, error) {
+func sqliteConn(ctx context.Context) (*bun.DB, error) {
 	// validate db address has actually been set
 	address := config.GetDbAddress()
 	if address == "" {
@@ -339,9 +337,6 @@ func sqliteConn(ctx context.Context, state *state.State) (*bun.DB, error) {
 	// Open new DB instance
 	sqldb, err := sql.Open("sqlite-gts", address)
 	if err != nil {
-		if errWithCode, ok := err.(*sqlite.Error); ok {
-			err = errors.New(sqlite.ErrorCodeString[errWithCode.Code()])
-		}
 		return nil, fmt.Errorf("could not open sqlite db with address %s: %w", address, err)
 	}
 
@@ -356,11 +351,9 @@ func sqliteConn(ctx context.Context, state *state.State) (*bun.DB, error) {
 
 	// ping to check the db is there and listening
 	if err := db.PingContext(ctx); err != nil {
-		if errWithCode, ok := err.(*sqlite.Error); ok {
-			err = errors.New(sqlite.ErrorCodeString[errWithCode.Code()])
-		}
 		return nil, fmt.Errorf("sqlite ping: %w", err)
 	}
+
 	log.Infof(ctx, "connected to SQLITE database with address %s", address)
 
 	return db, nil
@@ -528,12 +521,8 @@ func buildSQLiteAddress(addr string) string {
 
 		// Use random name for in-memory instead of ':memory:', so
 		// multiple in-mem databases can be created without conflict.
-		addr = uuid.NewString()
-
-		// in-mem-specific preferences
-		// (shared cache so that tests don't fail)
-		prefs.Add("mode", "memory")
-		prefs.Add("cache", "shared")
+		addr = "/" + uuid.NewString()
+		prefs.Add("vfs", "memdb")
 	}
 
 	if dur := config.GetDbSqliteBusyTimeout(); dur > 0 {
diff --git a/internal/db/bundb/drivers.go b/internal/db/bundb/drivers.go
index 1811ad533..f39189c9d 100644
--- a/internal/db/bundb/drivers.go
+++ b/internal/db/bundb/drivers.go
@@ -18,350 +18,14 @@
 package bundb
 
 import (
-	"context"
 	"database/sql"
-	"database/sql/driver"
-	"time"
-	_ "unsafe" // linkname shenanigans
 
-	pgx "github.com/jackc/pgx/v5/stdlib"
-	"github.com/superseriousbusiness/gotosocial/internal/db"
-	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
-	"modernc.org/sqlite"
+	"github.com/superseriousbusiness/gotosocial/internal/db/postgres"
+	"github.com/superseriousbusiness/gotosocial/internal/db/sqlite"
 )
 
-var (
-	// global SQL driver instances.
-	postgresDriver = pgx.GetDefaultDriver()
-	sqliteDriver   = getSQLiteDriver()
-
-	// check the postgres connection
-	// conforms to our conn{} interface.
-	// (note SQLite doesn't export their
-	// conn type, and gets checked in
-	// tests very regularly anywho).
-	_ conn = (*pgx.Conn)(nil)
-)
-
-//go:linkname getSQLiteDriver modernc.org/sqlite.newDriver
-func getSQLiteDriver() *sqlite.Driver
-
 func init() {
-	sql.Register("pgx-gts", &PostgreSQLDriver{})
-	sql.Register("sqlite-gts", &SQLiteDriver{})
-}
-
-// PostgreSQLDriver is our own wrapper around the
-// pgx/stdlib.Driver{} type in order to wrap further
-// SQL driver types with our own err processing.
-type PostgreSQLDriver struct{}
-
-func (d *PostgreSQLDriver) Open(name string) (driver.Conn, error) {
-	c, err := postgresDriver.Open(name)
-	if err != nil {
-		return nil, err
-	}
-	return &PostgreSQLConn{conn: c.(conn)}, nil
-}
-
-type PostgreSQLConn struct{ conn }
-
-func (c *PostgreSQLConn) Begin() (driver.Tx, error) {
-	return c.BeginTx(context.Background(), driver.TxOptions{})
-}
-
-func (c *PostgreSQLConn) BeginTx(ctx context.Context, opts driver.TxOptions) (driver.Tx, error) {
-	tx, err := c.conn.BeginTx(ctx, opts)
-	err = processPostgresError(err)
-	if err != nil {
-		return nil, err
-	}
-	return &PostgreSQLTx{tx}, nil
-}
-
-func (c *PostgreSQLConn) Prepare(query string) (driver.Stmt, error) {
-	return c.PrepareContext(context.Background(), query)
-}
-
-func (c *PostgreSQLConn) PrepareContext(ctx context.Context, query string) (driver.Stmt, error) {
-	st, err := c.conn.PrepareContext(ctx, query)
-	err = processPostgresError(err)
-	if err != nil {
-		return nil, err
-	}
-	return &PostgreSQLStmt{stmt: st.(stmt)}, nil
-}
-
-func (c *PostgreSQLConn) Exec(query string, args []driver.Value) (driver.Result, error) {
-	return c.ExecContext(context.Background(), query, toNamedValues(args))
-}
-
-func (c *PostgreSQLConn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Result, error) {
-	result, err := c.conn.ExecContext(ctx, query, args)
-	err = processPostgresError(err)
-	return result, err
-}
-
-func (c *PostgreSQLConn) Query(query string, args []driver.Value) (driver.Rows, error) {
-	return c.QueryContext(context.Background(), query, toNamedValues(args))
-}
-
-func (c *PostgreSQLConn) QueryContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Rows, error) {
-	rows, err := c.conn.QueryContext(ctx, query, args)
-	err = processPostgresError(err)
-	return rows, err
-}
-
-func (c *PostgreSQLConn) Close() error {
-	return c.conn.Close()
-}
-
-type PostgreSQLTx struct{ driver.Tx }
-
-func (tx *PostgreSQLTx) Commit() error {
-	err := tx.Tx.Commit()
-	return processPostgresError(err)
-}
-
-func (tx *PostgreSQLTx) Rollback() error {
-	err := tx.Tx.Rollback()
-	return processPostgresError(err)
-}
-
-type PostgreSQLStmt struct{ stmt }
-
-func (stmt *PostgreSQLStmt) Exec(args []driver.Value) (driver.Result, error) {
-	return stmt.ExecContext(context.Background(), toNamedValues(args))
-}
-
-func (stmt *PostgreSQLStmt) ExecContext(ctx context.Context, args []driver.NamedValue) (driver.Result, error) {
-	res, err := stmt.stmt.ExecContext(ctx, args)
-	err = processPostgresError(err)
-	return res, err
-}
-
-func (stmt *PostgreSQLStmt) Query(args []driver.Value) (driver.Rows, error) {
-	return stmt.QueryContext(context.Background(), toNamedValues(args))
-}
-
-func (stmt *PostgreSQLStmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driver.Rows, error) {
-	rows, err := stmt.stmt.QueryContext(ctx, args)
-	err = processPostgresError(err)
-	return rows, err
-}
-
-// SQLiteDriver is our own wrapper around the
-// sqlite.Driver{} type in order to wrap further
-// SQL driver types with our own functionality,
-// e.g. hooks, retries and err processing.
-type SQLiteDriver struct{}
-
-func (d *SQLiteDriver) Open(name string) (driver.Conn, error) {
-	c, err := sqliteDriver.Open(name)
-	if err != nil {
-		return nil, err
-	}
-	return &SQLiteConn{conn: c.(conn)}, nil
-}
-
-type SQLiteConn struct{ conn }
-
-func (c *SQLiteConn) Begin() (driver.Tx, error) {
-	return c.BeginTx(context.Background(), driver.TxOptions{})
-}
-
-func (c *SQLiteConn) BeginTx(ctx context.Context, opts driver.TxOptions) (tx driver.Tx, err error) {
-	err = retryOnBusy(ctx, func() error {
-		tx, err = c.conn.BeginTx(ctx, opts)
-		err = processSQLiteError(err)
-		return err
-	})
-	if err != nil {
-		return nil, err
-	}
-	return &SQLiteTx{Context: ctx, Tx: tx}, nil
-}
-
-func (c *SQLiteConn) Prepare(query string) (driver.Stmt, error) {
-	return c.PrepareContext(context.Background(), query)
-}
-
-func (c *SQLiteConn) PrepareContext(ctx context.Context, query string) (st driver.Stmt, err error) {
-	err = retryOnBusy(ctx, func() error {
-		st, err = c.conn.PrepareContext(ctx, query)
-		err = processSQLiteError(err)
-		return err
-	})
-	if err != nil {
-		return nil, err
-	}
-	return &SQLiteStmt{st.(stmt)}, nil
-}
-
-func (c *SQLiteConn) Exec(query string, args []driver.Value) (driver.Result, error) {
-	return c.ExecContext(context.Background(), query, toNamedValues(args))
-}
-
-func (c *SQLiteConn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (result driver.Result, err error) {
-	err = retryOnBusy(ctx, func() error {
-		result, err = c.conn.ExecContext(ctx, query, args)
-		err = processSQLiteError(err)
-		return err
-	})
-	return
-}
-
-func (c *SQLiteConn) Query(query string, args []driver.Value) (driver.Rows, error) {
-	return c.QueryContext(context.Background(), query, toNamedValues(args))
-}
-
-func (c *SQLiteConn) QueryContext(ctx context.Context, query string, args []driver.NamedValue) (rows driver.Rows, err error) {
-	err = retryOnBusy(ctx, func() error {
-		rows, err = c.conn.QueryContext(ctx, query, args)
-		err = processSQLiteError(err)
-		return err
-	})
-	return
-}
-
-func (c *SQLiteConn) Close() error {
-	// see: https://www.sqlite.org/pragma.html#pragma_optimize
-	const onClose = "PRAGMA analysis_limit=1000; PRAGMA optimize;"
-	_, _ = c.conn.ExecContext(context.Background(), onClose, nil)
-	return c.conn.Close()
-}
-
-type SQLiteTx struct {
-	context.Context
-	driver.Tx
-}
-
-func (tx *SQLiteTx) Commit() (err error) {
-	err = retryOnBusy(tx.Context, func() error {
-		err = tx.Tx.Commit()
-		err = processSQLiteError(err)
-		return err
-	})
-	return
-}
-
-func (tx *SQLiteTx) Rollback() (err error) {
-	err = retryOnBusy(tx.Context, func() error {
-		err = tx.Tx.Rollback()
-		err = processSQLiteError(err)
-		return err
-	})
-	return
-}
-
-type SQLiteStmt struct{ stmt }
-
-func (stmt *SQLiteStmt) Exec(args []driver.Value) (driver.Result, error) {
-	return stmt.ExecContext(context.Background(), toNamedValues(args))
-}
-
-func (stmt *SQLiteStmt) ExecContext(ctx context.Context, args []driver.NamedValue) (res driver.Result, err error) {
-	err = retryOnBusy(ctx, func() error {
-		res, err = stmt.stmt.ExecContext(ctx, args)
-		err = processSQLiteError(err)
-		return err
-	})
-	return
-}
-
-func (stmt *SQLiteStmt) Query(args []driver.Value) (driver.Rows, error) {
-	return stmt.QueryContext(context.Background(), toNamedValues(args))
-}
-
-func (stmt *SQLiteStmt) QueryContext(ctx context.Context, args []driver.NamedValue) (rows driver.Rows, err error) {
-	err = retryOnBusy(ctx, func() error {
-		rows, err = stmt.stmt.QueryContext(ctx, args)
-		err = processSQLiteError(err)
-		return err
-	})
-	return
-}
-
-type conn interface {
-	driver.Conn
-	driver.ConnPrepareContext
-	driver.ExecerContext
-	driver.QueryerContext
-	driver.ConnBeginTx
-}
-
-type stmt interface {
-	driver.Stmt
-	driver.StmtExecContext
-	driver.StmtQueryContext
-}
-
-// retryOnBusy will retry given function on returned 'errBusy'.
-func retryOnBusy(ctx context.Context, fn func() error) error {
-	if err := fn(); err != errBusy {
-		return err
-	}
-	return retryOnBusySlow(ctx, fn)
-}
-
-// retryOnBusySlow is the outlined form of retryOnBusy, to allow the fast path (i.e. only
-// 1 attempt) to be inlined, leaving the slow retry loop to be a separate function call.
-func retryOnBusySlow(ctx context.Context, fn func() error) error {
-	var backoff time.Duration
-
-	for i := 0; ; i++ {
-		// backoff according to a multiplier of 2ms * 2^2n,
-		// up to a maximum possible backoff time of 5 minutes.
-		//
-		// this works out as the following:
-		// 4ms
-		// 16ms
-		// 64ms
-		// 256ms
-		// 1.024s
-		// 4.096s
-		// 16.384s
-		// 1m5.536s
-		// 4m22.144s
-		backoff = 2 * time.Millisecond * (1 << (2*i + 1))
-		if backoff >= 5*time.Minute {
-			break
-		}
-
-		select {
-		// Context cancelled.
-		case <-ctx.Done():
-			return ctx.Err()
-
-		// Backoff for some time.
-		case <-time.After(backoff):
-		}
-
-		// Perform func.
-		err := fn()
-
-		if err != errBusy {
-			// May be nil, or may be
-			// some other error, either
-			// way return here.
-			return err
-		}
-	}
-
-	return gtserror.Newf("%w (waited > %s)", db.ErrBusyTimeout, backoff)
-}
-
-// toNamedValues converts older driver.Value types to driver.NamedValue types.
-func toNamedValues(args []driver.Value) []driver.NamedValue {
-	if args == nil {
-		return nil
-	}
-	args2 := make([]driver.NamedValue, len(args))
-	for i := range args {
-		args2[i] = driver.NamedValue{
-			Ordinal: i + 1,
-			Value:   args[i],
-		}
-	}
-	return args2
+	// register our SQL driver implementations.
+	sql.Register("pgx-gts", &postgres.Driver{})
+	sql.Register("sqlite-gts", &sqlite.Driver{})
 }
diff --git a/internal/db/bundb/errors.go b/internal/db/bundb/errors.go
deleted file mode 100644
index f2633786a..000000000
--- a/internal/db/bundb/errors.go
+++ /dev/null
@@ -1,105 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-package bundb
-
-import (
-	"database/sql/driver"
-	"errors"
-
-	"github.com/jackc/pgx/v5/pgconn"
-	"github.com/superseriousbusiness/gotosocial/internal/db"
-	"modernc.org/sqlite"
-	sqlite3 "modernc.org/sqlite/lib"
-)
-
-// errBusy is a sentinel error indicating
-// busy database (e.g. retry needed).
-var errBusy = errors.New("busy")
-
-// processPostgresError processes an error, replacing any postgres specific errors with our own error type
-func processPostgresError(err error) error {
-	// Catch nil errs.
-	if err == nil {
-		return nil
-	}
-
-	// Attempt to cast as postgres
-	pgErr, ok := err.(*pgconn.PgError)
-	if !ok {
-		return err
-	}
-
-	// Handle supplied error code:
-	// (https://www.postgresql.org/docs/10/errcodes-appendix.html)
-	switch pgErr.Code { //nolint
-	case "23505" /* unique_violation */ :
-		return db.ErrAlreadyExists
-	}
-
-	return err
-}
-
-// processSQLiteError processes an error, replacing any sqlite specific errors with our own error type
-func processSQLiteError(err error) error {
-	// Catch nil errs.
-	if err == nil {
-		return nil
-	}
-
-	// Attempt to cast as sqlite
-	sqliteErr, ok := err.(*sqlite.Error)
-	if !ok {
-		return err
-	}
-
-	// Handle supplied error code:
-	switch sqliteErr.Code() {
-	case sqlite3.SQLITE_CONSTRAINT_UNIQUE,
-		sqlite3.SQLITE_CONSTRAINT_PRIMARYKEY:
-		return db.ErrAlreadyExists
-	case sqlite3.SQLITE_BUSY,
-		sqlite3.SQLITE_BUSY_SNAPSHOT,
-		sqlite3.SQLITE_BUSY_RECOVERY:
-		return errBusy
-	case sqlite3.SQLITE_BUSY_TIMEOUT:
-		return db.ErrBusyTimeout
-
-	// WORKAROUND:
-	// text copied from matrix dev chat:
-	//
-	// okay i've found a workaround for now. so between
-	// v1.29.0 and v1.29.2 (modernc.org/sqlite) is that
-	// slightly tweaked interruptOnDone() behaviour, which
-	// causes interrupt to (imo, correctly) get called when
-	// a context is cancelled to cancel the running query. the
-	// issue is that every single query after that point seems
-	// to still then return interrupted. so as you thought,
-	// maybe that query count isn't being decremented. i don't
-	// think it's our code, but i haven't ruled it out yet.
-	//
-	// the workaround for now is adding to our sqlite error
-	// processor to replace an SQLITE_INTERRUPTED code with
-	// driver.ErrBadConn, which hints to the golang sql package
-	// that the conn needs to be closed and a new one opened
-	//
-	case sqlite3.SQLITE_INTERRUPT:
-		return driver.ErrBadConn
-	}
-
-	return err
-}
diff --git a/internal/db/bundb/tag_test.go b/internal/db/bundb/tag_test.go
index 324398d27..3647c92de 100644
--- a/internal/db/bundb/tag_test.go
+++ b/internal/db/bundb/tag_test.go
@@ -19,6 +19,7 @@
 
 import (
 	"context"
+	"errors"
 	"testing"
 
 	"github.com/stretchr/testify/suite"
@@ -82,10 +83,20 @@ func (suite *TagTestSuite) TestPutTag() {
 
 		// Subsequent inserts should fail
 		// since all these tags are equivalent.
-		suite.ErrorIs(err, db.ErrAlreadyExists)
+		if !suite.ErrorIs(err, db.ErrAlreadyExists) {
+			suite.T().Logf("%T(%v) %v", err, err, unwrap(err))
+		}
 	}
 }
 
 func TestTagTestSuite(t *testing.T) {
 	suite.Run(t, new(TagTestSuite))
 }
+
+func unwrap(err error) (errs []error) {
+	for err != nil {
+		errs = append(errs, err)
+		err = errors.Unwrap(err)
+	}
+	return
+}
diff --git a/internal/db/error.go b/internal/db/error.go
index b8e488297..43dd34df7 100644
--- a/internal/db/error.go
+++ b/internal/db/error.go
@@ -29,8 +29,4 @@
 
 	// ErrAlreadyExists is returned when a conflict was encountered in the db when doing an insert.
 	ErrAlreadyExists = errors.New("already exists")
-
-	// ErrBusyTimeout is returned if the database connection indicates the connection is too busy
-	// to complete the supplied query. This is generally intended to be handled internally by the DB.
-	ErrBusyTimeout = errors.New("busy timeout")
 )
diff --git a/internal/db/postgres/driver.go b/internal/db/postgres/driver.go
new file mode 100644
index 000000000..994c9ffba
--- /dev/null
+++ b/internal/db/postgres/driver.go
@@ -0,0 +1,209 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package postgres
+
+import (
+	"context"
+	"database/sql/driver"
+
+	pgx "github.com/jackc/pgx/v5/stdlib"
+	"github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+var (
+	// global PostgreSQL driver instances.
+	postgresDriver = pgx.GetDefaultDriver().(*pgx.Driver)
+
+	// check the postgres driver types
+	// conforms to our interface types.
+	// (note SQLite doesn't export their
+	// driver types, and gets checked in
+	// tests very regularly anywho).
+	_ connIface = (*pgx.Conn)(nil)
+	_ stmtIface = (*pgx.Stmt)(nil)
+	_ rowsIface = (*pgx.Rows)(nil)
+)
+
+// Driver is our own wrapper around the
+// pgx/stdlib.Driver{} type in order to wrap further
+// SQL driver types with our own err processing.
+type Driver struct{}
+
+func (d *Driver) Open(name string) (driver.Conn, error) {
+	conn, err := postgresDriver.Open(name)
+	if err != nil {
+		err = processPostgresError(err)
+		return nil, err
+	}
+	return &postgresConn{conn.(connIface)}, nil
+}
+
+func (d *Driver) OpenConnector(name string) (driver.Connector, error) {
+	cc, err := postgresDriver.OpenConnector(name)
+	if err != nil {
+		err = processPostgresError(err)
+		return nil, err
+	}
+	return &postgresConnector{driver: d, Connector: cc}, nil
+}
+
+type postgresConnector struct {
+	driver *Driver
+	driver.Connector
+}
+
+func (c *postgresConnector) Driver() driver.Driver { return c.driver }
+
+func (c *postgresConnector) Connect(ctx context.Context) (driver.Conn, error) {
+	conn, err := c.Connector.Connect(ctx)
+	if err != nil {
+		err = processPostgresError(err)
+		return nil, err
+	}
+	return &postgresConn{conn.(connIface)}, nil
+}
+
+type postgresConn struct{ connIface }
+
+func (c *postgresConn) Begin() (driver.Tx, error) {
+	return c.BeginTx(context.Background(), driver.TxOptions{})
+}
+
+func (c *postgresConn) BeginTx(ctx context.Context, opts driver.TxOptions) (driver.Tx, error) {
+	tx, err := c.connIface.BeginTx(ctx, opts)
+	err = processPostgresError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &postgresTx{tx}, nil
+}
+
+func (c *postgresConn) Prepare(query string) (driver.Stmt, error) {
+	return c.PrepareContext(context.Background(), query)
+}
+
+func (c *postgresConn) PrepareContext(ctx context.Context, query string) (driver.Stmt, error) {
+	st, err := c.connIface.PrepareContext(ctx, query)
+	err = processPostgresError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &postgresStmt{st.(stmtIface)}, nil
+}
+
+func (c *postgresConn) Exec(query string, args []driver.Value) (driver.Result, error) {
+	return c.ExecContext(context.Background(), query, db.ToNamedValues(args))
+}
+
+func (c *postgresConn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Result, error) {
+	result, err := c.connIface.ExecContext(ctx, query, args)
+	err = processPostgresError(err)
+	return result, err
+}
+
+func (c *postgresConn) Query(query string, args []driver.Value) (driver.Rows, error) {
+	return c.QueryContext(context.Background(), query, db.ToNamedValues(args))
+}
+
+func (c *postgresConn) QueryContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Rows, error) {
+	rows, err := c.connIface.QueryContext(ctx, query, args)
+	err = processPostgresError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &postgresRows{rows.(rowsIface)}, nil
+}
+
+func (c *postgresConn) Close() error {
+	err := c.connIface.Close()
+	return processPostgresError(err)
+}
+
+type postgresTx struct{ driver.Tx }
+
+func (tx *postgresTx) Commit() error {
+	err := tx.Tx.Commit()
+	return processPostgresError(err)
+}
+
+func (tx *postgresTx) Rollback() error {
+	err := tx.Tx.Rollback()
+	return processPostgresError(err)
+}
+
+type postgresStmt struct{ stmtIface }
+
+func (stmt *postgresStmt) Exec(args []driver.Value) (driver.Result, error) {
+	return stmt.ExecContext(context.Background(), db.ToNamedValues(args))
+}
+
+func (stmt *postgresStmt) ExecContext(ctx context.Context, args []driver.NamedValue) (driver.Result, error) {
+	res, err := stmt.stmtIface.ExecContext(ctx, args)
+	err = processPostgresError(err)
+	return res, err
+}
+
+func (stmt *postgresStmt) Query(args []driver.Value) (driver.Rows, error) {
+	return stmt.QueryContext(context.Background(), db.ToNamedValues(args))
+}
+
+func (stmt *postgresStmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driver.Rows, error) {
+	rows, err := stmt.stmtIface.QueryContext(ctx, args)
+	err = processPostgresError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &postgresRows{rows.(rowsIface)}, nil
+}
+
+type postgresRows struct{ rowsIface }
+
+func (r *postgresRows) Next(dest []driver.Value) error {
+	err := r.rowsIface.Next(dest)
+	err = processPostgresError(err)
+	return err
+}
+
+func (r *postgresRows) Close() error {
+	err := r.rowsIface.Close()
+	err = processPostgresError(err)
+	return err
+}
+
+type connIface interface {
+	driver.Conn
+	driver.ConnPrepareContext
+	driver.ExecerContext
+	driver.QueryerContext
+	driver.ConnBeginTx
+}
+
+type stmtIface interface {
+	driver.Stmt
+	driver.StmtExecContext
+	driver.StmtQueryContext
+}
+
+type rowsIface interface {
+	driver.Rows
+	driver.RowsColumnTypeDatabaseTypeName
+	driver.RowsColumnTypeLength
+	driver.RowsColumnTypePrecisionScale
+	driver.RowsColumnTypeScanType
+	driver.RowsColumnTypeScanType
+}
diff --git a/internal/db/postgres/errors.go b/internal/db/postgres/errors.go
new file mode 100644
index 000000000..cb8989a73
--- /dev/null
+++ b/internal/db/postgres/errors.go
@@ -0,0 +1,46 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package postgres
+
+import (
+	"fmt"
+
+	"github.com/jackc/pgx/v5/pgconn"
+	"github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+// processPostgresError processes an error, replacing any
+// postgres specific errors with our own error type
+func processPostgresError(err error) error {
+	// Attempt to cast as postgres
+	pgErr, ok := err.(*pgconn.PgError)
+	if !ok {
+		return err
+	}
+
+	// Handle supplied error code:
+	// (https://www.postgresql.org/docs/10/errcodes-appendix.html)
+	switch pgErr.Code { //nolint
+	case "23505" /* unique_violation */ :
+		return db.ErrAlreadyExists
+	}
+
+	// Wrap the returned error with the code and
+	// extended code for easier debugging later.
+	return fmt.Errorf("%w (code=%s)", err, pgErr.Code)
+}
diff --git a/internal/db/sqlite/driver.go b/internal/db/sqlite/driver.go
new file mode 100644
index 000000000..11cb6b27d
--- /dev/null
+++ b/internal/db/sqlite/driver.go
@@ -0,0 +1,197 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//go:build !wasmsqlite3
+
+package sqlite
+
+import (
+	"context"
+	"database/sql/driver"
+
+	"modernc.org/sqlite"
+
+	"github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+// Driver is our own wrapper around the
+// sqlite.Driver{} type in order to wrap
+// further SQL types with our own
+// functionality, e.g. err processing.
+type Driver struct{ sqlite.Driver }
+
+func (d *Driver) Open(name string) (driver.Conn, error) {
+	conn, err := d.Driver.Open(name)
+	if err != nil {
+		err = processSQLiteError(err)
+		return nil, err
+	}
+	return &sqliteConn{conn.(connIface)}, nil
+}
+
+type sqliteConn struct{ connIface }
+
+func (c *sqliteConn) Begin() (driver.Tx, error) {
+	return c.BeginTx(context.Background(), driver.TxOptions{})
+}
+
+func (c *sqliteConn) BeginTx(ctx context.Context, opts driver.TxOptions) (tx driver.Tx, err error) {
+	tx, err = c.connIface.BeginTx(ctx, opts)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteTx{tx}, nil
+}
+
+func (c *sqliteConn) Prepare(query string) (driver.Stmt, error) {
+	return c.PrepareContext(context.Background(), query)
+}
+
+func (c *sqliteConn) PrepareContext(ctx context.Context, query string) (stmt driver.Stmt, err error) {
+	stmt, err = c.connIface.PrepareContext(ctx, query)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteStmt{stmtIface: stmt.(stmtIface)}, nil
+}
+
+func (c *sqliteConn) Exec(query string, args []driver.Value) (driver.Result, error) {
+	return c.ExecContext(context.Background(), query, db.ToNamedValues(args))
+}
+
+func (c *sqliteConn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (res driver.Result, err error) {
+	res, err = c.connIface.ExecContext(ctx, query, args)
+	err = processSQLiteError(err)
+	return
+}
+
+func (c *sqliteConn) Query(query string, args []driver.Value) (driver.Rows, error) {
+	return c.QueryContext(context.Background(), query, db.ToNamedValues(args))
+}
+
+func (c *sqliteConn) QueryContext(ctx context.Context, query string, args []driver.NamedValue) (rows driver.Rows, err error) {
+	rows, err = c.connIface.QueryContext(ctx, query, args)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteRows{rows.(rowsIface)}, nil
+}
+
+func (c *sqliteConn) Close() (err error) {
+	// see: https://www.sqlite.org/pragma.html#pragma_optimize
+	const onClose = "PRAGMA analysis_limit=1000; PRAGMA optimize;"
+	_, _ = c.connIface.ExecContext(context.Background(), onClose, nil)
+
+	// Finally, close the conn.
+	err = c.connIface.Close()
+	return
+}
+
+type sqliteTx struct{ driver.Tx }
+
+func (tx *sqliteTx) Commit() (err error) {
+	err = tx.Tx.Commit()
+	err = processSQLiteError(err)
+	return
+}
+
+func (tx *sqliteTx) Rollback() (err error) {
+	err = tx.Tx.Rollback()
+	err = processSQLiteError(err)
+	return
+}
+
+type sqliteStmt struct{ stmtIface }
+
+func (stmt *sqliteStmt) Exec(args []driver.Value) (driver.Result, error) {
+	return stmt.ExecContext(context.Background(), db.ToNamedValues(args))
+}
+
+func (stmt *sqliteStmt) ExecContext(ctx context.Context, args []driver.NamedValue) (res driver.Result, err error) {
+	res, err = stmt.stmtIface.ExecContext(ctx, args)
+	err = processSQLiteError(err)
+	return
+}
+
+func (stmt *sqliteStmt) Query(args []driver.Value) (driver.Rows, error) {
+	return stmt.QueryContext(context.Background(), db.ToNamedValues(args))
+}
+
+func (stmt *sqliteStmt) QueryContext(ctx context.Context, args []driver.NamedValue) (rows driver.Rows, err error) {
+	rows, err = stmt.stmtIface.QueryContext(ctx, args)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteRows{rows.(rowsIface)}, nil
+}
+
+func (stmt *sqliteStmt) Close() (err error) {
+	err = stmt.stmtIface.Close()
+	err = processSQLiteError(err)
+	return
+}
+
+type sqliteRows struct{ rowsIface }
+
+func (r *sqliteRows) Next(dest []driver.Value) (err error) {
+	err = r.rowsIface.Next(dest)
+	err = processSQLiteError(err)
+	return
+}
+
+func (r *sqliteRows) Close() (err error) {
+	err = r.rowsIface.Close()
+	err = processSQLiteError(err)
+	return
+}
+
+// connIface is the driver.Conn interface
+// types (and the like) that modernc.org/sqlite.conn
+// conforms to. Useful so you don't need
+// to repeatedly perform checks yourself.
+type connIface interface {
+	driver.Conn
+	driver.ConnBeginTx
+	driver.ConnPrepareContext
+	driver.ExecerContext
+	driver.QueryerContext
+}
+
+// StmtIface is the driver.Stmt interface
+// types (and the like) that modernc.org/sqlite.stmt
+// conforms to. Useful so you don't need
+// to repeatedly perform checks yourself.
+type stmtIface interface {
+	driver.Stmt
+	driver.StmtExecContext
+	driver.StmtQueryContext
+}
+
+// RowsIface is the driver.Rows interface
+// types (and the like) that modernc.org/sqlite.rows
+// conforms to. Useful so you don't need
+// to repeatedly perform checks yourself.
+type rowsIface interface {
+	driver.Rows
+	driver.RowsColumnTypeDatabaseTypeName
+	driver.RowsColumnTypeLength
+	driver.RowsColumnTypeScanType
+}
diff --git a/internal/db/sqlite/driver_wasmsqlite3.go b/internal/db/sqlite/driver_wasmsqlite3.go
new file mode 100644
index 000000000..afe499a98
--- /dev/null
+++ b/internal/db/sqlite/driver_wasmsqlite3.go
@@ -0,0 +1,211 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//go:build wasmsqlite3
+
+package sqlite
+
+import (
+	"context"
+	"database/sql/driver"
+
+	"github.com/superseriousbusiness/gotosocial/internal/db"
+
+	"github.com/ncruces/go-sqlite3"
+	sqlite3driver "github.com/ncruces/go-sqlite3/driver"
+	_ "github.com/ncruces/go-sqlite3/embed"     // embed wasm binary
+	_ "github.com/ncruces/go-sqlite3/vfs/memdb" // include memdb vfs
+)
+
+// Driver is our own wrapper around the
+// driver.SQLite{} type in order to wrap
+// further SQL types with our own
+// functionality, e.g. err processing.
+type Driver struct{ sqlite3driver.SQLite }
+
+func (d *Driver) Open(name string) (driver.Conn, error) {
+	conn, err := d.SQLite.Open(name)
+	if err != nil {
+		err = processSQLiteError(err)
+		return nil, err
+	}
+	return &sqliteConn{conn.(connIface)}, nil
+}
+
+func (d *Driver) OpenConnector(name string) (driver.Connector, error) {
+	cc, err := d.SQLite.OpenConnector(name)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteConnector{driver: d, Connector: cc}, nil
+}
+
+type sqliteConnector struct {
+	driver *Driver
+	driver.Connector
+}
+
+func (c *sqliteConnector) Driver() driver.Driver { return c.driver }
+
+func (c *sqliteConnector) Connect(ctx context.Context) (driver.Conn, error) {
+	conn, err := c.Connector.Connect(ctx)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteConn{conn.(connIface)}, nil
+}
+
+type sqliteConn struct{ connIface }
+
+func (c *sqliteConn) Begin() (driver.Tx, error) {
+	return c.BeginTx(context.Background(), driver.TxOptions{})
+}
+
+func (c *sqliteConn) BeginTx(ctx context.Context, opts driver.TxOptions) (tx driver.Tx, err error) {
+	tx, err = c.connIface.BeginTx(ctx, opts)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteTx{tx}, nil
+}
+
+func (c *sqliteConn) Prepare(query string) (driver.Stmt, error) {
+	return c.PrepareContext(context.Background(), query)
+}
+
+func (c *sqliteConn) PrepareContext(ctx context.Context, query string) (stmt driver.Stmt, err error) {
+	stmt, err = c.connIface.PrepareContext(ctx, query)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteStmt{stmtIface: stmt.(stmtIface)}, nil
+}
+
+func (c *sqliteConn) Exec(query string, args []driver.Value) (driver.Result, error) {
+	return c.ExecContext(context.Background(), query, db.ToNamedValues(args))
+}
+
+func (c *sqliteConn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (res driver.Result, err error) {
+	res, err = c.connIface.ExecContext(ctx, query, args)
+	err = processSQLiteError(err)
+	return
+}
+
+func (c *sqliteConn) Close() (err error) {
+	// Get acces the underlying raw sqlite3 conn.
+	raw := c.connIface.(sqlite3.DriverConn).Raw()
+
+	// see: https://www.sqlite.org/pragma.html#pragma_optimize
+	const onClose = "PRAGMA analysis_limit=1000; PRAGMA optimize;"
+	_ = raw.Exec(onClose)
+
+	// Finally, close.
+	err = raw.Close()
+	return
+}
+
+type sqliteTx struct{ driver.Tx }
+
+func (tx *sqliteTx) Commit() (err error) {
+	err = tx.Tx.Commit()
+	err = processSQLiteError(err)
+	return
+}
+
+func (tx *sqliteTx) Rollback() (err error) {
+	err = tx.Tx.Rollback()
+	err = processSQLiteError(err)
+	return
+}
+
+type sqliteStmt struct{ stmtIface }
+
+func (stmt *sqliteStmt) Exec(args []driver.Value) (driver.Result, error) {
+	return stmt.ExecContext(context.Background(), db.ToNamedValues(args))
+}
+
+func (stmt *sqliteStmt) ExecContext(ctx context.Context, args []driver.NamedValue) (res driver.Result, err error) {
+	res, err = stmt.stmtIface.ExecContext(ctx, args)
+	err = processSQLiteError(err)
+	return
+}
+
+func (stmt *sqliteStmt) Query(args []driver.Value) (driver.Rows, error) {
+	return stmt.QueryContext(context.Background(), db.ToNamedValues(args))
+}
+
+func (stmt *sqliteStmt) QueryContext(ctx context.Context, args []driver.NamedValue) (rows driver.Rows, err error) {
+	rows, err = stmt.stmtIface.QueryContext(ctx, args)
+	err = processSQLiteError(err)
+	if err != nil {
+		return nil, err
+	}
+	return &sqliteRows{rows.(rowsIface)}, nil
+}
+
+func (stmt *sqliteStmt) Close() (err error) {
+	err = stmt.stmtIface.Close()
+	err = processSQLiteError(err)
+	return
+}
+
+type sqliteRows struct{ rowsIface }
+
+func (r *sqliteRows) Next(dest []driver.Value) (err error) {
+	err = r.rowsIface.Next(dest)
+	err = processSQLiteError(err)
+	return
+}
+
+func (r *sqliteRows) Close() (err error) {
+	err = r.rowsIface.Close()
+	err = processSQLiteError(err)
+	return
+}
+
+// connIface is the driver.Conn interface
+// types (and the like) that go-sqlite3/driver.conn
+// conforms to. Useful so you don't need
+// to repeatedly perform checks yourself.
+type connIface interface {
+	driver.Conn
+	driver.ConnBeginTx
+	driver.ConnPrepareContext
+	driver.ExecerContext
+}
+
+// StmtIface is the driver.Stmt interface
+// types (and the like) that go-sqlite3/driver.stmt
+// conforms to. Useful so you don't need
+// to repeatedly perform checks yourself.
+type stmtIface interface {
+	driver.Stmt
+	driver.StmtExecContext
+	driver.StmtQueryContext
+}
+
+// RowsIface is the driver.Rows interface
+// types (and the like) that go-sqlite3/driver.rows
+// conforms to. Useful so you don't need
+// to repeatedly perform checks yourself.
+type rowsIface interface {
+	driver.Rows
+	driver.RowsColumnTypeDatabaseTypeName
+}
diff --git a/internal/db/sqlite/errors.go b/internal/db/sqlite/errors.go
new file mode 100644
index 000000000..b07b026de
--- /dev/null
+++ b/internal/db/sqlite/errors.go
@@ -0,0 +1,62 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//go:build !wasmsqlite3
+
+package sqlite
+
+import (
+	"database/sql/driver"
+	"fmt"
+
+	"modernc.org/sqlite"
+	sqlite3 "modernc.org/sqlite/lib"
+
+	"github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+// processSQLiteError processes an sqlite3.Error to
+// handle conversion to any of our common db types.
+func processSQLiteError(err error) error {
+	// Attempt to cast as sqlite error.
+	sqliteErr, ok := err.(*sqlite.Error)
+	if !ok {
+		return err
+	}
+
+	// Handle supplied error code:
+	switch sqliteErr.Code() {
+	case sqlite3.SQLITE_CONSTRAINT_UNIQUE,
+		sqlite3.SQLITE_CONSTRAINT_PRIMARYKEY:
+		return db.ErrAlreadyExists
+
+	// Busy should be very rare, but
+	// on busy tell the database to close
+	// the connection, re-open and re-attempt
+	// which should give a necessary timeout.
+	case sqlite3.SQLITE_BUSY,
+		sqlite3.SQLITE_BUSY_RECOVERY,
+		sqlite3.SQLITE_BUSY_SNAPSHOT:
+		return driver.ErrBadConn
+	}
+
+	// Wrap the returned error with the code and
+	// extended code for easier debugging later.
+	return fmt.Errorf("%w (code=%d)", err,
+		sqliteErr.Code(),
+	)
+}
diff --git a/internal/db/sqlite/errors_wasmsqlite3.go b/internal/db/sqlite/errors_wasmsqlite3.go
new file mode 100644
index 000000000..26668a898
--- /dev/null
+++ b/internal/db/sqlite/errors_wasmsqlite3.go
@@ -0,0 +1,60 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//go:build wasmsqlite3
+
+package sqlite
+
+import (
+	"database/sql/driver"
+	"fmt"
+
+	"github.com/ncruces/go-sqlite3"
+	"github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+// processSQLiteError processes an sqlite3.Error to
+// handle conversion to any of our common db types.
+func processSQLiteError(err error) error {
+	// Attempt to cast as sqlite error.
+	sqliteErr, ok := err.(*sqlite3.Error)
+	if !ok {
+		return err
+	}
+
+	// Handle supplied error code:
+	switch sqliteErr.ExtendedCode() {
+	case sqlite3.CONSTRAINT_UNIQUE,
+		sqlite3.CONSTRAINT_PRIMARYKEY:
+		return db.ErrAlreadyExists
+
+	// Busy should be very rare, but on
+	// busy tell the database to close the
+	// connection, re-open and re-attempt
+	// which should give necessary timeout.
+	case sqlite3.BUSY_RECOVERY,
+		sqlite3.BUSY_SNAPSHOT:
+		return driver.ErrBadConn
+	}
+
+	// Wrap the returned error with the code and
+	// extended code for easier debugging later.
+	return fmt.Errorf("%w (code=%d extended=%d)", err,
+		sqliteErr.Code(),
+		sqliteErr.ExtendedCode(),
+	)
+}
diff --git a/internal/db/util.go b/internal/db/util.go
new file mode 100644
index 000000000..9cd29f2fc
--- /dev/null
+++ b/internal/db/util.go
@@ -0,0 +1,35 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package db
+
+import "database/sql/driver"
+
+// ToNamedValues converts older driver.Value types to driver.NamedValue types.
+func ToNamedValues(args []driver.Value) []driver.NamedValue {
+	if args == nil {
+		return nil
+	}
+	args2 := make([]driver.NamedValue, len(args))
+	for i := range args {
+		args2[i] = driver.NamedValue{
+			Ordinal: i + 1,
+			Value:   args[i],
+		}
+	}
+	return args2
+}
diff --git a/internal/gtserror/multi_test.go b/internal/gtserror/multi_test.go
index 10c342415..b58c1b881 100644
--- a/internal/gtserror/multi_test.go
+++ b/internal/gtserror/multi_test.go
@@ -43,10 +43,6 @@ func TestMultiError(t *testing.T) {
 		t.Error("should be db.ErrAlreadyExists")
 	}
 
-	if errors.Is(err, db.ErrBusyTimeout) {
-		t.Error("should not be db.ErrBusyTimeout")
-	}
-
 	errString := err.Error()
 	expected := `sql: no rows in result set
 oopsie woopsie we did a fucky wucky etc
diff --git a/internal/util/paging_test.go b/internal/util/paging_test.go
index 66c5b2c56..bab7dcb7b 100644
--- a/internal/util/paging_test.go
+++ b/internal/util/paging_test.go
@@ -30,6 +30,7 @@ type PagingSuite struct {
 }
 
 func (suite *PagingSuite) TestPagingStandard() {
+	config.SetProtocol("https")
 	config.SetHost("example.org")
 
 	params := util.PageableResponseParams{
@@ -52,6 +53,7 @@ func (suite *PagingSuite) TestPagingStandard() {
 }
 
 func (suite *PagingSuite) TestPagingNoLimit() {
+	config.SetProtocol("https")
 	config.SetHost("example.org")
 
 	params := util.PageableResponseParams{
@@ -73,6 +75,7 @@ func (suite *PagingSuite) TestPagingNoLimit() {
 }
 
 func (suite *PagingSuite) TestPagingNoNextID() {
+	config.SetProtocol("https")
 	config.SetHost("example.org")
 
 	params := util.PageableResponseParams{
@@ -94,6 +97,7 @@ func (suite *PagingSuite) TestPagingNoNextID() {
 }
 
 func (suite *PagingSuite) TestPagingNoPrevID() {
+	config.SetProtocol("https")
 	config.SetHost("example.org")
 
 	params := util.PageableResponseParams{
@@ -115,6 +119,7 @@ func (suite *PagingSuite) TestPagingNoPrevID() {
 }
 
 func (suite *PagingSuite) TestPagingNoItems() {
+	config.SetProtocol("https")
 	config.SetHost("example.org")
 
 	params := util.PageableResponseParams{
diff --git a/test/run-postgres.sh b/test/run-postgres.sh
index 029a72793..8da1a3276 100755
--- a/test/run-postgres.sh
+++ b/test/run-postgres.sh
@@ -2,6 +2,11 @@
 
 set -e
 
+# Determine available docker binary
+_docker=$(command -v 'podman') || \
+_docker=$(command -v 'docker') || \
+{ echo 'docker not found'; exit 1; }
+
 # Ensure test args are set.
 ARGS=${@}; [ -z "$ARGS" ] && \
 ARGS='./...'
@@ -10,33 +15,32 @@ ARGS='./...'
 DB_NAME='postgres'
 DB_USER='postgres'
 DB_PASS='postgres'
+DB_IP='127.0.0.1'
 DB_PORT=5432
 
 # Start postgres container
-CID=$(docker run --detach \
+CID=$($_docker run --detach \
+    --publish "${DB_IP}:${DB_PORT}:${DB_PORT}" \
     --env "POSTGRES_DB=${DB_NAME}" \
     --env "POSTGRES_USER=${DB_USER}" \
     --env "POSTGRES_PASSWORD=${DB_PASS}" \
     --env "POSTGRES_HOST_AUTH_METHOD=trust" \
     --env "PGHOST=0.0.0.0" \
     --env "PGPORT=${DB_PORT}" \
-    'postgres:latest')
+    'docker.io/postgres:latest')
 
 # On exit kill the container
-trap "docker kill ${CID}" exit
+trap "$_docker kill ${CID}" exit
 
 sleep 5
 #docker exec "$CID" psql --user "$DB_USER" --password "$DB_PASS" -c "CREATE DATABASE \"${DB_NAME}\" WITH LOCALE \"C.UTF-8\" TEMPLATE \"template0\";"
-docker exec "$CID" psql --user "$DB_USER" --password "$DB_PASS" -c "GRANT ALL PRIVILEGES ON DATABASE \"${DB_NAME}\" TO \"${DB_USER}\";"
-
-# Get running container IP
-IP=$(docker container inspect "${CID}" \
-    --format '{{ .NetworkSettings.IPAddress }}')
+$_docker exec "$CID" psql --user "$DB_USER" --password "$DB_PASS" -c "GRANT ALL PRIVILEGES ON DATABASE \"${DB_NAME}\" TO \"${DB_USER}\";"
 
+env \
 GTS_DB_TYPE=postgres \
-GTS_DB_ADDRESS=${IP} \
+GTS_DB_ADDRESS=${DB_IP} \
 GTS_DB_PORT=${DB_PORT} \
 GTS_DB_USER=${DB_USER} \
 GTS_DB_PASSWORD=${DB_PASS} \
 GTS_DB_DATABASE=${DB_NAME} \
-go test ./... -p 1 ${ARGS}
\ No newline at end of file
+go test -p 1 ${ARGS}
\ No newline at end of file
diff --git a/testrig/config.go b/testrig/config.go
index 93f3c5523..30beaa910 100644
--- a/testrig/config.go
+++ b/testrig/config.go
@@ -18,8 +18,8 @@
 package testrig
 
 import (
-	"cmp"
 	"os"
+	"strconv"
 	"time"
 
 	"codeberg.org/gruf/go-bytesize"
@@ -28,128 +28,149 @@
 	"github.com/superseriousbusiness/gotosocial/internal/language"
 )
 
-// InitTestConfig initializes viper configuration with test defaults.
+// InitTestConfig initializes viper
+// configuration with test defaults.
 func InitTestConfig() {
-	config.Config(func(cfg *config.Configuration) {
-		*cfg = testDefaults
+	config.Defaults = testDefaults()
+	config.Reset()
+}
+
+func testDefaults() config.Configuration {
+	return config.Configuration{
+		LogLevel:                 envStr("GTS_LOG_LEVEL", "error"),
+		LogTimestampFormat:       "02/01/2006 15:04:05.000",
+		LogDbQueries:             true,
+		ApplicationName:          "gotosocial",
+		LandingPageUser:          "",
+		ConfigPath:               "",
+		Host:                     "localhost:8080",
+		AccountDomain:            "localhost:8080",
+		Protocol:                 "http",
+		BindAddress:              "127.0.0.1",
+		Port:                     8080,
+		TrustedProxies:           []string{"127.0.0.1/32", "::1"},
+		DbType:                   envStr("GTS_DB_TYPE", "sqlite"),
+		DbAddress:                envStr("GTS_DB_ADDRESS", ":memory:"),
+		DbPort:                   envInt("GTS_DB_PORT", 0),
+		DbUser:                   envStr("GTS_DB_USER", ""),
+		DbPassword:               envStr("GTS_DB_PASSWORD", ""),
+		DbDatabase:               envStr("GTS_DB_DATABASE", ""),
+		DbTLSMode:                envStr("GTS_DB_TLS_MODE", ""),
+		DbTLSCACert:              envStr("GTS_DB_TLS_CA_CERT", ""),
+		DbMaxOpenConnsMultiplier: 8,
+		DbSqliteJournalMode:      "WAL",
+		DbSqliteSynchronous:      "NORMAL",
+		DbSqliteCacheSize:        8 * bytesize.MiB,
+		DbSqliteBusyTimeout:      time.Minute * 5,
+
+		WebTemplateBaseDir: "./web/template/",
+		WebAssetBaseDir:    "./web/assets/",
+
+		InstanceFederationMode:         config.InstanceFederationModeDefault,
+		InstanceFederationSpamFilter:   true,
+		InstanceExposePeers:            true,
+		InstanceExposeSuspended:        true,
+		InstanceExposeSuspendedWeb:     true,
+		InstanceDeliverToSharedInboxes: true,
+		InstanceLanguages: language.Languages{
+			{
+				TagStr: "nl",
+			},
+			{
+				TagStr: "en-gb",
+			},
+		},
+
+		AccountsRegistrationOpen: true,
+		AccountsReasonRequired:   true,
+		AccountsAllowCustomCSS:   true,
+		AccountsCustomCSSLength:  10000,
+
+		MediaImageMaxSize:        10485760, // 10MiB
+		MediaVideoMaxSize:        41943040, // 40MiB
+		MediaDescriptionMinChars: 0,
+		MediaDescriptionMaxChars: 500,
+		MediaRemoteCacheDays:     7,
+		MediaEmojiLocalMaxSize:   51200,          // 50KiB
+		MediaEmojiRemoteMaxSize:  102400,         // 100KiB
+		MediaCleanupFrom:         "00:00",        // midnight.
+		MediaCleanupEvery:        24 * time.Hour, // 1/day.
+
+		// the testrig only uses in-memory storage, so we can
+		// safely set this value to 'test' to avoid running storage
+		// migrations, and other silly things like that
+		StorageBackend:       "test",
+		StorageLocalBasePath: "",
+
+		StatusesMaxChars:           5000,
+		StatusesPollMaxOptions:     6,
+		StatusesPollOptionMaxChars: 50,
+		StatusesMediaMaxFiles:      6,
+
+		LetsEncryptEnabled:      false,
+		LetsEncryptPort:         0,
+		LetsEncryptCertDir:      "",
+		LetsEncryptEmailAddress: "",
+
+		OIDCEnabled:          false,
+		OIDCIdpName:          "",
+		OIDCSkipVerification: false,
+		OIDCIssuer:           "",
+		OIDCClientID:         "",
+		OIDCClientSecret:     "",
+		OIDCScopes:           []string{oidc.ScopeOpenID, "profile", "email", "groups"},
+		OIDCLinkExisting:     false,
+		OIDCAdminGroups:      []string{"adminRole"},
+		OIDCAllowedGroups:    []string{"allowedRole"},
+
+		SMTPHost:               "",
+		SMTPPort:               0,
+		SMTPUsername:           "",
+		SMTPPassword:           "",
+		SMTPFrom:               "GoToSocial",
+		SMTPDiscloseRecipients: false,
+
+		TracingEnabled:           false,
+		TracingEndpoint:          "localhost:4317",
+		TracingTransport:         "grpc",
+		TracingInsecureTransport: true,
+
+		MetricsEnabled:     false,
+		MetricsAuthEnabled: false,
+
+		SyslogEnabled:  false,
+		SyslogProtocol: "udp",
+		SyslogAddress:  "localhost:514",
+
+		AdvancedCookiesSamesite:      "lax",
+		AdvancedRateLimitRequests:    0, // disabled
+		AdvancedThrottlingMultiplier: 0, // disabled
+		AdvancedSenderMultiplier:     0, // 1 sender only, regardless of CPU
+
+		SoftwareVersion: "0.0.0-testrig",
+
+		// simply use cache defaults.
+		Cache: config.Defaults.Cache,
+	}
+}
+
+func envInt(key string, _default int) int {
+	return env(key, _default, func(value string) int {
+		i, _ := strconv.Atoi(value)
+		return i
 	})
 }
 
-var testDefaults = config.Configuration{
-	LogLevel:           cmp.Or(os.Getenv("GTS_LOG_LEVEL"), "error"),
-	LogTimestampFormat: "02/01/2006 15:04:05.000",
-	LogDbQueries:       true,
-	ApplicationName:    "gotosocial",
-	LandingPageUser:    "",
-	ConfigPath:         "",
-	Host:               "localhost:8080",
-	AccountDomain:      "localhost:8080",
-	Protocol:           "http",
-	BindAddress:        "127.0.0.1",
-	Port:               8080,
-	TrustedProxies:     []string{"127.0.0.1/32", "::1"},
-
-	DbType:                   "sqlite",
-	DbAddress:                ":memory:",
-	DbPort:                   5432,
-	DbUser:                   "postgres",
-	DbPassword:               "postgres",
-	DbDatabase:               "postgres",
-	DbTLSMode:                "disable",
-	DbTLSCACert:              "",
-	DbMaxOpenConnsMultiplier: 8,
-	DbSqliteJournalMode:      "WAL",
-	DbSqliteSynchronous:      "NORMAL",
-	DbSqliteCacheSize:        8 * bytesize.MiB,
-	DbSqliteBusyTimeout:      time.Minute * 5,
-
-	WebTemplateBaseDir: "./web/template/",
-	WebAssetBaseDir:    "./web/assets/",
-
-	InstanceFederationMode:         config.InstanceFederationModeDefault,
-	InstanceFederationSpamFilter:   true,
-	InstanceExposePeers:            true,
-	InstanceExposeSuspended:        true,
-	InstanceExposeSuspendedWeb:     true,
-	InstanceDeliverToSharedInboxes: true,
-	InstanceLanguages: language.Languages{
-		{
-			TagStr: "nl",
-		},
-		{
-			TagStr: "en-gb",
-		},
-	},
-
-	AccountsRegistrationOpen: true,
-	AccountsReasonRequired:   true,
-	AccountsAllowCustomCSS:   true,
-	AccountsCustomCSSLength:  10000,
-
-	MediaImageMaxSize:        10485760, // 10MiB
-	MediaVideoMaxSize:        41943040, // 40MiB
-	MediaDescriptionMinChars: 0,
-	MediaDescriptionMaxChars: 500,
-	MediaRemoteCacheDays:     7,
-	MediaEmojiLocalMaxSize:   51200,          // 50KiB
-	MediaEmojiRemoteMaxSize:  102400,         // 100KiB
-	MediaCleanupFrom:         "00:00",        // midnight.
-	MediaCleanupEvery:        24 * time.Hour, // 1/day.
-
-	// the testrig only uses in-memory storage, so we can
-	// safely set this value to 'test' to avoid running storage
-	// migrations, and other silly things like that
-	StorageBackend:       "test",
-	StorageLocalBasePath: "",
-
-	StatusesMaxChars:           5000,
-	StatusesPollMaxOptions:     6,
-	StatusesPollOptionMaxChars: 50,
-	StatusesMediaMaxFiles:      6,
-
-	LetsEncryptEnabled:      false,
-	LetsEncryptPort:         0,
-	LetsEncryptCertDir:      "",
-	LetsEncryptEmailAddress: "",
-
-	OIDCEnabled:          false,
-	OIDCIdpName:          "",
-	OIDCSkipVerification: false,
-	OIDCIssuer:           "",
-	OIDCClientID:         "",
-	OIDCClientSecret:     "",
-	OIDCScopes:           []string{oidc.ScopeOpenID, "profile", "email", "groups"},
-	OIDCLinkExisting:     false,
-	OIDCAdminGroups:      []string{"adminRole"},
-	OIDCAllowedGroups:    []string{"allowedRole"},
-
-	SMTPHost:               "",
-	SMTPPort:               0,
-	SMTPUsername:           "",
-	SMTPPassword:           "",
-	SMTPFrom:               "GoToSocial",
-	SMTPDiscloseRecipients: false,
-
-	TracingEnabled:           false,
-	TracingEndpoint:          "localhost:4317",
-	TracingTransport:         "grpc",
-	TracingInsecureTransport: true,
-
-	MetricsEnabled:     false,
-	MetricsAuthEnabled: false,
-
-	SyslogEnabled:  false,
-	SyslogProtocol: "udp",
-	SyslogAddress:  "localhost:514",
-
-	AdvancedCookiesSamesite:      "lax",
-	AdvancedRateLimitRequests:    0, // disabled
-	AdvancedThrottlingMultiplier: 0, // disabled
-	AdvancedSenderMultiplier:     0, // 1 sender only, regardless of CPU
-	AdvancedHeaderFilterMode:     config.RequestHeaderFilterModeBlock,
-
-	SoftwareVersion: "0.0.0-testrig",
-
-	// simply use cache defaults.
-	Cache: config.Defaults.Cache,
+func envStr(key string, _default string) string {
+	return env(key, _default, func(value string) string {
+		return value
+	})
+}
+
+func env[T any](key string, _default T, parse func(string) T) T {
+	value, ok := os.LookupEnv(key)
+	if ok {
+		return parse(value)
+	}
+	return _default
 }
diff --git a/testrig/db.go b/testrig/db.go
index 83bc46ec8..faa7a910d 100644
--- a/testrig/db.go
+++ b/testrig/db.go
@@ -19,10 +19,7 @@
 
 import (
 	"context"
-	"os"
-	"strconv"
 
-	"github.com/superseriousbusiness/gotosocial/internal/config"
 	"github.com/superseriousbusiness/gotosocial/internal/db"
 	"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@@ -84,22 +81,6 @@
 // If the environment variable GTS_DB_PORT is set, it will take that
 // value as the port instead.
 func NewTestDB(state *state.State) db.DB {
-	if alternateAddress := os.Getenv("GTS_DB_ADDRESS"); alternateAddress != "" {
-		config.SetDbAddress(alternateAddress)
-	}
-
-	if alternateDBType := os.Getenv("GTS_DB_TYPE"); alternateDBType != "" {
-		config.SetDbType(alternateDBType)
-	}
-
-	if alternateDBPort := os.Getenv("GTS_DB_PORT"); alternateDBPort != "" {
-		port, err := strconv.ParseUint(alternateDBPort, 10, 16)
-		if err != nil {
-			panic(err)
-		}
-		config.SetDbPort(int(port))
-	}
-
 	state.Caches.Init()
 
 	testDB, err := bundb.NewBunDBService(context.Background(), state)
@@ -374,9 +355,10 @@ func StandardDBTeardown(db db.DB) {
 	if db == nil {
 		return
 	}
+	defer db.Close()
 	for _, m := range testModels {
 		if err := db.DropTable(ctx, m); err != nil {
-			log.Panic(nil, err)
+			log.Error(ctx, err)
 		}
 	}
 }
diff --git a/vendor/github.com/ncruces/go-sqlite3/.gitignore b/vendor/github.com/ncruces/go-sqlite3/.gitignore
new file mode 100644
index 000000000..c8b2376cd
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/.gitignore
@@ -0,0 +1,16 @@
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+tools
\ No newline at end of file
diff --git a/vendor/github.com/ncruces/go-sqlite3/LICENSE b/vendor/github.com/ncruces/go-sqlite3/LICENSE
new file mode 100644
index 000000000..9bdc1df48
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Nuno Cruces
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/ncruces/go-sqlite3/README.md b/vendor/github.com/ncruces/go-sqlite3/README.md
new file mode 100644
index 000000000..c31414724
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/README.md
@@ -0,0 +1,113 @@
+# Go bindings to SQLite using Wazero
+
+[![Go Reference](https://pkg.go.dev/badge/image)](https://pkg.go.dev/github.com/ncruces/go-sqlite3)
+[![Go Report](https://goreportcard.com/badge/github.com/ncruces/go-sqlite3)](https://goreportcard.com/report/github.com/ncruces/go-sqlite3)
+[![Go Coverage](https://github.com/ncruces/go-sqlite3/wiki/coverage.svg)](https://github.com/ncruces/go-sqlite3/wiki/Test-coverage-report)
+
+Go module `github.com/ncruces/go-sqlite3` is a `cgo`-free [SQLite](https://sqlite.org/) wrapper.\
+It provides a [`database/sql`](https://pkg.go.dev/database/sql) compatible driver,
+as well as direct access to most of the [C SQLite API](https://sqlite.org/cintro.html).
+
+It wraps a [Wasm](https://webassembly.org/) [build](embed/) of SQLite,
+and uses [wazero](https://wazero.io/) as the runtime.\
+Go, wazero and [`x/sys`](https://pkg.go.dev/golang.org/x/sys) are the _only_ runtime dependencies [^1].
+
+### Packages
+
+- [`github.com/ncruces/go-sqlite3`](https://pkg.go.dev/github.com/ncruces/go-sqlite3)
+  wraps the [C SQLite API](https://sqlite.org/cintro.html)
+  ([example usage](https://pkg.go.dev/github.com/ncruces/go-sqlite3#example-package)).
+- [`github.com/ncruces/go-sqlite3/driver`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/driver)
+  provides a [`database/sql`](https://pkg.go.dev/database/sql) driver
+  ([example usage](https://pkg.go.dev/github.com/ncruces/go-sqlite3/driver#example-package)).
+- [`github.com/ncruces/go-sqlite3/embed`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/embed)
+  embeds a build of SQLite into your application.
+- [`github.com/ncruces/go-sqlite3/vfs`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs)
+  wraps the [C SQLite VFS API](https://sqlite.org/vfs.html) and provides a pure Go implementation.
+- [`github.com/ncruces/go-sqlite3/gormlite`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/gormlite)
+  provides a [GORM](https://gorm.io) driver.
+
+### Extensions
+
+- [`github.com/ncruces/go-sqlite3/ext/array`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/array)
+  provides the [`array`](https://sqlite.org/carray.html) table-valued function.
+- [`github.com/ncruces/go-sqlite3/ext/blobio`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/blobio)
+  simplifies [incremental BLOB I/O](https://sqlite.org/c3ref/blob_open.html).
+- [`github.com/ncruces/go-sqlite3/ext/csv`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/csv)
+  reads [comma-separated values](https://sqlite.org/csv.html).
+- [`github.com/ncruces/go-sqlite3/ext/fileio`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/fileio)
+  reads, writes and lists files.
+- [`github.com/ncruces/go-sqlite3/ext/hash`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/hash)
+  provides cryptographic hash functions.
+- [`github.com/ncruces/go-sqlite3/ext/lines`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/lines)
+  reads data [line-by-line](https://github.com/asg017/sqlite-lines).
+- [`github.com/ncruces/go-sqlite3/ext/pivot`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/pivot)
+  creates [pivot tables](https://github.com/jakethaw/pivot_vtab).
+- [`github.com/ncruces/go-sqlite3/ext/statement`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/statement)
+  creates [parameterized views](https://github.com/0x09/sqlite-statement-vtab).
+- [`github.com/ncruces/go-sqlite3/ext/stats`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/stats)
+  provides [statistics](https://www.oreilly.com/library/view/sql-in-a/9780596155322/ch04s02.html) functions.
+- [`github.com/ncruces/go-sqlite3/ext/unicode`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/unicode)
+  provides [Unicode aware](https://sqlite.org/src/dir/ext/icu) functions.
+- [`github.com/ncruces/go-sqlite3/ext/zorder`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/zorder)
+  maps multidimensional data to one dimension.
+- [`github.com/ncruces/go-sqlite3/vfs/memdb`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/memdb)
+  implements an in-memory VFS.
+- [`github.com/ncruces/go-sqlite3/vfs/readervfs`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/readervfs)
+  implements a VFS for immutable databases.
+- [`github.com/ncruces/go-sqlite3/vfs/adiantum`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/adiantum)
+  wraps a VFS to offer encryption at rest.
+
+### Advanced features
+
+- [incremental BLOB I/O](https://sqlite.org/c3ref/blob_open.html)
+- [nested transactions](https://sqlite.org/lang_savepoint.html)
+- [custom functions](https://sqlite.org/c3ref/create_function.html)
+- [virtual tables](https://sqlite.org/vtab.html)
+- [custom VFSes](https://sqlite.org/vfs.html)
+- [online backup](https://sqlite.org/backup.html)
+- [JSON support](https://sqlite.org/json1.html)
+- [math functions](https://sqlite.org/lang_mathfunc.html)
+- [full-text search](https://sqlite.org/fts5.html)
+- [geospatial search](https://sqlite.org/geopoly.html)
+- [encryption at rest](vfs/adiantum/README.md)
+- [and more…](embed/README.md)
+
+### Caveats
+
+This module replaces the SQLite [OS Interface](https://sqlite.org/vfs.html)
+(aka VFS) with a [pure Go](vfs/) implementation,
+which has advantages and disadvantages.
+
+Read more about the Go VFS design [here](vfs/README.md).
+
+### Testing
+
+This project aims for [high test coverage](https://github.com/ncruces/go-sqlite3/wiki/Test-coverage-report).
+It also benefits greatly from [SQLite's](https://sqlite.org/testing.html) and
+[wazero's](https://tetrate.io/blog/introducing-wazero-from-tetrate/#:~:text=Rock%2Dsolid%20test%20approach) thorough testing.
+
+Every commit is [tested](.github/workflows/test.yml) on
+Linux (amd64/arm64/386/riscv64/s390x), macOS (amd64/arm64),
+Windows (amd64), FreeBSD (amd64), illumos (amd64), and Solaris (amd64).
+
+The Go VFS is tested by running SQLite's
+[mptest](https://github.com/sqlite/sqlite/blob/master/mptest/mptest.c).
+
+### Performance
+
+Perfomance of the [`database/sql`](https://pkg.go.dev/database/sql) driver is
+[competitive](https://github.com/cvilsmeier/go-sqlite-bench) with alternatives.
+
+The Wasm and VFS layers are also tested by running SQLite's
+[speedtest1](https://github.com/sqlite/sqlite/blob/master/test/speedtest1.c).
+
+### Alternatives
+
+- [`modernc.org/sqlite`](https://pkg.go.dev/modernc.org/sqlite)
+- [`crawshaw.io/sqlite`](https://pkg.go.dev/crawshaw.io/sqlite)
+- [`github.com/mattn/go-sqlite3`](https://pkg.go.dev/github.com/mattn/go-sqlite3)
+- [`github.com/zombiezen/go-sqlite`](https://pkg.go.dev/github.com/zombiezen/go-sqlite)
+
+[^1]: anything else you find in `go.mod` is either a test dependency,
+      or needed by one of the extensions.
diff --git a/vendor/github.com/ncruces/go-sqlite3/backup.go b/vendor/github.com/ncruces/go-sqlite3/backup.go
new file mode 100644
index 000000000..b16c7511e
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/backup.go
@@ -0,0 +1,134 @@
+package sqlite3
+
+// Backup is an handle to an ongoing online backup operation.
+//
+// https://sqlite.org/c3ref/backup.html
+type Backup struct {
+	c      *Conn
+	handle uint32
+	otherc uint32
+}
+
+// Backup backs up srcDB on the src connection to the "main" database in dstURI.
+//
+// Backup opens the SQLite database file dstURI,
+// and blocks until the entire backup is complete.
+// Use [Conn.BackupInit] for incremental backup.
+//
+// https://sqlite.org/backup.html
+func (src *Conn) Backup(srcDB, dstURI string) error {
+	b, err := src.BackupInit(srcDB, dstURI)
+	if err != nil {
+		return err
+	}
+	defer b.Close()
+	_, err = b.Step(-1)
+	return err
+}
+
+// Restore restores dstDB on the dst connection from the "main" database in srcURI.
+//
+// Restore opens the SQLite database file srcURI,
+// and blocks until the entire restore is complete.
+//
+// https://sqlite.org/backup.html
+func (dst *Conn) Restore(dstDB, srcURI string) error {
+	src, err := dst.openDB(srcURI, OPEN_READONLY|OPEN_URI)
+	if err != nil {
+		return err
+	}
+	b, err := dst.backupInit(dst.handle, dstDB, src, "main")
+	if err != nil {
+		return err
+	}
+	defer b.Close()
+	_, err = b.Step(-1)
+	return err
+}
+
+// BackupInit initializes a backup operation to copy the content of one database into another.
+//
+// BackupInit opens the SQLite database file dstURI,
+// then initializes a backup that copies the contents of srcDB on the src connection
+// to the "main" database in dstURI.
+//
+// https://sqlite.org/c3ref/backup_finish.html#sqlite3backupinit
+func (src *Conn) BackupInit(srcDB, dstURI string) (*Backup, error) {
+	dst, err := src.openDB(dstURI, OPEN_READWRITE|OPEN_CREATE|OPEN_URI)
+	if err != nil {
+		return nil, err
+	}
+	return src.backupInit(dst, "main", src.handle, srcDB)
+}
+
+func (c *Conn) backupInit(dst uint32, dstName string, src uint32, srcName string) (*Backup, error) {
+	defer c.arena.mark()()
+	dstPtr := c.arena.string(dstName)
+	srcPtr := c.arena.string(srcName)
+
+	other := dst
+	if c.handle == dst {
+		other = src
+	}
+
+	r := c.call("sqlite3_backup_init",
+		uint64(dst), uint64(dstPtr),
+		uint64(src), uint64(srcPtr))
+	if r == 0 {
+		defer c.closeDB(other)
+		r = c.call("sqlite3_errcode", uint64(dst))
+		return nil, c.sqlite.error(r, dst)
+	}
+
+	return &Backup{
+		c:      c,
+		otherc: other,
+		handle: uint32(r),
+	}, nil
+}
+
+// Close finishes a backup operation.
+//
+// It is safe to close a nil, zero or closed Backup.
+//
+// https://sqlite.org/c3ref/backup_finish.html#sqlite3backupfinish
+func (b *Backup) Close() error {
+	if b == nil || b.handle == 0 {
+		return nil
+	}
+
+	r := b.c.call("sqlite3_backup_finish", uint64(b.handle))
+	b.c.closeDB(b.otherc)
+	b.handle = 0
+	return b.c.error(r)
+}
+
+// Step copies up to nPage pages between the source and destination databases.
+// If nPage is negative, all remaining source pages are copied.
+//
+// https://sqlite.org/c3ref/backup_finish.html#sqlite3backupstep
+func (b *Backup) Step(nPage int) (done bool, err error) {
+	r := b.c.call("sqlite3_backup_step", uint64(b.handle), uint64(nPage))
+	if r == _DONE {
+		return true, nil
+	}
+	return false, b.c.error(r)
+}
+
+// Remaining returns the number of pages still to be backed up
+// at the conclusion of the most recent [Backup.Step].
+//
+// https://sqlite.org/c3ref/backup_finish.html#sqlite3backupremaining
+func (b *Backup) Remaining() int {
+	r := b.c.call("sqlite3_backup_remaining", uint64(b.handle))
+	return int(int32(r))
+}
+
+// PageCount returns the total number of pages in the source database
+// at the conclusion of the most recent [Backup.Step].
+//
+// https://sqlite.org/c3ref/backup_finish.html#sqlite3backuppagecount
+func (b *Backup) PageCount() int {
+	r := b.c.call("sqlite3_backup_pagecount", uint64(b.handle))
+	return int(int32(r))
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/blob.go b/vendor/github.com/ncruces/go-sqlite3/blob.go
new file mode 100644
index 000000000..bb10c5fa2
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/blob.go
@@ -0,0 +1,250 @@
+package sqlite3
+
+import (
+	"io"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// ZeroBlob represents a zero-filled, length n BLOB
+// that can be used as an argument to
+// [database/sql.DB.Exec] and similar methods.
+type ZeroBlob int64
+
+// Blob is an handle to an open BLOB.
+//
+// It implements [io.ReadWriteSeeker] for incremental BLOB I/O.
+//
+// https://sqlite.org/c3ref/blob.html
+type Blob struct {
+	c      *Conn
+	bytes  int64
+	offset int64
+	handle uint32
+}
+
+var _ io.ReadWriteSeeker = &Blob{}
+
+// OpenBlob opens a BLOB for incremental I/O.
+//
+// https://sqlite.org/c3ref/blob_open.html
+func (c *Conn) OpenBlob(db, table, column string, row int64, write bool) (*Blob, error) {
+	c.checkInterrupt()
+	defer c.arena.mark()()
+	blobPtr := c.arena.new(ptrlen)
+	dbPtr := c.arena.string(db)
+	tablePtr := c.arena.string(table)
+	columnPtr := c.arena.string(column)
+
+	var flags uint64
+	if write {
+		flags = 1
+	}
+
+	r := c.call("sqlite3_blob_open", uint64(c.handle),
+		uint64(dbPtr), uint64(tablePtr), uint64(columnPtr),
+		uint64(row), flags, uint64(blobPtr))
+
+	if err := c.error(r); err != nil {
+		return nil, err
+	}
+
+	blob := Blob{c: c}
+	blob.handle = util.ReadUint32(c.mod, blobPtr)
+	blob.bytes = int64(c.call("sqlite3_blob_bytes", uint64(blob.handle)))
+	return &blob, nil
+}
+
+// Close closes a BLOB handle.
+//
+// It is safe to close a nil, zero or closed Blob.
+//
+// https://sqlite.org/c3ref/blob_close.html
+func (b *Blob) Close() error {
+	if b == nil || b.handle == 0 {
+		return nil
+	}
+
+	r := b.c.call("sqlite3_blob_close", uint64(b.handle))
+
+	b.handle = 0
+	return b.c.error(r)
+}
+
+// Size returns the size of the BLOB in bytes.
+//
+// https://sqlite.org/c3ref/blob_bytes.html
+func (b *Blob) Size() int64 {
+	return b.bytes
+}
+
+// Read implements the [io.Reader] interface.
+//
+// https://sqlite.org/c3ref/blob_read.html
+func (b *Blob) Read(p []byte) (n int, err error) {
+	if b.offset >= b.bytes {
+		return 0, io.EOF
+	}
+
+	avail := b.bytes - b.offset
+	want := int64(len(p))
+	if want > avail {
+		want = avail
+	}
+
+	defer b.c.arena.mark()()
+	ptr := b.c.arena.new(uint64(want))
+
+	r := b.c.call("sqlite3_blob_read", uint64(b.handle),
+		uint64(ptr), uint64(want), uint64(b.offset))
+	err = b.c.error(r)
+	if err != nil {
+		return 0, err
+	}
+	b.offset += want
+	if b.offset >= b.bytes {
+		err = io.EOF
+	}
+
+	copy(p, util.View(b.c.mod, ptr, uint64(want)))
+	return int(want), err
+}
+
+// WriteTo implements the [io.WriterTo] interface.
+//
+// https://sqlite.org/c3ref/blob_read.html
+func (b *Blob) WriteTo(w io.Writer) (n int64, err error) {
+	if b.offset >= b.bytes {
+		return 0, nil
+	}
+
+	want := int64(1024 * 1024)
+	avail := b.bytes - b.offset
+	if want > avail {
+		want = avail
+	}
+
+	defer b.c.arena.mark()()
+	ptr := b.c.arena.new(uint64(want))
+
+	for want > 0 {
+		r := b.c.call("sqlite3_blob_read", uint64(b.handle),
+			uint64(ptr), uint64(want), uint64(b.offset))
+		err = b.c.error(r)
+		if err != nil {
+			return n, err
+		}
+
+		mem := util.View(b.c.mod, ptr, uint64(want))
+		m, err := w.Write(mem[:want])
+		b.offset += int64(m)
+		n += int64(m)
+		if err != nil {
+			return n, err
+		}
+		if int64(m) != want {
+			return n, io.ErrShortWrite
+		}
+
+		avail = b.bytes - b.offset
+		if want > avail {
+			want = avail
+		}
+	}
+	return n, nil
+}
+
+// Write implements the [io.Writer] interface.
+//
+// https://sqlite.org/c3ref/blob_write.html
+func (b *Blob) Write(p []byte) (n int, err error) {
+	defer b.c.arena.mark()()
+	ptr := b.c.arena.bytes(p)
+
+	r := b.c.call("sqlite3_blob_write", uint64(b.handle),
+		uint64(ptr), uint64(len(p)), uint64(b.offset))
+	err = b.c.error(r)
+	if err != nil {
+		return 0, err
+	}
+	b.offset += int64(len(p))
+	return len(p), nil
+}
+
+// ReadFrom implements the [io.ReaderFrom] interface.
+//
+// https://sqlite.org/c3ref/blob_write.html
+func (b *Blob) ReadFrom(r io.Reader) (n int64, err error) {
+	want := int64(1024 * 1024)
+	avail := b.bytes - b.offset
+	if l, ok := r.(*io.LimitedReader); ok && want > l.N {
+		want = l.N
+	}
+	if want > avail {
+		want = avail
+	}
+	if want < 1 {
+		want = 1
+	}
+
+	defer b.c.arena.mark()()
+	ptr := b.c.arena.new(uint64(want))
+
+	for {
+		mem := util.View(b.c.mod, ptr, uint64(want))
+		m, err := r.Read(mem[:want])
+		if m > 0 {
+			r := b.c.call("sqlite3_blob_write", uint64(b.handle),
+				uint64(ptr), uint64(m), uint64(b.offset))
+			err := b.c.error(r)
+			if err != nil {
+				return n, err
+			}
+			b.offset += int64(m)
+			n += int64(m)
+		}
+		if err == io.EOF {
+			return n, nil
+		}
+		if err != nil {
+			return n, err
+		}
+
+		avail = b.bytes - b.offset
+		if want > avail {
+			want = avail
+		}
+		if want < 1 {
+			want = 1
+		}
+	}
+}
+
+// Seek implements the [io.Seeker] interface.
+func (b *Blob) Seek(offset int64, whence int) (int64, error) {
+	switch whence {
+	default:
+		return 0, util.WhenceErr
+	case io.SeekStart:
+		break
+	case io.SeekCurrent:
+		offset += b.offset
+	case io.SeekEnd:
+		offset += b.bytes
+	}
+	if offset < 0 {
+		return 0, util.OffsetErr
+	}
+	b.offset = offset
+	return offset, nil
+}
+
+// Reopen moves a BLOB handle to a new row of the same database table.
+//
+// https://sqlite.org/c3ref/blob_reopen.html
+func (b *Blob) Reopen(row int64) error {
+	err := b.c.error(b.c.call("sqlite3_blob_reopen", uint64(b.handle), uint64(row)))
+	b.bytes = int64(b.c.call("sqlite3_blob_bytes", uint64(b.handle)))
+	b.offset = 0
+	return err
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/config.go b/vendor/github.com/ncruces/go-sqlite3/config.go
new file mode 100644
index 000000000..0342be7fb
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/config.go
@@ -0,0 +1,164 @@
+package sqlite3
+
+import (
+	"context"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// Config makes configuration changes to a database connection.
+// Only boolean configuration options are supported.
+// Called with no arg reads the current configuration value,
+// called with one arg sets and returns the new value.
+//
+// https://sqlite.org/c3ref/db_config.html
+func (c *Conn) Config(op DBConfig, arg ...bool) (bool, error) {
+	defer c.arena.mark()()
+	argsPtr := c.arena.new(2 * ptrlen)
+
+	var flag int
+	switch {
+	case len(arg) == 0:
+		flag = -1
+	case arg[0]:
+		flag = 1
+	}
+
+	util.WriteUint32(c.mod, argsPtr+0*ptrlen, uint32(flag))
+	util.WriteUint32(c.mod, argsPtr+1*ptrlen, argsPtr)
+
+	r := c.call("sqlite3_db_config", uint64(c.handle),
+		uint64(op), uint64(argsPtr))
+	return util.ReadUint32(c.mod, argsPtr) != 0, c.error(r)
+}
+
+// ConfigLog sets up the error logging callback for the connection.
+//
+// https://sqlite.org/errlog.html
+func (c *Conn) ConfigLog(cb func(code ExtendedErrorCode, msg string)) error {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	r := c.call("sqlite3_config_log_go", enable)
+	if err := c.error(r); err != nil {
+		return err
+	}
+	c.log = cb
+	return nil
+}
+
+func logCallback(ctx context.Context, mod api.Module, _, iCode, zMsg uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.log != nil {
+		msg := util.ReadString(mod, zMsg, _MAX_LENGTH)
+		c.log(xErrorCode(iCode), msg)
+	}
+}
+
+// Limit allows the size of various constructs to be
+// limited on a connection by connection basis.
+//
+// https://sqlite.org/c3ref/limit.html
+func (c *Conn) Limit(id LimitCategory, value int) int {
+	r := c.call("sqlite3_limit", uint64(c.handle), uint64(id), uint64(value))
+	return int(int32(r))
+}
+
+// SetAuthorizer registers an authorizer callback with the database connection.
+//
+// https://sqlite.org/c3ref/set_authorizer.html
+func (c *Conn) SetAuthorizer(cb func(action AuthorizerActionCode, name3rd, name4th, schema, nameInner string) AuthorizerReturnCode) error {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	r := c.call("sqlite3_set_authorizer_go", uint64(c.handle), enable)
+	if err := c.error(r); err != nil {
+		return err
+	}
+	c.authorizer = cb
+	return nil
+
+}
+
+func authorizerCallback(ctx context.Context, mod api.Module, pDB uint32, action AuthorizerActionCode, zName3rd, zName4th, zSchema, zNameInner uint32) (rc AuthorizerReturnCode) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.authorizer != nil {
+		var name3rd, name4th, schema, nameInner string
+		if zName3rd != 0 {
+			name3rd = util.ReadString(mod, zName3rd, _MAX_NAME)
+		}
+		if zName4th != 0 {
+			name4th = util.ReadString(mod, zName4th, _MAX_NAME)
+		}
+		if zSchema != 0 {
+			schema = util.ReadString(mod, zSchema, _MAX_NAME)
+		}
+		if zNameInner != 0 {
+			nameInner = util.ReadString(mod, zNameInner, _MAX_NAME)
+		}
+		rc = c.authorizer(action, name3rd, name4th, schema, nameInner)
+	}
+	return rc
+}
+
+// WalCheckpoint checkpoints a WAL database.
+//
+// https://sqlite.org/c3ref/wal_checkpoint_v2.html
+func (c *Conn) WalCheckpoint(schema string, mode CheckpointMode) (nLog, nCkpt int, err error) {
+	defer c.arena.mark()()
+	nLogPtr := c.arena.new(ptrlen)
+	nCkptPtr := c.arena.new(ptrlen)
+	schemaPtr := c.arena.string(schema)
+	r := c.call("sqlite3_wal_checkpoint_v2",
+		uint64(c.handle), uint64(schemaPtr), uint64(mode),
+		uint64(nLogPtr), uint64(nCkptPtr))
+	nLog = int(int32(util.ReadUint32(c.mod, nLogPtr)))
+	nCkpt = int(int32(util.ReadUint32(c.mod, nCkptPtr)))
+	return nLog, nCkpt, c.error(r)
+}
+
+// WalAutoCheckpoint configures WAL auto-checkpoints.
+//
+// https://sqlite.org/c3ref/wal_autocheckpoint.html
+func (c *Conn) WalAutoCheckpoint(pages int) error {
+	r := c.call("sqlite3_wal_autocheckpoint", uint64(c.handle), uint64(pages))
+	return c.error(r)
+}
+
+// WalHook registers a callback function to be invoked
+// each time data is committed to a database in WAL mode.
+//
+// https://sqlite.org/c3ref/wal_hook.html
+func (c *Conn) WalHook(cb func(db *Conn, schema string, pages int) error) {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	c.call("sqlite3_wal_hook_go", uint64(c.handle), enable)
+	c.wal = cb
+}
+
+func walCallback(ctx context.Context, mod api.Module, _, pDB, zSchema uint32, pages int32) (rc uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.wal != nil {
+		schema := util.ReadString(mod, zSchema, _MAX_NAME)
+		err := c.wal(c, schema, int(pages))
+		_, rc = errorCode(err, ERROR)
+	}
+	return rc
+}
+
+// AutoVacuumPages registers a autovacuum compaction amount callback.
+//
+// https://sqlite.org/c3ref/autovacuum_pages.html
+func (c *Conn) AutoVacuumPages(cb func(schema string, dbPages, freePages, bytesPerPage uint) uint) error {
+	funcPtr := util.AddHandle(c.ctx, cb)
+	r := c.call("sqlite3_autovacuum_pages_go", uint64(c.handle), uint64(funcPtr))
+	return c.error(r)
+}
+
+func autoVacuumCallback(ctx context.Context, mod api.Module, pApp, zSchema, nDbPage, nFreePage, nBytePerPage uint32) uint32 {
+	fn := util.GetHandle(ctx, pApp).(func(schema string, dbPages, freePages, bytesPerPage uint) uint)
+	schema := util.ReadString(mod, zSchema, _MAX_NAME)
+	return uint32(fn(schema, uint(nDbPage), uint(nFreePage), uint(nBytePerPage)))
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/conn.go b/vendor/github.com/ncruces/go-sqlite3/conn.go
new file mode 100644
index 000000000..f170ccf57
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/conn.go
@@ -0,0 +1,426 @@
+package sqlite3
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/ncruces/go-sqlite3/vfs"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// Conn is a database connection handle.
+// A Conn is not safe for concurrent use by multiple goroutines.
+//
+// https://sqlite.org/c3ref/sqlite3.html
+type Conn struct {
+	*sqlite
+
+	interrupt  context.Context
+	pending    *Stmt
+	busy       func(int) bool
+	log        func(xErrorCode, string)
+	collation  func(*Conn, string)
+	authorizer func(AuthorizerActionCode, string, string, string, string) AuthorizerReturnCode
+	update     func(AuthorizerActionCode, string, string, int64)
+	commit     func() bool
+	rollback   func()
+	wal        func(*Conn, string, int) error
+	arena      arena
+
+	handle uint32
+}
+
+// Open calls [OpenFlags] with [OPEN_READWRITE], [OPEN_CREATE], [OPEN_URI] and [OPEN_NOFOLLOW].
+func Open(filename string) (*Conn, error) {
+	return newConn(filename, OPEN_READWRITE|OPEN_CREATE|OPEN_URI|OPEN_NOFOLLOW)
+}
+
+// OpenFlags opens an SQLite database file as specified by the filename argument.
+//
+// If none of the required flags is used, a combination of [OPEN_READWRITE] and [OPEN_CREATE] is used.
+// If a URI filename is used, PRAGMA statements to execute can be specified using "_pragma":
+//
+//	sqlite3.Open("file:demo.db?_pragma=busy_timeout(10000)")
+//
+// https://sqlite.org/c3ref/open.html
+func OpenFlags(filename string, flags OpenFlag) (*Conn, error) {
+	if flags&(OPEN_READONLY|OPEN_READWRITE|OPEN_CREATE) == 0 {
+		flags |= OPEN_READWRITE | OPEN_CREATE
+	}
+	return newConn(filename, flags)
+}
+
+type connKey struct{}
+
+func newConn(filename string, flags OpenFlag) (conn *Conn, err error) {
+	sqlite, err := instantiateSQLite()
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if conn == nil {
+			sqlite.close()
+		}
+	}()
+
+	c := &Conn{sqlite: sqlite}
+	c.arena = c.newArena(1024)
+	c.ctx = context.WithValue(c.ctx, connKey{}, c)
+	c.handle, err = c.openDB(filename, flags)
+	if err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+func (c *Conn) openDB(filename string, flags OpenFlag) (uint32, error) {
+	defer c.arena.mark()()
+	connPtr := c.arena.new(ptrlen)
+	namePtr := c.arena.string(filename)
+
+	flags |= OPEN_EXRESCODE
+	r := c.call("sqlite3_open_v2", uint64(namePtr), uint64(connPtr), uint64(flags), 0)
+
+	handle := util.ReadUint32(c.mod, connPtr)
+	if err := c.sqlite.error(r, handle); err != nil {
+		c.closeDB(handle)
+		return 0, err
+	}
+
+	if flags|OPEN_URI != 0 && strings.HasPrefix(filename, "file:") {
+		var pragmas strings.Builder
+		if _, after, ok := strings.Cut(filename, "?"); ok {
+			query, _ := url.ParseQuery(after)
+			for _, p := range query["_pragma"] {
+				pragmas.WriteString(`PRAGMA `)
+				pragmas.WriteString(p)
+				pragmas.WriteString(`;`)
+			}
+		}
+		if pragmas.Len() != 0 {
+			pragmaPtr := c.arena.string(pragmas.String())
+			r := c.call("sqlite3_exec", uint64(handle), uint64(pragmaPtr), 0, 0, 0)
+			if err := c.sqlite.error(r, handle, pragmas.String()); err != nil {
+				err = fmt.Errorf("sqlite3: invalid _pragma: %w", err)
+				c.closeDB(handle)
+				return 0, err
+			}
+		}
+	}
+	c.call("sqlite3_progress_handler_go", uint64(handle), 100)
+	return handle, nil
+}
+
+func (c *Conn) closeDB(handle uint32) {
+	r := c.call("sqlite3_close_v2", uint64(handle))
+	if err := c.sqlite.error(r, handle); err != nil {
+		panic(err)
+	}
+}
+
+// Close closes the database connection.
+//
+// If the database connection is associated with unfinalized prepared statements,
+// open blob handles, and/or unfinished backup objects,
+// Close will leave the database connection open and return [BUSY].
+//
+// It is safe to close a nil, zero or closed Conn.
+//
+// https://sqlite.org/c3ref/close.html
+func (c *Conn) Close() error {
+	if c == nil || c.handle == 0 {
+		return nil
+	}
+
+	c.pending.Close()
+	c.pending = nil
+
+	r := c.call("sqlite3_close", uint64(c.handle))
+	if err := c.error(r); err != nil {
+		return err
+	}
+
+	c.handle = 0
+	return c.close()
+}
+
+// Exec is a convenience function that allows an application to run
+// multiple statements of SQL without having to use a lot of code.
+//
+// https://sqlite.org/c3ref/exec.html
+func (c *Conn) Exec(sql string) error {
+	c.checkInterrupt()
+	defer c.arena.mark()()
+	sqlPtr := c.arena.string(sql)
+
+	r := c.call("sqlite3_exec", uint64(c.handle), uint64(sqlPtr), 0, 0, 0)
+	return c.error(r, sql)
+}
+
+// Prepare calls [Conn.PrepareFlags] with no flags.
+func (c *Conn) Prepare(sql string) (stmt *Stmt, tail string, err error) {
+	return c.PrepareFlags(sql, 0)
+}
+
+// PrepareFlags compiles the first SQL statement in sql;
+// tail is left pointing to what remains uncompiled.
+// If the input text contains no SQL (if the input is an empty string or a comment),
+// both stmt and err will be nil.
+//
+// https://sqlite.org/c3ref/prepare.html
+func (c *Conn) PrepareFlags(sql string, flags PrepareFlag) (stmt *Stmt, tail string, err error) {
+	if len(sql) > _MAX_SQL_LENGTH {
+		return nil, "", TOOBIG
+	}
+
+	defer c.arena.mark()()
+	stmtPtr := c.arena.new(ptrlen)
+	tailPtr := c.arena.new(ptrlen)
+	sqlPtr := c.arena.string(sql)
+
+	r := c.call("sqlite3_prepare_v3", uint64(c.handle),
+		uint64(sqlPtr), uint64(len(sql)+1), uint64(flags),
+		uint64(stmtPtr), uint64(tailPtr))
+
+	stmt = &Stmt{c: c}
+	stmt.handle = util.ReadUint32(c.mod, stmtPtr)
+	if sql := sql[util.ReadUint32(c.mod, tailPtr)-sqlPtr:]; sql != "" {
+		tail = sql
+	}
+
+	if err := c.error(r, sql); err != nil {
+		return nil, "", err
+	}
+	if stmt.handle == 0 {
+		return nil, "", nil
+	}
+	return stmt, tail, nil
+}
+
+// DBName returns the schema name for n-th database on the database connection.
+//
+// https://sqlite.org/c3ref/db_name.html
+func (c *Conn) DBName(n int) string {
+	r := c.call("sqlite3_db_name", uint64(c.handle), uint64(n))
+
+	ptr := uint32(r)
+	if ptr == 0 {
+		return ""
+	}
+	return util.ReadString(c.mod, ptr, _MAX_NAME)
+}
+
+// Filename returns the filename for a database.
+//
+// https://sqlite.org/c3ref/db_filename.html
+func (c *Conn) Filename(schema string) *vfs.Filename {
+	var ptr uint32
+	if schema != "" {
+		defer c.arena.mark()()
+		ptr = c.arena.string(schema)
+	}
+
+	r := c.call("sqlite3_db_filename", uint64(c.handle), uint64(ptr))
+	return vfs.OpenFilename(c.ctx, c.mod, uint32(r), vfs.OPEN_MAIN_DB)
+}
+
+// ReadOnly determines if a database is read-only.
+//
+// https://sqlite.org/c3ref/db_readonly.html
+func (c *Conn) ReadOnly(schema string) (ro bool, ok bool) {
+	var ptr uint32
+	if schema != "" {
+		defer c.arena.mark()()
+		ptr = c.arena.string(schema)
+	}
+	r := c.call("sqlite3_db_readonly", uint64(c.handle), uint64(ptr))
+	return int32(r) > 0, int32(r) < 0
+}
+
+// GetAutocommit tests the connection for auto-commit mode.
+//
+// https://sqlite.org/c3ref/get_autocommit.html
+func (c *Conn) GetAutocommit() bool {
+	r := c.call("sqlite3_get_autocommit", uint64(c.handle))
+	return r != 0
+}
+
+// LastInsertRowID returns the rowid of the most recent successful INSERT
+// on the database connection.
+//
+// https://sqlite.org/c3ref/last_insert_rowid.html
+func (c *Conn) LastInsertRowID() int64 {
+	r := c.call("sqlite3_last_insert_rowid", uint64(c.handle))
+	return int64(r)
+}
+
+// SetLastInsertRowID allows the application to set the value returned by
+// [Conn.LastInsertRowID].
+//
+// https://sqlite.org/c3ref/set_last_insert_rowid.html
+func (c *Conn) SetLastInsertRowID(id int64) {
+	c.call("sqlite3_set_last_insert_rowid", uint64(c.handle), uint64(id))
+}
+
+// Changes returns the number of rows modified, inserted or deleted
+// by the most recently completed INSERT, UPDATE or DELETE statement
+// on the database connection.
+//
+// https://sqlite.org/c3ref/changes.html
+func (c *Conn) Changes() int64 {
+	r := c.call("sqlite3_changes64", uint64(c.handle))
+	return int64(r)
+}
+
+// TotalChanges returns the number of rows modified, inserted or deleted
+// by all INSERT, UPDATE or DELETE statements completed
+// since the database connection was opened.
+//
+// https://sqlite.org/c3ref/total_changes.html
+func (c *Conn) TotalChanges() int64 {
+	r := c.call("sqlite3_total_changes64", uint64(c.handle))
+	return int64(r)
+}
+
+// ReleaseMemory frees memory used by a database connection.
+//
+// https://sqlite.org/c3ref/db_release_memory.html
+func (c *Conn) ReleaseMemory() error {
+	r := c.call("sqlite3_db_release_memory", uint64(c.handle))
+	return c.error(r)
+}
+
+// GetInterrupt gets the context set with [Conn.SetInterrupt],
+// or nil if none was set.
+func (c *Conn) GetInterrupt() context.Context {
+	return c.interrupt
+}
+
+// SetInterrupt interrupts a long-running query when a context is done.
+//
+// Subsequent uses of the connection will return [INTERRUPT]
+// until the context is reset by another call to SetInterrupt.
+//
+// To associate a timeout with a connection:
+//
+//	ctx, cancel := context.WithTimeout(context.TODO(), 100*time.Millisecond)
+//	conn.SetInterrupt(ctx)
+//	defer cancel()
+//
+// SetInterrupt returns the old context assigned to the connection.
+//
+// https://sqlite.org/c3ref/interrupt.html
+func (c *Conn) SetInterrupt(ctx context.Context) (old context.Context) {
+	// Is it the same context?
+	if ctx == c.interrupt {
+		return ctx
+	}
+
+	// A busy SQL statement prevents SQLite from ignoring an interrupt
+	// that comes before any other statements are started.
+	if c.pending == nil {
+		c.pending, _, _ = c.Prepare(`WITH RECURSIVE c(x) AS (VALUES(0) UNION ALL SELECT x FROM c) SELECT x FROM c`)
+	}
+
+	old = c.interrupt
+	c.interrupt = ctx
+
+	if old != nil && old.Done() != nil && (ctx == nil || ctx.Err() == nil) {
+		c.pending.Reset()
+	}
+	if ctx != nil && ctx.Done() != nil {
+		c.pending.Step()
+	}
+	return old
+}
+
+func (c *Conn) checkInterrupt() {
+	if c.interrupt != nil && c.interrupt.Err() != nil {
+		c.call("sqlite3_interrupt", uint64(c.handle))
+	}
+}
+
+func progressCallback(ctx context.Context, mod api.Module, pDB uint32) (interrupt uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB &&
+		c.interrupt != nil && c.interrupt.Err() != nil {
+		interrupt = 1
+	}
+	return interrupt
+}
+
+// BusyTimeout sets a busy timeout.
+//
+// https://sqlite.org/c3ref/busy_timeout.html
+func (c *Conn) BusyTimeout(timeout time.Duration) error {
+	ms := min((timeout+time.Millisecond-1)/time.Millisecond, math.MaxInt32)
+	r := c.call("sqlite3_busy_timeout", uint64(c.handle), uint64(ms))
+	return c.error(r)
+}
+
+func timeoutCallback(ctx context.Context, mod api.Module, pDB uint32, count, tmout int32) (retry uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok &&
+		(c.interrupt == nil || c.interrupt.Err() == nil) {
+		const delays = "\x01\x02\x05\x0a\x0f\x14\x19\x19\x19\x32\x32\x64"
+		const totals = "\x00\x01\x03\x08\x12\x21\x35\x4e\x67\x80\xb2\xe4"
+		const ndelay = int32(len(delays) - 1)
+
+		var delay, prior int32
+		if count <= ndelay {
+			delay = int32(delays[count])
+			prior = int32(totals[count])
+		} else {
+			delay = int32(delays[ndelay])
+			prior = int32(totals[ndelay]) + delay*(count-ndelay)
+		}
+
+		if delay = min(delay, tmout-prior); delay > 0 {
+			time.Sleep(time.Duration(delay) * time.Millisecond)
+			retry = 1
+		}
+	}
+	return retry
+}
+
+// BusyHandler registers a callback to handle [BUSY] errors.
+//
+// https://sqlite.org/c3ref/busy_handler.html
+func (c *Conn) BusyHandler(cb func(count int) (retry bool)) error {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	r := c.call("sqlite3_busy_handler_go", uint64(c.handle), enable)
+	if err := c.error(r); err != nil {
+		return err
+	}
+	c.busy = cb
+	return nil
+}
+
+func busyCallback(ctx context.Context, mod api.Module, pDB uint32, count int32) (retry uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.busy != nil &&
+		(c.interrupt == nil || c.interrupt.Err() == nil) {
+		if c.busy(int(count)) {
+			retry = 1
+		}
+	}
+	return retry
+}
+
+func (c *Conn) error(rc uint64, sql ...string) error {
+	return c.sqlite.error(rc, c.handle, sql...)
+}
+
+// DriverConn is implemented by the SQLite [database/sql] driver connection.
+//
+// It can be used to access SQLite features like [online backup].
+//
+// [online backup]: https://sqlite.org/backup.html
+type DriverConn interface {
+	Raw() *Conn
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/const.go b/vendor/github.com/ncruces/go-sqlite3/const.go
new file mode 100644
index 000000000..2bb53656f
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/const.go
@@ -0,0 +1,360 @@
+package sqlite3
+
+import "strconv"
+
+const (
+	_OK   = 0   /* Successful result */
+	_ROW  = 100 /* sqlite3_step() has another row ready */
+	_DONE = 101 /* sqlite3_step() has finished executing */
+
+	_UTF8 = 1
+
+	_MAX_NAME            = 1e6 // Self-imposed limit for most NUL terminated strings.
+	_MAX_LENGTH          = 1e9
+	_MAX_SQL_LENGTH      = 1e9
+	_MAX_ALLOCATION_SIZE = 0x7ffffeff
+	_MAX_FUNCTION_ARG    = 100
+
+	ptrlen = 4
+)
+
+// ErrorCode is a result code that [Error.Code] might return.
+//
+// https://sqlite.org/rescode.html
+type ErrorCode uint8
+
+const (
+	ERROR      ErrorCode = 1  /* Generic error */
+	INTERNAL   ErrorCode = 2  /* Internal logic error in SQLite */
+	PERM       ErrorCode = 3  /* Access permission denied */
+	ABORT      ErrorCode = 4  /* Callback routine requested an abort */
+	BUSY       ErrorCode = 5  /* The database file is locked */
+	LOCKED     ErrorCode = 6  /* A table in the database is locked */
+	NOMEM      ErrorCode = 7  /* A malloc() failed */
+	READONLY   ErrorCode = 8  /* Attempt to write a readonly database */
+	INTERRUPT  ErrorCode = 9  /* Operation terminated by sqlite3_interrupt() */
+	IOERR      ErrorCode = 10 /* Some kind of disk I/O error occurred */
+	CORRUPT    ErrorCode = 11 /* The database disk image is malformed */
+	NOTFOUND   ErrorCode = 12 /* Unknown opcode in sqlite3_file_control() */
+	FULL       ErrorCode = 13 /* Insertion failed because database is full */
+	CANTOPEN   ErrorCode = 14 /* Unable to open the database file */
+	PROTOCOL   ErrorCode = 15 /* Database lock protocol error */
+	EMPTY      ErrorCode = 16 /* Internal use only */
+	SCHEMA     ErrorCode = 17 /* The database schema changed */
+	TOOBIG     ErrorCode = 18 /* String or BLOB exceeds size limit */
+	CONSTRAINT ErrorCode = 19 /* Abort due to constraint violation */
+	MISMATCH   ErrorCode = 20 /* Data type mismatch */
+	MISUSE     ErrorCode = 21 /* Library used incorrectly */
+	NOLFS      ErrorCode = 22 /* Uses OS features not supported on host */
+	AUTH       ErrorCode = 23 /* Authorization denied */
+	FORMAT     ErrorCode = 24 /* Not used */
+	RANGE      ErrorCode = 25 /* 2nd parameter to sqlite3_bind out of range */
+	NOTADB     ErrorCode = 26 /* File opened that is not a database file */
+	NOTICE     ErrorCode = 27 /* Notifications from sqlite3_log() */
+	WARNING    ErrorCode = 28 /* Warnings from sqlite3_log() */
+)
+
+// ExtendedErrorCode is a result code that [Error.ExtendedCode] might return.
+//
+// https://sqlite.org/rescode.html
+type (
+	ExtendedErrorCode uint16
+	xErrorCode        = ExtendedErrorCode
+)
+
+const (
+	ERROR_MISSING_COLLSEQ   ExtendedErrorCode = xErrorCode(ERROR) | (1 << 8)
+	ERROR_RETRY             ExtendedErrorCode = xErrorCode(ERROR) | (2 << 8)
+	ERROR_SNAPSHOT          ExtendedErrorCode = xErrorCode(ERROR) | (3 << 8)
+	IOERR_READ              ExtendedErrorCode = xErrorCode(IOERR) | (1 << 8)
+	IOERR_SHORT_READ        ExtendedErrorCode = xErrorCode(IOERR) | (2 << 8)
+	IOERR_WRITE             ExtendedErrorCode = xErrorCode(IOERR) | (3 << 8)
+	IOERR_FSYNC             ExtendedErrorCode = xErrorCode(IOERR) | (4 << 8)
+	IOERR_DIR_FSYNC         ExtendedErrorCode = xErrorCode(IOERR) | (5 << 8)
+	IOERR_TRUNCATE          ExtendedErrorCode = xErrorCode(IOERR) | (6 << 8)
+	IOERR_FSTAT             ExtendedErrorCode = xErrorCode(IOERR) | (7 << 8)
+	IOERR_UNLOCK            ExtendedErrorCode = xErrorCode(IOERR) | (8 << 8)
+	IOERR_RDLOCK            ExtendedErrorCode = xErrorCode(IOERR) | (9 << 8)
+	IOERR_DELETE            ExtendedErrorCode = xErrorCode(IOERR) | (10 << 8)
+	IOERR_BLOCKED           ExtendedErrorCode = xErrorCode(IOERR) | (11 << 8)
+	IOERR_NOMEM             ExtendedErrorCode = xErrorCode(IOERR) | (12 << 8)
+	IOERR_ACCESS            ExtendedErrorCode = xErrorCode(IOERR) | (13 << 8)
+	IOERR_CHECKRESERVEDLOCK ExtendedErrorCode = xErrorCode(IOERR) | (14 << 8)
+	IOERR_LOCK              ExtendedErrorCode = xErrorCode(IOERR) | (15 << 8)
+	IOERR_CLOSE             ExtendedErrorCode = xErrorCode(IOERR) | (16 << 8)
+	IOERR_DIR_CLOSE         ExtendedErrorCode = xErrorCode(IOERR) | (17 << 8)
+	IOERR_SHMOPEN           ExtendedErrorCode = xErrorCode(IOERR) | (18 << 8)
+	IOERR_SHMSIZE           ExtendedErrorCode = xErrorCode(IOERR) | (19 << 8)
+	IOERR_SHMLOCK           ExtendedErrorCode = xErrorCode(IOERR) | (20 << 8)
+	IOERR_SHMMAP            ExtendedErrorCode = xErrorCode(IOERR) | (21 << 8)
+	IOERR_SEEK              ExtendedErrorCode = xErrorCode(IOERR) | (22 << 8)
+	IOERR_DELETE_NOENT      ExtendedErrorCode = xErrorCode(IOERR) | (23 << 8)
+	IOERR_MMAP              ExtendedErrorCode = xErrorCode(IOERR) | (24 << 8)
+	IOERR_GETTEMPPATH       ExtendedErrorCode = xErrorCode(IOERR) | (25 << 8)
+	IOERR_CONVPATH          ExtendedErrorCode = xErrorCode(IOERR) | (26 << 8)
+	IOERR_VNODE             ExtendedErrorCode = xErrorCode(IOERR) | (27 << 8)
+	IOERR_AUTH              ExtendedErrorCode = xErrorCode(IOERR) | (28 << 8)
+	IOERR_BEGIN_ATOMIC      ExtendedErrorCode = xErrorCode(IOERR) | (29 << 8)
+	IOERR_COMMIT_ATOMIC     ExtendedErrorCode = xErrorCode(IOERR) | (30 << 8)
+	IOERR_ROLLBACK_ATOMIC   ExtendedErrorCode = xErrorCode(IOERR) | (31 << 8)
+	IOERR_DATA              ExtendedErrorCode = xErrorCode(IOERR) | (32 << 8)
+	IOERR_CORRUPTFS         ExtendedErrorCode = xErrorCode(IOERR) | (33 << 8)
+	IOERR_IN_PAGE           ExtendedErrorCode = xErrorCode(IOERR) | (34 << 8)
+	LOCKED_SHAREDCACHE      ExtendedErrorCode = xErrorCode(LOCKED) | (1 << 8)
+	LOCKED_VTAB             ExtendedErrorCode = xErrorCode(LOCKED) | (2 << 8)
+	BUSY_RECOVERY           ExtendedErrorCode = xErrorCode(BUSY) | (1 << 8)
+	BUSY_SNAPSHOT           ExtendedErrorCode = xErrorCode(BUSY) | (2 << 8)
+	BUSY_TIMEOUT            ExtendedErrorCode = xErrorCode(BUSY) | (3 << 8)
+	CANTOPEN_NOTEMPDIR      ExtendedErrorCode = xErrorCode(CANTOPEN) | (1 << 8)
+	CANTOPEN_ISDIR          ExtendedErrorCode = xErrorCode(CANTOPEN) | (2 << 8)
+	CANTOPEN_FULLPATH       ExtendedErrorCode = xErrorCode(CANTOPEN) | (3 << 8)
+	CANTOPEN_CONVPATH       ExtendedErrorCode = xErrorCode(CANTOPEN) | (4 << 8)
+	CANTOPEN_DIRTYWAL       ExtendedErrorCode = xErrorCode(CANTOPEN) | (5 << 8) /* Not Used */
+	CANTOPEN_SYMLINK        ExtendedErrorCode = xErrorCode(CANTOPEN) | (6 << 8)
+	CORRUPT_VTAB            ExtendedErrorCode = xErrorCode(CORRUPT) | (1 << 8)
+	CORRUPT_SEQUENCE        ExtendedErrorCode = xErrorCode(CORRUPT) | (2 << 8)
+	CORRUPT_INDEX           ExtendedErrorCode = xErrorCode(CORRUPT) | (3 << 8)
+	READONLY_RECOVERY       ExtendedErrorCode = xErrorCode(READONLY) | (1 << 8)
+	READONLY_CANTLOCK       ExtendedErrorCode = xErrorCode(READONLY) | (2 << 8)
+	READONLY_ROLLBACK       ExtendedErrorCode = xErrorCode(READONLY) | (3 << 8)
+	READONLY_DBMOVED        ExtendedErrorCode = xErrorCode(READONLY) | (4 << 8)
+	READONLY_CANTINIT       ExtendedErrorCode = xErrorCode(READONLY) | (5 << 8)
+	READONLY_DIRECTORY      ExtendedErrorCode = xErrorCode(READONLY) | (6 << 8)
+	ABORT_ROLLBACK          ExtendedErrorCode = xErrorCode(ABORT) | (2 << 8)
+	CONSTRAINT_CHECK        ExtendedErrorCode = xErrorCode(CONSTRAINT) | (1 << 8)
+	CONSTRAINT_COMMITHOOK   ExtendedErrorCode = xErrorCode(CONSTRAINT) | (2 << 8)
+	CONSTRAINT_FOREIGNKEY   ExtendedErrorCode = xErrorCode(CONSTRAINT) | (3 << 8)
+	CONSTRAINT_FUNCTION     ExtendedErrorCode = xErrorCode(CONSTRAINT) | (4 << 8)
+	CONSTRAINT_NOTNULL      ExtendedErrorCode = xErrorCode(CONSTRAINT) | (5 << 8)
+	CONSTRAINT_PRIMARYKEY   ExtendedErrorCode = xErrorCode(CONSTRAINT) | (6 << 8)
+	CONSTRAINT_TRIGGER      ExtendedErrorCode = xErrorCode(CONSTRAINT) | (7 << 8)
+	CONSTRAINT_UNIQUE       ExtendedErrorCode = xErrorCode(CONSTRAINT) | (8 << 8)
+	CONSTRAINT_VTAB         ExtendedErrorCode = xErrorCode(CONSTRAINT) | (9 << 8)
+	CONSTRAINT_ROWID        ExtendedErrorCode = xErrorCode(CONSTRAINT) | (10 << 8)
+	CONSTRAINT_PINNED       ExtendedErrorCode = xErrorCode(CONSTRAINT) | (11 << 8)
+	CONSTRAINT_DATATYPE     ExtendedErrorCode = xErrorCode(CONSTRAINT) | (12 << 8)
+	NOTICE_RECOVER_WAL      ExtendedErrorCode = xErrorCode(NOTICE) | (1 << 8)
+	NOTICE_RECOVER_ROLLBACK ExtendedErrorCode = xErrorCode(NOTICE) | (2 << 8)
+	NOTICE_RBU              ExtendedErrorCode = xErrorCode(NOTICE) | (3 << 8)
+	WARNING_AUTOINDEX       ExtendedErrorCode = xErrorCode(WARNING) | (1 << 8)
+	AUTH_USER               ExtendedErrorCode = xErrorCode(AUTH) | (1 << 8)
+)
+
+// OpenFlag is a flag for the [OpenFlags] function.
+//
+// https://sqlite.org/c3ref/c_open_autoproxy.html
+type OpenFlag uint32
+
+const (
+	OPEN_READONLY     OpenFlag = 0x00000001 /* Ok for sqlite3_open_v2() */
+	OPEN_READWRITE    OpenFlag = 0x00000002 /* Ok for sqlite3_open_v2() */
+	OPEN_CREATE       OpenFlag = 0x00000004 /* Ok for sqlite3_open_v2() */
+	OPEN_URI          OpenFlag = 0x00000040 /* Ok for sqlite3_open_v2() */
+	OPEN_MEMORY       OpenFlag = 0x00000080 /* Ok for sqlite3_open_v2() */
+	OPEN_NOMUTEX      OpenFlag = 0x00008000 /* Ok for sqlite3_open_v2() */
+	OPEN_FULLMUTEX    OpenFlag = 0x00010000 /* Ok for sqlite3_open_v2() */
+	OPEN_SHAREDCACHE  OpenFlag = 0x00020000 /* Ok for sqlite3_open_v2() */
+	OPEN_PRIVATECACHE OpenFlag = 0x00040000 /* Ok for sqlite3_open_v2() */
+	OPEN_NOFOLLOW     OpenFlag = 0x01000000 /* Ok for sqlite3_open_v2() */
+	OPEN_EXRESCODE    OpenFlag = 0x02000000 /* Extended result codes */
+)
+
+// PrepareFlag is a flag that can be passed to [Conn.PrepareFlags].
+//
+// https://sqlite.org/c3ref/c_prepare_normalize.html
+type PrepareFlag uint32
+
+const (
+	PREPARE_PERSISTENT PrepareFlag = 0x01
+	PREPARE_NORMALIZE  PrepareFlag = 0x02
+	PREPARE_NO_VTAB    PrepareFlag = 0x04
+)
+
+// FunctionFlag is a flag that can be passed to
+// [Conn.CreateFunction] and [Conn.CreateWindowFunction].
+//
+// https://sqlite.org/c3ref/c_deterministic.html
+type FunctionFlag uint32
+
+const (
+	DETERMINISTIC  FunctionFlag = 0x000000800
+	DIRECTONLY     FunctionFlag = 0x000080000
+	SUBTYPE        FunctionFlag = 0x000100000
+	INNOCUOUS      FunctionFlag = 0x000200000
+	RESULT_SUBTYPE FunctionFlag = 0x001000000
+)
+
+// StmtStatus name counter values associated with the [Stmt.Status] method.
+//
+// https://sqlite.org/c3ref/c_stmtstatus_counter.html
+type StmtStatus uint32
+
+const (
+	STMTSTATUS_FULLSCAN_STEP StmtStatus = 1
+	STMTSTATUS_SORT          StmtStatus = 2
+	STMTSTATUS_AUTOINDEX     StmtStatus = 3
+	STMTSTATUS_VM_STEP       StmtStatus = 4
+	STMTSTATUS_REPREPARE     StmtStatus = 5
+	STMTSTATUS_RUN           StmtStatus = 6
+	STMTSTATUS_FILTER_MISS   StmtStatus = 7
+	STMTSTATUS_FILTER_HIT    StmtStatus = 8
+	STMTSTATUS_MEMUSED       StmtStatus = 99
+)
+
+// DBConfig are the available database connection configuration options.
+//
+// https://sqlite.org/c3ref/c_dbconfig_defensive.html
+type DBConfig uint32
+
+const (
+	// DBCONFIG_MAINDBNAME         DBConfig = 1000
+	// DBCONFIG_LOOKASIDE          DBConfig = 1001
+	DBCONFIG_ENABLE_FKEY           DBConfig = 1002
+	DBCONFIG_ENABLE_TRIGGER        DBConfig = 1003
+	DBCONFIG_ENABLE_FTS3_TOKENIZER DBConfig = 1004
+	DBCONFIG_ENABLE_LOAD_EXTENSION DBConfig = 1005
+	DBCONFIG_NO_CKPT_ON_CLOSE      DBConfig = 1006
+	DBCONFIG_ENABLE_QPSG           DBConfig = 1007
+	DBCONFIG_TRIGGER_EQP           DBConfig = 1008
+	DBCONFIG_RESET_DATABASE        DBConfig = 1009
+	DBCONFIG_DEFENSIVE             DBConfig = 1010
+	DBCONFIG_WRITABLE_SCHEMA       DBConfig = 1011
+	DBCONFIG_LEGACY_ALTER_TABLE    DBConfig = 1012
+	DBCONFIG_DQS_DML               DBConfig = 1013
+	DBCONFIG_DQS_DDL               DBConfig = 1014
+	DBCONFIG_ENABLE_VIEW           DBConfig = 1015
+	DBCONFIG_LEGACY_FILE_FORMAT    DBConfig = 1016
+	DBCONFIG_TRUSTED_SCHEMA        DBConfig = 1017
+	DBCONFIG_STMT_SCANSTATUS       DBConfig = 1018
+	DBCONFIG_REVERSE_SCANORDER     DBConfig = 1019
+)
+
+// LimitCategory are the available run-time limit categories.
+//
+// https://sqlite.org/c3ref/c_limit_attached.html
+type LimitCategory uint32
+
+const (
+	LIMIT_LENGTH              LimitCategory = 0
+	LIMIT_SQL_LENGTH          LimitCategory = 1
+	LIMIT_COLUMN              LimitCategory = 2
+	LIMIT_EXPR_DEPTH          LimitCategory = 3
+	LIMIT_COMPOUND_SELECT     LimitCategory = 4
+	LIMIT_VDBE_OP             LimitCategory = 5
+	LIMIT_FUNCTION_ARG        LimitCategory = 6
+	LIMIT_ATTACHED            LimitCategory = 7
+	LIMIT_LIKE_PATTERN_LENGTH LimitCategory = 8
+	LIMIT_VARIABLE_NUMBER     LimitCategory = 9
+	LIMIT_TRIGGER_DEPTH       LimitCategory = 10
+	LIMIT_WORKER_THREADS      LimitCategory = 11
+)
+
+// AuthorizerActionCode are the integer action codes
+// that the authorizer callback may be passed.
+//
+// https://sqlite.org/c3ref/c_alter_table.html
+type AuthorizerActionCode uint32
+
+const (
+	/***************************************************** 3rd ************ 4th ***********/
+	AUTH_CREATE_INDEX        AuthorizerActionCode = 1  /* Index Name      Table Name      */
+	AUTH_CREATE_TABLE        AuthorizerActionCode = 2  /* Table Name      NULL            */
+	AUTH_CREATE_TEMP_INDEX   AuthorizerActionCode = 3  /* Index Name      Table Name      */
+	AUTH_CREATE_TEMP_TABLE   AuthorizerActionCode = 4  /* Table Name      NULL            */
+	AUTH_CREATE_TEMP_TRIGGER AuthorizerActionCode = 5  /* Trigger Name    Table Name      */
+	AUTH_CREATE_TEMP_VIEW    AuthorizerActionCode = 6  /* View Name       NULL            */
+	AUTH_CREATE_TRIGGER      AuthorizerActionCode = 7  /* Trigger Name    Table Name      */
+	AUTH_CREATE_VIEW         AuthorizerActionCode = 8  /* View Name       NULL            */
+	AUTH_DELETE              AuthorizerActionCode = 9  /* Table Name      NULL            */
+	AUTH_DROP_INDEX          AuthorizerActionCode = 10 /* Index Name      Table Name      */
+	AUTH_DROP_TABLE          AuthorizerActionCode = 11 /* Table Name      NULL            */
+	AUTH_DROP_TEMP_INDEX     AuthorizerActionCode = 12 /* Index Name      Table Name      */
+	AUTH_DROP_TEMP_TABLE     AuthorizerActionCode = 13 /* Table Name      NULL            */
+	AUTH_DROP_TEMP_TRIGGER   AuthorizerActionCode = 14 /* Trigger Name    Table Name      */
+	AUTH_DROP_TEMP_VIEW      AuthorizerActionCode = 15 /* View Name       NULL            */
+	AUTH_DROP_TRIGGER        AuthorizerActionCode = 16 /* Trigger Name    Table Name      */
+	AUTH_DROP_VIEW           AuthorizerActionCode = 17 /* View Name       NULL            */
+	AUTH_INSERT              AuthorizerActionCode = 18 /* Table Name      NULL            */
+	AUTH_PRAGMA              AuthorizerActionCode = 19 /* Pragma Name     1st arg or NULL */
+	AUTH_READ                AuthorizerActionCode = 20 /* Table Name      Column Name     */
+	AUTH_SELECT              AuthorizerActionCode = 21 /* NULL            NULL            */
+	AUTH_TRANSACTION         AuthorizerActionCode = 22 /* Operation       NULL            */
+	AUTH_UPDATE              AuthorizerActionCode = 23 /* Table Name      Column Name     */
+	AUTH_ATTACH              AuthorizerActionCode = 24 /* Filename        NULL            */
+	AUTH_DETACH              AuthorizerActionCode = 25 /* Database Name   NULL            */
+	AUTH_ALTER_TABLE         AuthorizerActionCode = 26 /* Database Name   Table Name      */
+	AUTH_REINDEX             AuthorizerActionCode = 27 /* Index Name      NULL            */
+	AUTH_ANALYZE             AuthorizerActionCode = 28 /* Table Name      NULL            */
+	AUTH_CREATE_VTABLE       AuthorizerActionCode = 29 /* Table Name      Module Name     */
+	AUTH_DROP_VTABLE         AuthorizerActionCode = 30 /* Table Name      Module Name     */
+	AUTH_FUNCTION            AuthorizerActionCode = 31 /* NULL            Function Name   */
+	AUTH_SAVEPOINT           AuthorizerActionCode = 32 /* Operation       Savepoint Name  */
+	AUTH_COPY                AuthorizerActionCode = 0  /* No longer used */
+	AUTH_RECURSIVE           AuthorizerActionCode = 33 /* NULL            NULL            */
+)
+
+// AuthorizerReturnCode are the integer codes
+// that the authorizer callback may return.
+//
+// https://sqlite.org/c3ref/c_deny.html
+type AuthorizerReturnCode uint32
+
+const (
+	AUTH_OK     AuthorizerReturnCode = 0
+	AUTH_DENY   AuthorizerReturnCode = 1 /* Abort the SQL statement with an error */
+	AUTH_IGNORE AuthorizerReturnCode = 2 /* Don't allow access, but don't generate an error */
+)
+
+// CheckpointMode are all the checkpoint mode values.
+//
+// https://sqlite.org/c3ref/c_checkpoint_full.html
+type CheckpointMode uint32
+
+const (
+	CHECKPOINT_PASSIVE  CheckpointMode = 0 /* Do as much as possible w/o blocking */
+	CHECKPOINT_FULL     CheckpointMode = 1 /* Wait for writers, then checkpoint */
+	CHECKPOINT_RESTART  CheckpointMode = 2 /* Like FULL but wait for readers */
+	CHECKPOINT_TRUNCATE CheckpointMode = 3 /* Like RESTART but also truncate WAL */
+)
+
+// TxnState are the allowed return values from [Conn.TxnState].
+//
+// https://sqlite.org/c3ref/c_txn_none.html
+type TxnState uint32
+
+const (
+	TXN_NONE  TxnState = 0
+	TXN_READ  TxnState = 1
+	TXN_WRITE TxnState = 2
+)
+
+// Datatype is a fundamental datatype of SQLite.
+//
+// https://sqlite.org/c3ref/c_blob.html
+type Datatype uint32
+
+const (
+	INTEGER Datatype = 1
+	FLOAT   Datatype = 2
+	TEXT    Datatype = 3
+	BLOB    Datatype = 4
+	NULL    Datatype = 5
+)
+
+// String implements the [fmt.Stringer] interface.
+func (t Datatype) String() string {
+	const name = "INTEGERFLOATEXTBLOBNULL"
+	switch t {
+	case INTEGER:
+		return name[0:7]
+	case FLOAT:
+		return name[7:12]
+	case TEXT:
+		return name[11:15]
+	case BLOB:
+		return name[15:19]
+	case NULL:
+		return name[19:23]
+	}
+	return strconv.FormatUint(uint64(t), 10)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/context.go b/vendor/github.com/ncruces/go-sqlite3/context.go
new file mode 100644
index 000000000..8d7604c66
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/context.go
@@ -0,0 +1,229 @@
+package sqlite3
+
+import (
+	"encoding/json"
+	"errors"
+	"math"
+	"time"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// Context is the context in which an SQL function executes.
+// An SQLite [Context] is in no way related to a Go [context.Context].
+//
+// https://sqlite.org/c3ref/context.html
+type Context struct {
+	c      *Conn
+	handle uint32
+}
+
+// Conn returns the database connection of the
+// [Conn.CreateFunction] or [Conn.CreateWindowFunction]
+// routines that originally registered the application defined function.
+//
+// https://sqlite.org/c3ref/context_db_handle.html
+func (ctx Context) Conn() *Conn {
+	return ctx.c
+}
+
+// SetAuxData saves metadata for argument n of the function.
+//
+// https://sqlite.org/c3ref/get_auxdata.html
+func (ctx Context) SetAuxData(n int, data any) {
+	ptr := util.AddHandle(ctx.c.ctx, data)
+	ctx.c.call("sqlite3_set_auxdata_go", uint64(ctx.handle), uint64(n), uint64(ptr))
+}
+
+// GetAuxData returns metadata for argument n of the function.
+//
+// https://sqlite.org/c3ref/get_auxdata.html
+func (ctx Context) GetAuxData(n int) any {
+	ptr := uint32(ctx.c.call("sqlite3_get_auxdata", uint64(ctx.handle), uint64(n)))
+	return util.GetHandle(ctx.c.ctx, ptr)
+}
+
+// ResultBool sets the result of the function to a bool.
+// SQLite does not have a separate boolean storage class.
+// Instead, boolean values are stored as integers 0 (false) and 1 (true).
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultBool(value bool) {
+	var i int64
+	if value {
+		i = 1
+	}
+	ctx.ResultInt64(i)
+}
+
+// ResultInt sets the result of the function to an int.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultInt(value int) {
+	ctx.ResultInt64(int64(value))
+}
+
+// ResultInt64 sets the result of the function to an int64.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultInt64(value int64) {
+	ctx.c.call("sqlite3_result_int64",
+		uint64(ctx.handle), uint64(value))
+}
+
+// ResultFloat sets the result of the function to a float64.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultFloat(value float64) {
+	ctx.c.call("sqlite3_result_double",
+		uint64(ctx.handle), math.Float64bits(value))
+}
+
+// ResultText sets the result of the function to a string.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultText(value string) {
+	ptr := ctx.c.newString(value)
+	ctx.c.call("sqlite3_result_text64",
+		uint64(ctx.handle), uint64(ptr), uint64(len(value)),
+		uint64(ctx.c.freer), _UTF8)
+}
+
+// ResultRawText sets the text result of the function to a []byte.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultRawText(value []byte) {
+	ptr := ctx.c.newBytes(value)
+	ctx.c.call("sqlite3_result_text64",
+		uint64(ctx.handle), uint64(ptr), uint64(len(value)),
+		uint64(ctx.c.freer), _UTF8)
+}
+
+// ResultBlob sets the result of the function to a []byte.
+// Returning a nil slice is the same as calling [Context.ResultNull].
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultBlob(value []byte) {
+	ptr := ctx.c.newBytes(value)
+	ctx.c.call("sqlite3_result_blob64",
+		uint64(ctx.handle), uint64(ptr), uint64(len(value)),
+		uint64(ctx.c.freer))
+}
+
+// ResultZeroBlob sets the result of the function to a zero-filled, length n BLOB.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultZeroBlob(n int64) {
+	ctx.c.call("sqlite3_result_zeroblob64",
+		uint64(ctx.handle), uint64(n))
+}
+
+// ResultNull sets the result of the function to NULL.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultNull() {
+	ctx.c.call("sqlite3_result_null",
+		uint64(ctx.handle))
+}
+
+// ResultTime sets the result of the function to a [time.Time].
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultTime(value time.Time, format TimeFormat) {
+	if format == TimeFormatDefault {
+		ctx.resultRFC3339Nano(value)
+		return
+	}
+	switch v := format.Encode(value).(type) {
+	case string:
+		ctx.ResultText(v)
+	case int64:
+		ctx.ResultInt64(v)
+	case float64:
+		ctx.ResultFloat(v)
+	default:
+		panic(util.AssertErr())
+	}
+}
+
+func (ctx Context) resultRFC3339Nano(value time.Time) {
+	const maxlen = uint64(len(time.RFC3339Nano)) + 5
+
+	ptr := ctx.c.new(maxlen)
+	buf := util.View(ctx.c.mod, ptr, maxlen)
+	buf = value.AppendFormat(buf[:0], time.RFC3339Nano)
+
+	ctx.c.call("sqlite3_result_text64",
+		uint64(ctx.handle), uint64(ptr), uint64(len(buf)),
+		uint64(ctx.c.freer), _UTF8)
+}
+
+// ResultPointer sets the result of the function to NULL, just like [Context.ResultNull],
+// except that it also associates ptr with that NULL value such that it can be retrieved
+// within an application-defined SQL function using [Value.Pointer].
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultPointer(ptr any) {
+	valPtr := util.AddHandle(ctx.c.ctx, ptr)
+	ctx.c.call("sqlite3_result_pointer_go", uint64(valPtr))
+}
+
+// ResultJSON sets the result of the function to the JSON encoding of value.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultJSON(value any) {
+	data, err := json.Marshal(value)
+	if err != nil {
+		ctx.ResultError(err)
+		return
+	}
+	ctx.ResultRawText(data)
+}
+
+// ResultValue sets the result of the function to a copy of [Value].
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultValue(value Value) {
+	if value.c != ctx.c {
+		ctx.ResultError(MISUSE)
+		return
+	}
+	ctx.c.call("sqlite3_result_value",
+		uint64(ctx.handle), uint64(value.handle))
+}
+
+// ResultError sets the result of the function an error.
+//
+// https://sqlite.org/c3ref/result_blob.html
+func (ctx Context) ResultError(err error) {
+	if errors.Is(err, NOMEM) {
+		ctx.c.call("sqlite3_result_error_nomem", uint64(ctx.handle))
+		return
+	}
+
+	if errors.Is(err, TOOBIG) {
+		ctx.c.call("sqlite3_result_error_toobig", uint64(ctx.handle))
+		return
+	}
+
+	msg, code := errorCode(err, _OK)
+	if msg != "" {
+		defer ctx.c.arena.mark()()
+		ptr := ctx.c.arena.string(msg)
+		ctx.c.call("sqlite3_result_error",
+			uint64(ctx.handle), uint64(ptr), uint64(len(msg)))
+	}
+	if code != _OK {
+		ctx.c.call("sqlite3_result_error_code",
+			uint64(ctx.handle), uint64(code))
+	}
+}
+
+// VTabNoChange may return true if a column is being fetched as part
+// of an update during which the column value will not change.
+//
+// https://sqlite.org/c3ref/vtab_nochange.html
+func (ctx Context) VTabNoChange() bool {
+	r := ctx.c.call("sqlite3_vtab_nochange", uint64(ctx.handle))
+	return r != 0
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/driver/driver.go b/vendor/github.com/ncruces/go-sqlite3/driver/driver.go
new file mode 100644
index 000000000..b496f76ec
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/driver/driver.go
@@ -0,0 +1,579 @@
+// Package driver provides a database/sql driver for SQLite.
+//
+// Importing package driver registers a [database/sql] driver named "sqlite3".
+// You may also need to import package embed.
+//
+//	import _ "github.com/ncruces/go-sqlite3/driver"
+//	import _ "github.com/ncruces/go-sqlite3/embed"
+//
+// The data source name for "sqlite3" databases can be a filename or a "file:" [URI].
+//
+// The [TRANSACTION] mode can be specified using "_txlock":
+//
+//	sql.Open("sqlite3", "file:demo.db?_txlock=immediate")
+//
+// Possible values are: "deferred", "immediate", "exclusive".
+// A [read-only] transaction is always "deferred", regardless of "_txlock".
+//
+// The time encoding/decoding format can be specified using "_timefmt":
+//
+//	sql.Open("sqlite3", "file:demo.db?_timefmt=sqlite")
+//
+// Possible values are: "auto" (the default), "sqlite", "rfc3339";
+// "auto" encodes as RFC 3339 and decodes any [format] supported by SQLite;
+// "sqlite" encodes as SQLite and decodes any [format] supported by SQLite;
+// "rfc3339" encodes and decodes RFC 3339 only.
+//
+// [PRAGMA] statements can be specified using "_pragma":
+//
+//	sql.Open("sqlite3", "file:demo.db?_pragma=busy_timeout(10000)")
+//
+// If no PRAGMAs are specified, a busy timeout of 1 minute is set.
+//
+// Order matters:
+// busy timeout and locking mode should be the first PRAGMAs set, in that order.
+//
+// [URI]: https://sqlite.org/uri.html
+// [PRAGMA]: https://sqlite.org/pragma.html
+// [format]: https://sqlite.org/lang_datefunc.html#time_values
+// [TRANSACTION]: https://sqlite.org/lang_transaction.html#deferred_immediate_and_exclusive_transactions
+// [read-only]: https://pkg.go.dev/database/sql#TxOptions
+package driver
+
+import (
+	"context"
+	"database/sql"
+	"database/sql/driver"
+	"errors"
+	"fmt"
+	"io"
+	"net/url"
+	"strings"
+	"time"
+	"unsafe"
+
+	"github.com/ncruces/go-sqlite3"
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// This variable can be replaced with -ldflags:
+//
+//	go build -ldflags="-X github.com/ncruces/go-sqlite3/driver.driverName=sqlite"
+var driverName = "sqlite3"
+
+func init() {
+	if driverName != "" {
+		sql.Register(driverName, &SQLite{})
+	}
+}
+
+// Open opens the SQLite database specified by dataSourceName as a [database/sql.DB].
+//
+// The init function is called by the driver on new connections.
+// The [sqlite3.Conn] can be used to execute queries, register functions, etc.
+// Any error returned closes the connection and is returned to [database/sql].
+func Open(dataSourceName string, init func(*sqlite3.Conn) error) (*sql.DB, error) {
+	c, err := (&SQLite{Init: init}).OpenConnector(dataSourceName)
+	if err != nil {
+		return nil, err
+	}
+	return sql.OpenDB(c), nil
+}
+
+// SQLite implements [database/sql/driver.Driver].
+type SQLite struct {
+	// Init function is called by the driver on new connections.
+	// The [sqlite3.Conn] can be used to execute queries, register functions, etc.
+	// Any error returned closes the connection and is returned to [database/sql].
+	Init func(*sqlite3.Conn) error
+}
+
+// Open implements [database/sql/driver.Driver].
+func (d *SQLite) Open(name string) (driver.Conn, error) {
+	c, err := d.newConnector(name)
+	if err != nil {
+		return nil, err
+	}
+	return c.Connect(context.Background())
+}
+
+// OpenConnector implements [database/sql/driver.DriverContext].
+func (d *SQLite) OpenConnector(name string) (driver.Connector, error) {
+	return d.newConnector(name)
+}
+
+func (d *SQLite) newConnector(name string) (*connector, error) {
+	c := connector{driver: d, name: name}
+
+	var txlock, timefmt string
+	if strings.HasPrefix(name, "file:") {
+		if _, after, ok := strings.Cut(name, "?"); ok {
+			query, err := url.ParseQuery(after)
+			if err != nil {
+				return nil, err
+			}
+			txlock = query.Get("_txlock")
+			timefmt = query.Get("_timefmt")
+			c.pragmas = query.Has("_pragma")
+		}
+	}
+
+	switch txlock {
+	case "":
+		c.txBegin = "BEGIN"
+	case "deferred", "immediate", "exclusive":
+		c.txBegin = "BEGIN " + txlock
+	default:
+		return nil, fmt.Errorf("sqlite3: invalid _txlock: %s", txlock)
+	}
+
+	switch timefmt {
+	case "":
+		c.tmRead = sqlite3.TimeFormatAuto
+		c.tmWrite = sqlite3.TimeFormatDefault
+	case "sqlite":
+		c.tmRead = sqlite3.TimeFormatAuto
+		c.tmWrite = sqlite3.TimeFormat3
+	case "rfc3339":
+		c.tmRead = sqlite3.TimeFormatDefault
+		c.tmWrite = sqlite3.TimeFormatDefault
+	default:
+		c.tmRead = sqlite3.TimeFormat(timefmt)
+		c.tmWrite = sqlite3.TimeFormat(timefmt)
+	}
+	return &c, nil
+}
+
+type connector struct {
+	driver  *SQLite
+	name    string
+	txBegin string
+	tmRead  sqlite3.TimeFormat
+	tmWrite sqlite3.TimeFormat
+	pragmas bool
+}
+
+func (n *connector) Driver() driver.Driver {
+	return n.driver
+}
+
+func (n *connector) Connect(ctx context.Context) (_ driver.Conn, err error) {
+	c := &conn{
+		txBegin: n.txBegin,
+		tmRead:  n.tmRead,
+		tmWrite: n.tmWrite,
+	}
+
+	c.Conn, err = sqlite3.Open(n.name)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if err != nil {
+			c.Close()
+		}
+	}()
+
+	old := c.Conn.SetInterrupt(ctx)
+	defer c.Conn.SetInterrupt(old)
+
+	if !n.pragmas {
+		err = c.Conn.BusyTimeout(60 * time.Second)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if n.driver.Init != nil {
+		err = n.driver.Init(c.Conn)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if n.pragmas || n.driver.Init != nil {
+		s, _, err := c.Conn.Prepare(`PRAGMA query_only`)
+		if err != nil {
+			return nil, err
+		}
+		if s.Step() && s.ColumnBool(0) {
+			c.readOnly = '1'
+		} else {
+			c.readOnly = '0'
+		}
+		err = s.Close()
+		if err != nil {
+			return nil, err
+		}
+	}
+	return c, nil
+}
+
+type conn struct {
+	*sqlite3.Conn
+	txBegin    string
+	txCommit   string
+	txRollback string
+	tmRead     sqlite3.TimeFormat
+	tmWrite    sqlite3.TimeFormat
+	readOnly   byte
+}
+
+var (
+	// Ensure these interfaces are implemented:
+	_ driver.ConnPrepareContext = &conn{}
+	_ driver.ExecerContext      = &conn{}
+	_ driver.ConnBeginTx        = &conn{}
+	_ sqlite3.DriverConn        = &conn{}
+)
+
+func (c *conn) Raw() *sqlite3.Conn {
+	return c.Conn
+}
+
+func (c *conn) Begin() (driver.Tx, error) {
+	return c.BeginTx(context.Background(), driver.TxOptions{})
+}
+
+func (c *conn) BeginTx(ctx context.Context, opts driver.TxOptions) (driver.Tx, error) {
+	txBegin := c.txBegin
+	c.txCommit = `COMMIT`
+	c.txRollback = `ROLLBACK`
+
+	if opts.ReadOnly {
+		txBegin = `
+			BEGIN deferred;
+			PRAGMA query_only=on`
+		c.txRollback = `
+			ROLLBACK;
+			PRAGMA query_only=` + string(c.readOnly)
+		c.txCommit = c.txRollback
+	}
+
+	switch opts.Isolation {
+	default:
+		return nil, util.IsolationErr
+	case
+		driver.IsolationLevel(sql.LevelDefault),
+		driver.IsolationLevel(sql.LevelSerializable):
+		break
+	}
+
+	old := c.Conn.SetInterrupt(ctx)
+	defer c.Conn.SetInterrupt(old)
+
+	err := c.Conn.Exec(txBegin)
+	if err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+func (c *conn) Commit() error {
+	err := c.Conn.Exec(c.txCommit)
+	if err != nil && !c.Conn.GetAutocommit() {
+		c.Rollback()
+	}
+	return err
+}
+
+func (c *conn) Rollback() error {
+	err := c.Conn.Exec(c.txRollback)
+	if errors.Is(err, sqlite3.INTERRUPT) {
+		old := c.Conn.SetInterrupt(context.Background())
+		defer c.Conn.SetInterrupt(old)
+		err = c.Conn.Exec(c.txRollback)
+	}
+	return err
+}
+
+func (c *conn) Prepare(query string) (driver.Stmt, error) {
+	return c.PrepareContext(context.Background(), query)
+}
+
+func (c *conn) PrepareContext(ctx context.Context, query string) (driver.Stmt, error) {
+	old := c.Conn.SetInterrupt(ctx)
+	defer c.Conn.SetInterrupt(old)
+
+	s, tail, err := c.Conn.Prepare(query)
+	if err != nil {
+		return nil, err
+	}
+	if tail != "" {
+		s.Close()
+		return nil, util.TailErr
+	}
+	return &stmt{Stmt: s, tmRead: c.tmRead, tmWrite: c.tmWrite}, nil
+}
+
+func (c *conn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Result, error) {
+	if len(args) != 0 {
+		// Slow path.
+		return nil, driver.ErrSkip
+	}
+
+	if savept, ok := ctx.(*saveptCtx); ok {
+		// Called from driver.Savepoint.
+		savept.Savepoint = c.Conn.Savepoint()
+		return resultRowsAffected(0), nil
+	}
+
+	old := c.Conn.SetInterrupt(ctx)
+	defer c.Conn.SetInterrupt(old)
+
+	err := c.Conn.Exec(query)
+	if err != nil {
+		return nil, err
+	}
+
+	return newResult(c.Conn), nil
+}
+
+func (c *conn) CheckNamedValue(arg *driver.NamedValue) error {
+	return nil
+}
+
+type stmt struct {
+	*sqlite3.Stmt
+	tmWrite sqlite3.TimeFormat
+	tmRead  sqlite3.TimeFormat
+}
+
+var (
+	// Ensure these interfaces are implemented:
+	_ driver.StmtExecContext   = &stmt{}
+	_ driver.StmtQueryContext  = &stmt{}
+	_ driver.NamedValueChecker = &stmt{}
+)
+
+func (s *stmt) NumInput() int {
+	n := s.Stmt.BindCount()
+	for i := 1; i <= n; i++ {
+		if s.Stmt.BindName(i) != "" {
+			return -1
+		}
+	}
+	return n
+}
+
+// Deprecated: use ExecContext instead.
+func (s *stmt) Exec(args []driver.Value) (driver.Result, error) {
+	return s.ExecContext(context.Background(), namedValues(args))
+}
+
+// Deprecated: use QueryContext instead.
+func (s *stmt) Query(args []driver.Value) (driver.Rows, error) {
+	return s.QueryContext(context.Background(), namedValues(args))
+}
+
+func (s *stmt) ExecContext(ctx context.Context, args []driver.NamedValue) (driver.Result, error) {
+	err := s.setupBindings(args)
+	if err != nil {
+		return nil, err
+	}
+
+	old := s.Stmt.Conn().SetInterrupt(ctx)
+	defer s.Stmt.Conn().SetInterrupt(old)
+
+	err = s.Stmt.Exec()
+	if err != nil {
+		return nil, err
+	}
+
+	return newResult(s.Stmt.Conn()), nil
+}
+
+func (s *stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driver.Rows, error) {
+	err := s.setupBindings(args)
+	if err != nil {
+		return nil, err
+	}
+	return &rows{ctx: ctx, stmt: s}, nil
+}
+
+func (s *stmt) setupBindings(args []driver.NamedValue) error {
+	err := s.Stmt.ClearBindings()
+	if err != nil {
+		return err
+	}
+
+	var ids [3]int
+	for _, arg := range args {
+		ids := ids[:0]
+		if arg.Name == "" {
+			ids = append(ids, arg.Ordinal)
+		} else {
+			for _, prefix := range []string{":", "@", "$"} {
+				if id := s.Stmt.BindIndex(prefix + arg.Name); id != 0 {
+					ids = append(ids, id)
+				}
+			}
+		}
+
+		for _, id := range ids {
+			switch a := arg.Value.(type) {
+			case bool:
+				err = s.Stmt.BindBool(id, a)
+			case int:
+				err = s.Stmt.BindInt(id, a)
+			case int64:
+				err = s.Stmt.BindInt64(id, a)
+			case float64:
+				err = s.Stmt.BindFloat(id, a)
+			case string:
+				err = s.Stmt.BindText(id, a)
+			case []byte:
+				err = s.Stmt.BindBlob(id, a)
+			case sqlite3.ZeroBlob:
+				err = s.Stmt.BindZeroBlob(id, int64(a))
+			case time.Time:
+				err = s.Stmt.BindTime(id, a, s.tmWrite)
+			case util.JSON:
+				err = s.Stmt.BindJSON(id, a.Value)
+			case util.PointerUnwrap:
+				err = s.Stmt.BindPointer(id, util.UnwrapPointer(a))
+			case nil:
+				err = s.Stmt.BindNull(id)
+			default:
+				panic(util.AssertErr())
+			}
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (s *stmt) CheckNamedValue(arg *driver.NamedValue) error {
+	switch arg.Value.(type) {
+	case bool, int, int64, float64, string, []byte,
+		time.Time, sqlite3.ZeroBlob,
+		util.JSON, util.PointerUnwrap,
+		nil:
+		return nil
+	default:
+		return driver.ErrSkip
+	}
+}
+
+func newResult(c *sqlite3.Conn) driver.Result {
+	rows := c.Changes()
+	if rows != 0 {
+		id := c.LastInsertRowID()
+		if id != 0 {
+			return result{id, rows}
+		}
+	}
+	return resultRowsAffected(rows)
+}
+
+type result struct{ lastInsertId, rowsAffected int64 }
+
+func (r result) LastInsertId() (int64, error) {
+	return r.lastInsertId, nil
+}
+
+func (r result) RowsAffected() (int64, error) {
+	return r.rowsAffected, nil
+}
+
+type resultRowsAffected int64
+
+func (r resultRowsAffected) LastInsertId() (int64, error) {
+	return 0, nil
+}
+
+func (r resultRowsAffected) RowsAffected() (int64, error) {
+	return int64(r), nil
+}
+
+type rows struct {
+	ctx context.Context
+	*stmt
+	names []string
+	types []string
+}
+
+func (r *rows) Close() error {
+	r.Stmt.ClearBindings()
+	return r.Stmt.Reset()
+}
+
+func (r *rows) Columns() []string {
+	if r.names == nil {
+		count := r.Stmt.ColumnCount()
+		r.names = make([]string, count)
+		for i := range r.names {
+			r.names[i] = r.Stmt.ColumnName(i)
+		}
+	}
+	return r.names
+}
+
+func (r *rows) declType(index int) string {
+	if r.types == nil {
+		count := r.Stmt.ColumnCount()
+		r.types = make([]string, count)
+		for i := range r.types {
+			r.types[i] = strings.ToUpper(r.Stmt.ColumnDeclType(i))
+		}
+	}
+	return r.types[index]
+}
+
+func (r *rows) ColumnTypeDatabaseTypeName(index int) string {
+	decltype := r.declType(index)
+	if len := len(decltype); len > 0 && decltype[len-1] == ')' {
+		if i := strings.LastIndexByte(decltype, '('); i >= 0 {
+			decltype = decltype[:i]
+		}
+	}
+	return strings.TrimSpace(decltype)
+}
+
+func (r *rows) Next(dest []driver.Value) error {
+	old := r.Stmt.Conn().SetInterrupt(r.ctx)
+	defer r.Stmt.Conn().SetInterrupt(old)
+
+	if !r.Stmt.Step() {
+		if err := r.Stmt.Err(); err != nil {
+			return err
+		}
+		return io.EOF
+	}
+
+	data := unsafe.Slice((*any)(unsafe.SliceData(dest)), len(dest))
+	err := r.Stmt.Columns(data)
+	for i := range dest {
+		if t, ok := r.decodeTime(i, dest[i]); ok {
+			dest[i] = t
+			continue
+		}
+		if s, ok := dest[i].(string); ok {
+			t, ok := maybeTime(s)
+			if ok {
+				dest[i] = t
+			}
+		}
+	}
+	return err
+}
+
+func (r *rows) decodeTime(i int, v any) (_ time.Time, _ bool) {
+	if r.tmRead == sqlite3.TimeFormatDefault {
+		return
+	}
+	switch r.declType(i) {
+	case "DATE", "TIME", "DATETIME", "TIMESTAMP":
+		// maybe
+	default:
+		return
+	}
+	switch v.(type) {
+	case int64, float64, string:
+		// maybe
+	default:
+		return
+	}
+	t, err := r.tmRead.Decode(v)
+	return t, err == nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/driver/savepoint.go b/vendor/github.com/ncruces/go-sqlite3/driver/savepoint.go
new file mode 100644
index 000000000..60aa6b991
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/driver/savepoint.go
@@ -0,0 +1,27 @@
+package driver
+
+import (
+	"database/sql"
+	"time"
+
+	"github.com/ncruces/go-sqlite3"
+)
+
+// Savepoint establishes a new transaction savepoint.
+//
+// https://sqlite.org/lang_savepoint.html
+func Savepoint(tx *sql.Tx) sqlite3.Savepoint {
+	var ctx saveptCtx
+	tx.ExecContext(&ctx, "")
+	return ctx.Savepoint
+}
+
+type saveptCtx struct{ sqlite3.Savepoint }
+
+func (*saveptCtx) Deadline() (deadline time.Time, ok bool) { return }
+
+func (*saveptCtx) Done() <-chan struct{} { return nil }
+
+func (*saveptCtx) Err() error { return nil }
+
+func (*saveptCtx) Value(key any) any { return nil }
diff --git a/vendor/github.com/ncruces/go-sqlite3/driver/time.go b/vendor/github.com/ncruces/go-sqlite3/driver/time.go
new file mode 100644
index 000000000..630a5b10b
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/driver/time.go
@@ -0,0 +1,31 @@
+package driver
+
+import (
+	"time"
+)
+
+// Convert a string in [time.RFC3339Nano] format into a [time.Time]
+// if it roundtrips back to the same string.
+// This way times can be persisted to, and recovered from, the database,
+// but if a string is needed, [database/sql] will recover the same string.
+func maybeTime(text string) (_ time.Time, _ bool) {
+	// Weed out (some) values that can't possibly be
+	// [time.RFC3339Nano] timestamps.
+	if len(text) < len("2006-01-02T15:04:05Z") {
+		return
+	}
+	if len(text) > len(time.RFC3339Nano) {
+		return
+	}
+	if text[4] != '-' || text[10] != 'T' || text[16] != ':' {
+		return
+	}
+
+	// Slow path.
+	var buf [len(time.RFC3339Nano)]byte
+	date, err := time.Parse(time.RFC3339Nano, text)
+	if err == nil && text == string(date.AppendFormat(buf[:0], time.RFC3339Nano)) {
+		return date, true
+	}
+	return
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/driver/util.go b/vendor/github.com/ncruces/go-sqlite3/driver/util.go
new file mode 100644
index 000000000..033841157
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/driver/util.go
@@ -0,0 +1,14 @@
+package driver
+
+import "database/sql/driver"
+
+func namedValues(args []driver.Value) []driver.NamedValue {
+	named := make([]driver.NamedValue, len(args))
+	for i, v := range args {
+		named[i] = driver.NamedValue{
+			Ordinal: i + 1,
+			Value:   v,
+		}
+	}
+	return named
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/README.md b/vendor/github.com/ncruces/go-sqlite3/embed/README.md
new file mode 100644
index 000000000..400fe870a
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/embed/README.md
@@ -0,0 +1,27 @@
+# Embeddable Wasm build of SQLite
+
+This folder includes an embeddable Wasm build of SQLite 3.46.0 for use with
+[`github.com/ncruces/go-sqlite3`](https://pkg.go.dev/github.com/ncruces/go-sqlite3).
+
+The following optional features are compiled in:
+- [math functions](https://sqlite.org/lang_mathfunc.html)
+- [FTS5](https://sqlite.org/fts5.html)
+- [JSON](https://sqlite.org/json1.html)
+- [R*Tree](https://sqlite.org/rtree.html)
+- [GeoPoly](https://sqlite.org/geopoly.html)
+- [soundex](https://sqlite.org/lang_corefunc.html#soundex)
+- [stat4](https://sqlite.org/compile.html#enable_stat4)
+- [base64](https://github.com/sqlite/sqlite/blob/master/ext/misc/base64.c)
+- [decimal](https://github.com/sqlite/sqlite/blob/master/ext/misc/decimal.c)
+- [ieee754](https://github.com/sqlite/sqlite/blob/master/ext/misc/ieee754.c)
+- [regexp](https://github.com/sqlite/sqlite/blob/master/ext/misc/regexp.c)
+- [series](https://github.com/sqlite/sqlite/blob/master/ext/misc/series.c)
+- [uint](https://github.com/sqlite/sqlite/blob/master/ext/misc/uint.c)
+- [uuid](https://github.com/sqlite/sqlite/blob/master/ext/misc/uuid.c)
+- [time](../sqlite3/time.c)
+
+See the [configuration options](../sqlite3/sqlite_cfg.h),
+and [patches](../sqlite3) applied.
+
+Built using [`wasi-sdk`](https://github.com/WebAssembly/wasi-sdk),
+and [`binaryen`](https://github.com/WebAssembly/binaryen).
\ No newline at end of file
diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/build.sh b/vendor/github.com/ncruces/go-sqlite3/embed/build.sh
new file mode 100644
index 000000000..abe5e60c4
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/embed/build.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+cd -P -- "$(dirname -- "$0")"
+
+ROOT=../
+BINARYEN="$ROOT/tools/binaryen-version_117/bin"
+WASI_SDK="$ROOT/tools/wasi-sdk-22.0/bin"
+
+"$WASI_SDK/clang" --target=wasm32-wasi -std=c17 -flto -g0 -O2 \
+	-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
+	-o sqlite3.wasm "$ROOT/sqlite3/main.c" \
+	-I"$ROOT/sqlite3" \
+	-mexec-model=reactor \
+	-msimd128 -mmutable-globals \
+	-mbulk-memory -mreference-types \
+	-mnontrapping-fptoint -msign-ext \
+	-fno-stack-protector -fno-stack-clash-protection \
+	-Wl,--initial-memory=327680 \
+	-Wl,--stack-first \
+	-Wl,--import-undefined \
+	-D_HAVE_SQLITE_CONFIG_H \
+	-DSQLITE_CUSTOM_INCLUDE=sqlite_opt.h \
+	$(awk '{print "-Wl,--export="$0}' exports.txt)
+
+trap 'rm -f sqlite3.tmp' EXIT
+"$BINARYEN/wasm-ctor-eval" -g -c _initialize sqlite3.wasm -o sqlite3.tmp
+"$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \
+	sqlite3.tmp -o sqlite3.wasm \
+	--enable-simd --enable-mutable-globals --enable-multivalue \
+	--enable-bulk-memory --enable-reference-types \
+	--enable-nontrapping-float-to-int --enable-sign-ext
\ No newline at end of file
diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt b/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt
new file mode 100644
index 000000000..b3cb1581c
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt
@@ -0,0 +1,130 @@
+aligned_alloc
+free
+malloc
+malloc_destructor
+sqlite3_anycollseq_init
+sqlite3_autovacuum_pages_go
+sqlite3_backup_finish
+sqlite3_backup_init
+sqlite3_backup_pagecount
+sqlite3_backup_remaining
+sqlite3_backup_step
+sqlite3_bind_blob64
+sqlite3_bind_double
+sqlite3_bind_int64
+sqlite3_bind_null
+sqlite3_bind_parameter_count
+sqlite3_bind_parameter_index
+sqlite3_bind_parameter_name
+sqlite3_bind_pointer_go
+sqlite3_bind_text64
+sqlite3_bind_value
+sqlite3_bind_zeroblob64
+sqlite3_blob_bytes
+sqlite3_blob_close
+sqlite3_blob_open
+sqlite3_blob_read
+sqlite3_blob_reopen
+sqlite3_blob_write
+sqlite3_busy_handler_go
+sqlite3_busy_timeout
+sqlite3_changes64
+sqlite3_clear_bindings
+sqlite3_close
+sqlite3_close_v2
+sqlite3_collation_needed_go
+sqlite3_column_blob
+sqlite3_column_bytes
+sqlite3_column_count
+sqlite3_column_database_name
+sqlite3_column_decltype
+sqlite3_column_double
+sqlite3_column_int64
+sqlite3_column_name
+sqlite3_column_origin_name
+sqlite3_column_table_name
+sqlite3_column_text
+sqlite3_column_type
+sqlite3_column_value
+sqlite3_columns_go
+sqlite3_commit_hook_go
+sqlite3_config_log_go
+sqlite3_create_aggregate_function_go
+sqlite3_create_collation_go
+sqlite3_create_function_go
+sqlite3_create_module_go
+sqlite3_create_window_function_go
+sqlite3_database_file_object
+sqlite3_db_config
+sqlite3_db_filename
+sqlite3_db_name
+sqlite3_db_readonly
+sqlite3_db_release_memory
+sqlite3_declare_vtab
+sqlite3_errcode
+sqlite3_errmsg
+sqlite3_error_offset
+sqlite3_errstr
+sqlite3_exec
+sqlite3_filename_database
+sqlite3_filename_journal
+sqlite3_filename_wal
+sqlite3_finalize
+sqlite3_get_autocommit
+sqlite3_get_auxdata
+sqlite3_interrupt
+sqlite3_last_insert_rowid
+sqlite3_limit
+sqlite3_open_v2
+sqlite3_overload_function
+sqlite3_prepare_v3
+sqlite3_progress_handler_go
+sqlite3_reset
+sqlite3_result_blob64
+sqlite3_result_double
+sqlite3_result_error
+sqlite3_result_error_code
+sqlite3_result_error_nomem
+sqlite3_result_error_toobig
+sqlite3_result_int64
+sqlite3_result_null
+sqlite3_result_pointer_go
+sqlite3_result_text64
+sqlite3_result_value
+sqlite3_result_zeroblob64
+sqlite3_rollback_hook_go
+sqlite3_set_authorizer_go
+sqlite3_set_auxdata_go
+sqlite3_set_last_insert_rowid
+sqlite3_step
+sqlite3_stmt_busy
+sqlite3_stmt_readonly
+sqlite3_stmt_status
+sqlite3_total_changes64
+sqlite3_txn_state
+sqlite3_update_hook_go
+sqlite3_uri_key
+sqlite3_uri_parameter
+sqlite3_value_blob
+sqlite3_value_bytes
+sqlite3_value_double
+sqlite3_value_dup
+sqlite3_value_free
+sqlite3_value_int64
+sqlite3_value_nochange
+sqlite3_value_numeric_type
+sqlite3_value_pointer_go
+sqlite3_value_text
+sqlite3_value_type
+sqlite3_vtab_collation
+sqlite3_vtab_config_go
+sqlite3_vtab_distinct
+sqlite3_vtab_in
+sqlite3_vtab_in_first
+sqlite3_vtab_in_next
+sqlite3_vtab_nochange
+sqlite3_vtab_on_conflict
+sqlite3_vtab_rhs_value
+sqlite3_wal_autocheckpoint
+sqlite3_wal_checkpoint_v2
+sqlite3_wal_hook_go
\ No newline at end of file
diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/init.go b/vendor/github.com/ncruces/go-sqlite3/embed/init.go
new file mode 100644
index 000000000..da527abd0
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/embed/init.go
@@ -0,0 +1,20 @@
+// Package embed embeds SQLite into your application.
+//
+// Importing package embed initializes the [sqlite3.Binary] variable
+// with an appropriate build of SQLite:
+//
+//	import _ "github.com/ncruces/go-sqlite3/embed"
+package embed
+
+import (
+	_ "embed"
+
+	"github.com/ncruces/go-sqlite3"
+)
+
+//go:embed sqlite3.wasm
+var binary []byte
+
+func init() {
+	sqlite3.Binary = binary
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm b/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm
new file mode 100644
index 000000000..2689f773a
Binary files /dev/null and b/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm differ
diff --git a/vendor/github.com/ncruces/go-sqlite3/error.go b/vendor/github.com/ncruces/go-sqlite3/error.go
new file mode 100644
index 000000000..71238ef12
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/error.go
@@ -0,0 +1,162 @@
+package sqlite3
+
+import (
+	"errors"
+	"strconv"
+	"strings"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// Error wraps an SQLite Error Code.
+//
+// https://sqlite.org/c3ref/errcode.html
+type Error struct {
+	str  string
+	msg  string
+	sql  string
+	code uint64
+}
+
+// Code returns the primary error code for this error.
+//
+// https://sqlite.org/rescode.html
+func (e *Error) Code() ErrorCode {
+	return ErrorCode(e.code)
+}
+
+// ExtendedCode returns the extended error code for this error.
+//
+// https://sqlite.org/rescode.html
+func (e *Error) ExtendedCode() ExtendedErrorCode {
+	return ExtendedErrorCode(e.code)
+}
+
+// Error implements the error interface.
+func (e *Error) Error() string {
+	var b strings.Builder
+	b.WriteString("sqlite3: ")
+
+	if e.str != "" {
+		b.WriteString(e.str)
+	} else {
+		b.WriteString(strconv.Itoa(int(e.code)))
+	}
+
+	if e.msg != "" {
+		b.WriteString(": ")
+		b.WriteString(e.msg)
+	}
+
+	return b.String()
+}
+
+// Is tests whether this error matches a given [ErrorCode] or [ExtendedErrorCode].
+//
+// It makes it possible to do:
+//
+//	if errors.Is(err, sqlite3.BUSY) {
+//		// ... handle BUSY
+//	}
+func (e *Error) Is(err error) bool {
+	switch c := err.(type) {
+	case ErrorCode:
+		return c == e.Code()
+	case ExtendedErrorCode:
+		return c == e.ExtendedCode()
+	}
+	return false
+}
+
+// As converts this error to an [ErrorCode] or [ExtendedErrorCode].
+func (e *Error) As(err any) bool {
+	switch c := err.(type) {
+	case *ErrorCode:
+		*c = e.Code()
+		return true
+	case *ExtendedErrorCode:
+		*c = e.ExtendedCode()
+		return true
+	}
+	return false
+}
+
+// Temporary returns true for [BUSY] errors.
+func (e *Error) Temporary() bool {
+	return e.Code() == BUSY
+}
+
+// Timeout returns true for [BUSY_TIMEOUT] errors.
+func (e *Error) Timeout() bool {
+	return e.ExtendedCode() == BUSY_TIMEOUT
+}
+
+// SQL returns the SQL starting at the token that triggered a syntax error.
+func (e *Error) SQL() string {
+	return e.sql
+}
+
+// Error implements the error interface.
+func (e ErrorCode) Error() string {
+	return util.ErrorCodeString(uint32(e))
+}
+
+// Temporary returns true for [BUSY] errors.
+func (e ErrorCode) Temporary() bool {
+	return e == BUSY
+}
+
+// Error implements the error interface.
+func (e ExtendedErrorCode) Error() string {
+	return util.ErrorCodeString(uint32(e))
+}
+
+// Is tests whether this error matches a given [ErrorCode].
+func (e ExtendedErrorCode) Is(err error) bool {
+	c, ok := err.(ErrorCode)
+	return ok && c == ErrorCode(e)
+}
+
+// As converts this error to an [ErrorCode].
+func (e ExtendedErrorCode) As(err any) bool {
+	c, ok := err.(*ErrorCode)
+	if ok {
+		*c = ErrorCode(e)
+	}
+	return ok
+}
+
+// Temporary returns true for [BUSY] errors.
+func (e ExtendedErrorCode) Temporary() bool {
+	return ErrorCode(e) == BUSY
+}
+
+// Timeout returns true for [BUSY_TIMEOUT] errors.
+func (e ExtendedErrorCode) Timeout() bool {
+	return e == BUSY_TIMEOUT
+}
+
+func errorCode(err error, def ErrorCode) (msg string, code uint32) {
+	switch code := err.(type) {
+	case nil:
+		return "", _OK
+	case ErrorCode:
+		return "", uint32(code)
+	case xErrorCode:
+		return "", uint32(code)
+	case *Error:
+		return code.msg, uint32(code.code)
+	}
+
+	var ecode ErrorCode
+	var xcode xErrorCode
+	switch {
+	case errors.As(err, &xcode):
+		code = uint32(xcode)
+	case errors.As(err, &ecode):
+		code = uint32(ecode)
+	default:
+		code = uint32(def)
+	}
+	return err.Error(), code
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/func.go b/vendor/github.com/ncruces/go-sqlite3/func.go
new file mode 100644
index 000000000..255584a43
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/func.go
@@ -0,0 +1,214 @@
+package sqlite3
+
+import (
+	"context"
+	"sync"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// CollationNeeded registers a callback to be invoked
+// whenever an unknown collation sequence is required.
+//
+// https://sqlite.org/c3ref/collation_needed.html
+func (c *Conn) CollationNeeded(cb func(db *Conn, name string)) error {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	r := c.call("sqlite3_collation_needed_go", uint64(c.handle), enable)
+	if err := c.error(r); err != nil {
+		return err
+	}
+	c.collation = cb
+	return nil
+}
+
+// AnyCollationNeeded uses [Conn.CollationNeeded] to register
+// a fake collating function for any unknown collating sequence.
+// The fake collating function works like BINARY.
+//
+// This can be used to load schemas that contain
+// one or more unknown collating sequences.
+func (c *Conn) AnyCollationNeeded() {
+	c.call("sqlite3_anycollseq_init", uint64(c.handle), 0, 0)
+}
+
+// CreateCollation defines a new collating sequence.
+//
+// https://sqlite.org/c3ref/create_collation.html
+func (c *Conn) CreateCollation(name string, fn func(a, b []byte) int) error {
+	defer c.arena.mark()()
+	namePtr := c.arena.string(name)
+	funcPtr := util.AddHandle(c.ctx, fn)
+	r := c.call("sqlite3_create_collation_go",
+		uint64(c.handle), uint64(namePtr), uint64(funcPtr))
+	return c.error(r)
+}
+
+// CreateFunction defines a new scalar SQL function.
+//
+// https://sqlite.org/c3ref/create_function.html
+func (c *Conn) CreateFunction(name string, nArg int, flag FunctionFlag, fn ScalarFunction) error {
+	defer c.arena.mark()()
+	namePtr := c.arena.string(name)
+	funcPtr := util.AddHandle(c.ctx, fn)
+	r := c.call("sqlite3_create_function_go",
+		uint64(c.handle), uint64(namePtr), uint64(nArg),
+		uint64(flag), uint64(funcPtr))
+	return c.error(r)
+}
+
+// ScalarFunction is the type of a scalar SQL function.
+// Implementations must not retain arg.
+type ScalarFunction func(ctx Context, arg ...Value)
+
+// CreateWindowFunction defines a new aggregate or aggregate window SQL function.
+// If fn returns a [WindowFunction], then an aggregate window function is created.
+// If fn returns an [io.Closer], it will be called to free resources.
+//
+// https://sqlite.org/c3ref/create_function.html
+func (c *Conn) CreateWindowFunction(name string, nArg int, flag FunctionFlag, fn func() AggregateFunction) error {
+	defer c.arena.mark()()
+	call := "sqlite3_create_aggregate_function_go"
+	namePtr := c.arena.string(name)
+	funcPtr := util.AddHandle(c.ctx, fn)
+	if _, ok := fn().(WindowFunction); ok {
+		call = "sqlite3_create_window_function_go"
+	}
+	r := c.call(call,
+		uint64(c.handle), uint64(namePtr), uint64(nArg),
+		uint64(flag), uint64(funcPtr))
+	return c.error(r)
+}
+
+// AggregateFunction is the interface an aggregate function should implement.
+//
+// https://sqlite.org/appfunc.html
+type AggregateFunction interface {
+	// Step is invoked to add a row to the current window.
+	// The function arguments, if any, corresponding to the row being added, are passed to Step.
+	// Implementations must not retain arg.
+	Step(ctx Context, arg ...Value)
+
+	// Value is invoked to return the current (or final) value of the aggregate.
+	Value(ctx Context)
+}
+
+// WindowFunction is the interface an aggregate window function should implement.
+//
+// https://sqlite.org/windowfunctions.html
+type WindowFunction interface {
+	AggregateFunction
+
+	// Inverse is invoked to remove the oldest presently aggregated result of Step from the current window.
+	// The function arguments, if any, are those passed to Step for the row being removed.
+	// Implementations must not retain arg.
+	Inverse(ctx Context, arg ...Value)
+}
+
+// OverloadFunction overloads a function for a virtual table.
+//
+// https://sqlite.org/c3ref/overload_function.html
+func (c *Conn) OverloadFunction(name string, nArg int) error {
+	defer c.arena.mark()()
+	namePtr := c.arena.string(name)
+	r := c.call("sqlite3_overload_function",
+		uint64(c.handle), uint64(namePtr), uint64(nArg))
+	return c.error(r)
+}
+
+func destroyCallback(ctx context.Context, mod api.Module, pApp uint32) {
+	util.DelHandle(ctx, pApp)
+}
+
+func collationCallback(ctx context.Context, mod api.Module, pArg, pDB, eTextRep, zName uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.collation != nil {
+		name := util.ReadString(mod, zName, _MAX_NAME)
+		c.collation(c, name)
+	}
+}
+
+func compareCallback(ctx context.Context, mod api.Module, pApp, nKey1, pKey1, nKey2, pKey2 uint32) uint32 {
+	fn := util.GetHandle(ctx, pApp).(func(a, b []byte) int)
+	return uint32(fn(util.View(mod, pKey1, uint64(nKey1)), util.View(mod, pKey2, uint64(nKey2))))
+}
+
+func funcCallback(ctx context.Context, mod api.Module, pCtx, pApp, nArg, pArg uint32) {
+	args := getFuncArgs()
+	defer putFuncArgs(args)
+	db := ctx.Value(connKey{}).(*Conn)
+	fn := util.GetHandle(db.ctx, pApp).(ScalarFunction)
+	callbackArgs(db, args[:nArg], pArg)
+	fn(Context{db, pCtx}, args[:nArg]...)
+}
+
+func stepCallback(ctx context.Context, mod api.Module, pCtx, pAgg, pApp, nArg, pArg uint32) {
+	args := getFuncArgs()
+	defer putFuncArgs(args)
+	db := ctx.Value(connKey{}).(*Conn)
+	callbackArgs(db, args[:nArg], pArg)
+	fn, _ := callbackAggregate(db, pAgg, pApp)
+	fn.Step(Context{db, pCtx}, args[:nArg]...)
+}
+
+func finalCallback(ctx context.Context, mod api.Module, pCtx, pAgg, pApp uint32) {
+	db := ctx.Value(connKey{}).(*Conn)
+	fn, handle := callbackAggregate(db, pAgg, pApp)
+	fn.Value(Context{db, pCtx})
+	util.DelHandle(ctx, handle)
+}
+
+func valueCallback(ctx context.Context, mod api.Module, pCtx, pAgg uint32) {
+	db := ctx.Value(connKey{}).(*Conn)
+	fn := util.GetHandle(db.ctx, pAgg).(AggregateFunction)
+	fn.Value(Context{db, pCtx})
+}
+
+func inverseCallback(ctx context.Context, mod api.Module, pCtx, pAgg, nArg, pArg uint32) {
+	args := getFuncArgs()
+	defer putFuncArgs(args)
+	db := ctx.Value(connKey{}).(*Conn)
+	callbackArgs(db, args[:nArg], pArg)
+	fn := util.GetHandle(db.ctx, pAgg).(WindowFunction)
+	fn.Inverse(Context{db, pCtx}, args[:nArg]...)
+}
+
+func callbackAggregate(db *Conn, pAgg, pApp uint32) (AggregateFunction, uint32) {
+	if pApp == 0 {
+		handle := util.ReadUint32(db.mod, pAgg)
+		return util.GetHandle(db.ctx, handle).(AggregateFunction), handle
+	}
+
+	// We need to create the aggregate.
+	fn := util.GetHandle(db.ctx, pApp).(func() AggregateFunction)()
+	handle := util.AddHandle(db.ctx, fn)
+	if pAgg != 0 {
+		util.WriteUint32(db.mod, pAgg, handle)
+	}
+	return fn, handle
+}
+
+func callbackArgs(db *Conn, arg []Value, pArg uint32) {
+	for i := range arg {
+		arg[i] = Value{
+			c:      db,
+			handle: util.ReadUint32(db.mod, pArg+ptrlen*uint32(i)),
+		}
+	}
+}
+
+var funcArgsPool sync.Pool
+
+func putFuncArgs(p *[_MAX_FUNCTION_ARG]Value) {
+	funcArgsPool.Put(p)
+}
+
+func getFuncArgs() *[_MAX_FUNCTION_ARG]Value {
+	if p := funcArgsPool.Get(); p == nil {
+		return new([_MAX_FUNCTION_ARG]Value)
+	} else {
+		return p.(*[_MAX_FUNCTION_ARG]Value)
+	}
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/go.work b/vendor/github.com/ncruces/go-sqlite3/go.work
new file mode 100644
index 000000000..18e378592
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/go.work
@@ -0,0 +1,6 @@
+go 1.21
+
+use (
+	.
+	./gormlite
+)
diff --git a/vendor/github.com/ncruces/go-sqlite3/go.work.sum b/vendor/github.com/ncruces/go-sqlite3/go.work.sum
new file mode 100644
index 000000000..4deb7b7f3
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/go.work.sum
@@ -0,0 +1,9 @@
+golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
+golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_other.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_other.go
new file mode 100644
index 000000000..ba16efc02
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_other.go
@@ -0,0 +1,9 @@
+//go:build !(unix || windows) || sqlite3_nosys
+
+package util
+
+import "github.com/tetratelabs/wazero/experimental"
+
+func virtualAlloc(cap, max uint64) experimental.LinearMemory {
+	return sliceAlloc(cap, max)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_slice.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_slice.go
new file mode 100644
index 000000000..b8cc1453c
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_slice.go
@@ -0,0 +1,25 @@
+//go:build !(darwin || linux) || !(amd64 || arm64 || riscv64) || sqlite3_noshm || sqlite3_nosys
+
+package util
+
+import "github.com/tetratelabs/wazero/experimental"
+
+func sliceAlloc(cap, max uint64) experimental.LinearMemory {
+	return &sliceBuffer{make([]byte, cap), max}
+}
+
+type sliceBuffer struct {
+	buf []byte
+	max uint64
+}
+
+func (b *sliceBuffer) Free() {}
+
+func (b *sliceBuffer) Reallocate(size uint64) []byte {
+	if cap := uint64(cap(b.buf)); size > cap {
+		b.buf = append(b.buf[:cap], make([]byte, size-cap)...)
+	} else {
+		b.buf = b.buf[:size]
+	}
+	return b.buf
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_unix.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_unix.go
new file mode 100644
index 000000000..2b1d3916b
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_unix.go
@@ -0,0 +1,67 @@
+//go:build unix && !sqlite3_nosys
+
+package util
+
+import (
+	"math"
+
+	"github.com/tetratelabs/wazero/experimental"
+	"golang.org/x/sys/unix"
+)
+
+func virtualAlloc(cap, max uint64) experimental.LinearMemory {
+	// Round up to the page size.
+	rnd := uint64(unix.Getpagesize() - 1)
+	max = (max + rnd) &^ rnd
+
+	if max > math.MaxInt {
+		// This ensures int(max) overflows to a negative value,
+		// and unix.Mmap returns EINVAL.
+		max = math.MaxUint64
+	}
+
+	// Reserve max bytes of address space, to ensure we won't need to move it.
+	// A protected, private, anonymous mapping should not commit memory.
+	b, err := unix.Mmap(-1, 0, int(max), unix.PROT_NONE, unix.MAP_PRIVATE|unix.MAP_ANON)
+	if err != nil {
+		panic(err)
+	}
+	return &mmappedMemory{buf: b[:0]}
+}
+
+// The slice covers the entire mmapped memory:
+//   - len(buf) is the already committed memory,
+//   - cap(buf) is the reserved address space.
+type mmappedMemory struct {
+	buf []byte
+}
+
+func (m *mmappedMemory) Reallocate(size uint64) []byte {
+	com := uint64(len(m.buf))
+	res := uint64(cap(m.buf))
+	if com < size && size < res {
+		// Round up to the page size.
+		rnd := uint64(unix.Getpagesize() - 1)
+		new := (size + rnd) &^ rnd
+
+		// Commit additional memory up to new bytes.
+		err := unix.Mprotect(m.buf[com:new], unix.PROT_READ|unix.PROT_WRITE)
+		if err != nil {
+			panic(err)
+		}
+
+		// Update committed memory.
+		m.buf = m.buf[:new]
+	}
+	// Limit returned capacity because bytes beyond
+	// len(m.buf) have not yet been committed.
+	return m.buf[:size:len(m.buf)]
+}
+
+func (m *mmappedMemory) Free() {
+	err := unix.Munmap(m.buf[:cap(m.buf)])
+	if err != nil {
+		panic(err)
+	}
+	m.buf = nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_windows.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_windows.go
new file mode 100644
index 000000000..8936173b4
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc_windows.go
@@ -0,0 +1,76 @@
+//go:build !sqlite3_nosys
+
+package util
+
+import (
+	"math"
+	"reflect"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental"
+	"golang.org/x/sys/windows"
+)
+
+func virtualAlloc(cap, max uint64) experimental.LinearMemory {
+	// Round up to the page size.
+	rnd := uint64(windows.Getpagesize() - 1)
+	max = (max + rnd) &^ rnd
+
+	if max > math.MaxInt {
+		// This ensures uintptr(max) overflows to a large value,
+		// and windows.VirtualAlloc returns an error.
+		max = math.MaxUint64
+	}
+
+	// Reserve max bytes of address space, to ensure we won't need to move it.
+	// This does not commit memory.
+	r, err := windows.VirtualAlloc(0, uintptr(max), windows.MEM_RESERVE, windows.PAGE_READWRITE)
+	if err != nil {
+		panic(err)
+	}
+
+	mem := virtualMemory{addr: r}
+	// SliceHeader, although deprecated, avoids a go vet warning.
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&mem.buf))
+	sh.Cap = int(max) // Not a bug.
+	sh.Data = r
+	return &mem
+}
+
+// The slice covers the entire mmapped memory:
+//   - len(buf) is the already committed memory,
+//   - cap(buf) is the reserved address space.
+type virtualMemory struct {
+	buf  []byte
+	addr uintptr
+}
+
+func (m *virtualMemory) Reallocate(size uint64) []byte {
+	com := uint64(len(m.buf))
+	res := uint64(cap(m.buf))
+	if com < size && size < res {
+		// Round up to the page size.
+		rnd := uint64(windows.Getpagesize() - 1)
+		new := (size + rnd) &^ rnd
+
+		// Commit additional memory up to new bytes.
+		_, err := windows.VirtualAlloc(m.addr, uintptr(new), windows.MEM_COMMIT, windows.PAGE_READWRITE)
+		if err != nil {
+			panic(err)
+		}
+
+		// Update committed memory.
+		m.buf = m.buf[:new]
+	}
+	// Limit returned capacity because bytes beyond
+	// len(m.buf) have not yet been committed.
+	return m.buf[:size:len(m.buf)]
+}
+
+func (m *virtualMemory) Free() {
+	err := windows.VirtualFree(m.addr, 0, windows.MEM_RELEASE)
+	if err != nil {
+		panic(err)
+	}
+	m.addr = 0
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/bool.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/bool.go
new file mode 100644
index 000000000..8427f3085
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/bool.go
@@ -0,0 +1,22 @@
+package util
+
+import "strings"
+
+func ParseBool(s string) (b, ok bool) {
+	if len(s) == 0 {
+		return false, false
+	}
+	if s[0] == '0' {
+		return false, true
+	}
+	if '1' <= s[0] && s[0] <= '9' {
+		return true, true
+	}
+	switch strings.ToLower(s) {
+	case "true", "yes", "on":
+		return true, true
+	case "false", "no", "off":
+		return false, true
+	}
+	return false, false
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go
new file mode 100644
index 000000000..86bb9749d
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go
@@ -0,0 +1,117 @@
+package util
+
+// https://sqlite.com/matrix/rescode.html
+const (
+	OK = 0 /* Successful result */
+
+	ERROR      = 1  /* Generic error */
+	INTERNAL   = 2  /* Internal logic error in SQLite */
+	PERM       = 3  /* Access permission denied */
+	ABORT      = 4  /* Callback routine requested an abort */
+	BUSY       = 5  /* The database file is locked */
+	LOCKED     = 6  /* A table in the database is locked */
+	NOMEM      = 7  /* A malloc() failed */
+	READONLY   = 8  /* Attempt to write a readonly database */
+	INTERRUPT  = 9  /* Operation terminated by sqlite3_interrupt() */
+	IOERR      = 10 /* Some kind of disk I/O error occurred */
+	CORRUPT    = 11 /* The database disk image is malformed */
+	NOTFOUND   = 12 /* Unknown opcode in sqlite3_file_control() */
+	FULL       = 13 /* Insertion failed because database is full */
+	CANTOPEN   = 14 /* Unable to open the database file */
+	PROTOCOL   = 15 /* Database lock protocol error */
+	EMPTY      = 16 /* Internal use only */
+	SCHEMA     = 17 /* The database schema changed */
+	TOOBIG     = 18 /* String or BLOB exceeds size limit */
+	CONSTRAINT = 19 /* Abort due to constraint violation */
+	MISMATCH   = 20 /* Data type mismatch */
+	MISUSE     = 21 /* Library used incorrectly */
+	NOLFS      = 22 /* Uses OS features not supported on host */
+	AUTH       = 23 /* Authorization denied */
+	FORMAT     = 24 /* Not used */
+	RANGE      = 25 /* 2nd parameter to sqlite3_bind out of range */
+	NOTADB     = 26 /* File opened that is not a database file */
+	NOTICE     = 27 /* Notifications from sqlite3_log() */
+	WARNING    = 28 /* Warnings from sqlite3_log() */
+
+	ROW  = 100 /* sqlite3_step() has another row ready */
+	DONE = 101 /* sqlite3_step() has finished executing */
+
+	ERROR_MISSING_COLLSEQ   = ERROR | (1 << 8)
+	ERROR_RETRY             = ERROR | (2 << 8)
+	ERROR_SNAPSHOT          = ERROR | (3 << 8)
+	IOERR_READ              = IOERR | (1 << 8)
+	IOERR_SHORT_READ        = IOERR | (2 << 8)
+	IOERR_WRITE             = IOERR | (3 << 8)
+	IOERR_FSYNC             = IOERR | (4 << 8)
+	IOERR_DIR_FSYNC         = IOERR | (5 << 8)
+	IOERR_TRUNCATE          = IOERR | (6 << 8)
+	IOERR_FSTAT             = IOERR | (7 << 8)
+	IOERR_UNLOCK            = IOERR | (8 << 8)
+	IOERR_RDLOCK            = IOERR | (9 << 8)
+	IOERR_DELETE            = IOERR | (10 << 8)
+	IOERR_BLOCKED           = IOERR | (11 << 8)
+	IOERR_NOMEM             = IOERR | (12 << 8)
+	IOERR_ACCESS            = IOERR | (13 << 8)
+	IOERR_CHECKRESERVEDLOCK = IOERR | (14 << 8)
+	IOERR_LOCK              = IOERR | (15 << 8)
+	IOERR_CLOSE             = IOERR | (16 << 8)
+	IOERR_DIR_CLOSE         = IOERR | (17 << 8)
+	IOERR_SHMOPEN           = IOERR | (18 << 8)
+	IOERR_SHMSIZE           = IOERR | (19 << 8)
+	IOERR_SHMLOCK           = IOERR | (20 << 8)
+	IOERR_SHMMAP            = IOERR | (21 << 8)
+	IOERR_SEEK              = IOERR | (22 << 8)
+	IOERR_DELETE_NOENT      = IOERR | (23 << 8)
+	IOERR_MMAP              = IOERR | (24 << 8)
+	IOERR_GETTEMPPATH       = IOERR | (25 << 8)
+	IOERR_CONVPATH          = IOERR | (26 << 8)
+	IOERR_VNODE             = IOERR | (27 << 8)
+	IOERR_AUTH              = IOERR | (28 << 8)
+	IOERR_BEGIN_ATOMIC      = IOERR | (29 << 8)
+	IOERR_COMMIT_ATOMIC     = IOERR | (30 << 8)
+	IOERR_ROLLBACK_ATOMIC   = IOERR | (31 << 8)
+	IOERR_DATA              = IOERR | (32 << 8)
+	IOERR_CORRUPTFS         = IOERR | (33 << 8)
+	IOERR_IN_PAGE           = IOERR | (34 << 8)
+	LOCKED_SHAREDCACHE      = LOCKED | (1 << 8)
+	LOCKED_VTAB             = LOCKED | (2 << 8)
+	BUSY_RECOVERY           = BUSY | (1 << 8)
+	BUSY_SNAPSHOT           = BUSY | (2 << 8)
+	BUSY_TIMEOUT            = BUSY | (3 << 8)
+	CANTOPEN_NOTEMPDIR      = CANTOPEN | (1 << 8)
+	CANTOPEN_ISDIR          = CANTOPEN | (2 << 8)
+	CANTOPEN_FULLPATH       = CANTOPEN | (3 << 8)
+	CANTOPEN_CONVPATH       = CANTOPEN | (4 << 8)
+	CANTOPEN_DIRTYWAL       = CANTOPEN | (5 << 8) /* Not Used */
+	CANTOPEN_SYMLINK        = CANTOPEN | (6 << 8)
+	CORRUPT_VTAB            = CORRUPT | (1 << 8)
+	CORRUPT_SEQUENCE        = CORRUPT | (2 << 8)
+	CORRUPT_INDEX           = CORRUPT | (3 << 8)
+	READONLY_RECOVERY       = READONLY | (1 << 8)
+	READONLY_CANTLOCK       = READONLY | (2 << 8)
+	READONLY_ROLLBACK       = READONLY | (3 << 8)
+	READONLY_DBMOVED        = READONLY | (4 << 8)
+	READONLY_CANTINIT       = READONLY | (5 << 8)
+	READONLY_DIRECTORY      = READONLY | (6 << 8)
+	ABORT_ROLLBACK          = ABORT | (2 << 8)
+	CONSTRAINT_CHECK        = CONSTRAINT | (1 << 8)
+	CONSTRAINT_COMMITHOOK   = CONSTRAINT | (2 << 8)
+	CONSTRAINT_FOREIGNKEY   = CONSTRAINT | (3 << 8)
+	CONSTRAINT_FUNCTION     = CONSTRAINT | (4 << 8)
+	CONSTRAINT_NOTNULL      = CONSTRAINT | (5 << 8)
+	CONSTRAINT_PRIMARYKEY   = CONSTRAINT | (6 << 8)
+	CONSTRAINT_TRIGGER      = CONSTRAINT | (7 << 8)
+	CONSTRAINT_UNIQUE       = CONSTRAINT | (8 << 8)
+	CONSTRAINT_VTAB         = CONSTRAINT | (9 << 8)
+	CONSTRAINT_ROWID        = CONSTRAINT | (10 << 8)
+	CONSTRAINT_PINNED       = CONSTRAINT | (11 << 8)
+	CONSTRAINT_DATATYPE     = CONSTRAINT | (12 << 8)
+	NOTICE_RECOVER_WAL      = NOTICE | (1 << 8)
+	NOTICE_RECOVER_ROLLBACK = NOTICE | (2 << 8)
+	NOTICE_RBU              = NOTICE | (3 << 8)
+	WARNING_AUTOINDEX       = WARNING | (1 << 8)
+	AUTH_USER               = AUTH | (1 << 8)
+
+	OK_LOAD_PERMANENTLY = OK | (1 << 8)
+	OK_SYMLINK          = OK | (2 << 8) /* internal use only */
+)
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/error.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/error.go
new file mode 100644
index 000000000..1f5555fd3
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/error.go
@@ -0,0 +1,106 @@
+package util
+
+import (
+	"runtime"
+	"strconv"
+)
+
+type ErrorString string
+
+func (e ErrorString) Error() string { return string(e) }
+
+const (
+	NilErr       = ErrorString("sqlite3: invalid memory address or null pointer dereference")
+	OOMErr       = ErrorString("sqlite3: out of memory")
+	RangeErr     = ErrorString("sqlite3: index out of range")
+	NoNulErr     = ErrorString("sqlite3: missing NUL terminator")
+	NoBinaryErr  = ErrorString("sqlite3: no SQLite binary embed/set/loaded")
+	BadBinaryErr = ErrorString("sqlite3: invalid SQLite binary embed/set/loaded")
+	TimeErr      = ErrorString("sqlite3: invalid time value")
+	WhenceErr    = ErrorString("sqlite3: invalid whence")
+	OffsetErr    = ErrorString("sqlite3: invalid offset")
+	TailErr      = ErrorString("sqlite3: multiple statements")
+	IsolationErr = ErrorString("sqlite3: unsupported isolation level")
+	ValueErr     = ErrorString("sqlite3: unsupported value")
+	NoVFSErr     = ErrorString("sqlite3: no such vfs: ")
+)
+
+func AssertErr() ErrorString {
+	msg := "sqlite3: assertion failed"
+	if _, file, line, ok := runtime.Caller(1); ok {
+		msg += " (" + file + ":" + strconv.Itoa(line) + ")"
+	}
+	return ErrorString(msg)
+}
+
+func ErrorCodeString(rc uint32) string {
+	switch rc {
+	case ABORT_ROLLBACK:
+		return "sqlite3: abort due to ROLLBACK"
+	case ROW:
+		return "sqlite3: another row available"
+	case DONE:
+		return "sqlite3: no more rows available"
+	}
+	switch rc & 0xff {
+	case OK:
+		return "sqlite3: not an error"
+	case ERROR:
+		return "sqlite3: SQL logic error"
+	case INTERNAL:
+		break
+	case PERM:
+		return "sqlite3: access permission denied"
+	case ABORT:
+		return "sqlite3: query aborted"
+	case BUSY:
+		return "sqlite3: database is locked"
+	case LOCKED:
+		return "sqlite3: database table is locked"
+	case NOMEM:
+		return "sqlite3: out of memory"
+	case READONLY:
+		return "sqlite3: attempt to write a readonly database"
+	case INTERRUPT:
+		return "sqlite3: interrupted"
+	case IOERR:
+		return "sqlite3: disk I/O error"
+	case CORRUPT:
+		return "sqlite3: database disk image is malformed"
+	case NOTFOUND:
+		return "sqlite3: unknown operation"
+	case FULL:
+		return "sqlite3: database or disk is full"
+	case CANTOPEN:
+		return "sqlite3: unable to open database file"
+	case PROTOCOL:
+		return "sqlite3: locking protocol"
+	case FORMAT:
+		break
+	case SCHEMA:
+		return "sqlite3: database schema has changed"
+	case TOOBIG:
+		return "sqlite3: string or blob too big"
+	case CONSTRAINT:
+		return "sqlite3: constraint failed"
+	case MISMATCH:
+		return "sqlite3: datatype mismatch"
+	case MISUSE:
+		return "sqlite3: bad parameter or other API misuse"
+	case NOLFS:
+		break
+	case AUTH:
+		return "sqlite3: authorization denied"
+	case EMPTY:
+		break
+	case RANGE:
+		return "sqlite3: column index out of range"
+	case NOTADB:
+		return "sqlite3: file is not a database"
+	case NOTICE:
+		return "sqlite3: notification message"
+	case WARNING:
+		return "sqlite3: warning message"
+	}
+	return "sqlite3: unknown error"
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/func.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/func.go
new file mode 100644
index 000000000..be7a47c2f
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/func.go
@@ -0,0 +1,193 @@
+package util
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/api"
+)
+
+type i32 interface{ ~int32 | ~uint32 }
+type i64 interface{ ~int64 | ~uint64 }
+
+type funcVI[T0 i32] func(context.Context, api.Module, T0)
+
+func (fn funcVI[T0]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	fn(ctx, mod, T0(stack[0]))
+}
+
+func ExportFuncVI[T0 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0)) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcVI[T0](fn),
+			[]api.ValueType{api.ValueTypeI32}, nil).
+		Export(name)
+}
+
+type funcVII[T0, T1 i32] func(context.Context, api.Module, T0, T1)
+
+func (fn funcVII[T0, T1]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	fn(ctx, mod, T0(stack[0]), T1(stack[1]))
+}
+
+func ExportFuncVII[T0, T1 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1)) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcVII[T0, T1](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32}, nil).
+		Export(name)
+}
+
+type funcVIII[T0, T1, T2 i32] func(context.Context, api.Module, T0, T1, T2)
+
+func (fn funcVIII[T0, T1, T2]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]))
+}
+
+func ExportFuncVIII[T0, T1, T2 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2)) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcVIII[T0, T1, T2](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, nil).
+		Export(name)
+}
+
+type funcVIIII[T0, T1, T2, T3 i32] func(context.Context, api.Module, T0, T1, T2, T3)
+
+func (fn funcVIIII[T0, T1, T2, T3]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3]))
+}
+
+func ExportFuncVIIII[T0, T1, T2, T3 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3)) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcVIIII[T0, T1, T2, T3](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, nil).
+		Export(name)
+}
+
+type funcVIIIII[T0, T1, T2, T3, T4 i32] func(context.Context, api.Module, T0, T1, T2, T3, T4)
+
+func (fn funcVIIIII[T0, T1, T2, T3, T4]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3]), T4(stack[4]))
+}
+
+func ExportFuncVIIIII[T0, T1, T2, T3, T4 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3, T4)) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcVIIIII[T0, T1, T2, T3, T4](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, nil).
+		Export(name)
+}
+
+type funcVIIIIJ[T0, T1, T2, T3 i32, T4 i64] func(context.Context, api.Module, T0, T1, T2, T3, T4)
+
+func (fn funcVIIIIJ[T0, T1, T2, T3, T4]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3]), T4(stack[4]))
+}
+
+func ExportFuncVIIIIJ[T0, T1, T2, T3 i32, T4 i64](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3, T4)) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcVIIIIJ[T0, T1, T2, T3, T4](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI64}, nil).
+		Export(name)
+}
+
+type funcII[TR, T0 i32] func(context.Context, api.Module, T0) TR
+
+func (fn funcII[TR, T0]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0])))
+}
+
+func ExportFuncII[TR, T0 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcII[TR, T0](fn),
+			[]api.ValueType{api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIII[TR, T0, T1 i32] func(context.Context, api.Module, T0, T1) TR
+
+func (fn funcIII[TR, T0, T1]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1])))
+}
+
+func ExportFuncIII[TR, T0, T1 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIII[TR, T0, T1](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIIII[TR, T0, T1, T2 i32] func(context.Context, api.Module, T0, T1, T2) TR
+
+func (fn funcIIII[TR, T0, T1, T2]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2])))
+}
+
+func ExportFuncIIII[TR, T0, T1, T2 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIIII[TR, T0, T1, T2](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIIIII[TR, T0, T1, T2, T3 i32] func(context.Context, api.Module, T0, T1, T2, T3) TR
+
+func (fn funcIIIII[TR, T0, T1, T2, T3]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3])))
+}
+
+func ExportFuncIIIII[TR, T0, T1, T2, T3 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIIIII[TR, T0, T1, T2, T3](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIIIIII[TR, T0, T1, T2, T3, T4 i32] func(context.Context, api.Module, T0, T1, T2, T3, T4) TR
+
+func (fn funcIIIIII[TR, T0, T1, T2, T3, T4]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3]), T4(stack[4])))
+}
+
+func ExportFuncIIIIII[TR, T0, T1, T2, T3, T4 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3, T4) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIIIIII[TR, T0, T1, T2, T3, T4](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIIIIIII[TR, T0, T1, T2, T3, T4, T5 i32] func(context.Context, api.Module, T0, T1, T2, T3, T4, T5) TR
+
+func (fn funcIIIIIII[TR, T0, T1, T2, T3, T4, T5]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3]), T4(stack[4]), T5(stack[5])))
+}
+
+func ExportFuncIIIIIII[TR, T0, T1, T2, T3, T4, T5 i32](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3, T4, T5) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIIIIIII[TR, T0, T1, T2, T3, T4, T5](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIIIIJ[TR, T0, T1, T2 i32, T3 i64] func(context.Context, api.Module, T0, T1, T2, T3) TR
+
+func (fn funcIIIIJ[TR, T0, T1, T2, T3]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1]), T2(stack[2]), T3(stack[3])))
+}
+
+func ExportFuncIIIIJ[TR, T0, T1, T2 i32, T3 i64](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1, T2, T3) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIIIIJ[TR, T0, T1, T2, T3](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI32, api.ValueTypeI64}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
+
+type funcIIJ[TR, T0 i32, T1 i64] func(context.Context, api.Module, T0, T1) TR
+
+func (fn funcIIJ[TR, T0, T1]) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	stack[0] = uint64(fn(ctx, mod, T0(stack[0]), T1(stack[1])))
+}
+
+func ExportFuncIIJ[TR, T0 i32, T1 i64](mod wazero.HostModuleBuilder, name string, fn func(context.Context, api.Module, T0, T1) TR) {
+	mod.NewFunctionBuilder().
+		WithGoModuleFunction(funcIIJ[TR, T0, T1](fn),
+			[]api.ValueType{api.ValueTypeI32, api.ValueTypeI64}, []api.ValueType{api.ValueTypeI32}).
+		Export(name)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/handle.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/handle.go
new file mode 100644
index 000000000..4584324c1
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/handle.go
@@ -0,0 +1,65 @@
+package util
+
+import (
+	"context"
+	"io"
+)
+
+type handleState struct {
+	handles []any
+	holes   int
+}
+
+func (s *handleState) CloseNotify(ctx context.Context, exitCode uint32) {
+	for _, h := range s.handles {
+		if c, ok := h.(io.Closer); ok {
+			c.Close()
+		}
+	}
+	s.handles = nil
+	s.holes = 0
+}
+
+func GetHandle(ctx context.Context, id uint32) any {
+	if id == 0 {
+		return nil
+	}
+	s := ctx.Value(moduleKey{}).(*moduleState)
+	return s.handles[^id]
+}
+
+func DelHandle(ctx context.Context, id uint32) error {
+	if id == 0 {
+		return nil
+	}
+	s := ctx.Value(moduleKey{}).(*moduleState)
+	a := s.handles[^id]
+	s.handles[^id] = nil
+	s.holes++
+	if c, ok := a.(io.Closer); ok {
+		return c.Close()
+	}
+	return nil
+}
+
+func AddHandle(ctx context.Context, a any) (id uint32) {
+	if a == nil {
+		panic(NilErr)
+	}
+	s := ctx.Value(moduleKey{}).(*moduleState)
+
+	// Find an empty slot.
+	if s.holes > cap(s.handles)-len(s.handles) {
+		for id, h := range s.handles {
+			if h == nil {
+				s.holes--
+				s.handles[id] = a
+				return ^uint32(id)
+			}
+		}
+	}
+
+	// Add a new slot.
+	s.handles = append(s.handles, a)
+	return -uint32(len(s.handles))
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go
new file mode 100644
index 000000000..c0ba38cf0
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go
@@ -0,0 +1,35 @@
+package util
+
+import (
+	"encoding/json"
+	"strconv"
+	"time"
+	"unsafe"
+)
+
+type JSON struct{ Value any }
+
+func (j JSON) Scan(value any) error {
+	var buf []byte
+
+	switch v := value.(type) {
+	case []byte:
+		buf = v
+	case string:
+		buf = unsafe.Slice(unsafe.StringData(v), len(v))
+	case int64:
+		buf = strconv.AppendInt(nil, v, 10)
+	case float64:
+		buf = strconv.AppendFloat(nil, v, 'g', -1, 64)
+	case time.Time:
+		buf = append(buf, '"')
+		buf = v.AppendFormat(buf, time.RFC3339Nano)
+		buf = append(buf, '"')
+	case nil:
+		buf = append(buf, "null"...)
+	default:
+		panic(AssertErr())
+	}
+
+	return json.Unmarshal(buf, j.Value)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/mem.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/mem.go
new file mode 100644
index 000000000..a09523fd1
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/mem.go
@@ -0,0 +1,134 @@
+package util
+
+import (
+	"bytes"
+	"math"
+
+	"github.com/tetratelabs/wazero/api"
+)
+
+func View(mod api.Module, ptr uint32, size uint64) []byte {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	if size > math.MaxUint32 {
+		panic(RangeErr)
+	}
+	if size == 0 {
+		return nil
+	}
+	buf, ok := mod.Memory().Read(ptr, uint32(size))
+	if !ok {
+		panic(RangeErr)
+	}
+	return buf
+}
+
+func ReadUint8(mod api.Module, ptr uint32) uint8 {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	v, ok := mod.Memory().ReadByte(ptr)
+	if !ok {
+		panic(RangeErr)
+	}
+	return v
+}
+
+func ReadUint32(mod api.Module, ptr uint32) uint32 {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	v, ok := mod.Memory().ReadUint32Le(ptr)
+	if !ok {
+		panic(RangeErr)
+	}
+	return v
+}
+
+func WriteUint8(mod api.Module, ptr uint32, v uint8) {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	ok := mod.Memory().WriteByte(ptr, v)
+	if !ok {
+		panic(RangeErr)
+	}
+}
+
+func WriteUint32(mod api.Module, ptr uint32, v uint32) {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	ok := mod.Memory().WriteUint32Le(ptr, v)
+	if !ok {
+		panic(RangeErr)
+	}
+}
+
+func ReadUint64(mod api.Module, ptr uint32) uint64 {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	v, ok := mod.Memory().ReadUint64Le(ptr)
+	if !ok {
+		panic(RangeErr)
+	}
+	return v
+}
+
+func WriteUint64(mod api.Module, ptr uint32, v uint64) {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	ok := mod.Memory().WriteUint64Le(ptr, v)
+	if !ok {
+		panic(RangeErr)
+	}
+}
+
+func ReadFloat64(mod api.Module, ptr uint32) float64 {
+	return math.Float64frombits(ReadUint64(mod, ptr))
+}
+
+func WriteFloat64(mod api.Module, ptr uint32, v float64) {
+	WriteUint64(mod, ptr, math.Float64bits(v))
+}
+
+func ReadString(mod api.Module, ptr, maxlen uint32) string {
+	if ptr == 0 {
+		panic(NilErr)
+	}
+	switch maxlen {
+	case 0:
+		return ""
+	case math.MaxUint32:
+		// avoid overflow
+	default:
+		maxlen = maxlen + 1
+	}
+	mem := mod.Memory()
+	buf, ok := mem.Read(ptr, maxlen)
+	if !ok {
+		buf, ok = mem.Read(ptr, mem.Size()-ptr)
+		if !ok {
+			panic(RangeErr)
+		}
+	}
+	if i := bytes.IndexByte(buf, 0); i < 0 {
+		panic(NoNulErr)
+	} else {
+		return string(buf[:i])
+	}
+}
+
+func WriteBytes(mod api.Module, ptr uint32, b []byte) {
+	buf := View(mod, ptr, uint64(len(b)))
+	copy(buf, b)
+}
+
+func WriteString(mod api.Module, ptr uint32, s string) {
+	buf := View(mod, ptr, uint64(len(s)+1))
+	buf[len(s)] = 0
+	copy(buf, s)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go
new file mode 100644
index 000000000..6783c9612
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go
@@ -0,0 +1,97 @@
+//go:build (darwin || linux) && (amd64 || arm64 || riscv64) && !(sqlite3_noshm || sqlite3_nosys)
+
+package util
+
+import (
+	"context"
+	"os"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"golang.org/x/sys/unix"
+)
+
+func withAllocator(ctx context.Context) context.Context {
+	return experimental.WithMemoryAllocator(ctx,
+		experimental.MemoryAllocatorFunc(virtualAlloc))
+}
+
+type mmapState struct {
+	regions []*MappedRegion
+}
+
+func (s *mmapState) new(ctx context.Context, mod api.Module, size int32) *MappedRegion {
+	// Find unused region.
+	for _, r := range s.regions {
+		if !r.used && r.size == size {
+			return r
+		}
+	}
+
+	// Allocate page aligned memmory.
+	alloc := mod.ExportedFunction("aligned_alloc")
+	stack := [2]uint64{
+		uint64(unix.Getpagesize()),
+		uint64(size),
+	}
+	if err := alloc.CallWithStack(ctx, stack[:]); err != nil {
+		panic(err)
+	}
+	if stack[0] == 0 {
+		panic(OOMErr)
+	}
+
+	// Save the newly allocated region.
+	ptr := uint32(stack[0])
+	buf := View(mod, ptr, uint64(size))
+	addr := uintptr(unsafe.Pointer(&buf[0]))
+	s.regions = append(s.regions, &MappedRegion{
+		Ptr:  ptr,
+		addr: addr,
+		size: size,
+	})
+	return s.regions[len(s.regions)-1]
+}
+
+type MappedRegion struct {
+	addr uintptr
+	Ptr  uint32
+	size int32
+	used bool
+}
+
+func MapRegion(ctx context.Context, mod api.Module, f *os.File, offset int64, size int32, prot int) (*MappedRegion, error) {
+	s := ctx.Value(moduleKey{}).(*moduleState)
+	r := s.new(ctx, mod, size)
+	err := r.mmap(f, offset, prot)
+	if err != nil {
+		return nil, err
+	}
+	return r, nil
+}
+
+func (r *MappedRegion) Unmap() error {
+	// We can't munmap the region, otherwise it could be remaped.
+	// Instead, convert it to a protected, private, anonymous mapping.
+	// If successful, it can be reused for a subsequent mmap.
+	_, err := mmap(r.addr, uintptr(r.size),
+		unix.PROT_NONE, unix.MAP_PRIVATE|unix.MAP_ANON|unix.MAP_FIXED,
+		-1, 0)
+	r.used = err != nil
+	return err
+}
+
+func (r *MappedRegion) mmap(f *os.File, offset int64, prot int) error {
+	_, err := mmap(r.addr, uintptr(r.size),
+		prot, unix.MAP_SHARED|unix.MAP_FIXED,
+		int(f.Fd()), offset)
+	r.used = err == nil
+	return err
+}
+
+// We need the low level mmap for MAP_FIXED to work.
+// Bind the syscall version hoping that it is more stable.
+
+//go:linkname mmap syscall.mmap
+func mmap(addr, length uintptr, prot, flag, fd int, pos int64) (*byte, error)
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go
new file mode 100644
index 000000000..1e81c9fd3
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go
@@ -0,0 +1,21 @@
+//go:build !(darwin || linux) || !(amd64 || arm64 || riscv64) || sqlite3_noshm || sqlite3_nosys
+
+package util
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/experimental"
+)
+
+type mmapState struct{}
+
+func withAllocator(ctx context.Context) context.Context {
+	return experimental.WithMemoryAllocator(ctx,
+		experimental.MemoryAllocatorFunc(func(cap, max uint64) experimental.LinearMemory {
+			if cap == max {
+				return virtualAlloc(cap, max)
+			}
+			return sliceAlloc(cap, max)
+		}))
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go
new file mode 100644
index 000000000..22793e972
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go
@@ -0,0 +1,21 @@
+package util
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/experimental"
+)
+
+type moduleKey struct{}
+type moduleState struct {
+	mmapState
+	handleState
+}
+
+func NewContext(ctx context.Context) context.Context {
+	state := new(moduleState)
+	ctx = withAllocator(ctx)
+	ctx = experimental.WithCloseNotifier(ctx, state)
+	ctx = context.WithValue(ctx, moduleKey{}, state)
+	return ctx
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/pointer.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/pointer.go
new file mode 100644
index 000000000..eae4dae17
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/pointer.go
@@ -0,0 +1,11 @@
+package util
+
+type Pointer[T any] struct{ Value T }
+
+func (p Pointer[T]) unwrap() any { return p.Value }
+
+type PointerUnwrap interface{ unwrap() any }
+
+func UnwrapPointer(p PointerUnwrap) any {
+	return p.unwrap()
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/reflect.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/reflect.go
new file mode 100644
index 000000000..3104a7cf3
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/reflect.go
@@ -0,0 +1,10 @@
+package util
+
+import "reflect"
+
+func ReflectType(v reflect.Value) reflect.Type {
+	if v.Kind() != reflect.Invalid {
+		return v.Type()
+	}
+	return nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/json.go b/vendor/github.com/ncruces/go-sqlite3/json.go
new file mode 100644
index 000000000..9b2565e87
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/json.go
@@ -0,0 +1,11 @@
+package sqlite3
+
+import "github.com/ncruces/go-sqlite3/internal/util"
+
+// JSON returns a value that can be used as an argument to
+// [database/sql.DB.Exec], [database/sql.Row.Scan] and similar methods to
+// store value as JSON, or decode JSON into value.
+// JSON should NOT be used with [BindJSON] or [ResultJSON].
+func JSON(value any) any {
+	return util.JSON{Value: value}
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/pointer.go b/vendor/github.com/ncruces/go-sqlite3/pointer.go
new file mode 100644
index 000000000..611c1528c
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/pointer.go
@@ -0,0 +1,12 @@
+package sqlite3
+
+import "github.com/ncruces/go-sqlite3/internal/util"
+
+// Pointer returns a pointer to a value that can be used as an argument to
+// [database/sql.DB.Exec] and similar methods.
+// Pointer should NOT be used with [BindPointer] or [ResultPointer].
+//
+// https://sqlite.org/bindptr.html
+func Pointer[T any](value T) any {
+	return util.Pointer[T]{Value: value}
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/quote.go b/vendor/github.com/ncruces/go-sqlite3/quote.go
new file mode 100644
index 000000000..d1cd6fa87
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/quote.go
@@ -0,0 +1,112 @@
+package sqlite3
+
+import (
+	"bytes"
+	"math"
+	"strconv"
+	"strings"
+	"time"
+	"unsafe"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// Quote escapes and quotes a value
+// making it safe to embed in SQL text.
+func Quote(value any) string {
+	switch v := value.(type) {
+	case nil:
+		return "NULL"
+	case bool:
+		if v {
+			return "1"
+		} else {
+			return "0"
+		}
+
+	case int:
+		return strconv.Itoa(v)
+	case int64:
+		return strconv.FormatInt(v, 10)
+	case float64:
+		switch {
+		case math.IsNaN(v):
+			return "NULL"
+		case math.IsInf(v, 1):
+			return "9.0e999"
+		case math.IsInf(v, -1):
+			return "-9.0e999"
+		}
+		return strconv.FormatFloat(v, 'g', -1, 64)
+	case time.Time:
+		return "'" + v.Format(time.RFC3339Nano) + "'"
+
+	case string:
+		if strings.IndexByte(v, 0) >= 0 {
+			break
+		}
+
+		buf := make([]byte, 2+len(v)+strings.Count(v, "'"))
+		buf[0] = '\''
+		i := 1
+		for _, b := range []byte(v) {
+			if b == '\'' {
+				buf[i] = b
+				i += 1
+			}
+			buf[i] = b
+			i += 1
+		}
+		buf[i] = '\''
+		return unsafe.String(&buf[0], len(buf))
+
+	case []byte:
+		buf := make([]byte, 3+2*len(v))
+		buf[0] = 'x'
+		buf[1] = '\''
+		i := 2
+		for _, b := range v {
+			const hex = "0123456789ABCDEF"
+			buf[i+0] = hex[b/16]
+			buf[i+1] = hex[b%16]
+			i += 2
+		}
+		buf[i] = '\''
+		return unsafe.String(&buf[0], len(buf))
+
+	case ZeroBlob:
+		if v > ZeroBlob(1e9-3)/2 {
+			break
+		}
+
+		buf := bytes.Repeat([]byte("0"), int(3+2*int64(v)))
+		buf[0] = 'x'
+		buf[1] = '\''
+		buf[len(buf)-1] = '\''
+		return unsafe.String(&buf[0], len(buf))
+	}
+
+	panic(util.ValueErr)
+}
+
+// QuoteIdentifier escapes and quotes an identifier
+// making it safe to embed in SQL text.
+func QuoteIdentifier(id string) string {
+	if strings.IndexByte(id, 0) >= 0 {
+		panic(util.ValueErr)
+	}
+
+	buf := make([]byte, 2+len(id)+strings.Count(id, `"`))
+	buf[0] = '"'
+	i := 1
+	for _, b := range []byte(id) {
+		if b == '"' {
+			buf[i] = b
+			i += 1
+		}
+		buf[i] = b
+		i += 1
+	}
+	buf[i] = '"'
+	return unsafe.String(&buf[0], len(buf))
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/sqlite.go b/vendor/github.com/ncruces/go-sqlite3/sqlite.go
new file mode 100644
index 000000000..61a03652f
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/sqlite.go
@@ -0,0 +1,341 @@
+// Package sqlite3 wraps the C SQLite API.
+package sqlite3
+
+import (
+	"context"
+	"math"
+	"math/bits"
+	"os"
+	"sync"
+	"unsafe"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/ncruces/go-sqlite3/vfs"
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// Configure SQLite Wasm.
+//
+// Importing package embed initializes [Binary]
+// with an appropriate build of SQLite:
+//
+//	import _ "github.com/ncruces/go-sqlite3/embed"
+var (
+	Binary []byte // Wasm binary to load.
+	Path   string // Path to load the binary from.
+
+	RuntimeConfig wazero.RuntimeConfig
+)
+
+// Initialize decodes and compiles the SQLite Wasm binary.
+// This is called implicitly when the first connection is openned,
+// but is potentially slow, so you may want to call it at a more convenient time.
+func Initialize() error {
+	instance.once.Do(compileSQLite)
+	return instance.err
+}
+
+var instance struct {
+	runtime  wazero.Runtime
+	compiled wazero.CompiledModule
+	err      error
+	once     sync.Once
+}
+
+func compileSQLite() {
+	if RuntimeConfig == nil {
+		RuntimeConfig = wazero.NewRuntimeConfig()
+	}
+
+	ctx := context.Background()
+	instance.runtime = wazero.NewRuntimeWithConfig(ctx, RuntimeConfig)
+
+	env := instance.runtime.NewHostModuleBuilder("env")
+	env = vfs.ExportHostFunctions(env)
+	env = exportCallbacks(env)
+	_, instance.err = env.Instantiate(ctx)
+	if instance.err != nil {
+		return
+	}
+
+	bin := Binary
+	if bin == nil && Path != "" {
+		bin, instance.err = os.ReadFile(Path)
+		if instance.err != nil {
+			return
+		}
+	}
+	if bin == nil {
+		instance.err = util.NoBinaryErr
+		return
+	}
+
+	instance.compiled, instance.err = instance.runtime.CompileModule(ctx, bin)
+}
+
+type sqlite struct {
+	ctx   context.Context
+	mod   api.Module
+	funcs struct {
+		fn   [32]api.Function
+		id   [32]*byte
+		mask uint32
+	}
+	stack [8]uint64
+	freer uint32
+}
+
+func instantiateSQLite() (sqlt *sqlite, err error) {
+	if err := Initialize(); err != nil {
+		return nil, err
+	}
+
+	sqlt = new(sqlite)
+	sqlt.ctx = util.NewContext(context.Background())
+
+	sqlt.mod, err = instance.runtime.InstantiateModule(sqlt.ctx,
+		instance.compiled, wazero.NewModuleConfig().WithName(""))
+	if err != nil {
+		return nil, err
+	}
+
+	global := sqlt.mod.ExportedGlobal("malloc_destructor")
+	if global == nil {
+		return nil, util.BadBinaryErr
+	}
+
+	sqlt.freer = util.ReadUint32(sqlt.mod, uint32(global.Get()))
+	if sqlt.freer == 0 {
+		return nil, util.BadBinaryErr
+	}
+	return sqlt, nil
+}
+
+func (sqlt *sqlite) close() error {
+	return sqlt.mod.Close(sqlt.ctx)
+}
+
+func (sqlt *sqlite) error(rc uint64, handle uint32, sql ...string) error {
+	if rc == _OK {
+		return nil
+	}
+
+	err := Error{code: rc}
+
+	if err.Code() == NOMEM || err.ExtendedCode() == IOERR_NOMEM {
+		panic(util.OOMErr)
+	}
+
+	if r := sqlt.call("sqlite3_errstr", rc); r != 0 {
+		err.str = util.ReadString(sqlt.mod, uint32(r), _MAX_NAME)
+	}
+
+	if handle != 0 {
+		if r := sqlt.call("sqlite3_errmsg", uint64(handle)); r != 0 {
+			err.msg = util.ReadString(sqlt.mod, uint32(r), _MAX_LENGTH)
+		}
+
+		if sql != nil {
+			if r := sqlt.call("sqlite3_error_offset", uint64(handle)); r != math.MaxUint32 {
+				err.sql = sql[0][r:]
+			}
+		}
+	}
+
+	switch err.msg {
+	case err.str, "not an error":
+		err.msg = ""
+	}
+	return &err
+}
+
+func (sqlt *sqlite) getfn(name string) api.Function {
+	c := &sqlt.funcs
+	p := unsafe.StringData(name)
+	for i := range c.id {
+		if c.id[i] == p {
+			c.id[i] = nil
+			c.mask &^= uint32(1) << i
+			return c.fn[i]
+		}
+	}
+	return sqlt.mod.ExportedFunction(name)
+}
+
+func (sqlt *sqlite) putfn(name string, fn api.Function) {
+	c := &sqlt.funcs
+	p := unsafe.StringData(name)
+	i := bits.TrailingZeros32(^c.mask)
+	if i < 32 {
+		c.id[i] = p
+		c.fn[i] = fn
+		c.mask |= uint32(1) << i
+	} else {
+		c.id[0] = p
+		c.fn[0] = fn
+		c.mask = uint32(1)
+	}
+}
+
+func (sqlt *sqlite) call(name string, params ...uint64) uint64 {
+	copy(sqlt.stack[:], params)
+	fn := sqlt.getfn(name)
+	err := fn.CallWithStack(sqlt.ctx, sqlt.stack[:])
+	if err != nil {
+		panic(err)
+	}
+	sqlt.putfn(name, fn)
+	return sqlt.stack[0]
+}
+
+func (sqlt *sqlite) free(ptr uint32) {
+	if ptr == 0 {
+		return
+	}
+	sqlt.call("free", uint64(ptr))
+}
+
+func (sqlt *sqlite) new(size uint64) uint32 {
+	if size > _MAX_ALLOCATION_SIZE {
+		panic(util.OOMErr)
+	}
+	ptr := uint32(sqlt.call("malloc", size))
+	if ptr == 0 && size != 0 {
+		panic(util.OOMErr)
+	}
+	return ptr
+}
+
+func (sqlt *sqlite) newBytes(b []byte) uint32 {
+	if (*[0]byte)(b) == nil {
+		return 0
+	}
+	ptr := sqlt.new(uint64(len(b)))
+	util.WriteBytes(sqlt.mod, ptr, b)
+	return ptr
+}
+
+func (sqlt *sqlite) newString(s string) uint32 {
+	ptr := sqlt.new(uint64(len(s) + 1))
+	util.WriteString(sqlt.mod, ptr, s)
+	return ptr
+}
+
+func (sqlt *sqlite) newArena(size uint64) arena {
+	// Ensure the arena's size is a multiple of 8.
+	size = (size + 7) &^ 7
+	return arena{
+		sqlt: sqlt,
+		size: uint32(size),
+		base: sqlt.new(size),
+	}
+}
+
+type arena struct {
+	sqlt *sqlite
+	ptrs []uint32
+	base uint32
+	next uint32
+	size uint32
+}
+
+func (a *arena) free() {
+	if a.sqlt == nil {
+		return
+	}
+	for _, ptr := range a.ptrs {
+		a.sqlt.free(ptr)
+	}
+	a.sqlt.free(a.base)
+	a.sqlt = nil
+}
+
+func (a *arena) mark() (reset func()) {
+	ptrs := len(a.ptrs)
+	next := a.next
+	return func() {
+		for _, ptr := range a.ptrs[ptrs:] {
+			a.sqlt.free(ptr)
+		}
+		a.ptrs = a.ptrs[:ptrs]
+		a.next = next
+	}
+}
+
+func (a *arena) new(size uint64) uint32 {
+	// Align the next address, to 4 or 8 bytes.
+	if size&7 != 0 {
+		a.next = (a.next + 3) &^ 3
+	} else {
+		a.next = (a.next + 7) &^ 7
+	}
+	if size <= uint64(a.size-a.next) {
+		ptr := a.base + a.next
+		a.next += uint32(size)
+		return ptr
+	}
+	ptr := a.sqlt.new(size)
+	a.ptrs = append(a.ptrs, ptr)
+	return ptr
+}
+
+func (a *arena) bytes(b []byte) uint32 {
+	if (*[0]byte)(b) == nil {
+		return 0
+	}
+	ptr := a.new(uint64(len(b)))
+	util.WriteBytes(a.sqlt.mod, ptr, b)
+	return ptr
+}
+
+func (a *arena) string(s string) uint32 {
+	ptr := a.new(uint64(len(s) + 1))
+	util.WriteString(a.sqlt.mod, ptr, s)
+	return ptr
+}
+
+func exportCallbacks(env wazero.HostModuleBuilder) wazero.HostModuleBuilder {
+	util.ExportFuncII(env, "go_progress_handler", progressCallback)
+	util.ExportFuncIIII(env, "go_busy_timeout", timeoutCallback)
+	util.ExportFuncIII(env, "go_busy_handler", busyCallback)
+	util.ExportFuncII(env, "go_commit_hook", commitCallback)
+	util.ExportFuncVI(env, "go_rollback_hook", rollbackCallback)
+	util.ExportFuncVIIIIJ(env, "go_update_hook", updateCallback)
+	util.ExportFuncIIIII(env, "go_wal_hook", walCallback)
+	util.ExportFuncIIIIII(env, "go_autovacuum_pages", autoVacuumCallback)
+	util.ExportFuncIIIIIII(env, "go_authorizer", authorizerCallback)
+	util.ExportFuncVIII(env, "go_log", logCallback)
+	util.ExportFuncVI(env, "go_destroy", destroyCallback)
+	util.ExportFuncVIIII(env, "go_func", funcCallback)
+	util.ExportFuncVIIIII(env, "go_step", stepCallback)
+	util.ExportFuncVIII(env, "go_final", finalCallback)
+	util.ExportFuncVII(env, "go_value", valueCallback)
+	util.ExportFuncVIIII(env, "go_inverse", inverseCallback)
+	util.ExportFuncVIIII(env, "go_collation_needed", collationCallback)
+	util.ExportFuncIIIIII(env, "go_compare", compareCallback)
+	util.ExportFuncIIIIII(env, "go_vtab_create", vtabModuleCallback(xCreate))
+	util.ExportFuncIIIIII(env, "go_vtab_connect", vtabModuleCallback(xConnect))
+	util.ExportFuncII(env, "go_vtab_disconnect", vtabDisconnectCallback)
+	util.ExportFuncII(env, "go_vtab_destroy", vtabDestroyCallback)
+	util.ExportFuncIII(env, "go_vtab_best_index", vtabBestIndexCallback)
+	util.ExportFuncIIIII(env, "go_vtab_update", vtabUpdateCallback)
+	util.ExportFuncIII(env, "go_vtab_rename", vtabRenameCallback)
+	util.ExportFuncIIIII(env, "go_vtab_find_function", vtabFindFuncCallback)
+	util.ExportFuncII(env, "go_vtab_begin", vtabBeginCallback)
+	util.ExportFuncII(env, "go_vtab_sync", vtabSyncCallback)
+	util.ExportFuncII(env, "go_vtab_commit", vtabCommitCallback)
+	util.ExportFuncII(env, "go_vtab_rollback", vtabRollbackCallback)
+	util.ExportFuncIII(env, "go_vtab_savepoint", vtabSavepointCallback)
+	util.ExportFuncIII(env, "go_vtab_release", vtabReleaseCallback)
+	util.ExportFuncIII(env, "go_vtab_rollback_to", vtabRollbackToCallback)
+	util.ExportFuncIIIIII(env, "go_vtab_integrity", vtabIntegrityCallback)
+	util.ExportFuncIII(env, "go_cur_open", cursorOpenCallback)
+	util.ExportFuncII(env, "go_cur_close", cursorCloseCallback)
+	util.ExportFuncIIIIII(env, "go_cur_filter", cursorFilterCallback)
+	util.ExportFuncII(env, "go_cur_next", cursorNextCallback)
+	util.ExportFuncII(env, "go_cur_eof", cursorEOFCallback)
+	util.ExportFuncIIII(env, "go_cur_column", cursorColumnCallback)
+	util.ExportFuncIII(env, "go_cur_rowid", cursorRowIDCallback)
+	return env
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/stmt.go b/vendor/github.com/ncruces/go-sqlite3/stmt.go
new file mode 100644
index 000000000..63c2085d0
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/stmt.go
@@ -0,0 +1,639 @@
+package sqlite3
+
+import (
+	"encoding/json"
+	"math"
+	"strconv"
+	"time"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// Stmt is a prepared statement object.
+//
+// https://sqlite.org/c3ref/stmt.html
+type Stmt struct {
+	c      *Conn
+	err    error
+	handle uint32
+}
+
+// Close destroys the prepared statement object.
+//
+// It is safe to close a nil, zero or closed Stmt.
+//
+// https://sqlite.org/c3ref/finalize.html
+func (s *Stmt) Close() error {
+	if s == nil || s.handle == 0 {
+		return nil
+	}
+
+	r := s.c.call("sqlite3_finalize", uint64(s.handle))
+
+	s.handle = 0
+	return s.c.error(r)
+}
+
+// Conn returns the database connection to which the prepared statement belongs.
+//
+// https://sqlite.org/c3ref/db_handle.html
+func (s *Stmt) Conn() *Conn {
+	return s.c
+}
+
+// ReadOnly returns true if and only if the statement
+// makes no direct changes to the content of the database file.
+//
+// https://sqlite.org/c3ref/stmt_readonly.html
+func (s *Stmt) ReadOnly() bool {
+	r := s.c.call("sqlite3_stmt_readonly", uint64(s.handle))
+	return r != 0
+}
+
+// Reset resets the prepared statement object.
+//
+// https://sqlite.org/c3ref/reset.html
+func (s *Stmt) Reset() error {
+	r := s.c.call("sqlite3_reset", uint64(s.handle))
+	s.err = nil
+	return s.c.error(r)
+}
+
+// Busy determines if a prepared statement has been reset.
+//
+// https://sqlite.org/c3ref/stmt_busy.html
+func (s *Stmt) Busy() bool {
+	r := s.c.call("sqlite3_stmt_busy", uint64(s.handle))
+	return r != 0
+}
+
+// Step evaluates the SQL statement.
+// If the SQL statement being executed returns any data,
+// then true is returned each time a new row of data is ready for processing by the caller.
+// The values may be accessed using the Column access functions.
+// Step is called again to retrieve the next row of data.
+// If an error has occurred, Step returns false;
+// call [Stmt.Err] or [Stmt.Reset] to get the error.
+//
+// https://sqlite.org/c3ref/step.html
+func (s *Stmt) Step() bool {
+	s.c.checkInterrupt()
+	r := s.c.call("sqlite3_step", uint64(s.handle))
+	switch r {
+	case _ROW:
+		s.err = nil
+		return true
+	case _DONE:
+		s.err = nil
+	default:
+		s.err = s.c.error(r)
+	}
+	return false
+}
+
+// Err gets the last error occurred during [Stmt.Step].
+// Err returns nil after [Stmt.Reset] is called.
+//
+// https://sqlite.org/c3ref/step.html
+func (s *Stmt) Err() error {
+	return s.err
+}
+
+// Exec is a convenience function that repeatedly calls [Stmt.Step] until it returns false,
+// then calls [Stmt.Reset] to reset the statement and get any error that occurred.
+func (s *Stmt) Exec() error {
+	for s.Step() {
+	}
+	return s.Reset()
+}
+
+// Status monitors the performance characteristics of prepared statements.
+//
+// https://sqlite.org/c3ref/stmt_status.html
+func (s *Stmt) Status(op StmtStatus, reset bool) int {
+	if op > STMTSTATUS_FILTER_HIT && op != STMTSTATUS_MEMUSED {
+		return 0
+	}
+	var i uint64
+	if reset {
+		i = 1
+	}
+	r := s.c.call("sqlite3_stmt_status", uint64(s.handle),
+		uint64(op), i)
+	return int(int32(r))
+}
+
+// ClearBindings resets all bindings on the prepared statement.
+//
+// https://sqlite.org/c3ref/clear_bindings.html
+func (s *Stmt) ClearBindings() error {
+	r := s.c.call("sqlite3_clear_bindings", uint64(s.handle))
+	return s.c.error(r)
+}
+
+// BindCount returns the number of SQL parameters in the prepared statement.
+//
+// https://sqlite.org/c3ref/bind_parameter_count.html
+func (s *Stmt) BindCount() int {
+	r := s.c.call("sqlite3_bind_parameter_count",
+		uint64(s.handle))
+	return int(int32(r))
+}
+
+// BindIndex returns the index of a parameter in the prepared statement
+// given its name.
+//
+// https://sqlite.org/c3ref/bind_parameter_index.html
+func (s *Stmt) BindIndex(name string) int {
+	defer s.c.arena.mark()()
+	namePtr := s.c.arena.string(name)
+	r := s.c.call("sqlite3_bind_parameter_index",
+		uint64(s.handle), uint64(namePtr))
+	return int(int32(r))
+}
+
+// BindName returns the name of a parameter in the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_parameter_name.html
+func (s *Stmt) BindName(param int) string {
+	r := s.c.call("sqlite3_bind_parameter_name",
+		uint64(s.handle), uint64(param))
+
+	ptr := uint32(r)
+	if ptr == 0 {
+		return ""
+	}
+	return util.ReadString(s.c.mod, ptr, _MAX_NAME)
+}
+
+// BindBool binds a bool to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+// SQLite does not have a separate boolean storage class.
+// Instead, boolean values are stored as integers 0 (false) and 1 (true).
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindBool(param int, value bool) error {
+	var i int64
+	if value {
+		i = 1
+	}
+	return s.BindInt64(param, i)
+}
+
+// BindInt binds an int to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindInt(param int, value int) error {
+	return s.BindInt64(param, int64(value))
+}
+
+// BindInt64 binds an int64 to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindInt64(param int, value int64) error {
+	r := s.c.call("sqlite3_bind_int64",
+		uint64(s.handle), uint64(param), uint64(value))
+	return s.c.error(r)
+}
+
+// BindFloat binds a float64 to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindFloat(param int, value float64) error {
+	r := s.c.call("sqlite3_bind_double",
+		uint64(s.handle), uint64(param), math.Float64bits(value))
+	return s.c.error(r)
+}
+
+// BindText binds a string to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindText(param int, value string) error {
+	if len(value) > _MAX_LENGTH {
+		return TOOBIG
+	}
+	ptr := s.c.newString(value)
+	r := s.c.call("sqlite3_bind_text64",
+		uint64(s.handle), uint64(param),
+		uint64(ptr), uint64(len(value)),
+		uint64(s.c.freer), _UTF8)
+	return s.c.error(r)
+}
+
+// BindRawText binds a []byte to the prepared statement as text.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindRawText(param int, value []byte) error {
+	if len(value) > _MAX_LENGTH {
+		return TOOBIG
+	}
+	ptr := s.c.newBytes(value)
+	r := s.c.call("sqlite3_bind_text64",
+		uint64(s.handle), uint64(param),
+		uint64(ptr), uint64(len(value)),
+		uint64(s.c.freer), _UTF8)
+	return s.c.error(r)
+}
+
+// BindBlob binds a []byte to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+// Binding a nil slice is the same as calling [Stmt.BindNull].
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindBlob(param int, value []byte) error {
+	if len(value) > _MAX_LENGTH {
+		return TOOBIG
+	}
+	ptr := s.c.newBytes(value)
+	r := s.c.call("sqlite3_bind_blob64",
+		uint64(s.handle), uint64(param),
+		uint64(ptr), uint64(len(value)),
+		uint64(s.c.freer))
+	return s.c.error(r)
+}
+
+// BindZeroBlob binds a zero-filled, length n BLOB to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindZeroBlob(param int, n int64) error {
+	r := s.c.call("sqlite3_bind_zeroblob64",
+		uint64(s.handle), uint64(param), uint64(n))
+	return s.c.error(r)
+}
+
+// BindNull binds a NULL to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindNull(param int) error {
+	r := s.c.call("sqlite3_bind_null",
+		uint64(s.handle), uint64(param))
+	return s.c.error(r)
+}
+
+// BindTime binds a [time.Time] to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindTime(param int, value time.Time, format TimeFormat) error {
+	if format == TimeFormatDefault {
+		return s.bindRFC3339Nano(param, value)
+	}
+	switch v := format.Encode(value).(type) {
+	case string:
+		s.BindText(param, v)
+	case int64:
+		s.BindInt64(param, v)
+	case float64:
+		s.BindFloat(param, v)
+	default:
+		panic(util.AssertErr())
+	}
+	return nil
+}
+
+func (s *Stmt) bindRFC3339Nano(param int, value time.Time) error {
+	const maxlen = uint64(len(time.RFC3339Nano)) + 5
+
+	ptr := s.c.new(maxlen)
+	buf := util.View(s.c.mod, ptr, maxlen)
+	buf = value.AppendFormat(buf[:0], time.RFC3339Nano)
+
+	r := s.c.call("sqlite3_bind_text64",
+		uint64(s.handle), uint64(param),
+		uint64(ptr), uint64(len(buf)),
+		uint64(s.c.freer), _UTF8)
+	return s.c.error(r)
+}
+
+// BindPointer binds a NULL to the prepared statement, just like [Stmt.BindNull],
+// but it also associates ptr with that NULL value such that it can be retrieved
+// within an application-defined SQL function using [Value.Pointer].
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindPointer(param int, ptr any) error {
+	valPtr := util.AddHandle(s.c.ctx, ptr)
+	r := s.c.call("sqlite3_bind_pointer_go",
+		uint64(s.handle), uint64(param), uint64(valPtr))
+	return s.c.error(r)
+}
+
+// BindJSON binds the JSON encoding of value to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindJSON(param int, value any) error {
+	data, err := json.Marshal(value)
+	if err != nil {
+		return err
+	}
+	return s.BindRawText(param, data)
+}
+
+// BindValue binds a copy of value to the prepared statement.
+// The leftmost SQL parameter has an index of 1.
+//
+// https://sqlite.org/c3ref/bind_blob.html
+func (s *Stmt) BindValue(param int, value Value) error {
+	if value.c != s.c {
+		return MISUSE
+	}
+	r := s.c.call("sqlite3_bind_value",
+		uint64(s.handle), uint64(param), uint64(value.handle))
+	return s.c.error(r)
+}
+
+// ColumnCount returns the number of columns in a result set.
+//
+// https://sqlite.org/c3ref/column_count.html
+func (s *Stmt) ColumnCount() int {
+	r := s.c.call("sqlite3_column_count",
+		uint64(s.handle))
+	return int(int32(r))
+}
+
+// ColumnName returns the name of the result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_name.html
+func (s *Stmt) ColumnName(col int) string {
+	r := s.c.call("sqlite3_column_name",
+		uint64(s.handle), uint64(col))
+	if r == 0 {
+		panic(util.OOMErr)
+	}
+	return util.ReadString(s.c.mod, uint32(r), _MAX_NAME)
+}
+
+// ColumnType returns the initial [Datatype] of the result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnType(col int) Datatype {
+	r := s.c.call("sqlite3_column_type",
+		uint64(s.handle), uint64(col))
+	return Datatype(r)
+}
+
+// ColumnDeclType returns the declared datatype of the result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_decltype.html
+func (s *Stmt) ColumnDeclType(col int) string {
+	r := s.c.call("sqlite3_column_decltype",
+		uint64(s.handle), uint64(col))
+	if r == 0 {
+		return ""
+	}
+	return util.ReadString(s.c.mod, uint32(r), _MAX_NAME)
+}
+
+// ColumnDatabaseName returns the name of the database
+// that is the origin of a particular result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_database_name.html
+func (s *Stmt) ColumnDatabaseName(col int) string {
+	r := s.c.call("sqlite3_column_database_name",
+		uint64(s.handle), uint64(col))
+	if r == 0 {
+		return ""
+	}
+	return util.ReadString(s.c.mod, uint32(r), _MAX_NAME)
+}
+
+// ColumnTableName returns the name of the table
+// that is the origin of a particular result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_database_name.html
+func (s *Stmt) ColumnTableName(col int) string {
+	r := s.c.call("sqlite3_column_table_name",
+		uint64(s.handle), uint64(col))
+	if r == 0 {
+		return ""
+	}
+	return util.ReadString(s.c.mod, uint32(r), _MAX_NAME)
+}
+
+// ColumnOriginName returns the name of the table column
+// that is the origin of a particular result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_database_name.html
+func (s *Stmt) ColumnOriginName(col int) string {
+	r := s.c.call("sqlite3_column_origin_name",
+		uint64(s.handle), uint64(col))
+	if r == 0 {
+		return ""
+	}
+	return util.ReadString(s.c.mod, uint32(r), _MAX_NAME)
+}
+
+// ColumnBool returns the value of the result column as a bool.
+// The leftmost column of the result set has the index 0.
+// SQLite does not have a separate boolean storage class.
+// Instead, boolean values are retrieved as integers,
+// with 0 converted to false and any other value to true.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnBool(col int) bool {
+	return s.ColumnInt64(col) != 0
+}
+
+// ColumnInt returns the value of the result column as an int.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnInt(col int) int {
+	return int(s.ColumnInt64(col))
+}
+
+// ColumnInt64 returns the value of the result column as an int64.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnInt64(col int) int64 {
+	r := s.c.call("sqlite3_column_int64",
+		uint64(s.handle), uint64(col))
+	return int64(r)
+}
+
+// ColumnFloat returns the value of the result column as a float64.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnFloat(col int) float64 {
+	r := s.c.call("sqlite3_column_double",
+		uint64(s.handle), uint64(col))
+	return math.Float64frombits(r)
+}
+
+// ColumnTime returns the value of the result column as a [time.Time].
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnTime(col int, format TimeFormat) time.Time {
+	var v any
+	switch s.ColumnType(col) {
+	case INTEGER:
+		v = s.ColumnInt64(col)
+	case FLOAT:
+		v = s.ColumnFloat(col)
+	case TEXT, BLOB:
+		v = s.ColumnText(col)
+	case NULL:
+		return time.Time{}
+	default:
+		panic(util.AssertErr())
+	}
+	t, err := format.Decode(v)
+	if err != nil {
+		s.err = err
+	}
+	return t
+}
+
+// ColumnText returns the value of the result column as a string.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnText(col int) string {
+	return string(s.ColumnRawText(col))
+}
+
+// ColumnBlob appends to buf and returns
+// the value of the result column as a []byte.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnBlob(col int, buf []byte) []byte {
+	return append(buf, s.ColumnRawBlob(col)...)
+}
+
+// ColumnRawText returns the value of the result column as a []byte.
+// The []byte is owned by SQLite and may be invalidated by
+// subsequent calls to [Stmt] methods.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnRawText(col int) []byte {
+	r := s.c.call("sqlite3_column_text",
+		uint64(s.handle), uint64(col))
+	return s.columnRawBytes(col, uint32(r))
+}
+
+// ColumnRawBlob returns the value of the result column as a []byte.
+// The []byte is owned by SQLite and may be invalidated by
+// subsequent calls to [Stmt] methods.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnRawBlob(col int) []byte {
+	r := s.c.call("sqlite3_column_blob",
+		uint64(s.handle), uint64(col))
+	return s.columnRawBytes(col, uint32(r))
+}
+
+func (s *Stmt) columnRawBytes(col int, ptr uint32) []byte {
+	if ptr == 0 {
+		r := s.c.call("sqlite3_errcode", uint64(s.c.handle))
+		s.err = s.c.error(r)
+		return nil
+	}
+
+	r := s.c.call("sqlite3_column_bytes",
+		uint64(s.handle), uint64(col))
+	return util.View(s.c.mod, ptr, r)
+}
+
+// ColumnJSON parses the JSON-encoded value of the result column
+// and stores it in the value pointed to by ptr.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnJSON(col int, ptr any) error {
+	var data []byte
+	switch s.ColumnType(col) {
+	case NULL:
+		data = append(data, "null"...)
+	case TEXT:
+		data = s.ColumnRawText(col)
+	case BLOB:
+		data = s.ColumnRawBlob(col)
+	case INTEGER:
+		data = strconv.AppendInt(nil, s.ColumnInt64(col), 10)
+	case FLOAT:
+		data = strconv.AppendFloat(nil, s.ColumnFloat(col), 'g', -1, 64)
+	default:
+		panic(util.AssertErr())
+	}
+	return json.Unmarshal(data, ptr)
+}
+
+// ColumnValue returns the unprotected value of the result column.
+// The leftmost column of the result set has the index 0.
+//
+// https://sqlite.org/c3ref/column_blob.html
+func (s *Stmt) ColumnValue(col int) Value {
+	r := s.c.call("sqlite3_column_value",
+		uint64(s.handle), uint64(col))
+	return Value{
+		c:      s.c,
+		unprot: true,
+		handle: uint32(r),
+	}
+}
+
+// Columns populates result columns into the provided slice.
+// The slice must have [Stmt.ColumnCount] length.
+//
+// [INTEGER] columns will be retrieved as int64 values,
+// [FLOAT] as float64, [NULL] as nil,
+// [TEXT] as string, and [BLOB] as []byte.
+// Any []byte are owned by SQLite and may be invalidated by
+// subsequent calls to [Stmt] methods.
+func (s *Stmt) Columns(dest []any) error {
+	defer s.c.arena.mark()()
+	count := uint64(len(dest))
+	typePtr := s.c.arena.new(count)
+	dataPtr := s.c.arena.new(8 * count)
+
+	r := s.c.call("sqlite3_columns_go",
+		uint64(s.handle), count, uint64(typePtr), uint64(dataPtr))
+	if err := s.c.error(r); err != nil {
+		return err
+	}
+
+	types := util.View(s.c.mod, typePtr, count)
+	for i := range dest {
+		switch types[i] {
+		case byte(INTEGER):
+			dest[i] = int64(util.ReadUint64(s.c.mod, dataPtr+8*uint32(i)))
+			continue
+		case byte(FLOAT):
+			dest[i] = util.ReadFloat64(s.c.mod, dataPtr+8*uint32(i))
+			continue
+		case byte(NULL):
+			dest[i] = nil
+			continue
+		}
+		ptr := util.ReadUint32(s.c.mod, dataPtr+8*uint32(i)+0)
+		len := util.ReadUint32(s.c.mod, dataPtr+8*uint32(i)+4)
+		buf := util.View(s.c.mod, ptr, uint64(len))
+		if types[i] == byte(TEXT) {
+			dest[i] = string(buf)
+		} else {
+			dest[i] = buf
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/time.go b/vendor/github.com/ncruces/go-sqlite3/time.go
new file mode 100644
index 000000000..0164a307b
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/time.go
@@ -0,0 +1,354 @@
+package sqlite3
+
+import (
+	"math"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/ncruces/julianday"
+)
+
+// TimeFormat specifies how to encode/decode time values.
+//
+// See the documentation for the [TimeFormatDefault] constant
+// for formats recognized by SQLite.
+//
+// https://sqlite.org/lang_datefunc.html
+type TimeFormat string
+
+// TimeFormats recognized by SQLite to encode/decode time values.
+//
+// https://sqlite.org/lang_datefunc.html#time_values
+const (
+	TimeFormatDefault TimeFormat = "" // time.RFC3339Nano
+
+	// Text formats
+	TimeFormat1  TimeFormat = "2006-01-02"
+	TimeFormat2  TimeFormat = "2006-01-02 15:04"
+	TimeFormat3  TimeFormat = "2006-01-02 15:04:05"
+	TimeFormat4  TimeFormat = "2006-01-02 15:04:05.000"
+	TimeFormat5  TimeFormat = "2006-01-02T15:04"
+	TimeFormat6  TimeFormat = "2006-01-02T15:04:05"
+	TimeFormat7  TimeFormat = "2006-01-02T15:04:05.000"
+	TimeFormat8  TimeFormat = "15:04"
+	TimeFormat9  TimeFormat = "15:04:05"
+	TimeFormat10 TimeFormat = "15:04:05.000"
+
+	TimeFormat2TZ  = TimeFormat2 + "Z07:00"
+	TimeFormat3TZ  = TimeFormat3 + "Z07:00"
+	TimeFormat4TZ  = TimeFormat4 + "Z07:00"
+	TimeFormat5TZ  = TimeFormat5 + "Z07:00"
+	TimeFormat6TZ  = TimeFormat6 + "Z07:00"
+	TimeFormat7TZ  = TimeFormat7 + "Z07:00"
+	TimeFormat8TZ  = TimeFormat8 + "Z07:00"
+	TimeFormat9TZ  = TimeFormat9 + "Z07:00"
+	TimeFormat10TZ = TimeFormat10 + "Z07:00"
+
+	// Numeric formats
+	TimeFormatJulianDay TimeFormat = "julianday"
+	TimeFormatUnix      TimeFormat = "unixepoch"
+	TimeFormatUnixFrac  TimeFormat = "unixepoch_frac"
+	TimeFormatUnixMilli TimeFormat = "unixepoch_milli" // not an SQLite format
+	TimeFormatUnixMicro TimeFormat = "unixepoch_micro" // not an SQLite format
+	TimeFormatUnixNano  TimeFormat = "unixepoch_nano"  // not an SQLite format
+
+	// Auto
+	TimeFormatAuto TimeFormat = "auto"
+)
+
+// Encode encodes a time value using this format.
+//
+// [TimeFormatDefault] and [TimeFormatAuto] encode using [time.RFC3339Nano],
+// with nanosecond accuracy, and preserving any timezone offset.
+//
+// This is the format used by the [database/sql] driver:
+// [database/sql.Row.Scan] will decode as [time.Time]
+// values encoded with [time.RFC3339Nano].
+//
+// Time values encoded with [time.RFC3339Nano] cannot be sorted as strings
+// to produce a time-ordered sequence.
+//
+// Assuming that the time zones of the time values are the same (e.g., all in UTC),
+// and expressed using the same string (e.g., all "Z" or all "+00:00"),
+// use the TIME [collating sequence] to produce a time-ordered sequence.
+//
+// Otherwise, use [TimeFormat7] for time-ordered encoding.
+//
+// Formats [TimeFormat1] through [TimeFormat10]
+// convert time values to UTC before encoding.
+//
+// Returns a string for the text formats,
+// a float64 for [TimeFormatJulianDay] and [TimeFormatUnixFrac],
+// or an int64 for the other numeric formats.
+//
+// https://sqlite.org/lang_datefunc.html
+//
+// [collating sequence]: https://sqlite.org/datatype3.html#collating_sequences
+func (f TimeFormat) Encode(t time.Time) any {
+	switch f {
+	// Numeric formats
+	case TimeFormatJulianDay:
+		return julianday.Float(t)
+	case TimeFormatUnix:
+		return t.Unix()
+	case TimeFormatUnixFrac:
+		return float64(t.Unix()) + float64(t.Nanosecond())*1e-9
+	case TimeFormatUnixMilli:
+		return t.UnixMilli()
+	case TimeFormatUnixMicro:
+		return t.UnixMicro()
+	case TimeFormatUnixNano:
+		return t.UnixNano()
+	// Special formats.
+	case TimeFormatDefault, TimeFormatAuto:
+		f = time.RFC3339Nano
+	// SQLite assumes UTC if unspecified.
+	case
+		TimeFormat1, TimeFormat2,
+		TimeFormat3, TimeFormat4,
+		TimeFormat5, TimeFormat6,
+		TimeFormat7, TimeFormat8,
+		TimeFormat9, TimeFormat10:
+		t = t.UTC()
+	}
+	return t.Format(string(f))
+}
+
+// Decode decodes a time value using this format.
+//
+// The time value can be a string, an int64, or a float64.
+//
+// Formats [TimeFormat8] through [TimeFormat10]
+// (and [TimeFormat8TZ] through [TimeFormat10TZ])
+// assume a date of 2000-01-01.
+//
+// The timezone indicator and fractional seconds are always optional
+// for formats [TimeFormat2] through [TimeFormat10]
+// (and [TimeFormat2TZ] through [TimeFormat10TZ]).
+//
+// [TimeFormatAuto] implements (and extends) the SQLite auto modifier.
+// Julian day numbers are safe to use for historical dates,
+// from 4712BC through 9999AD.
+// Unix timestamps (expressed in seconds, milliseconds, microseconds, or nanoseconds)
+// are safe to use for current events, from at least 1980 through at least 2260.
+// Unix timestamps before 1980 and after 9999 may be misinterpreted as julian day numbers,
+// or have the wrong time unit.
+//
+// https://sqlite.org/lang_datefunc.html
+func (f TimeFormat) Decode(v any) (time.Time, error) {
+	switch f {
+	// Numeric formats.
+	case TimeFormatJulianDay:
+		switch v := v.(type) {
+		case string:
+			return julianday.Parse(v)
+		case float64:
+			return julianday.FloatTime(v), nil
+		case int64:
+			return julianday.Time(v, 0), nil
+		default:
+			return time.Time{}, util.TimeErr
+		}
+
+	case TimeFormatUnix, TimeFormatUnixFrac:
+		if s, ok := v.(string); ok {
+			f, err := strconv.ParseFloat(s, 64)
+			if err != nil {
+				return time.Time{}, err
+			}
+			v = f
+		}
+		switch v := v.(type) {
+		case float64:
+			sec, frac := math.Modf(v)
+			nsec := math.Floor(frac * 1e9)
+			return time.Unix(int64(sec), int64(nsec)).UTC(), nil
+		case int64:
+			return time.Unix(v, 0).UTC(), nil
+		default:
+			return time.Time{}, util.TimeErr
+		}
+
+	case TimeFormatUnixMilli:
+		if s, ok := v.(string); ok {
+			i, err := strconv.ParseInt(s, 10, 64)
+			if err != nil {
+				return time.Time{}, err
+			}
+			v = i
+		}
+		switch v := v.(type) {
+		case float64:
+			return time.UnixMilli(int64(math.Floor(v))).UTC(), nil
+		case int64:
+			return time.UnixMilli(v).UTC(), nil
+		default:
+			return time.Time{}, util.TimeErr
+		}
+
+	case TimeFormatUnixMicro:
+		if s, ok := v.(string); ok {
+			i, err := strconv.ParseInt(s, 10, 64)
+			if err != nil {
+				return time.Time{}, err
+			}
+			v = i
+		}
+		switch v := v.(type) {
+		case float64:
+			return time.UnixMicro(int64(math.Floor(v))).UTC(), nil
+		case int64:
+			return time.UnixMicro(v).UTC(), nil
+		default:
+			return time.Time{}, util.TimeErr
+		}
+
+	case TimeFormatUnixNano:
+		if s, ok := v.(string); ok {
+			i, err := strconv.ParseInt(s, 10, 64)
+			if err != nil {
+				return time.Time{}, util.TimeErr
+			}
+			v = i
+		}
+		switch v := v.(type) {
+		case float64:
+			return time.Unix(0, int64(math.Floor(v))).UTC(), nil
+		case int64:
+			return time.Unix(0, v).UTC(), nil
+		default:
+			return time.Time{}, util.TimeErr
+		}
+
+	// Special formats.
+	case TimeFormatAuto:
+		switch s := v.(type) {
+		case string:
+			i, err := strconv.ParseInt(s, 10, 64)
+			if err == nil {
+				v = i
+				break
+			}
+			f, err := strconv.ParseFloat(s, 64)
+			if err == nil {
+				v = f
+				break
+			}
+
+			dates := []TimeFormat{
+				TimeFormat9, TimeFormat8,
+				TimeFormat6, TimeFormat5,
+				TimeFormat3, TimeFormat2, TimeFormat1,
+			}
+			for _, f := range dates {
+				t, err := f.Decode(s)
+				if err == nil {
+					return t, nil
+				}
+			}
+		}
+		switch v := v.(type) {
+		case float64:
+			if 0 <= v && v < 5373484.5 {
+				return TimeFormatJulianDay.Decode(v)
+			}
+			if v < 253402300800 {
+				return TimeFormatUnixFrac.Decode(v)
+			}
+			if v < 253402300800_000 {
+				return TimeFormatUnixMilli.Decode(v)
+			}
+			if v < 253402300800_000000 {
+				return TimeFormatUnixMicro.Decode(v)
+			}
+			return TimeFormatUnixNano.Decode(v)
+		case int64:
+			if 0 <= v && v < 5373485 {
+				return TimeFormatJulianDay.Decode(v)
+			}
+			if v < 253402300800 {
+				return TimeFormatUnixFrac.Decode(v)
+			}
+			if v < 253402300800_000 {
+				return TimeFormatUnixMilli.Decode(v)
+			}
+			if v < 253402300800_000000 {
+				return TimeFormatUnixMicro.Decode(v)
+			}
+			return TimeFormatUnixNano.Decode(v)
+		default:
+			return time.Time{}, util.TimeErr
+		}
+
+	case
+		TimeFormat2, TimeFormat2TZ,
+		TimeFormat3, TimeFormat3TZ,
+		TimeFormat4, TimeFormat4TZ,
+		TimeFormat5, TimeFormat5TZ,
+		TimeFormat6, TimeFormat6TZ,
+		TimeFormat7, TimeFormat7TZ:
+		s, ok := v.(string)
+		if !ok {
+			return time.Time{}, util.TimeErr
+		}
+		return f.parseRelaxed(s)
+
+	case
+		TimeFormat8, TimeFormat8TZ,
+		TimeFormat9, TimeFormat9TZ,
+		TimeFormat10, TimeFormat10TZ:
+		s, ok := v.(string)
+		if !ok {
+			return time.Time{}, util.TimeErr
+		}
+		t, err := f.parseRelaxed(s)
+		if err != nil {
+			return time.Time{}, err
+		}
+		return t.AddDate(2000, 0, 0), nil
+
+	default:
+		s, ok := v.(string)
+		if !ok {
+			return time.Time{}, util.TimeErr
+		}
+		if f == "" {
+			f = time.RFC3339Nano
+		}
+		return time.Parse(string(f), s)
+	}
+}
+
+func (f TimeFormat) parseRelaxed(s string) (time.Time, error) {
+	fs := string(f)
+	fs = strings.TrimSuffix(fs, "Z07:00")
+	fs = strings.TrimSuffix(fs, ".000")
+	t, err := time.Parse(fs+"Z07:00", s)
+	if err != nil {
+		return time.Parse(fs, s)
+	}
+	return t, nil
+}
+
+// Scanner returns a [database/sql.Scanner] that can be used as an argument to
+// [database/sql.Row.Scan] and similar methods to
+// decode a time value into dest using this format.
+func (f TimeFormat) Scanner(dest *time.Time) interface{ Scan(any) error } {
+	return timeScanner{dest, f}
+}
+
+type timeScanner struct {
+	*time.Time
+	TimeFormat
+}
+
+func (s timeScanner) Scan(src any) error {
+	var ok bool
+	var err error
+	if *s.Time, ok = src.(time.Time); !ok {
+		*s.Time, err = s.Decode(src)
+	}
+	return err
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/txn.go b/vendor/github.com/ncruces/go-sqlite3/txn.go
new file mode 100644
index 000000000..0efbc2d80
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/txn.go
@@ -0,0 +1,294 @@
+package sqlite3
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"math/rand"
+	"runtime"
+	"strconv"
+	"strings"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// Txn is an in-progress database transaction.
+//
+// https://sqlite.org/lang_transaction.html
+type Txn struct {
+	c *Conn
+}
+
+// Begin starts a deferred transaction.
+//
+// https://sqlite.org/lang_transaction.html
+func (c *Conn) Begin() Txn {
+	// BEGIN even if interrupted.
+	err := c.txnExecInterrupted(`BEGIN DEFERRED`)
+	if err != nil {
+		panic(err)
+	}
+	return Txn{c}
+}
+
+// BeginImmediate starts an immediate transaction.
+//
+// https://sqlite.org/lang_transaction.html
+func (c *Conn) BeginImmediate() (Txn, error) {
+	err := c.Exec(`BEGIN IMMEDIATE`)
+	if err != nil {
+		return Txn{}, err
+	}
+	return Txn{c}, nil
+}
+
+// BeginExclusive starts an exclusive transaction.
+//
+// https://sqlite.org/lang_transaction.html
+func (c *Conn) BeginExclusive() (Txn, error) {
+	err := c.Exec(`BEGIN EXCLUSIVE`)
+	if err != nil {
+		return Txn{}, err
+	}
+	return Txn{c}, nil
+}
+
+// End calls either [Txn.Commit] or [Txn.Rollback]
+// depending on whether *error points to a nil or non-nil error.
+//
+// This is meant to be deferred:
+//
+//	func doWork(db *sqlite3.Conn) (err error) {
+//		tx := db.Begin()
+//		defer tx.End(&err)
+//
+//		// ... do work in the transaction
+//	}
+//
+// https://sqlite.org/lang_transaction.html
+func (tx Txn) End(errp *error) {
+	recovered := recover()
+	if recovered != nil {
+		defer panic(recovered)
+	}
+
+	if *errp == nil && recovered == nil {
+		// Success path.
+		if tx.c.GetAutocommit() { // There is nothing to commit.
+			return
+		}
+		*errp = tx.Commit()
+		if *errp == nil {
+			return
+		}
+		// Fall through to the error path.
+	}
+
+	// Error path.
+	if tx.c.GetAutocommit() { // There is nothing to rollback.
+		return
+	}
+	err := tx.Rollback()
+	if err != nil {
+		panic(err)
+	}
+}
+
+// Commit commits the transaction.
+//
+// https://sqlite.org/lang_transaction.html
+func (tx Txn) Commit() error {
+	return tx.c.Exec(`COMMIT`)
+}
+
+// Rollback rolls back the transaction,
+// even if the connection has been interrupted.
+//
+// https://sqlite.org/lang_transaction.html
+func (tx Txn) Rollback() error {
+	return tx.c.txnExecInterrupted(`ROLLBACK`)
+}
+
+// Savepoint is a marker within a transaction
+// that allows for partial rollback.
+//
+// https://sqlite.org/lang_savepoint.html
+type Savepoint struct {
+	c    *Conn
+	name string
+}
+
+// Savepoint establishes a new transaction savepoint.
+//
+// https://sqlite.org/lang_savepoint.html
+func (c *Conn) Savepoint() Savepoint {
+	// Names can be reused; this makes catching bugs more likely.
+	name := saveptName() + "_" + strconv.Itoa(int(rand.Int31()))
+
+	err := c.txnExecInterrupted(fmt.Sprintf("SAVEPOINT %q;", name))
+	if err != nil {
+		panic(err)
+	}
+	return Savepoint{c: c, name: name}
+}
+
+func saveptName() (name string) {
+	defer func() {
+		if name == "" {
+			name = "sqlite3.Savepoint"
+		}
+	}()
+
+	var pc [8]uintptr
+	n := runtime.Callers(3, pc[:])
+	if n <= 0 {
+		return ""
+	}
+	frames := runtime.CallersFrames(pc[:n])
+	frame, more := frames.Next()
+	for more && (strings.HasPrefix(frame.Function, "database/sql.") ||
+		strings.HasPrefix(frame.Function, "github.com/ncruces/go-sqlite3/driver.")) {
+		frame, more = frames.Next()
+	}
+	return frame.Function
+}
+
+// Release releases the savepoint rolling back any changes
+// if *error points to a non-nil error.
+//
+// This is meant to be deferred:
+//
+//	func doWork(db *sqlite3.Conn) (err error) {
+//		savept := db.Savepoint()
+//		defer savept.Release(&err)
+//
+//		// ... do work in the transaction
+//	}
+func (s Savepoint) Release(errp *error) {
+	recovered := recover()
+	if recovered != nil {
+		defer panic(recovered)
+	}
+
+	if *errp == nil && recovered == nil {
+		// Success path.
+		if s.c.GetAutocommit() { // There is nothing to commit.
+			return
+		}
+		*errp = s.c.Exec(fmt.Sprintf("RELEASE %q;", s.name))
+		if *errp == nil {
+			return
+		}
+		// Fall through to the error path.
+	}
+
+	// Error path.
+	if s.c.GetAutocommit() { // There is nothing to rollback.
+		return
+	}
+	// ROLLBACK and RELEASE even if interrupted.
+	err := s.c.txnExecInterrupted(fmt.Sprintf(`
+		ROLLBACK TO %[1]q;
+		RELEASE %[1]q;
+	`, s.name))
+	if err != nil {
+		panic(err)
+	}
+}
+
+// Rollback rolls the transaction back to the savepoint,
+// even if the connection has been interrupted.
+// Rollback does not release the savepoint.
+//
+// https://sqlite.org/lang_transaction.html
+func (s Savepoint) Rollback() error {
+	// ROLLBACK even if interrupted.
+	return s.c.txnExecInterrupted(fmt.Sprintf("ROLLBACK TO %q;", s.name))
+}
+
+func (c *Conn) txnExecInterrupted(sql string) error {
+	err := c.Exec(sql)
+	if errors.Is(err, INTERRUPT) {
+		old := c.SetInterrupt(context.Background())
+		defer c.SetInterrupt(old)
+		err = c.Exec(sql)
+	}
+	return err
+}
+
+// TxnState starts a deferred transaction.
+//
+// https://sqlite.org/c3ref/txn_state.html
+func (c *Conn) TxnState(schema string) TxnState {
+	var ptr uint32
+	if schema != "" {
+		defer c.arena.mark()()
+		ptr = c.arena.string(schema)
+	}
+	r := c.call("sqlite3_txn_state", uint64(c.handle), uint64(ptr))
+	return TxnState(r)
+}
+
+// CommitHook registers a callback function to be invoked
+// whenever a transaction is committed.
+// Return true to allow the commit operation to continue normally.
+//
+// https://sqlite.org/c3ref/commit_hook.html
+func (c *Conn) CommitHook(cb func() (ok bool)) {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	c.call("sqlite3_commit_hook_go", uint64(c.handle), enable)
+	c.commit = cb
+}
+
+// RollbackHook registers a callback function to be invoked
+// whenever a transaction is rolled back.
+//
+// https://sqlite.org/c3ref/commit_hook.html
+func (c *Conn) RollbackHook(cb func()) {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	c.call("sqlite3_rollback_hook_go", uint64(c.handle), enable)
+	c.rollback = cb
+}
+
+// UpdateHook registers a callback function to be invoked
+// whenever a row is updated, inserted or deleted in a rowid table.
+//
+// https://sqlite.org/c3ref/update_hook.html
+func (c *Conn) UpdateHook(cb func(action AuthorizerActionCode, schema, table string, rowid int64)) {
+	var enable uint64
+	if cb != nil {
+		enable = 1
+	}
+	c.call("sqlite3_update_hook_go", uint64(c.handle), enable)
+	c.update = cb
+}
+
+func commitCallback(ctx context.Context, mod api.Module, pDB uint32) (rollback uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.commit != nil {
+		if !c.commit() {
+			rollback = 1
+		}
+	}
+	return rollback
+}
+
+func rollbackCallback(ctx context.Context, mod api.Module, pDB uint32) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.rollback != nil {
+		c.rollback()
+	}
+}
+
+func updateCallback(ctx context.Context, mod api.Module, pDB uint32, action AuthorizerActionCode, zSchema, zTabName uint32, rowid uint64) {
+	if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.update != nil {
+		schema := util.ReadString(mod, zSchema, _MAX_NAME)
+		table := util.ReadString(mod, zTabName, _MAX_NAME)
+		c.update(action, schema, table, int64(rowid))
+	}
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/util/osutil/open.go b/vendor/github.com/ncruces/go-sqlite3/util/osutil/open.go
new file mode 100644
index 000000000..0242ad032
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/util/osutil/open.go
@@ -0,0 +1,16 @@
+//go:build !windows
+
+package osutil
+
+import (
+	"io/fs"
+	"os"
+)
+
+// OpenFile behaves the same as [os.OpenFile],
+// except on Windows it sets [syscall.FILE_SHARE_DELETE].
+//
+// See: https://go.dev/issue/32088#issuecomment-502850674
+func OpenFile(name string, flag int, perm fs.FileMode) (*os.File, error) {
+	return os.OpenFile(name, flag, perm)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/util/osutil/open_windows.go b/vendor/github.com/ncruces/go-sqlite3/util/osutil/open_windows.go
new file mode 100644
index 000000000..277f58bc3
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/util/osutil/open_windows.go
@@ -0,0 +1,112 @@
+package osutil
+
+import (
+	"io/fs"
+	"os"
+	. "syscall"
+	"unsafe"
+)
+
+// OpenFile behaves the same as [os.OpenFile],
+// except on Windows it sets [syscall.FILE_SHARE_DELETE].
+//
+// See: https://go.dev/issue/32088#issuecomment-502850674
+func OpenFile(name string, flag int, perm fs.FileMode) (*os.File, error) {
+	if name == "" {
+		return nil, &os.PathError{Op: "open", Path: name, Err: ENOENT}
+	}
+	r, e := syscallOpen(name, flag, uint32(perm.Perm()))
+	if e != nil {
+		return nil, &os.PathError{Op: "open", Path: name, Err: e}
+	}
+	return os.NewFile(uintptr(r), name), nil
+}
+
+// syscallOpen is a copy of [syscall.Open]
+// that uses [syscall.FILE_SHARE_DELETE].
+//
+// https://go.dev/src/syscall/syscall_windows.go
+func syscallOpen(path string, mode int, perm uint32) (fd Handle, err error) {
+	if len(path) == 0 {
+		return InvalidHandle, ERROR_FILE_NOT_FOUND
+	}
+	pathp, err := UTF16PtrFromString(path)
+	if err != nil {
+		return InvalidHandle, err
+	}
+	var access uint32
+	switch mode & (O_RDONLY | O_WRONLY | O_RDWR) {
+	case O_RDONLY:
+		access = GENERIC_READ
+	case O_WRONLY:
+		access = GENERIC_WRITE
+	case O_RDWR:
+		access = GENERIC_READ | GENERIC_WRITE
+	}
+	if mode&O_CREAT != 0 {
+		access |= GENERIC_WRITE
+	}
+	if mode&O_APPEND != 0 {
+		access &^= GENERIC_WRITE
+		access |= FILE_APPEND_DATA
+	}
+	sharemode := uint32(FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE)
+	var sa *SecurityAttributes
+	if mode&O_CLOEXEC == 0 {
+		sa = makeInheritSa()
+	}
+	var createmode uint32
+	switch {
+	case mode&(O_CREAT|O_EXCL) == (O_CREAT | O_EXCL):
+		createmode = CREATE_NEW
+	case mode&(O_CREAT|O_TRUNC) == (O_CREAT | O_TRUNC):
+		createmode = CREATE_ALWAYS
+	case mode&O_CREAT == O_CREAT:
+		createmode = OPEN_ALWAYS
+	case mode&O_TRUNC == O_TRUNC:
+		createmode = TRUNCATE_EXISTING
+	default:
+		createmode = OPEN_EXISTING
+	}
+	var attrs uint32 = FILE_ATTRIBUTE_NORMAL
+	if perm&S_IWRITE == 0 {
+		attrs = FILE_ATTRIBUTE_READONLY
+		if createmode == CREATE_ALWAYS {
+			const _ERROR_BAD_NETPATH = Errno(53)
+			// We have been asked to create a read-only file.
+			// If the file already exists, the semantics of
+			// the Unix open system call is to preserve the
+			// existing permissions. If we pass CREATE_ALWAYS
+			// and FILE_ATTRIBUTE_READONLY to CreateFile,
+			// and the file already exists, CreateFile will
+			// change the file permissions.
+			// Avoid that to preserve the Unix semantics.
+			h, e := CreateFile(pathp, access, sharemode, sa, TRUNCATE_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
+			switch e {
+			case ERROR_FILE_NOT_FOUND, _ERROR_BAD_NETPATH, ERROR_PATH_NOT_FOUND:
+				// File does not exist. These are the same
+				// errors as Errno.Is checks for ErrNotExist.
+				// Carry on to create the file.
+			default:
+				// Success or some different error.
+				return h, e
+			}
+		}
+	}
+	if createmode == OPEN_EXISTING && access == GENERIC_READ {
+		// Necessary for opening directory handles.
+		attrs |= FILE_FLAG_BACKUP_SEMANTICS
+	}
+	if mode&O_SYNC != 0 {
+		const _FILE_FLAG_WRITE_THROUGH = 0x80000000
+		attrs |= _FILE_FLAG_WRITE_THROUGH
+	}
+	return CreateFile(pathp, access, sharemode, sa, createmode, attrs, 0)
+}
+
+func makeInheritSa() *SecurityAttributes {
+	var sa SecurityAttributes
+	sa.Length = uint32(unsafe.Sizeof(sa))
+	sa.InheritHandle = 1
+	return &sa
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/util/osutil/osfs.go b/vendor/github.com/ncruces/go-sqlite3/util/osutil/osfs.go
new file mode 100644
index 000000000..2e1195934
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/util/osutil/osfs.go
@@ -0,0 +1,33 @@
+package osutil
+
+import (
+	"io/fs"
+	"os"
+)
+
+// FS implements [fs.FS], [fs.StatFS], and [fs.ReadFileFS]
+// using package [os].
+//
+// This filesystem does not respect [fs.ValidPath] rules,
+// and fails [testing/fstest.TestFS]!
+//
+// Still, it can be a useful tool to unify implementations
+// that can access either the [os] filesystem or an [fs.FS].
+// It's OK to use this to open files, but you should avoid
+// opening directories, resolving paths, or walking the file system.
+type FS struct{}
+
+// Open implements [fs.FS].
+func (FS) Open(name string) (fs.File, error) {
+	return OpenFile(name, os.O_RDONLY, 0)
+}
+
+// ReadFileFS implements [fs.StatFS].
+func (FS) Stat(name string) (fs.FileInfo, error) {
+	return os.Stat(name)
+}
+
+// ReadFile implements [fs.ReadFileFS].
+func (FS) ReadFile(name string) ([]byte, error) {
+	return os.ReadFile(name)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/util/osutil/osutil.go b/vendor/github.com/ncruces/go-sqlite3/util/osutil/osutil.go
new file mode 100644
index 000000000..7fbd04787
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/util/osutil/osutil.go
@@ -0,0 +1,2 @@
+// Package osutil implements operating system utility functions.
+package osutil
diff --git a/vendor/github.com/ncruces/go-sqlite3/value.go b/vendor/github.com/ncruces/go-sqlite3/value.go
new file mode 100644
index 000000000..61d3cbf70
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/value.go
@@ -0,0 +1,236 @@
+package sqlite3
+
+import (
+	"encoding/json"
+	"math"
+	"strconv"
+	"time"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+)
+
+// Value is any value that can be stored in a database table.
+//
+// https://sqlite.org/c3ref/value.html
+type Value struct {
+	c      *Conn
+	handle uint32
+	unprot bool
+	copied bool
+}
+
+func (v Value) protected() uint64 {
+	if v.unprot {
+		panic(util.ValueErr)
+	}
+	return uint64(v.handle)
+}
+
+// Dup makes a copy of the SQL value and returns a pointer to that copy.
+//
+// https://sqlite.org/c3ref/value_dup.html
+func (v Value) Dup() *Value {
+	r := v.c.call("sqlite3_value_dup", uint64(v.handle))
+	return &Value{
+		c:      v.c,
+		copied: true,
+		handle: uint32(r),
+	}
+}
+
+// Close frees an SQL value previously obtained by [Value.Dup].
+//
+// https://sqlite.org/c3ref/value_dup.html
+func (dup *Value) Close() error {
+	if !dup.copied {
+		panic(util.ValueErr)
+	}
+	dup.c.call("sqlite3_value_free", uint64(dup.handle))
+	dup.handle = 0
+	return nil
+}
+
+// Type returns the initial datatype of the value.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Type() Datatype {
+	r := v.c.call("sqlite3_value_type", v.protected())
+	return Datatype(r)
+}
+
+// Type returns the numeric datatype of the value.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) NumericType() Datatype {
+	r := v.c.call("sqlite3_value_numeric_type", v.protected())
+	return Datatype(r)
+}
+
+// Bool returns the value as a bool.
+// SQLite does not have a separate boolean storage class.
+// Instead, boolean values are retrieved as integers,
+// with 0 converted to false and any other value to true.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Bool() bool {
+	return v.Int64() != 0
+}
+
+// Int returns the value as an int.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Int() int {
+	return int(v.Int64())
+}
+
+// Int64 returns the value as an int64.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Int64() int64 {
+	r := v.c.call("sqlite3_value_int64", v.protected())
+	return int64(r)
+}
+
+// Float returns the value as a float64.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Float() float64 {
+	r := v.c.call("sqlite3_value_double", v.protected())
+	return math.Float64frombits(r)
+}
+
+// Time returns the value as a [time.Time].
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Time(format TimeFormat) time.Time {
+	var a any
+	switch v.Type() {
+	case INTEGER:
+		a = v.Int64()
+	case FLOAT:
+		a = v.Float()
+	case TEXT, BLOB:
+		a = v.Text()
+	case NULL:
+		return time.Time{}
+	default:
+		panic(util.AssertErr())
+	}
+	t, _ := format.Decode(a)
+	return t
+}
+
+// Text returns the value as a string.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Text() string {
+	return string(v.RawText())
+}
+
+// Blob appends to buf and returns
+// the value as a []byte.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) Blob(buf []byte) []byte {
+	return append(buf, v.RawBlob()...)
+}
+
+// RawText returns the value as a []byte.
+// The []byte is owned by SQLite and may be invalidated by
+// subsequent calls to [Value] methods.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) RawText() []byte {
+	r := v.c.call("sqlite3_value_text", v.protected())
+	return v.rawBytes(uint32(r))
+}
+
+// RawBlob returns the value as a []byte.
+// The []byte is owned by SQLite and may be invalidated by
+// subsequent calls to [Value] methods.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) RawBlob() []byte {
+	r := v.c.call("sqlite3_value_blob", v.protected())
+	return v.rawBytes(uint32(r))
+}
+
+func (v Value) rawBytes(ptr uint32) []byte {
+	if ptr == 0 {
+		return nil
+	}
+
+	r := v.c.call("sqlite3_value_bytes", v.protected())
+	return util.View(v.c.mod, ptr, r)
+}
+
+// Pointer gets the pointer associated with this value,
+// or nil if it has no associated pointer.
+func (v Value) Pointer() any {
+	r := v.c.call("sqlite3_value_pointer_go", v.protected())
+	return util.GetHandle(v.c.ctx, uint32(r))
+}
+
+// JSON parses a JSON-encoded value
+// and stores the result in the value pointed to by ptr.
+func (v Value) JSON(ptr any) error {
+	var data []byte
+	switch v.Type() {
+	case NULL:
+		data = append(data, "null"...)
+	case TEXT:
+		data = v.RawText()
+	case BLOB:
+		data = v.RawBlob()
+	case INTEGER:
+		data = strconv.AppendInt(nil, v.Int64(), 10)
+	case FLOAT:
+		data = strconv.AppendFloat(nil, v.Float(), 'g', -1, 64)
+	default:
+		panic(util.AssertErr())
+	}
+	return json.Unmarshal(data, ptr)
+}
+
+// NoChange returns true if and only if the value is unchanged
+// in a virtual table update operatiom.
+//
+// https://sqlite.org/c3ref/value_blob.html
+func (v Value) NoChange() bool {
+	r := v.c.call("sqlite3_value_nochange", v.protected())
+	return r != 0
+}
+
+// InFirst returns the first element
+// on the right-hand side of an IN constraint.
+//
+// https://sqlite.org/c3ref/vtab_in_first.html
+func (v Value) InFirst() (Value, error) {
+	defer v.c.arena.mark()()
+	valPtr := v.c.arena.new(ptrlen)
+	r := v.c.call("sqlite3_vtab_in_first", uint64(v.handle), uint64(valPtr))
+	if err := v.c.error(r); err != nil {
+		return Value{}, err
+	}
+	return Value{
+		c:      v.c,
+		handle: util.ReadUint32(v.c.mod, valPtr),
+	}, nil
+}
+
+// InNext returns the next element
+// on the right-hand side of an IN constraint.
+//
+// https://sqlite.org/c3ref/vtab_in_first.html
+func (v Value) InNext() (Value, error) {
+	defer v.c.arena.mark()()
+	valPtr := v.c.arena.new(ptrlen)
+	r := v.c.call("sqlite3_vtab_in_next", uint64(v.handle), uint64(valPtr))
+	if err := v.c.error(r); err != nil {
+		return Value{}, err
+	}
+	return Value{
+		c:      v.c,
+		handle: util.ReadUint32(v.c.mod, valPtr),
+	}, nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/README.md b/vendor/github.com/ncruces/go-sqlite3/vfs/README.md
new file mode 100644
index 000000000..88059a41b
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/README.md
@@ -0,0 +1,86 @@
+# Go SQLite VFS API
+
+This package implements the SQLite [OS Interface](https://sqlite.org/vfs.html) (aka VFS).
+
+It replaces the default SQLite VFS with a **pure Go** implementation,
+and exposes [interfaces](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#VFS)
+that should allow you to implement your own custom VFSes.
+
+Since it is a from scratch reimplementation,
+there are naturally some ways it deviates from the original.
+
+The main differences are [file locking](#file-locking) and [WAL mode](#write-ahead-logging) support.
+
+### File Locking
+
+POSIX advisory locks, which SQLite uses on Unix, are
+[broken by design](https://github.com/sqlite/sqlite/blob/b74eb0/src/os_unix.c#L1073-L1161).
+
+On Linux and macOS, this module uses
+[OFD locks](https://www.gnu.org/software/libc/manual/html_node/Open-File-Description-Locks.html)
+to synchronize access to database files.
+OFD locks are fully compatible with POSIX advisory locks.
+
+This module can also use
+[BSD locks](https://man.freebsd.org/cgi/man.cgi?query=flock&sektion=2),
+albeit with reduced concurrency (`BEGIN IMMEDIATE` behaves like `BEGIN EXCLUSIVE`).
+On BSD, macOS, and illumos, BSD locks are fully compatible with POSIX advisory locks;
+on Linux and z/OS, they are fully functional, but incompatible;
+elsewhere, they are very likely broken.
+BSD locks are the default on BSD and illumos,
+but you can opt into them with the `sqlite3_flock` build tag.
+
+On Windows, this module uses `LockFileEx` and `UnlockFileEx`,
+like SQLite.
+
+Otherwise, file locking is not supported, and you must use
+[`nolock=1`](https://sqlite.org/uri.html#urinolock)
+(or [`immutable=1`](https://sqlite.org/uri.html#uriimmutable))
+to open database files.
+To use the [`database/sql`](https://pkg.go.dev/database/sql) driver
+with `nolock=1` you must disable connection pooling by calling
+[`db.SetMaxOpenConns(1)`](https://pkg.go.dev/database/sql#DB.SetMaxOpenConns).
+
+You can use [`vfs.SupportsFileLocking`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#SupportsFileLocking)
+to check if your build supports file locking.
+
+### Write-Ahead Logging
+
+On 64-bit Linux and macOS, this module uses `mmap` to implement
+[shared-memory for the WAL-index](https://sqlite.org/wal.html#implementation_of_shared_memory_for_the_wal_index),
+like SQLite.
+
+To allow `mmap` to work, each connection needs to reserve up to 4GB of address space.
+To limit the address space each connection reserves,
+use [`WithMemoryLimitPages`](../tests/testcfg/testcfg.go).
+
+Otherwise, [WAL support is limited](https://sqlite.org/wal.html#noshm),
+and `EXCLUSIVE` locking mode must be set to create, read, and write WAL databases.
+To use `EXCLUSIVE` locking mode with the
+[`database/sql`](https://pkg.go.dev/database/sql) driver
+you must disable connection pooling by calling
+[`db.SetMaxOpenConns(1)`](https://pkg.go.dev/database/sql#DB.SetMaxOpenConns).
+
+You can use [`vfs.SupportsSharedMemory`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#SupportsSharedMemory)
+to check if your build supports shared memory.
+
+### Batch-Atomic Write
+
+On 64-bit Linux, this module supports [batch-atomic writes](https://sqlite.org/cgi/src/technote/714)
+on the F2FS filesystem.
+
+### Build Tags
+
+The VFS can be customized with a few build tags:
+- `sqlite3_flock` forces the use of BSD locks; it can be used on z/OS to enable locking,
+  and elsewhere to test BSD locks.
+- `sqlite3_nosys` prevents importing [`x/sys`](https://pkg.go.dev/golang.org/x/sys);
+  disables locking _and_ shared memory on all platforms.
+- `sqlite3_noshm` disables shared memory on all platforms.
+
+> [!IMPORTANT]
+> The default configuration of this package is compatible with
+> the standard [Unix and Windows SQLite VFSes](https://sqlite.org/vfs.html#multiple_vfses);
+> `sqlite3_flock` is compatible with the [`unix-flock` VFS](https://sqlite.org/compile.html#enable_locking_style).
+> If incompatible file locking is used, accessing databases concurrently with _other_ SQLite libraries
+> will eventually corrupt data.
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/api.go b/vendor/github.com/ncruces/go-sqlite3/vfs/api.go
new file mode 100644
index 000000000..19c22ae8f
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/api.go
@@ -0,0 +1,175 @@
+// Package vfs wraps the C SQLite VFS API.
+package vfs
+
+import (
+	"context"
+	"io"
+
+	"github.com/tetratelabs/wazero/api"
+)
+
+// A VFS defines the interface between the SQLite core and the underlying operating system.
+//
+// Use sqlite3.ErrorCode or sqlite3.ExtendedErrorCode to return specific error codes to SQLite.
+//
+// https://sqlite.org/c3ref/vfs.html
+type VFS interface {
+	Open(name string, flags OpenFlag) (File, OpenFlag, error)
+	Delete(name string, syncDir bool) error
+	Access(name string, flags AccessFlag) (bool, error)
+	FullPathname(name string) (string, error)
+}
+
+// VFSFilename extends VFS with the ability to use Filename
+// objects for opening files.
+//
+// https://sqlite.org/c3ref/filename.html
+type VFSFilename interface {
+	VFS
+	OpenFilename(name *Filename, flags OpenFlag) (File, OpenFlag, error)
+}
+
+// A File represents an open file in the OS interface layer.
+//
+// Use sqlite3.ErrorCode or sqlite3.ExtendedErrorCode to return specific error codes to SQLite.
+// In particular, sqlite3.BUSY is necessary to correctly implement lock methods.
+//
+// https://sqlite.org/c3ref/io_methods.html
+type File interface {
+	Close() error
+	ReadAt(p []byte, off int64) (n int, err error)
+	WriteAt(p []byte, off int64) (n int, err error)
+	Truncate(size int64) error
+	Sync(flags SyncFlag) error
+	Size() (int64, error)
+	Lock(lock LockLevel) error
+	Unlock(lock LockLevel) error
+	CheckReservedLock() (bool, error)
+	SectorSize() int
+	DeviceCharacteristics() DeviceCharacteristic
+}
+
+// FileLockState extends File to implement the
+// SQLITE_FCNTL_LOCKSTATE file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntllockstate
+type FileLockState interface {
+	File
+	LockState() LockLevel
+}
+
+// FileChunkSize extends File to implement the
+// SQLITE_FCNTL_CHUNK_SIZE file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlchunksize
+type FileChunkSize interface {
+	File
+	ChunkSize(size int)
+}
+
+// FileSizeHint extends File to implement the
+// SQLITE_FCNTL_SIZE_HINT file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlsizehint
+type FileSizeHint interface {
+	File
+	SizeHint(size int64) error
+}
+
+// FileHasMoved extends File to implement the
+// SQLITE_FCNTL_HAS_MOVED file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlhasmoved
+type FileHasMoved interface {
+	File
+	HasMoved() (bool, error)
+}
+
+// FileOverwrite extends File to implement the
+// SQLITE_FCNTL_OVERWRITE file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntloverwrite
+type FileOverwrite interface {
+	File
+	Overwrite() error
+}
+
+// FilePersistentWAL extends File to implement the
+// SQLITE_FCNTL_PERSIST_WAL file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlpersistwal
+type FilePersistentWAL interface {
+	File
+	PersistentWAL() bool
+	SetPersistentWAL(bool)
+}
+
+// FilePowersafeOverwrite extends File to implement the
+// SQLITE_FCNTL_POWERSAFE_OVERWRITE file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlpowersafeoverwrite
+type FilePowersafeOverwrite interface {
+	File
+	PowersafeOverwrite() bool
+	SetPowersafeOverwrite(bool)
+}
+
+// FileCommitPhaseTwo extends File to implement the
+// SQLITE_FCNTL_COMMIT_PHASETWO file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlcommitphasetwo
+type FileCommitPhaseTwo interface {
+	File
+	CommitPhaseTwo() error
+}
+
+// FileBatchAtomicWrite extends File to implement the
+// SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE
+// and SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE file control opcodes.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlbeginatomicwrite
+type FileBatchAtomicWrite interface {
+	File
+	BeginAtomicWrite() error
+	CommitAtomicWrite() error
+	RollbackAtomicWrite() error
+}
+
+// FilePragma extends File to implement the
+// SQLITE_FCNTL_PRAGMA file control opcode.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlpragma
+type FilePragma interface {
+	File
+	Pragma(name, value string) (string, error)
+}
+
+// FileCheckpoint extends File to implement the
+// SQLITE_FCNTL_CKPT_START and SQLITE_FCNTL_CKPT_DONE
+// file control opcodes.
+//
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html#sqlitefcntlckptstart
+type FileCheckpoint interface {
+	File
+	CheckpointDone() error
+	CheckpointStart() error
+}
+
+// FileSharedMemory extends File to possibly implement
+// shared-memory for the WAL-index.
+// The same shared-memory instance must be returned
+// for the entire life of the file.
+// It's OK for SharedMemory to return nil.
+type FileSharedMemory interface {
+	File
+	SharedMemory() SharedMemory
+}
+
+// SharedMemory is a shared-memory WAL-index implementation.
+// Use [NewSharedMemory] to create a shared-memory.
+type SharedMemory interface {
+	shmMap(context.Context, api.Module, int32, int32, bool) (uint32, error)
+	shmLock(int32, int32, _ShmFlag) error
+	shmUnmap(bool)
+	io.Closer
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/const.go b/vendor/github.com/ncruces/go-sqlite3/vfs/const.go
new file mode 100644
index 000000000..7f409f35f
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/const.go
@@ -0,0 +1,234 @@
+package vfs
+
+import "github.com/ncruces/go-sqlite3/internal/util"
+
+const (
+	_MAX_NAME            = 1e6 // Self-imposed limit for most NUL terminated strings.
+	_MAX_SQL_LENGTH      = 1e9
+	_MAX_PATHNAME        = 1024
+	_DEFAULT_SECTOR_SIZE = 4096
+
+	ptrlen = 4
+)
+
+// https://sqlite.org/rescode.html
+type _ErrorCode uint32
+
+func (e _ErrorCode) Error() string {
+	return util.ErrorCodeString(uint32(e))
+}
+
+const (
+	_OK                      _ErrorCode = util.OK
+	_ERROR                   _ErrorCode = util.ERROR
+	_PERM                    _ErrorCode = util.PERM
+	_BUSY                    _ErrorCode = util.BUSY
+	_READONLY                _ErrorCode = util.READONLY
+	_IOERR                   _ErrorCode = util.IOERR
+	_NOTFOUND                _ErrorCode = util.NOTFOUND
+	_CANTOPEN                _ErrorCode = util.CANTOPEN
+	_IOERR_READ              _ErrorCode = util.IOERR_READ
+	_IOERR_SHORT_READ        _ErrorCode = util.IOERR_SHORT_READ
+	_IOERR_WRITE             _ErrorCode = util.IOERR_WRITE
+	_IOERR_FSYNC             _ErrorCode = util.IOERR_FSYNC
+	_IOERR_DIR_FSYNC         _ErrorCode = util.IOERR_DIR_FSYNC
+	_IOERR_TRUNCATE          _ErrorCode = util.IOERR_TRUNCATE
+	_IOERR_FSTAT             _ErrorCode = util.IOERR_FSTAT
+	_IOERR_UNLOCK            _ErrorCode = util.IOERR_UNLOCK
+	_IOERR_RDLOCK            _ErrorCode = util.IOERR_RDLOCK
+	_IOERR_DELETE            _ErrorCode = util.IOERR_DELETE
+	_IOERR_ACCESS            _ErrorCode = util.IOERR_ACCESS
+	_IOERR_CHECKRESERVEDLOCK _ErrorCode = util.IOERR_CHECKRESERVEDLOCK
+	_IOERR_LOCK              _ErrorCode = util.IOERR_LOCK
+	_IOERR_CLOSE             _ErrorCode = util.IOERR_CLOSE
+	_IOERR_SHMOPEN           _ErrorCode = util.IOERR_SHMOPEN
+	_IOERR_SHMSIZE           _ErrorCode = util.IOERR_SHMSIZE
+	_IOERR_SHMLOCK           _ErrorCode = util.IOERR_SHMLOCK
+	_IOERR_SHMMAP            _ErrorCode = util.IOERR_SHMMAP
+	_IOERR_SEEK              _ErrorCode = util.IOERR_SEEK
+	_IOERR_DELETE_NOENT      _ErrorCode = util.IOERR_DELETE_NOENT
+	_IOERR_BEGIN_ATOMIC      _ErrorCode = util.IOERR_BEGIN_ATOMIC
+	_IOERR_COMMIT_ATOMIC     _ErrorCode = util.IOERR_COMMIT_ATOMIC
+	_IOERR_ROLLBACK_ATOMIC   _ErrorCode = util.IOERR_ROLLBACK_ATOMIC
+	_CANTOPEN_FULLPATH       _ErrorCode = util.CANTOPEN_FULLPATH
+	_CANTOPEN_ISDIR          _ErrorCode = util.CANTOPEN_ISDIR
+	_READONLY_CANTINIT       _ErrorCode = util.READONLY_CANTINIT
+	_OK_SYMLINK              _ErrorCode = util.OK_SYMLINK
+)
+
+// OpenFlag is a flag for the [VFS] Open method.
+//
+// https://sqlite.org/c3ref/c_open_autoproxy.html
+type OpenFlag uint32
+
+const (
+	OPEN_READONLY      OpenFlag = 0x00000001 /* Ok for sqlite3_open_v2() */
+	OPEN_READWRITE     OpenFlag = 0x00000002 /* Ok for sqlite3_open_v2() */
+	OPEN_CREATE        OpenFlag = 0x00000004 /* Ok for sqlite3_open_v2() */
+	OPEN_DELETEONCLOSE OpenFlag = 0x00000008 /* VFS only */
+	OPEN_EXCLUSIVE     OpenFlag = 0x00000010 /* VFS only */
+	OPEN_AUTOPROXY     OpenFlag = 0x00000020 /* VFS only */
+	OPEN_URI           OpenFlag = 0x00000040 /* Ok for sqlite3_open_v2() */
+	OPEN_MEMORY        OpenFlag = 0x00000080 /* Ok for sqlite3_open_v2() */
+	OPEN_MAIN_DB       OpenFlag = 0x00000100 /* VFS only */
+	OPEN_TEMP_DB       OpenFlag = 0x00000200 /* VFS only */
+	OPEN_TRANSIENT_DB  OpenFlag = 0x00000400 /* VFS only */
+	OPEN_MAIN_JOURNAL  OpenFlag = 0x00000800 /* VFS only */
+	OPEN_TEMP_JOURNAL  OpenFlag = 0x00001000 /* VFS only */
+	OPEN_SUBJOURNAL    OpenFlag = 0x00002000 /* VFS only */
+	OPEN_SUPER_JOURNAL OpenFlag = 0x00004000 /* VFS only */
+	OPEN_NOMUTEX       OpenFlag = 0x00008000 /* Ok for sqlite3_open_v2() */
+	OPEN_FULLMUTEX     OpenFlag = 0x00010000 /* Ok for sqlite3_open_v2() */
+	OPEN_SHAREDCACHE   OpenFlag = 0x00020000 /* Ok for sqlite3_open_v2() */
+	OPEN_PRIVATECACHE  OpenFlag = 0x00040000 /* Ok for sqlite3_open_v2() */
+	OPEN_WAL           OpenFlag = 0x00080000 /* VFS only */
+	OPEN_NOFOLLOW      OpenFlag = 0x01000000 /* Ok for sqlite3_open_v2() */
+)
+
+// AccessFlag is a flag for the [VFS] Access method.
+//
+// https://sqlite.org/c3ref/c_access_exists.html
+type AccessFlag uint32
+
+const (
+	ACCESS_EXISTS    AccessFlag = 0
+	ACCESS_READWRITE AccessFlag = 1 /* Used by PRAGMA temp_store_directory */
+	ACCESS_READ      AccessFlag = 2 /* Unused */
+)
+
+// SyncFlag is a flag for the [File] Sync method.
+//
+// https://sqlite.org/c3ref/c_sync_dataonly.html
+type SyncFlag uint32
+
+const (
+	SYNC_NORMAL   SyncFlag = 0x00002
+	SYNC_FULL     SyncFlag = 0x00003
+	SYNC_DATAONLY SyncFlag = 0x00010
+)
+
+// LockLevel is a value used with [File] Lock and Unlock methods.
+//
+// https://sqlite.org/c3ref/c_lock_exclusive.html
+type LockLevel uint32
+
+const (
+	// No locks are held on the database.
+	// The database may be neither read nor written.
+	// Any internally cached data is considered suspect and subject to
+	// verification against the database file before being used.
+	// Other processes can read or write the database as their own locking
+	// states permit.
+	// This is the default state.
+	LOCK_NONE LockLevel = 0 /* xUnlock() only */
+
+	// The database may be read but not written.
+	// Any number of processes can hold SHARED locks at the same time,
+	// hence there can be many simultaneous readers.
+	// But no other thread or process is allowed to write to the database file
+	// while one or more SHARED locks are active.
+	LOCK_SHARED LockLevel = 1 /* xLock() or xUnlock() */
+
+	// A RESERVED lock means that the process is planning on writing to the
+	// database file at some point in the future but that it is currently just
+	// reading from the file.
+	// Only a single RESERVED lock may be active at one time,
+	// though multiple SHARED locks can coexist with a single RESERVED lock.
+	// RESERVED differs from PENDING in that new SHARED locks can be acquired
+	// while there is a RESERVED lock.
+	LOCK_RESERVED LockLevel = 2 /* xLock() only */
+
+	// A PENDING lock means that the process holding the lock wants to write to
+	// the database as soon as possible and is just waiting on all current
+	// SHARED locks to clear so that it can get an EXCLUSIVE lock.
+	// No new SHARED locks are permitted against the database if a PENDING lock
+	// is active, though existing SHARED locks are allowed to continue.
+	LOCK_PENDING LockLevel = 3 /* internal use only */
+
+	// An EXCLUSIVE lock is needed in order to write to the database file.
+	// Only one EXCLUSIVE lock is allowed on the file and no other locks of any
+	// kind are allowed to coexist with an EXCLUSIVE lock.
+	// In order to maximize concurrency, SQLite works to minimize the amount of
+	// time that EXCLUSIVE locks are held.
+	LOCK_EXCLUSIVE LockLevel = 4 /* xLock() only */
+)
+
+// DeviceCharacteristic is a flag retuned by the [File] DeviceCharacteristics method.
+//
+// https://sqlite.org/c3ref/c_iocap_atomic.html
+type DeviceCharacteristic uint32
+
+const (
+	IOCAP_ATOMIC                DeviceCharacteristic = 0x00000001
+	IOCAP_ATOMIC512             DeviceCharacteristic = 0x00000002
+	IOCAP_ATOMIC1K              DeviceCharacteristic = 0x00000004
+	IOCAP_ATOMIC2K              DeviceCharacteristic = 0x00000008
+	IOCAP_ATOMIC4K              DeviceCharacteristic = 0x00000010
+	IOCAP_ATOMIC8K              DeviceCharacteristic = 0x00000020
+	IOCAP_ATOMIC16K             DeviceCharacteristic = 0x00000040
+	IOCAP_ATOMIC32K             DeviceCharacteristic = 0x00000080
+	IOCAP_ATOMIC64K             DeviceCharacteristic = 0x00000100
+	IOCAP_SAFE_APPEND           DeviceCharacteristic = 0x00000200
+	IOCAP_SEQUENTIAL            DeviceCharacteristic = 0x00000400
+	IOCAP_UNDELETABLE_WHEN_OPEN DeviceCharacteristic = 0x00000800
+	IOCAP_POWERSAFE_OVERWRITE   DeviceCharacteristic = 0x00001000
+	IOCAP_IMMUTABLE             DeviceCharacteristic = 0x00002000
+	IOCAP_BATCH_ATOMIC          DeviceCharacteristic = 0x00004000
+)
+
+// https://sqlite.org/c3ref/c_fcntl_begin_atomic_write.html
+type _FcntlOpcode uint32
+
+const (
+	_FCNTL_LOCKSTATE             _FcntlOpcode = 1
+	_FCNTL_GET_LOCKPROXYFILE     _FcntlOpcode = 2
+	_FCNTL_SET_LOCKPROXYFILE     _FcntlOpcode = 3
+	_FCNTL_LAST_ERRNO            _FcntlOpcode = 4
+	_FCNTL_SIZE_HINT             _FcntlOpcode = 5
+	_FCNTL_CHUNK_SIZE            _FcntlOpcode = 6
+	_FCNTL_FILE_POINTER          _FcntlOpcode = 7
+	_FCNTL_SYNC_OMITTED          _FcntlOpcode = 8
+	_FCNTL_WIN32_AV_RETRY        _FcntlOpcode = 9
+	_FCNTL_PERSIST_WAL           _FcntlOpcode = 10
+	_FCNTL_OVERWRITE             _FcntlOpcode = 11
+	_FCNTL_VFSNAME               _FcntlOpcode = 12
+	_FCNTL_POWERSAFE_OVERWRITE   _FcntlOpcode = 13
+	_FCNTL_PRAGMA                _FcntlOpcode = 14
+	_FCNTL_BUSYHANDLER           _FcntlOpcode = 15
+	_FCNTL_TEMPFILENAME          _FcntlOpcode = 16
+	_FCNTL_MMAP_SIZE             _FcntlOpcode = 18
+	_FCNTL_TRACE                 _FcntlOpcode = 19
+	_FCNTL_HAS_MOVED             _FcntlOpcode = 20
+	_FCNTL_SYNC                  _FcntlOpcode = 21
+	_FCNTL_COMMIT_PHASETWO       _FcntlOpcode = 22
+	_FCNTL_WIN32_SET_HANDLE      _FcntlOpcode = 23
+	_FCNTL_WAL_BLOCK             _FcntlOpcode = 24
+	_FCNTL_ZIPVFS                _FcntlOpcode = 25
+	_FCNTL_RBU                   _FcntlOpcode = 26
+	_FCNTL_VFS_POINTER           _FcntlOpcode = 27
+	_FCNTL_JOURNAL_POINTER       _FcntlOpcode = 28
+	_FCNTL_WIN32_GET_HANDLE      _FcntlOpcode = 29
+	_FCNTL_PDB                   _FcntlOpcode = 30
+	_FCNTL_BEGIN_ATOMIC_WRITE    _FcntlOpcode = 31
+	_FCNTL_COMMIT_ATOMIC_WRITE   _FcntlOpcode = 32
+	_FCNTL_ROLLBACK_ATOMIC_WRITE _FcntlOpcode = 33
+	_FCNTL_LOCK_TIMEOUT          _FcntlOpcode = 34
+	_FCNTL_DATA_VERSION          _FcntlOpcode = 35
+	_FCNTL_SIZE_LIMIT            _FcntlOpcode = 36
+	_FCNTL_CKPT_DONE             _FcntlOpcode = 37
+	_FCNTL_RESERVE_BYTES         _FcntlOpcode = 38
+	_FCNTL_CKPT_START            _FcntlOpcode = 39
+	_FCNTL_EXTERNAL_READER       _FcntlOpcode = 40
+	_FCNTL_CKSM_FILE             _FcntlOpcode = 41
+	_FCNTL_RESET_CACHE           _FcntlOpcode = 42
+)
+
+// https://sqlite.org/c3ref/c_shm_exclusive.html
+type _ShmFlag uint32
+
+const (
+	_SHM_UNLOCK    _ShmFlag = 1
+	_SHM_LOCK      _ShmFlag = 2
+	_SHM_SHARED    _ShmFlag = 4
+	_SHM_EXCLUSIVE _ShmFlag = 8
+)
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/file.go b/vendor/github.com/ncruces/go-sqlite3/vfs/file.go
new file mode 100644
index 000000000..ca8cf84f3
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/file.go
@@ -0,0 +1,217 @@
+package vfs
+
+import (
+	"errors"
+	"io"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"runtime"
+	"syscall"
+
+	"github.com/ncruces/go-sqlite3/util/osutil"
+)
+
+type vfsOS struct{}
+
+func (vfsOS) FullPathname(path string) (string, error) {
+	path, err := filepath.Abs(path)
+	if err != nil {
+		return "", err
+	}
+	fi, err := os.Lstat(path)
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return path, nil
+		}
+		return "", err
+	}
+	if fi.Mode()&fs.ModeSymlink != 0 {
+		err = _OK_SYMLINK
+	}
+	return path, err
+}
+
+func (vfsOS) Delete(path string, syncDir bool) error {
+	err := os.Remove(path)
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return _IOERR_DELETE_NOENT
+		}
+		return err
+	}
+	if runtime.GOOS != "windows" && syncDir {
+		f, err := os.Open(filepath.Dir(path))
+		if err != nil {
+			return _OK
+		}
+		defer f.Close()
+		err = osSync(f, false, false)
+		if err != nil {
+			return _IOERR_DIR_FSYNC
+		}
+	}
+	return nil
+}
+
+func (vfsOS) Access(name string, flags AccessFlag) (bool, error) {
+	err := osAccess(name, flags)
+	if flags == ACCESS_EXISTS {
+		if errors.Is(err, fs.ErrNotExist) {
+			return false, nil
+		}
+	} else {
+		if errors.Is(err, fs.ErrPermission) {
+			return false, nil
+		}
+	}
+	return err == nil, err
+}
+
+func (vfsOS) Open(name string, flags OpenFlag) (File, OpenFlag, error) {
+	return nil, 0, _CANTOPEN
+}
+
+func (vfsOS) OpenFilename(name *Filename, flags OpenFlag) (File, OpenFlag, error) {
+	var oflags int
+	if flags&OPEN_EXCLUSIVE != 0 {
+		oflags |= os.O_EXCL
+	}
+	if flags&OPEN_CREATE != 0 {
+		oflags |= os.O_CREATE
+	}
+	if flags&OPEN_READONLY != 0 {
+		oflags |= os.O_RDONLY
+	}
+	if flags&OPEN_READWRITE != 0 {
+		oflags |= os.O_RDWR
+	}
+
+	var err error
+	var f *os.File
+	if name == nil {
+		f, err = os.CreateTemp("", "*.db")
+	} else {
+		f, err = osutil.OpenFile(name.String(), oflags, 0666)
+	}
+	if err != nil {
+		if errors.Is(err, syscall.EISDIR) {
+			return nil, flags, _CANTOPEN_ISDIR
+		}
+		return nil, flags, err
+	}
+
+	if modeof := name.URIParameter("modeof"); modeof != "" {
+		if err = osSetMode(f, modeof); err != nil {
+			f.Close()
+			return nil, flags, _IOERR_FSTAT
+		}
+	}
+	if flags&OPEN_DELETEONCLOSE != 0 {
+		os.Remove(f.Name())
+	}
+
+	file := vfsFile{
+		File:     f,
+		psow:     true,
+		readOnly: flags&OPEN_READONLY != 0,
+		syncDir: runtime.GOOS != "windows" &&
+			flags&(OPEN_CREATE) != 0 &&
+			flags&(OPEN_MAIN_JOURNAL|OPEN_SUPER_JOURNAL|OPEN_WAL) != 0,
+		shm: NewSharedMemory(name.String()+"-shm", flags),
+	}
+	return &file, flags, nil
+}
+
+type vfsFile struct {
+	*os.File
+	shm      SharedMemory
+	lock     LockLevel
+	readOnly bool
+	keepWAL  bool
+	syncDir  bool
+	psow     bool
+}
+
+var (
+	// Ensure these interfaces are implemented:
+	_ FileLockState          = &vfsFile{}
+	_ FileHasMoved           = &vfsFile{}
+	_ FileSizeHint           = &vfsFile{}
+	_ FilePersistentWAL      = &vfsFile{}
+	_ FilePowersafeOverwrite = &vfsFile{}
+)
+
+func (f *vfsFile) Close() error {
+	if f.shm != nil {
+		f.shm.Close()
+	}
+	return f.File.Close()
+}
+
+func (f *vfsFile) Sync(flags SyncFlag) error {
+	dataonly := (flags & SYNC_DATAONLY) != 0
+	fullsync := (flags & 0x0f) == SYNC_FULL
+
+	err := osSync(f.File, fullsync, dataonly)
+	if err != nil {
+		return err
+	}
+	if runtime.GOOS != "windows" && f.syncDir {
+		f.syncDir = false
+		d, err := os.Open(filepath.Dir(f.File.Name()))
+		if err != nil {
+			return nil
+		}
+		defer d.Close()
+		err = osSync(d, false, false)
+		if err != nil {
+			return _IOERR_DIR_FSYNC
+		}
+	}
+	return nil
+}
+
+func (f *vfsFile) Size() (int64, error) {
+	return f.Seek(0, io.SeekEnd)
+}
+
+func (f *vfsFile) SectorSize() int {
+	return _DEFAULT_SECTOR_SIZE
+}
+
+func (f *vfsFile) DeviceCharacteristics() DeviceCharacteristic {
+	var res DeviceCharacteristic
+	if osBatchAtomic(f.File) {
+		res |= IOCAP_BATCH_ATOMIC
+	}
+	if f.psow {
+		res |= IOCAP_POWERSAFE_OVERWRITE
+	}
+	return res
+}
+
+func (f *vfsFile) SizeHint(size int64) error {
+	return osAllocate(f.File, size)
+}
+
+func (f *vfsFile) HasMoved() (bool, error) {
+	fi, err := f.Stat()
+	if err != nil {
+		return false, err
+	}
+	pi, err := os.Stat(f.Name())
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return true, nil
+		}
+		return false, err
+	}
+	return !os.SameFile(fi, pi), nil
+}
+
+func (f *vfsFile) LockState() LockLevel            { return f.lock }
+func (f *vfsFile) PowersafeOverwrite() bool        { return f.psow }
+func (f *vfsFile) PersistentWAL() bool             { return f.keepWAL }
+func (f *vfsFile) SetPowersafeOverwrite(psow bool) { f.psow = psow }
+func (f *vfsFile) SetPersistentWAL(keepWAL bool)   { f.keepWAL = keepWAL }
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/filename.go b/vendor/github.com/ncruces/go-sqlite3/vfs/filename.go
new file mode 100644
index 000000000..e23575bbb
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/filename.go
@@ -0,0 +1,174 @@
+package vfs
+
+import (
+	"context"
+	"net/url"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// Filename is used by SQLite to pass filenames
+// to the Open method of a VFS.
+//
+// https://sqlite.org/c3ref/filename.html
+type Filename struct {
+	ctx   context.Context
+	mod   api.Module
+	zPath uint32
+	flags OpenFlag
+	stack [2]uint64
+}
+
+// OpenFilename is an internal API users should not call directly.
+func OpenFilename(ctx context.Context, mod api.Module, id uint32, flags OpenFlag) *Filename {
+	if id == 0 {
+		return nil
+	}
+	return &Filename{
+		ctx:   ctx,
+		mod:   mod,
+		zPath: id,
+		flags: flags,
+	}
+}
+
+// String returns this filename as a string.
+func (n *Filename) String() string {
+	if n == nil || n.zPath == 0 {
+		return ""
+	}
+	return util.ReadString(n.mod, n.zPath, _MAX_PATHNAME)
+}
+
+// Database returns the name of the corresponding database file.
+//
+// https://sqlite.org/c3ref/filename_database.html
+func (n *Filename) Database() string {
+	return n.path("sqlite3_filename_database")
+}
+
+// Journal returns the name of the corresponding rollback journal file.
+//
+// https://sqlite.org/c3ref/filename_database.html
+func (n *Filename) Journal() string {
+	return n.path("sqlite3_filename_journal")
+}
+
+// Journal returns the name of the corresponding WAL file.
+//
+// https://sqlite.org/c3ref/filename_database.html
+func (n *Filename) WAL() string {
+	return n.path("sqlite3_filename_wal")
+}
+
+func (n *Filename) path(method string) string {
+	if n == nil || n.zPath == 0 {
+		return ""
+	}
+	n.stack[0] = uint64(n.zPath)
+	fn := n.mod.ExportedFunction(method)
+	if err := fn.CallWithStack(n.ctx, n.stack[:]); err != nil {
+		panic(err)
+	}
+	return util.ReadString(n.mod, uint32(n.stack[0]), _MAX_PATHNAME)
+}
+
+// DatabaseFile returns the main database [File] corresponding to a journal.
+//
+// https://sqlite.org/c3ref/database_file_object.html
+func (n *Filename) DatabaseFile() File {
+	if n == nil || n.zPath == 0 {
+		return nil
+	}
+	if n.flags&(OPEN_MAIN_DB|OPEN_MAIN_JOURNAL|OPEN_WAL) == 0 {
+		return nil
+	}
+
+	n.stack[0] = uint64(n.zPath)
+	fn := n.mod.ExportedFunction("sqlite3_database_file_object")
+	if err := fn.CallWithStack(n.ctx, n.stack[:]); err != nil {
+		panic(err)
+	}
+	file, _ := vfsFileGet(n.ctx, n.mod, uint32(n.stack[0])).(File)
+	return file
+}
+
+// URIParameter returns the value of a URI parameter.
+//
+// https://sqlite.org/c3ref/uri_boolean.html
+func (n *Filename) URIParameter(key string) string {
+	if n == nil || n.zPath == 0 {
+		return ""
+	}
+
+	uriKey := n.mod.ExportedFunction("sqlite3_uri_key")
+	n.stack[0] = uint64(n.zPath)
+	n.stack[1] = uint64(0)
+	if err := uriKey.CallWithStack(n.ctx, n.stack[:]); err != nil {
+		panic(err)
+	}
+
+	ptr := uint32(n.stack[0])
+	if ptr == 0 {
+		return ""
+	}
+
+	// Parse the format from:
+	// https://github.com/sqlite/sqlite/blob/b74eb0/src/pager.c#L4797-L4840
+	// This avoids having to alloc/free the key just to find a value.
+	for {
+		k := util.ReadString(n.mod, ptr, _MAX_NAME)
+		if k == "" {
+			return ""
+		}
+		ptr += uint32(len(k)) + 1
+
+		v := util.ReadString(n.mod, ptr, _MAX_NAME)
+		if k == key {
+			return v
+		}
+		ptr += uint32(len(v)) + 1
+	}
+}
+
+// URIParameters obtains values for URI parameters.
+//
+// https://sqlite.org/c3ref/uri_boolean.html
+func (n *Filename) URIParameters() url.Values {
+	if n == nil || n.zPath == 0 {
+		return nil
+	}
+
+	uriKey := n.mod.ExportedFunction("sqlite3_uri_key")
+	n.stack[0] = uint64(n.zPath)
+	n.stack[1] = uint64(0)
+	if err := uriKey.CallWithStack(n.ctx, n.stack[:]); err != nil {
+		panic(err)
+	}
+
+	ptr := uint32(n.stack[0])
+	if ptr == 0 {
+		return nil
+	}
+
+	var params url.Values
+
+	// Parse the format from:
+	// https://github.com/sqlite/sqlite/blob/b74eb0/src/pager.c#L4797-L4840
+	// This is the only way to support multiple valued keys.
+	for {
+		k := util.ReadString(n.mod, ptr, _MAX_NAME)
+		if k == "" {
+			return params
+		}
+		ptr += uint32(len(k)) + 1
+
+		v := util.ReadString(n.mod, ptr, _MAX_NAME)
+		if params == nil {
+			params = url.Values{}
+		}
+		params.Add(k, v)
+		ptr += uint32(len(v)) + 1
+	}
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go b/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go
new file mode 100644
index 000000000..86a988ae8
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go
@@ -0,0 +1,144 @@
+//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys
+
+package vfs
+
+import "github.com/ncruces/go-sqlite3/internal/util"
+
+// SupportsFileLocking is false on platforms that do not support file locking.
+// To open a database file on those platforms,
+// you need to use the [nolock] or [immutable] URI parameters.
+//
+// [nolock]: https://sqlite.org/uri.html#urinolock
+// [immutable]: https://sqlite.org/uri.html#uriimmutable
+const SupportsFileLocking = true
+
+const (
+	_PENDING_BYTE  = 0x40000000
+	_RESERVED_BYTE = (_PENDING_BYTE + 1)
+	_SHARED_FIRST  = (_PENDING_BYTE + 2)
+	_SHARED_SIZE   = 510
+)
+
+func (f *vfsFile) Lock(lock LockLevel) error {
+	// Argument check. SQLite never explicitly requests a pending lock.
+	if lock != LOCK_SHARED && lock != LOCK_RESERVED && lock != LOCK_EXCLUSIVE {
+		panic(util.AssertErr())
+	}
+
+	switch {
+	case f.lock < LOCK_NONE || f.lock > LOCK_EXCLUSIVE:
+		// Connection state check.
+		panic(util.AssertErr())
+	case f.lock == LOCK_NONE && lock > LOCK_SHARED:
+		// We never move from unlocked to anything higher than a shared lock.
+		panic(util.AssertErr())
+	case f.lock != LOCK_SHARED && lock == LOCK_RESERVED:
+		// A shared lock is always held when a reserved lock is requested.
+		panic(util.AssertErr())
+	}
+
+	// If we already have an equal or more restrictive lock, do nothing.
+	if f.lock >= lock {
+		return nil
+	}
+
+	// Do not allow any kind of write-lock on a read-only database.
+	if f.readOnly && lock >= LOCK_RESERVED {
+		return _IOERR_LOCK
+	}
+
+	switch lock {
+	case LOCK_SHARED:
+		// Must be unlocked to get SHARED.
+		if f.lock != LOCK_NONE {
+			panic(util.AssertErr())
+		}
+		if rc := osGetSharedLock(f.File); rc != _OK {
+			return rc
+		}
+		f.lock = LOCK_SHARED
+		return nil
+
+	case LOCK_RESERVED:
+		// Must be SHARED to get RESERVED.
+		if f.lock != LOCK_SHARED {
+			panic(util.AssertErr())
+		}
+		if rc := osGetReservedLock(f.File); rc != _OK {
+			return rc
+		}
+		f.lock = LOCK_RESERVED
+		return nil
+
+	case LOCK_EXCLUSIVE:
+		// Must be SHARED, RESERVED or PENDING to get EXCLUSIVE.
+		if f.lock <= LOCK_NONE || f.lock >= LOCK_EXCLUSIVE {
+			panic(util.AssertErr())
+		}
+		reserved := f.lock == LOCK_RESERVED
+		// A PENDING lock is needed before acquiring an EXCLUSIVE lock.
+		if f.lock < LOCK_PENDING {
+			// If we're already RESERVED, we can block indefinitely,
+			// since only new readers may briefly hold the PENDING lock.
+			if rc := osGetPendingLock(f.File, reserved /* block */); rc != _OK {
+				return rc
+			}
+			f.lock = LOCK_PENDING
+		}
+		// We already have PENDING, so we're just waiting for readers to leave.
+		// If we were RESERVED, we can wait for a little while, before invoking
+		// the busy handler; we will only do this once.
+		if rc := osGetExclusiveLock(f.File, reserved /* wait */); rc != _OK {
+			return rc
+		}
+		f.lock = LOCK_EXCLUSIVE
+		return nil
+
+	default:
+		panic(util.AssertErr())
+	}
+}
+
+func (f *vfsFile) Unlock(lock LockLevel) error {
+	// Argument check.
+	if lock != LOCK_NONE && lock != LOCK_SHARED {
+		panic(util.AssertErr())
+	}
+
+	// Connection state check.
+	if f.lock < LOCK_NONE || f.lock > LOCK_EXCLUSIVE {
+		panic(util.AssertErr())
+	}
+
+	// If we don't have a more restrictive lock, do nothing.
+	if f.lock <= lock {
+		return nil
+	}
+
+	switch lock {
+	case LOCK_SHARED:
+		rc := osDowngradeLock(f.File, f.lock)
+		f.lock = LOCK_SHARED
+		return rc
+
+	case LOCK_NONE:
+		rc := osReleaseLock(f.File, f.lock)
+		f.lock = LOCK_NONE
+		return rc
+
+	default:
+		panic(util.AssertErr())
+	}
+}
+
+func (f *vfsFile) CheckReservedLock() (bool, error) {
+	// Connection state check.
+	if f.lock < LOCK_NONE || f.lock > LOCK_EXCLUSIVE {
+		panic(util.AssertErr())
+	}
+
+	if f.lock >= LOCK_RESERVED {
+		return true, nil
+	}
+	return osCheckReservedLock(f.File)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go b/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go
new file mode 100644
index 000000000..c395f34a7
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go
@@ -0,0 +1,23 @@
+//go:build !(linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) || sqlite3_nosys
+
+package vfs
+
+// SupportsFileLocking is false on platforms that do not support file locking.
+// To open a database file on those platforms,
+// you need to use the [nolock] or [immutable] URI parameters.
+//
+// [nolock]: https://sqlite.org/uri.html#urinolock
+// [immutable]: https://sqlite.org/uri.html#uriimmutable
+const SupportsFileLocking = false
+
+func (f *vfsFile) Lock(LockLevel) error {
+	return _IOERR_LOCK
+}
+
+func (f *vfsFile) Unlock(LockLevel) error {
+	return _IOERR_UNLOCK
+}
+
+func (f *vfsFile) CheckReservedLock() (bool, error) {
+	return false, _IOERR_CHECKRESERVEDLOCK
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/README.md b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/README.md
new file mode 100644
index 000000000..193e29d98
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/README.md
@@ -0,0 +1,9 @@
+# Go `"memdb"` SQLite VFS
+
+This package implements the [`"memdb"`](https://sqlite.org/src/doc/tip/src/memdb.c)
+SQLite VFS in pure Go.
+
+It has some benefits over the C version:
+- the memory backing the database needs not be contiguous,
+- the database can grow/shrink incrementally without copying,
+- reader-writer concurrency is slightly improved.
\ No newline at end of file
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/api.go b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/api.go
new file mode 100644
index 000000000..5a2b84c71
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/api.go
@@ -0,0 +1,68 @@
+// Package memdb implements the "memdb" SQLite VFS.
+//
+// The "memdb" [vfs.VFS] allows the same in-memory database to be shared
+// among multiple database connections in the same process,
+// as long as the database name begins with "/".
+//
+// Importing package memdb registers the VFS:
+//
+//	import _ "github.com/ncruces/go-sqlite3/vfs/memdb"
+package memdb
+
+import (
+	"sync"
+
+	"github.com/ncruces/go-sqlite3/vfs"
+)
+
+func init() {
+	vfs.Register("memdb", memVFS{})
+}
+
+var (
+	memoryMtx sync.Mutex
+	// +checklocks:memoryMtx
+	memoryDBs = map[string]*memDB{}
+)
+
+// Create creates a shared memory database,
+// using data as its initial contents.
+// The new database takes ownership of data,
+// and the caller should not use data after this call.
+func Create(name string, data []byte) {
+	memoryMtx.Lock()
+	defer memoryMtx.Unlock()
+
+	db := &memDB{
+		refs: 1,
+		name: name,
+		size: int64(len(data)),
+	}
+
+	// Convert data from WAL to rollback journal.
+	if len(data) >= 20 && data[18] == 2 && data[19] == 2 {
+		data[18] = 1
+		data[19] = 1
+	}
+
+	sectors := divRoundUp(db.size, sectorSize)
+	db.data = make([]*[sectorSize]byte, sectors)
+	for i := range db.data {
+		sector := data[i*sectorSize:]
+		if len(sector) >= sectorSize {
+			db.data[i] = (*[sectorSize]byte)(sector)
+		} else {
+			db.data[i] = new([sectorSize]byte)
+			copy((*db.data[i])[:], sector)
+		}
+	}
+
+	memoryDBs[name] = db
+}
+
+// Delete deletes a shared memory database.
+func Delete(name string) {
+	memoryMtx.Lock()
+	defer memoryMtx.Unlock()
+	delete(memoryDBs, name)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go
new file mode 100644
index 000000000..8dc57ab9c
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go
@@ -0,0 +1,311 @@
+package memdb
+
+import (
+	"io"
+	"runtime"
+	"sync"
+	"time"
+
+	"github.com/ncruces/go-sqlite3"
+	"github.com/ncruces/go-sqlite3/vfs"
+)
+
+// Must be a multiple of 64K (the largest page size).
+const sectorSize = 65536
+
+type memVFS struct{}
+
+func (memVFS) Open(name string, flags vfs.OpenFlag) (vfs.File, vfs.OpenFlag, error) {
+	// For simplicity, we do not support reading or writing data
+	// across "sector" boundaries.
+	//
+	// This is not a problem for most SQLite file types:
+	// - databases, which only do page aligned reads/writes;
+	// - temp journals, as used by the sorter, which does the same:
+	//   https://github.com/sqlite/sqlite/blob/b74eb0/src/vdbesort.c#L409-L412
+	//
+	// We refuse to open all other file types,
+	// but returning OPEN_MEMORY means SQLite won't ask us to.
+	const types = vfs.OPEN_MAIN_DB |
+		vfs.OPEN_TEMP_DB |
+		vfs.OPEN_TEMP_JOURNAL
+	if flags&types == 0 {
+		return nil, flags, sqlite3.CANTOPEN
+	}
+
+	// A shared database has a name that begins with "/".
+	shared := len(name) > 1 && name[0] == '/'
+
+	var db *memDB
+	if shared {
+		name = name[1:]
+		memoryMtx.Lock()
+		defer memoryMtx.Unlock()
+		db = memoryDBs[name]
+	}
+	if db == nil {
+		if flags&vfs.OPEN_CREATE == 0 {
+			return nil, flags, sqlite3.CANTOPEN
+		}
+		db = &memDB{name: name}
+	}
+	if shared {
+		db.refs++ // +checklocksforce: memoryMtx is held
+		memoryDBs[name] = db
+	}
+
+	return &memFile{
+		memDB:    db,
+		readOnly: flags&vfs.OPEN_READONLY != 0,
+	}, flags | vfs.OPEN_MEMORY, nil
+}
+
+func (memVFS) Delete(name string, dirSync bool) error {
+	return sqlite3.IOERR_DELETE
+}
+
+func (memVFS) Access(name string, flag vfs.AccessFlag) (bool, error) {
+	return false, nil
+}
+
+func (memVFS) FullPathname(name string) (string, error) {
+	return name, nil
+}
+
+type memDB struct {
+	name string
+
+	// +checklocks:lockMtx
+	pending *memFile
+	// +checklocks:lockMtx
+	reserved *memFile
+
+	// +checklocks:dataMtx
+	data []*[sectorSize]byte
+
+	// +checklocks:dataMtx
+	size int64
+
+	// +checklocks:lockMtx
+	shared int
+
+	// +checklocks:memoryMtx
+	refs int
+
+	lockMtx sync.Mutex
+	dataMtx sync.RWMutex
+}
+
+func (m *memDB) release() {
+	memoryMtx.Lock()
+	defer memoryMtx.Unlock()
+	if m.refs--; m.refs == 0 && m == memoryDBs[m.name] {
+		delete(memoryDBs, m.name)
+	}
+}
+
+type memFile struct {
+	*memDB
+	lock     vfs.LockLevel
+	readOnly bool
+}
+
+var (
+	// Ensure these interfaces are implemented:
+	_ vfs.FileLockState = &memFile{}
+	_ vfs.FileSizeHint  = &memFile{}
+)
+
+func (m *memFile) Close() error {
+	m.release()
+	return m.Unlock(vfs.LOCK_NONE)
+}
+
+func (m *memFile) ReadAt(b []byte, off int64) (n int, err error) {
+	m.dataMtx.RLock()
+	defer m.dataMtx.RUnlock()
+
+	if off >= m.size {
+		return 0, io.EOF
+	}
+
+	base := off / sectorSize
+	rest := off % sectorSize
+	have := int64(sectorSize)
+	if base == int64(len(m.data))-1 {
+		have = modRoundUp(m.size, sectorSize)
+	}
+	n = copy(b, (*m.data[base])[rest:have])
+	if n < len(b) {
+		// Assume reads are page aligned.
+		return 0, io.ErrNoProgress
+	}
+	return n, nil
+}
+
+func (m *memFile) WriteAt(b []byte, off int64) (n int, err error) {
+	m.dataMtx.Lock()
+	defer m.dataMtx.Unlock()
+
+	base := off / sectorSize
+	rest := off % sectorSize
+	for base >= int64(len(m.data)) {
+		m.data = append(m.data, new([sectorSize]byte))
+	}
+	n = copy((*m.data[base])[rest:], b)
+	if n < len(b) {
+		// Assume writes are page aligned.
+		return n, io.ErrShortWrite
+	}
+	if size := off + int64(len(b)); size > m.size {
+		m.size = size
+	}
+	return n, nil
+}
+
+func (m *memFile) Truncate(size int64) error {
+	m.dataMtx.Lock()
+	defer m.dataMtx.Unlock()
+	return m.truncate(size)
+}
+
+// +checklocks:m.dataMtx
+func (m *memFile) truncate(size int64) error {
+	if size < m.size {
+		base := size / sectorSize
+		rest := size % sectorSize
+		if rest != 0 {
+			clear((*m.data[base])[rest:])
+		}
+	}
+	sectors := divRoundUp(size, sectorSize)
+	for sectors > int64(len(m.data)) {
+		m.data = append(m.data, new([sectorSize]byte))
+	}
+	clear(m.data[sectors:])
+	m.data = m.data[:sectors]
+	m.size = size
+	return nil
+}
+
+func (m *memFile) Sync(flag vfs.SyncFlag) error {
+	return nil
+}
+
+func (m *memFile) Size() (int64, error) {
+	m.dataMtx.RLock()
+	defer m.dataMtx.RUnlock()
+	return m.size, nil
+}
+
+const spinWait = 25 * time.Microsecond
+
+func (m *memFile) Lock(lock vfs.LockLevel) error {
+	if m.lock >= lock {
+		return nil
+	}
+
+	if m.readOnly && lock >= vfs.LOCK_RESERVED {
+		return sqlite3.IOERR_LOCK
+	}
+
+	m.lockMtx.Lock()
+	defer m.lockMtx.Unlock()
+
+	switch lock {
+	case vfs.LOCK_SHARED:
+		if m.pending != nil {
+			return sqlite3.BUSY
+		}
+		m.shared++
+
+	case vfs.LOCK_RESERVED:
+		if m.reserved != nil {
+			return sqlite3.BUSY
+		}
+		m.reserved = m
+
+	case vfs.LOCK_EXCLUSIVE:
+		if m.lock < vfs.LOCK_PENDING {
+			if m.pending != nil {
+				return sqlite3.BUSY
+			}
+			m.lock = vfs.LOCK_PENDING
+			m.pending = m
+		}
+
+		for before := time.Now(); m.shared > 1; {
+			if time.Since(before) > spinWait {
+				return sqlite3.BUSY
+			}
+			m.lockMtx.Unlock()
+			runtime.Gosched()
+			m.lockMtx.Lock()
+		}
+	}
+
+	m.lock = lock
+	return nil
+}
+
+func (m *memFile) Unlock(lock vfs.LockLevel) error {
+	if m.lock <= lock {
+		return nil
+	}
+
+	m.lockMtx.Lock()
+	defer m.lockMtx.Unlock()
+
+	if m.pending == m {
+		m.pending = nil
+	}
+	if m.reserved == m {
+		m.reserved = nil
+	}
+	if lock < vfs.LOCK_SHARED {
+		m.shared--
+	}
+	m.lock = lock
+	return nil
+}
+
+func (m *memFile) CheckReservedLock() (bool, error) {
+	if m.lock >= vfs.LOCK_RESERVED {
+		return true, nil
+	}
+	m.lockMtx.Lock()
+	defer m.lockMtx.Unlock()
+	return m.reserved != nil, nil
+}
+
+func (m *memFile) SectorSize() int {
+	return sectorSize
+}
+
+func (m *memFile) DeviceCharacteristics() vfs.DeviceCharacteristic {
+	return vfs.IOCAP_ATOMIC |
+		vfs.IOCAP_SEQUENTIAL |
+		vfs.IOCAP_SAFE_APPEND |
+		vfs.IOCAP_POWERSAFE_OVERWRITE
+}
+
+func (m *memFile) SizeHint(size int64) error {
+	m.dataMtx.Lock()
+	defer m.dataMtx.Unlock()
+	if size > m.size {
+		return m.truncate(size)
+	}
+	return nil
+}
+
+func (m *memFile) LockState() vfs.LockLevel {
+	return m.lock
+}
+
+func divRoundUp(a, b int64) int64 {
+	return (a + b - 1) / b
+}
+
+func modRoundUp(a, b int64) int64 {
+	return b - (b-a%b)%b
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go
new file mode 100644
index 000000000..48ac5c9c9
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go
@@ -0,0 +1,33 @@
+//go:build (freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys
+
+package vfs
+
+import (
+	"os"
+	"time"
+
+	"golang.org/x/sys/unix"
+)
+
+func osUnlock(file *os.File, start, len int64) _ErrorCode {
+	if start == 0 && len == 0 {
+		err := unix.Flock(int(file.Fd()), unix.LOCK_UN)
+		if err != nil {
+			return _IOERR_UNLOCK
+		}
+	}
+	return _OK
+}
+
+func osLock(file *os.File, how int, def _ErrorCode) _ErrorCode {
+	err := unix.Flock(int(file.Fd()), how)
+	return osLockErrorCode(err, def)
+}
+
+func osReadLock(file *os.File, _ /*start*/, _ /*len*/ int64, _ /*timeout*/ time.Duration) _ErrorCode {
+	return osLock(file, unix.LOCK_SH|unix.LOCK_NB, _IOERR_RDLOCK)
+}
+
+func osWriteLock(file *os.File, _ /*start*/, _ /*len*/ int64, _ /*timeout*/ time.Duration) _ErrorCode {
+	return osLock(file, unix.LOCK_EX|unix.LOCK_NB, _IOERR_LOCK)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go
new file mode 100644
index 000000000..8bfe96bb1
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go
@@ -0,0 +1,95 @@
+//go:build !(sqlite3_flock || sqlite3_nosys)
+
+package vfs
+
+import (
+	"io"
+	"os"
+	"time"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	// https://github.com/apple/darwin-xnu/blob/main/bsd/sys/fcntl.h
+	_F_OFD_SETLK         = 90
+	_F_OFD_SETLKW        = 91
+	_F_OFD_SETLKWTIMEOUT = 93
+)
+
+type flocktimeout_t struct {
+	fl      unix.Flock_t
+	timeout unix.Timespec
+}
+
+func osSync(file *os.File, fullsync, _ /*dataonly*/ bool) error {
+	if fullsync {
+		return file.Sync()
+	}
+	return unix.Fsync(int(file.Fd()))
+}
+
+func osAllocate(file *os.File, size int64) error {
+	off, err := file.Seek(0, io.SeekEnd)
+	if err != nil {
+		return err
+	}
+	if size <= off {
+		return nil
+	}
+
+	store := unix.Fstore_t{
+		Flags:   unix.F_ALLOCATEALL | unix.F_ALLOCATECONTIG,
+		Posmode: unix.F_PEOFPOSMODE,
+		Offset:  0,
+		Length:  size - off,
+	}
+
+	// Try to get a continuous chunk of disk space.
+	err = unix.FcntlFstore(file.Fd(), unix.F_PREALLOCATE, &store)
+	if err != nil {
+		// OK, perhaps we are too fragmented, allocate non-continuous.
+		store.Flags = unix.F_ALLOCATEALL
+		unix.FcntlFstore(file.Fd(), unix.F_PREALLOCATE, &store)
+	}
+	return file.Truncate(size)
+}
+
+func osUnlock(file *os.File, start, len int64) _ErrorCode {
+	err := unix.FcntlFlock(file.Fd(), _F_OFD_SETLK, &unix.Flock_t{
+		Type:  unix.F_UNLCK,
+		Start: start,
+		Len:   len,
+	})
+	if err != nil {
+		return _IOERR_UNLOCK
+	}
+	return _OK
+}
+
+func osLock(file *os.File, typ int16, start, len int64, timeout time.Duration, def _ErrorCode) _ErrorCode {
+	lock := flocktimeout_t{fl: unix.Flock_t{
+		Type:  typ,
+		Start: start,
+		Len:   len,
+	}}
+	var err error
+	switch {
+	case timeout == 0:
+		err = unix.FcntlFlock(file.Fd(), _F_OFD_SETLK, &lock.fl)
+	case timeout < 0:
+		err = unix.FcntlFlock(file.Fd(), _F_OFD_SETLKW, &lock.fl)
+	default:
+		lock.timeout = unix.NsecToTimespec(int64(timeout / time.Nanosecond))
+		err = unix.FcntlFlock(file.Fd(), _F_OFD_SETLKWTIMEOUT, &lock.fl)
+	}
+	return osLockErrorCode(err, def)
+}
+
+func osReadLock(file *os.File, start, len int64, timeout time.Duration) _ErrorCode {
+	return osLock(file, unix.F_RDLCK, start, len, timeout, _IOERR_RDLOCK)
+}
+
+func osWriteLock(file *os.File, start, len int64, timeout time.Duration) _ErrorCode {
+	return osLock(file, unix.F_WRLCK, start, len, timeout, _IOERR_LOCK)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_f2fs_linux.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_f2fs_linux.go
new file mode 100644
index 000000000..a9f0e333c
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_f2fs_linux.go
@@ -0,0 +1,34 @@
+//go:build (amd64 || arm64 || riscv64) && !sqlite3_nosys
+
+package vfs
+
+import (
+	"os"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	_F2FS_IOC_START_ATOMIC_WRITE  = 62721
+	_F2FS_IOC_COMMIT_ATOMIC_WRITE = 62722
+	_F2FS_IOC_ABORT_ATOMIC_WRITE  = 62725
+	_F2FS_IOC_GET_FEATURES        = 2147808524
+	_F2FS_FEATURE_ATOMIC_WRITE    = 4
+)
+
+func osBatchAtomic(file *os.File) bool {
+	flags, err := unix.IoctlGetInt(int(file.Fd()), _F2FS_IOC_GET_FEATURES)
+	return err == nil && flags&_F2FS_FEATURE_ATOMIC_WRITE != 0
+}
+
+func (f *vfsFile) BeginAtomicWrite() error {
+	return unix.IoctlSetInt(int(f.Fd()), _F2FS_IOC_START_ATOMIC_WRITE, 0)
+}
+
+func (f *vfsFile) CommitAtomicWrite() error {
+	return unix.IoctlSetInt(int(f.Fd()), _F2FS_IOC_COMMIT_ATOMIC_WRITE, 0)
+}
+
+func (f *vfsFile) RollbackAtomicWrite() error {
+	return unix.IoctlSetInt(int(f.Fd()), _F2FS_IOC_ABORT_ATOMIC_WRITE, 0)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go
new file mode 100644
index 000000000..11e683a04
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go
@@ -0,0 +1,71 @@
+//go:build !(sqlite3_flock || sqlite3_nosys)
+
+package vfs
+
+import (
+	"math/rand"
+	"os"
+	"time"
+
+	"golang.org/x/sys/unix"
+)
+
+func osSync(file *os.File, _ /*fullsync*/, _ /*dataonly*/ bool) error {
+	// SQLite trusts Linux's fdatasync for all fsync's.
+	return unix.Fdatasync(int(file.Fd()))
+}
+
+func osAllocate(file *os.File, size int64) error {
+	if size == 0 {
+		return nil
+	}
+	return unix.Fallocate(int(file.Fd()), 0, 0, size)
+}
+
+func osUnlock(file *os.File, start, len int64) _ErrorCode {
+	err := unix.FcntlFlock(file.Fd(), unix.F_OFD_SETLK, &unix.Flock_t{
+		Type:  unix.F_UNLCK,
+		Start: start,
+		Len:   len,
+	})
+	if err != nil {
+		return _IOERR_UNLOCK
+	}
+	return _OK
+}
+
+func osLock(file *os.File, typ int16, start, len int64, timeout time.Duration, def _ErrorCode) _ErrorCode {
+	lock := unix.Flock_t{
+		Type:  typ,
+		Start: start,
+		Len:   len,
+	}
+	var err error
+	switch {
+	case timeout == 0:
+		err = unix.FcntlFlock(file.Fd(), unix.F_OFD_SETLK, &lock)
+	case timeout < 0:
+		err = unix.FcntlFlock(file.Fd(), unix.F_OFD_SETLKW, &lock)
+	default:
+		before := time.Now()
+		for {
+			err = unix.FcntlFlock(file.Fd(), unix.F_OFD_SETLK, &lock)
+			if errno, _ := err.(unix.Errno); errno != unix.EAGAIN {
+				break
+			}
+			if timeout < time.Since(before) {
+				break
+			}
+			osSleep(time.Duration(rand.Int63n(int64(time.Millisecond))))
+		}
+	}
+	return osLockErrorCode(err, def)
+}
+
+func osReadLock(file *os.File, start, len int64, timeout time.Duration) _ErrorCode {
+	return osLock(file, unix.F_RDLCK, start, len, timeout, _IOERR_RDLOCK)
+}
+
+func osWriteLock(file *os.File, start, len int64, timeout time.Duration) _ErrorCode {
+	return osLock(file, unix.F_WRLCK, start, len, timeout, _IOERR_LOCK)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_access.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_access.go
new file mode 100644
index 000000000..1621c0998
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_access.go
@@ -0,0 +1,36 @@
+//go:build !unix || sqlite3_nosys
+
+package vfs
+
+import (
+	"io/fs"
+	"os"
+)
+
+func osAccess(path string, flags AccessFlag) error {
+	fi, err := os.Stat(path)
+	if err != nil {
+		return err
+	}
+	if flags == ACCESS_EXISTS {
+		return nil
+	}
+
+	const (
+		S_IREAD  = 0400
+		S_IWRITE = 0200
+		S_IEXEC  = 0100
+	)
+
+	var want fs.FileMode = S_IREAD
+	if flags == ACCESS_READWRITE {
+		want |= S_IWRITE
+	}
+	if fi.IsDir() {
+		want |= S_IEXEC
+	}
+	if fi.Mode()&want != want {
+		return fs.ErrPermission
+	}
+	return nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_alloc.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_alloc.go
new file mode 100644
index 000000000..60c92182c
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_alloc.go
@@ -0,0 +1,19 @@
+//go:build !(linux || darwin) || sqlite3_flock || sqlite3_nosys
+
+package vfs
+
+import (
+	"io"
+	"os"
+)
+
+func osAllocate(file *os.File, size int64) error {
+	off, err := file.Seek(0, io.SeekEnd)
+	if err != nil {
+		return err
+	}
+	if size <= off {
+		return nil
+	}
+	return file.Truncate(size)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_atomic.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_atomic.go
new file mode 100644
index 000000000..ecaff0245
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_atomic.go
@@ -0,0 +1,9 @@
+//go:build !linux || !(amd64 || arm64 || riscv64) || sqlite3_nosys
+
+package vfs
+
+import "os"
+
+func osBatchAtomic(*os.File) bool {
+	return false
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_mode.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_mode.go
new file mode 100644
index 000000000..ac4904773
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_mode.go
@@ -0,0 +1,14 @@
+//go:build !unix || sqlite3_nosys
+
+package vfs
+
+import "os"
+
+func osSetMode(file *os.File, modeof string) error {
+	fi, err := os.Stat(modeof)
+	if err != nil {
+		return err
+	}
+	file.Chmod(fi.Mode())
+	return nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sleep.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sleep.go
new file mode 100644
index 000000000..c6bc40769
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sleep.go
@@ -0,0 +1,9 @@
+//go:build !windows || sqlite3_nosys
+
+package vfs
+
+import "time"
+
+func osSleep(d time.Duration) {
+	time.Sleep(d)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sync.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sync.go
new file mode 100644
index 000000000..84dbd23bc
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sync.go
@@ -0,0 +1,9 @@
+//go:build !(linux || darwin) || sqlite3_flock || sqlite3_nosys
+
+package vfs
+
+import "os"
+
+func osSync(file *os.File, _ /*fullsync*/, _ /*dataonly*/ bool) error {
+	return file.Sync()
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix.go
new file mode 100644
index 000000000..bf4b44efd
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix.go
@@ -0,0 +1,33 @@
+//go:build unix && !sqlite3_nosys
+
+package vfs
+
+import (
+	"os"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+)
+
+func osAccess(path string, flags AccessFlag) error {
+	var access uint32 // unix.F_OK
+	switch flags {
+	case ACCESS_READWRITE:
+		access = unix.R_OK | unix.W_OK
+	case ACCESS_READ:
+		access = unix.R_OK
+	}
+	return unix.Access(path, access)
+}
+
+func osSetMode(file *os.File, modeof string) error {
+	fi, err := os.Stat(modeof)
+	if err != nil {
+		return err
+	}
+	file.Chmod(fi.Mode())
+	if sys, ok := fi.Sys().(*syscall.Stat_t); ok {
+		file.Chown(int(sys.Uid), int(sys.Gid))
+	}
+	return nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go
new file mode 100644
index 000000000..d04c1f6a0
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go
@@ -0,0 +1,106 @@
+//go:build (linux || darwin || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys
+
+package vfs
+
+import (
+	"os"
+	"time"
+
+	"golang.org/x/sys/unix"
+)
+
+func osGetSharedLock(file *os.File) _ErrorCode {
+	// Test the PENDING lock before acquiring a new SHARED lock.
+	if lock, _ := osGetLock(file, _PENDING_BYTE, 1); lock == unix.F_WRLCK {
+		return _BUSY
+	}
+	// Acquire the SHARED lock.
+	return osReadLock(file, _SHARED_FIRST, _SHARED_SIZE, 0)
+}
+
+func osGetReservedLock(file *os.File) _ErrorCode {
+	// Acquire the RESERVED lock.
+	return osWriteLock(file, _RESERVED_BYTE, 1, 0)
+}
+
+func osGetPendingLock(file *os.File, block bool) _ErrorCode {
+	var timeout time.Duration
+	if block {
+		timeout = -1
+	}
+	// Acquire the PENDING lock.
+	return osWriteLock(file, _PENDING_BYTE, 1, timeout)
+}
+
+func osGetExclusiveLock(file *os.File, wait bool) _ErrorCode {
+	var timeout time.Duration
+	if wait {
+		timeout = time.Millisecond
+	}
+	// Acquire the EXCLUSIVE lock.
+	return osWriteLock(file, _SHARED_FIRST, _SHARED_SIZE, timeout)
+}
+
+func osDowngradeLock(file *os.File, state LockLevel) _ErrorCode {
+	if state >= LOCK_EXCLUSIVE {
+		// Downgrade to a SHARED lock.
+		if rc := osReadLock(file, _SHARED_FIRST, _SHARED_SIZE, 0); rc != _OK {
+			// In theory, the downgrade to a SHARED cannot fail because another
+			// process is holding an incompatible lock. If it does, this
+			// indicates that the other process is not following the locking
+			// protocol. If this happens, return _IOERR_RDLOCK. Returning
+			// BUSY would confuse the upper layer.
+			return _IOERR_RDLOCK
+		}
+	}
+	// Release the PENDING and RESERVED locks.
+	return osUnlock(file, _PENDING_BYTE, 2)
+}
+
+func osReleaseLock(file *os.File, _ LockLevel) _ErrorCode {
+	// Release all locks.
+	return osUnlock(file, 0, 0)
+}
+
+func osCheckReservedLock(file *os.File) (bool, _ErrorCode) {
+	// Test the RESERVED lock.
+	lock, rc := osGetLock(file, _RESERVED_BYTE, 1)
+	return lock == unix.F_WRLCK, rc
+}
+
+func osGetLock(file *os.File, start, len int64) (int16, _ErrorCode) {
+	lock := unix.Flock_t{
+		Type:  unix.F_WRLCK,
+		Start: start,
+		Len:   len,
+	}
+	if unix.FcntlFlock(file.Fd(), unix.F_GETLK, &lock) != nil {
+		return 0, _IOERR_CHECKRESERVEDLOCK
+	}
+	return lock.Type, _OK
+}
+
+func osLockErrorCode(err error, def _ErrorCode) _ErrorCode {
+	if err == nil {
+		return _OK
+	}
+	if errno, ok := err.(unix.Errno); ok {
+		switch errno {
+		case
+			unix.EACCES,
+			unix.EAGAIN,
+			unix.EBUSY,
+			unix.EINTR,
+			unix.ENOLCK,
+			unix.EDEADLK,
+			unix.ETIMEDOUT:
+			return _BUSY
+		case unix.EPERM:
+			return _PERM
+		}
+		if errno == unix.EWOULDBLOCK && unix.EWOULDBLOCK != unix.EAGAIN {
+			return _BUSY
+		}
+	}
+	return def
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go
new file mode 100644
index 000000000..5c68754f8
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go
@@ -0,0 +1,186 @@
+//go:build !sqlite3_nosys
+
+package vfs
+
+import (
+	"math/rand"
+	"os"
+	"time"
+
+	"golang.org/x/sys/windows"
+)
+
+func osGetSharedLock(file *os.File) _ErrorCode {
+	// Acquire the PENDING lock temporarily before acquiring a new SHARED lock.
+	rc := osReadLock(file, _PENDING_BYTE, 1, 0)
+	if rc == _OK {
+		// Acquire the SHARED lock.
+		rc = osReadLock(file, _SHARED_FIRST, _SHARED_SIZE, 0)
+
+		// Release the PENDING lock.
+		osUnlock(file, _PENDING_BYTE, 1)
+	}
+	return rc
+}
+
+func osGetReservedLock(file *os.File) _ErrorCode {
+	// Acquire the RESERVED lock.
+	return osWriteLock(file, _RESERVED_BYTE, 1, 0)
+}
+
+func osGetPendingLock(file *os.File, block bool) _ErrorCode {
+	var timeout time.Duration
+	if block {
+		timeout = -1
+	}
+
+	// Acquire the PENDING lock.
+	return osWriteLock(file, _PENDING_BYTE, 1, timeout)
+}
+
+func osGetExclusiveLock(file *os.File, wait bool) _ErrorCode {
+	var timeout time.Duration
+	if wait {
+		timeout = time.Millisecond
+	}
+
+	// Release the SHARED lock.
+	osUnlock(file, _SHARED_FIRST, _SHARED_SIZE)
+
+	// Acquire the EXCLUSIVE lock.
+	rc := osWriteLock(file, _SHARED_FIRST, _SHARED_SIZE, timeout)
+
+	if rc != _OK {
+		// Reacquire the SHARED lock.
+		osReadLock(file, _SHARED_FIRST, _SHARED_SIZE, 0)
+	}
+	return rc
+}
+
+func osDowngradeLock(file *os.File, state LockLevel) _ErrorCode {
+	if state >= LOCK_EXCLUSIVE {
+		// Release the EXCLUSIVE lock.
+		osUnlock(file, _SHARED_FIRST, _SHARED_SIZE)
+
+		// Reacquire the SHARED lock.
+		if rc := osReadLock(file, _SHARED_FIRST, _SHARED_SIZE, 0); rc != _OK {
+			// This should never happen.
+			// We should always be able to reacquire the read lock.
+			return _IOERR_RDLOCK
+		}
+	}
+
+	// Release the PENDING and RESERVED locks.
+	if state >= LOCK_RESERVED {
+		osUnlock(file, _RESERVED_BYTE, 1)
+	}
+	if state >= LOCK_PENDING {
+		osUnlock(file, _PENDING_BYTE, 1)
+	}
+	return _OK
+}
+
+func osReleaseLock(file *os.File, state LockLevel) _ErrorCode {
+	// Release all locks.
+	if state >= LOCK_RESERVED {
+		osUnlock(file, _RESERVED_BYTE, 1)
+	}
+	if state >= LOCK_SHARED {
+		osUnlock(file, _SHARED_FIRST, _SHARED_SIZE)
+	}
+	if state >= LOCK_PENDING {
+		osUnlock(file, _PENDING_BYTE, 1)
+	}
+	return _OK
+}
+
+func osCheckReservedLock(file *os.File) (bool, _ErrorCode) {
+	// Test the RESERVED lock.
+	rc := osLock(file, 0, _RESERVED_BYTE, 1, 0, _IOERR_CHECKRESERVEDLOCK)
+	if rc == _BUSY {
+		return true, _OK
+	}
+	if rc == _OK {
+		// Release the RESERVED lock.
+		osUnlock(file, _RESERVED_BYTE, 1)
+	}
+	return false, rc
+}
+
+func osUnlock(file *os.File, start, len uint32) _ErrorCode {
+	err := windows.UnlockFileEx(windows.Handle(file.Fd()),
+		0, len, 0, &windows.Overlapped{Offset: start})
+	if err == windows.ERROR_NOT_LOCKED {
+		return _OK
+	}
+	if err != nil {
+		return _IOERR_UNLOCK
+	}
+	return _OK
+}
+
+func osLock(file *os.File, flags, start, len uint32, timeout time.Duration, def _ErrorCode) _ErrorCode {
+	var err error
+	switch {
+	case timeout == 0:
+		err = osLockEx(file, flags|windows.LOCKFILE_FAIL_IMMEDIATELY, start, len)
+	case timeout < 0:
+		err = osLockEx(file, flags, start, len)
+	default:
+		before := time.Now()
+		for {
+			err = osLockEx(file, flags|windows.LOCKFILE_FAIL_IMMEDIATELY, start, len)
+			if errno, _ := err.(windows.Errno); errno != windows.ERROR_LOCK_VIOLATION {
+				break
+			}
+			if timeout < time.Since(before) {
+				break
+			}
+			osSleep(time.Duration(rand.Int63n(int64(time.Millisecond))))
+		}
+	}
+	return osLockErrorCode(err, def)
+}
+
+func osLockEx(file *os.File, flags, start, len uint32) error {
+	return windows.LockFileEx(windows.Handle(file.Fd()), flags,
+		0, len, 0, &windows.Overlapped{Offset: start})
+}
+
+func osReadLock(file *os.File, start, len uint32, timeout time.Duration) _ErrorCode {
+	return osLock(file, 0, start, len, timeout, _IOERR_RDLOCK)
+}
+
+func osWriteLock(file *os.File, start, len uint32, timeout time.Duration) _ErrorCode {
+	return osLock(file, windows.LOCKFILE_EXCLUSIVE_LOCK, start, len, timeout, _IOERR_LOCK)
+}
+
+func osLockErrorCode(err error, def _ErrorCode) _ErrorCode {
+	if err == nil {
+		return _OK
+	}
+	if errno, ok := err.(windows.Errno); ok {
+		// https://devblogs.microsoft.com/oldnewthing/20140905-00/?p=63
+		switch errno {
+		case
+			windows.ERROR_LOCK_VIOLATION,
+			windows.ERROR_IO_PENDING,
+			windows.ERROR_OPERATION_ABORTED:
+			return _BUSY
+		}
+	}
+	return def
+}
+
+func osSleep(d time.Duration) {
+	if d > 0 {
+		period := max(1, d/(5*time.Millisecond))
+		if period < 16 {
+			windows.TimeBeginPeriod(uint32(period))
+		}
+		time.Sleep(d)
+		if period < 16 {
+			windows.TimeEndPeriod(uint32(period))
+		}
+	}
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/registry.go b/vendor/github.com/ncruces/go-sqlite3/vfs/registry.go
new file mode 100644
index 000000000..42a2106fb
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/registry.go
@@ -0,0 +1,48 @@
+package vfs
+
+import "sync"
+
+var (
+	// +checklocks:vfsRegistryMtx
+	vfsRegistry    map[string]VFS
+	vfsRegistryMtx sync.RWMutex
+)
+
+// Find returns a VFS given its name.
+// If there is no match, nil is returned.
+// If name is empty, the default VFS is returned.
+//
+// https://sqlite.org/c3ref/vfs_find.html
+func Find(name string) VFS {
+	if name == "" || name == "os" {
+		return vfsOS{}
+	}
+	vfsRegistryMtx.RLock()
+	defer vfsRegistryMtx.RUnlock()
+	return vfsRegistry[name]
+}
+
+// Register registers a VFS.
+// Empty and "os" are reserved names.
+//
+// https://sqlite.org/c3ref/vfs_find.html
+func Register(name string, vfs VFS) {
+	if name == "" || name == "os" {
+		return
+	}
+	vfsRegistryMtx.Lock()
+	defer vfsRegistryMtx.Unlock()
+	if vfsRegistry == nil {
+		vfsRegistry = map[string]VFS{}
+	}
+	vfsRegistry[name] = vfs
+}
+
+// Unregister unregisters a VFS.
+//
+// https://sqlite.org/c3ref/vfs_find.html
+func Unregister(name string) {
+	vfsRegistryMtx.Lock()
+	defer vfsRegistryMtx.Unlock()
+	delete(vfsRegistry, name)
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go
new file mode 100644
index 000000000..2b76dd5dc
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go
@@ -0,0 +1,173 @@
+//go:build (darwin || linux) && (amd64 || arm64 || riscv64) && !(sqlite3_flock || sqlite3_noshm || sqlite3_nosys)
+
+package vfs
+
+import (
+	"context"
+	"io"
+	"os"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/tetratelabs/wazero/api"
+	"golang.org/x/sys/unix"
+)
+
+// SupportsSharedMemory is false on platforms that do not support shared memory.
+// To use [WAL without shared-memory], you need to set [EXCLUSIVE locking mode].
+//
+// [WAL without shared-memory]: https://sqlite.org/wal.html#noshm
+// [EXCLUSIVE locking mode]: https://sqlite.org/pragma.html#pragma_locking_mode
+const SupportsSharedMemory = true
+
+const (
+	_SHM_NLOCK = 8
+	_SHM_BASE  = 120
+	_SHM_DMS   = _SHM_BASE + _SHM_NLOCK
+)
+
+func (f *vfsFile) SharedMemory() SharedMemory { return f.shm }
+
+// NewSharedMemory returns a shared-memory WAL-index
+// backed by a file with the given path.
+// It will return nil if shared-memory is not supported,
+// or not appropriate for the given flags.
+// Only [OPEN_MAIN_DB] databases may need a WAL-index.
+// You must ensure all concurrent accesses to a database
+// use shared-memory instances created with the same path.
+func NewSharedMemory(path string, flags OpenFlag) SharedMemory {
+	if flags&OPEN_MAIN_DB == 0 || flags&(OPEN_DELETEONCLOSE|OPEN_MEMORY) != 0 {
+		return nil
+	}
+	return &vfsShm{
+		path:     path,
+		readOnly: flags&OPEN_READONLY != 0,
+	}
+}
+
+type vfsShm struct {
+	*os.File
+	path     string
+	regions  []*util.MappedRegion
+	readOnly bool
+}
+
+func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, extend bool) (uint32, error) {
+	// Ensure size is a multiple of the OS page size.
+	if int(size)&(unix.Getpagesize()-1) != 0 {
+		return 0, _IOERR_SHMMAP
+	}
+
+	if s.File == nil {
+		var flag int
+		if s.readOnly {
+			flag = unix.O_RDONLY
+		} else {
+			flag = unix.O_RDWR
+		}
+		f, err := os.OpenFile(s.path,
+			flag|unix.O_CREAT|unix.O_NOFOLLOW, 0666)
+		if err != nil {
+			return 0, _CANTOPEN
+		}
+		s.File = f
+	}
+
+	// Dead man's switch.
+	if lock, rc := osGetLock(s.File, _SHM_DMS, 1); rc != _OK {
+		return 0, _IOERR_LOCK
+	} else if lock == unix.F_WRLCK {
+		return 0, _BUSY
+	} else if lock == unix.F_UNLCK {
+		if s.readOnly {
+			return 0, _READONLY_CANTINIT
+		}
+		if rc := osWriteLock(s.File, _SHM_DMS, 1, 0); rc != _OK {
+			return 0, rc
+		}
+		if err := s.Truncate(0); err != nil {
+			return 0, _IOERR_SHMOPEN
+		}
+	}
+	if rc := osReadLock(s.File, _SHM_DMS, 1, 0); rc != _OK {
+		return 0, rc
+	}
+
+	// Check if file is big enough.
+	o, err := s.Seek(0, io.SeekEnd)
+	if err != nil {
+		return 0, _IOERR_SHMSIZE
+	}
+	if n := (int64(id) + 1) * int64(size); n > o {
+		if !extend {
+			return 0, nil
+		}
+		err := osAllocate(s.File, n)
+		if err != nil {
+			return 0, _IOERR_SHMSIZE
+		}
+	}
+
+	var prot int
+	if s.readOnly {
+		prot = unix.PROT_READ
+	} else {
+		prot = unix.PROT_READ | unix.PROT_WRITE
+	}
+	r, err := util.MapRegion(ctx, mod, s.File, int64(id)*int64(size), size, prot)
+	if err != nil {
+		return 0, err
+	}
+	s.regions = append(s.regions, r)
+	return r.Ptr, nil
+}
+
+func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) error {
+	// Argument check.
+	if n <= 0 || offset < 0 || offset+n > _SHM_NLOCK {
+		panic(util.AssertErr())
+	}
+	switch flags {
+	case
+		_SHM_LOCK | _SHM_SHARED,
+		_SHM_LOCK | _SHM_EXCLUSIVE,
+		_SHM_UNLOCK | _SHM_SHARED,
+		_SHM_UNLOCK | _SHM_EXCLUSIVE:
+		//
+	default:
+		panic(util.AssertErr())
+	}
+	if n != 1 && flags&_SHM_EXCLUSIVE == 0 {
+		panic(util.AssertErr())
+	}
+
+	switch {
+	case flags&_SHM_UNLOCK != 0:
+		return osUnlock(s.File, _SHM_BASE+int64(offset), int64(n))
+	case flags&_SHM_SHARED != 0:
+		return osReadLock(s.File, _SHM_BASE+int64(offset), int64(n), 0)
+	case flags&_SHM_EXCLUSIVE != 0:
+		return osWriteLock(s.File, _SHM_BASE+int64(offset), int64(n), 0)
+	default:
+		panic(util.AssertErr())
+	}
+}
+
+func (s *vfsShm) shmUnmap(delete bool) {
+	if s.File == nil {
+		return
+	}
+
+	// Unmap regions.
+	for _, r := range s.regions {
+		r.Unmap()
+	}
+	clear(s.regions)
+	s.regions = s.regions[:0]
+
+	// Close the file.
+	defer s.Close()
+	if delete {
+		os.Remove(s.Name())
+	}
+	s.File = nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go
new file mode 100644
index 000000000..21191979e
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go
@@ -0,0 +1,21 @@
+//go:build !(darwin || linux) || !(amd64 || arm64 || riscv64) || sqlite3_flock || sqlite3_noshm || sqlite3_nosys
+
+package vfs
+
+// SupportsSharedMemory is false on platforms that do not support shared memory.
+// To use [WAL without shared-memory], you need to set [EXCLUSIVE locking mode].
+//
+// [WAL without shared-memory]: https://sqlite.org/wal.html#noshm
+// [EXCLUSIVE locking mode]: https://sqlite.org/pragma.html#pragma_locking_mode
+const SupportsSharedMemory = false
+
+// NewSharedMemory returns a shared-memory WAL-index
+// backed by a file with the given path.
+// It will return nil if shared-memory is not supported,
+// or not appropriate for the given flags.
+// Only [OPEN_MAIN_DB] databases may need a WAL-index.
+// You must ensure all concurrent accesses to a database
+// use shared-memory instances created with the same path.
+func NewSharedMemory(path string, flags OpenFlag) SharedMemory {
+	return nil
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go b/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go
new file mode 100644
index 000000000..1887e9f22
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go
@@ -0,0 +1,459 @@
+package vfs
+
+import (
+	"context"
+	"crypto/rand"
+	"io"
+	"reflect"
+	"sync"
+	"time"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/ncruces/julianday"
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// ExportHostFunctions is an internal API users need not call directly.
+//
+// ExportHostFunctions registers the required VFS host functions
+// with the provided env module.
+func ExportHostFunctions(env wazero.HostModuleBuilder) wazero.HostModuleBuilder {
+	util.ExportFuncII(env, "go_vfs_find", vfsFind)
+	util.ExportFuncIIJ(env, "go_localtime", vfsLocaltime)
+	util.ExportFuncIIII(env, "go_randomness", vfsRandomness)
+	util.ExportFuncIII(env, "go_sleep", vfsSleep)
+	util.ExportFuncIII(env, "go_current_time_64", vfsCurrentTime64)
+	util.ExportFuncIIIII(env, "go_full_pathname", vfsFullPathname)
+	util.ExportFuncIIII(env, "go_delete", vfsDelete)
+	util.ExportFuncIIIII(env, "go_access", vfsAccess)
+	util.ExportFuncIIIIIII(env, "go_open", vfsOpen)
+	util.ExportFuncII(env, "go_close", vfsClose)
+	util.ExportFuncIIIIJ(env, "go_read", vfsRead)
+	util.ExportFuncIIIIJ(env, "go_write", vfsWrite)
+	util.ExportFuncIIJ(env, "go_truncate", vfsTruncate)
+	util.ExportFuncIII(env, "go_sync", vfsSync)
+	util.ExportFuncIII(env, "go_file_size", vfsFileSize)
+	util.ExportFuncIIII(env, "go_file_control", vfsFileControl)
+	util.ExportFuncII(env, "go_sector_size", vfsSectorSize)
+	util.ExportFuncII(env, "go_device_characteristics", vfsDeviceCharacteristics)
+	util.ExportFuncIII(env, "go_lock", vfsLock)
+	util.ExportFuncIII(env, "go_unlock", vfsUnlock)
+	util.ExportFuncIII(env, "go_check_reserved_lock", vfsCheckReservedLock)
+	util.ExportFuncIIIIII(env, "go_shm_map", vfsShmMap)
+	util.ExportFuncIIIII(env, "go_shm_lock", vfsShmLock)
+	util.ExportFuncIII(env, "go_shm_unmap", vfsShmUnmap)
+	util.ExportFuncVI(env, "go_shm_barrier", vfsShmBarrier)
+	return env
+}
+
+func vfsFind(ctx context.Context, mod api.Module, zVfsName uint32) uint32 {
+	name := util.ReadString(mod, zVfsName, _MAX_NAME)
+	if vfs := Find(name); vfs != nil && vfs != (vfsOS{}) {
+		return 1
+	}
+	return 0
+}
+
+func vfsLocaltime(ctx context.Context, mod api.Module, pTm uint32, t int64) _ErrorCode {
+	tm := time.Unix(t, 0)
+	var isdst int
+	if tm.IsDST() {
+		isdst = 1
+	}
+
+	const size = 32 / 8
+	// https://pubs.opengroup.org/onlinepubs/7908799/xsh/time.h.html
+	util.WriteUint32(mod, pTm+0*size, uint32(tm.Second()))
+	util.WriteUint32(mod, pTm+1*size, uint32(tm.Minute()))
+	util.WriteUint32(mod, pTm+2*size, uint32(tm.Hour()))
+	util.WriteUint32(mod, pTm+3*size, uint32(tm.Day()))
+	util.WriteUint32(mod, pTm+4*size, uint32(tm.Month()-time.January))
+	util.WriteUint32(mod, pTm+5*size, uint32(tm.Year()-1900))
+	util.WriteUint32(mod, pTm+6*size, uint32(tm.Weekday()-time.Sunday))
+	util.WriteUint32(mod, pTm+7*size, uint32(tm.YearDay()-1))
+	util.WriteUint32(mod, pTm+8*size, uint32(isdst))
+	return _OK
+}
+
+func vfsRandomness(ctx context.Context, mod api.Module, pVfs uint32, nByte int32, zByte uint32) uint32 {
+	mem := util.View(mod, zByte, uint64(nByte))
+	n, _ := rand.Reader.Read(mem)
+	return uint32(n)
+}
+
+func vfsSleep(ctx context.Context, mod api.Module, pVfs uint32, nMicro int32) _ErrorCode {
+	osSleep(time.Duration(nMicro) * time.Microsecond)
+	return _OK
+}
+
+func vfsCurrentTime64(ctx context.Context, mod api.Module, pVfs, piNow uint32) _ErrorCode {
+	day, nsec := julianday.Date(time.Now())
+	msec := day*86_400_000 + nsec/1_000_000
+	util.WriteUint64(mod, piNow, uint64(msec))
+	return _OK
+}
+
+func vfsFullPathname(ctx context.Context, mod api.Module, pVfs, zRelative uint32, nFull int32, zFull uint32) _ErrorCode {
+	vfs := vfsGet(mod, pVfs)
+	path := util.ReadString(mod, zRelative, _MAX_PATHNAME)
+
+	path, err := vfs.FullPathname(path)
+
+	if len(path) >= int(nFull) {
+		return _CANTOPEN_FULLPATH
+	}
+	util.WriteString(mod, zFull, path)
+
+	return vfsErrorCode(err, _CANTOPEN_FULLPATH)
+}
+
+func vfsDelete(ctx context.Context, mod api.Module, pVfs, zPath, syncDir uint32) _ErrorCode {
+	vfs := vfsGet(mod, pVfs)
+	path := util.ReadString(mod, zPath, _MAX_PATHNAME)
+
+	err := vfs.Delete(path, syncDir != 0)
+	return vfsErrorCode(err, _IOERR_DELETE)
+}
+
+func vfsAccess(ctx context.Context, mod api.Module, pVfs, zPath uint32, flags AccessFlag, pResOut uint32) _ErrorCode {
+	vfs := vfsGet(mod, pVfs)
+	path := util.ReadString(mod, zPath, _MAX_PATHNAME)
+
+	ok, err := vfs.Access(path, flags)
+	var res uint32
+	if ok {
+		res = 1
+	}
+
+	util.WriteUint32(mod, pResOut, res)
+	return vfsErrorCode(err, _IOERR_ACCESS)
+}
+
+func vfsOpen(ctx context.Context, mod api.Module, pVfs, zPath, pFile uint32, flags OpenFlag, pOutFlags, pOutVFS uint32) _ErrorCode {
+	vfs := vfsGet(mod, pVfs)
+
+	var path string
+	if zPath != 0 {
+		path = util.ReadString(mod, zPath, _MAX_PATHNAME)
+	}
+
+	var file File
+	var err error
+	if ffs, ok := vfs.(VFSFilename); ok {
+		name := OpenFilename(ctx, mod, zPath, flags)
+		file, flags, err = ffs.OpenFilename(name, flags)
+	} else {
+		file, flags, err = vfs.Open(path, flags)
+	}
+	if err != nil {
+		return vfsErrorCode(err, _CANTOPEN)
+	}
+
+	if file, ok := file.(FilePowersafeOverwrite); ok {
+		name := OpenFilename(ctx, mod, zPath, flags)
+		if b, ok := util.ParseBool(name.URIParameter("psow")); ok {
+			file.SetPowersafeOverwrite(b)
+		}
+	}
+	if file, ok := file.(FileSharedMemory); ok &&
+		pOutVFS != 0 && file.SharedMemory() != nil {
+		util.WriteUint32(mod, pOutVFS, 1)
+	}
+	if pOutFlags != 0 {
+		util.WriteUint32(mod, pOutFlags, uint32(flags))
+	}
+	vfsFileRegister(ctx, mod, pFile, file)
+	return _OK
+}
+
+func vfsClose(ctx context.Context, mod api.Module, pFile uint32) _ErrorCode {
+	err := vfsFileClose(ctx, mod, pFile)
+	if err != nil {
+		return vfsErrorCode(err, _IOERR_CLOSE)
+	}
+	return _OK
+}
+
+func vfsRead(ctx context.Context, mod api.Module, pFile, zBuf uint32, iAmt int32, iOfst int64) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	buf := util.View(mod, zBuf, uint64(iAmt))
+
+	n, err := file.ReadAt(buf, iOfst)
+	if n == int(iAmt) {
+		return _OK
+	}
+	if err != io.EOF {
+		return vfsErrorCode(err, _IOERR_READ)
+	}
+	clear(buf[n:])
+	return _IOERR_SHORT_READ
+}
+
+func vfsWrite(ctx context.Context, mod api.Module, pFile, zBuf uint32, iAmt int32, iOfst int64) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	buf := util.View(mod, zBuf, uint64(iAmt))
+
+	_, err := file.WriteAt(buf, iOfst)
+	if err != nil {
+		return vfsErrorCode(err, _IOERR_WRITE)
+	}
+	return _OK
+}
+
+func vfsTruncate(ctx context.Context, mod api.Module, pFile uint32, nByte int64) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	err := file.Truncate(nByte)
+	return vfsErrorCode(err, _IOERR_TRUNCATE)
+}
+
+func vfsSync(ctx context.Context, mod api.Module, pFile uint32, flags SyncFlag) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	err := file.Sync(flags)
+	return vfsErrorCode(err, _IOERR_FSYNC)
+}
+
+func vfsFileSize(ctx context.Context, mod api.Module, pFile, pSize uint32) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	size, err := file.Size()
+	util.WriteUint64(mod, pSize, uint64(size))
+	return vfsErrorCode(err, _IOERR_SEEK)
+}
+
+func vfsLock(ctx context.Context, mod api.Module, pFile uint32, eLock LockLevel) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	err := file.Lock(eLock)
+	return vfsErrorCode(err, _IOERR_LOCK)
+}
+
+func vfsUnlock(ctx context.Context, mod api.Module, pFile uint32, eLock LockLevel) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	err := file.Unlock(eLock)
+	return vfsErrorCode(err, _IOERR_UNLOCK)
+}
+
+func vfsCheckReservedLock(ctx context.Context, mod api.Module, pFile, pResOut uint32) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	locked, err := file.CheckReservedLock()
+
+	var res uint32
+	if locked {
+		res = 1
+	}
+
+	util.WriteUint32(mod, pResOut, res)
+	return vfsErrorCode(err, _IOERR_CHECKRESERVEDLOCK)
+}
+
+func vfsFileControl(ctx context.Context, mod api.Module, pFile uint32, op _FcntlOpcode, pArg uint32) _ErrorCode {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+
+	switch op {
+	case _FCNTL_LOCKSTATE:
+		if file, ok := file.(FileLockState); ok {
+			util.WriteUint32(mod, pArg, uint32(file.LockState()))
+			return _OK
+		}
+
+	case _FCNTL_PERSIST_WAL:
+		if file, ok := file.(FilePersistentWAL); ok {
+			if i := util.ReadUint32(mod, pArg); int32(i) >= 0 {
+				file.SetPersistentWAL(i != 0)
+			} else if file.PersistentWAL() {
+				util.WriteUint32(mod, pArg, 1)
+			} else {
+				util.WriteUint32(mod, pArg, 0)
+			}
+			return _OK
+		}
+
+	case _FCNTL_POWERSAFE_OVERWRITE:
+		if file, ok := file.(FilePowersafeOverwrite); ok {
+			if i := util.ReadUint32(mod, pArg); int32(i) >= 0 {
+				file.SetPowersafeOverwrite(i != 0)
+			} else if file.PowersafeOverwrite() {
+				util.WriteUint32(mod, pArg, 1)
+			} else {
+				util.WriteUint32(mod, pArg, 0)
+			}
+			return _OK
+		}
+
+	case _FCNTL_CHUNK_SIZE:
+		if file, ok := file.(FileChunkSize); ok {
+			size := util.ReadUint32(mod, pArg)
+			file.ChunkSize(int(size))
+			return _OK
+		}
+
+	case _FCNTL_SIZE_HINT:
+		if file, ok := file.(FileSizeHint); ok {
+			size := util.ReadUint64(mod, pArg)
+			err := file.SizeHint(int64(size))
+			return vfsErrorCode(err, _IOERR_TRUNCATE)
+		}
+
+	case _FCNTL_HAS_MOVED:
+		if file, ok := file.(FileHasMoved); ok {
+			moved, err := file.HasMoved()
+			var res uint32
+			if moved {
+				res = 1
+			}
+			util.WriteUint32(mod, pArg, res)
+			return vfsErrorCode(err, _IOERR_FSTAT)
+		}
+
+	case _FCNTL_OVERWRITE:
+		if file, ok := file.(FileOverwrite); ok {
+			err := file.Overwrite()
+			return vfsErrorCode(err, _IOERR)
+		}
+
+	case _FCNTL_COMMIT_PHASETWO:
+		if file, ok := file.(FileCommitPhaseTwo); ok {
+			err := file.CommitPhaseTwo()
+			return vfsErrorCode(err, _IOERR)
+		}
+
+	case _FCNTL_BEGIN_ATOMIC_WRITE:
+		if file, ok := file.(FileBatchAtomicWrite); ok {
+			err := file.BeginAtomicWrite()
+			return vfsErrorCode(err, _IOERR_BEGIN_ATOMIC)
+		}
+	case _FCNTL_COMMIT_ATOMIC_WRITE:
+		if file, ok := file.(FileBatchAtomicWrite); ok {
+			err := file.CommitAtomicWrite()
+			return vfsErrorCode(err, _IOERR_COMMIT_ATOMIC)
+		}
+	case _FCNTL_ROLLBACK_ATOMIC_WRITE:
+		if file, ok := file.(FileBatchAtomicWrite); ok {
+			err := file.RollbackAtomicWrite()
+			return vfsErrorCode(err, _IOERR_ROLLBACK_ATOMIC)
+		}
+
+	case _FCNTL_CKPT_DONE:
+		if file, ok := file.(FileCheckpoint); ok {
+			err := file.CheckpointDone()
+			return vfsErrorCode(err, _IOERR)
+		}
+	case _FCNTL_CKPT_START:
+		if file, ok := file.(FileCheckpoint); ok {
+			err := file.CheckpointStart()
+			return vfsErrorCode(err, _IOERR)
+		}
+
+	case _FCNTL_PRAGMA:
+		if file, ok := file.(FilePragma); ok {
+			ptr := util.ReadUint32(mod, pArg+1*ptrlen)
+			name := util.ReadString(mod, ptr, _MAX_SQL_LENGTH)
+			var value string
+			if ptr := util.ReadUint32(mod, pArg+2*ptrlen); ptr != 0 {
+				value = util.ReadString(mod, ptr, _MAX_SQL_LENGTH)
+			}
+
+			out, err := file.Pragma(name, value)
+
+			ret := vfsErrorCode(err, _ERROR)
+			if ret == _ERROR {
+				out = err.Error()
+			}
+			if out != "" {
+				fn := mod.ExportedFunction("malloc")
+				stack := [...]uint64{uint64(len(out) + 1)}
+				if err := fn.CallWithStack(ctx, stack[:]); err != nil {
+					panic(err)
+				}
+				util.WriteUint32(mod, pArg, uint32(stack[0]))
+				util.WriteString(mod, uint32(stack[0]), out)
+			}
+			return ret
+		}
+	}
+
+	// Consider also implementing these opcodes (in use by SQLite):
+	//  _FCNTL_BUSYHANDLER
+	//  _FCNTL_LAST_ERRNO
+	//  _FCNTL_SYNC
+	return _NOTFOUND
+}
+
+func vfsSectorSize(ctx context.Context, mod api.Module, pFile uint32) uint32 {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	return uint32(file.SectorSize())
+}
+
+func vfsDeviceCharacteristics(ctx context.Context, mod api.Module, pFile uint32) DeviceCharacteristic {
+	file := vfsFileGet(ctx, mod, pFile).(File)
+	return file.DeviceCharacteristics()
+}
+
+var shmBarrier sync.Mutex
+
+func vfsShmBarrier(ctx context.Context, mod api.Module, pFile uint32) {
+	shmBarrier.Lock()
+	defer shmBarrier.Unlock()
+}
+
+func vfsShmMap(ctx context.Context, mod api.Module, pFile uint32, iRegion, szRegion int32, bExtend, pp uint32) _ErrorCode {
+	shm := vfsFileGet(ctx, mod, pFile).(FileSharedMemory).SharedMemory()
+	p, err := shm.shmMap(ctx, mod, iRegion, szRegion, bExtend != 0)
+	if err != nil {
+		return vfsErrorCode(err, _IOERR_SHMMAP)
+	}
+	util.WriteUint32(mod, pp, p)
+	return _OK
+}
+
+func vfsShmLock(ctx context.Context, mod api.Module, pFile uint32, offset, n int32, flags _ShmFlag) _ErrorCode {
+	shm := vfsFileGet(ctx, mod, pFile).(FileSharedMemory).SharedMemory()
+	err := shm.shmLock(offset, n, flags)
+	return vfsErrorCode(err, _IOERR_SHMLOCK)
+}
+
+func vfsShmUnmap(ctx context.Context, mod api.Module, pFile, bDelete uint32) _ErrorCode {
+	shm := vfsFileGet(ctx, mod, pFile).(FileSharedMemory).SharedMemory()
+	shm.shmUnmap(bDelete != 0)
+	return _OK
+}
+
+func vfsGet(mod api.Module, pVfs uint32) VFS {
+	var name string
+	if pVfs != 0 {
+		const zNameOffset = 16
+		name = util.ReadString(mod, util.ReadUint32(mod, pVfs+zNameOffset), _MAX_NAME)
+	}
+	if vfs := Find(name); vfs != nil {
+		return vfs
+	}
+	panic(util.NoVFSErr + util.ErrorString(name))
+}
+
+func vfsFileRegister(ctx context.Context, mod api.Module, pFile uint32, file File) {
+	const fileHandleOffset = 4
+	id := util.AddHandle(ctx, file)
+	util.WriteUint32(mod, pFile+fileHandleOffset, id)
+}
+
+func vfsFileGet(ctx context.Context, mod api.Module, pFile uint32) any {
+	const fileHandleOffset = 4
+	id := util.ReadUint32(mod, pFile+fileHandleOffset)
+	return util.GetHandle(ctx, id)
+}
+
+func vfsFileClose(ctx context.Context, mod api.Module, pFile uint32) error {
+	const fileHandleOffset = 4
+	id := util.ReadUint32(mod, pFile+fileHandleOffset)
+	return util.DelHandle(ctx, id)
+}
+
+func vfsErrorCode(err error, def _ErrorCode) _ErrorCode {
+	if err == nil {
+		return _OK
+	}
+	switch v := reflect.ValueOf(err); v.Kind() {
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32:
+		return _ErrorCode(v.Uint())
+	}
+	return def
+}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vtab.go b/vendor/github.com/ncruces/go-sqlite3/vtab.go
new file mode 100644
index 000000000..a330c98ff
--- /dev/null
+++ b/vendor/github.com/ncruces/go-sqlite3/vtab.go
@@ -0,0 +1,663 @@
+package sqlite3
+
+import (
+	"context"
+	"reflect"
+
+	"github.com/ncruces/go-sqlite3/internal/util"
+	"github.com/tetratelabs/wazero/api"
+)
+
+// CreateModule registers a new virtual table module name.
+// If create is nil, the virtual table is eponymous.
+//
+// https://sqlite.org/c3ref/create_module.html
+func CreateModule[T VTab](db *Conn, name string, create, connect VTabConstructor[T]) error {
+	var flags int
+
+	const (
+		VTAB_CREATOR     = 0x01
+		VTAB_DESTROYER   = 0x02
+		VTAB_UPDATER     = 0x04
+		VTAB_RENAMER     = 0x08
+		VTAB_OVERLOADER  = 0x10
+		VTAB_CHECKER     = 0x20
+		VTAB_TXN         = 0x40
+		VTAB_SAVEPOINTER = 0x80
+	)
+
+	if create != nil {
+		flags |= VTAB_CREATOR
+	}
+
+	vtab := reflect.TypeOf(connect).Out(0)
+	if implements[VTabDestroyer](vtab) {
+		flags |= VTAB_DESTROYER
+	}
+	if implements[VTabUpdater](vtab) {
+		flags |= VTAB_UPDATER
+	}
+	if implements[VTabRenamer](vtab) {
+		flags |= VTAB_RENAMER
+	}
+	if implements[VTabOverloader](vtab) {
+		flags |= VTAB_OVERLOADER
+	}
+	if implements[VTabChecker](vtab) {
+		flags |= VTAB_CHECKER
+	}
+	if implements[VTabTxn](vtab) {
+		flags |= VTAB_TXN
+	}
+	if implements[VTabSavepointer](vtab) {
+		flags |= VTAB_SAVEPOINTER
+	}
+
+	defer db.arena.mark()()
+	namePtr := db.arena.string(name)
+	modulePtr := util.AddHandle(db.ctx, module[T]{create, connect})
+	r := db.call("sqlite3_create_module_go", uint64(db.handle),
+		uint64(namePtr), uint64(flags), uint64(modulePtr))
+	return db.error(r)
+}
+
+func implements[T any](typ reflect.Type) bool {
+	var ptr *T
+	return typ.Implements(reflect.TypeOf(ptr).Elem())
+}
+
+// DeclareVTab declares the schema of a virtual table.
+//
+// https://sqlite.org/c3ref/declare_vtab.html
+func (c *Conn) DeclareVTab(sql string) error {
+	defer c.arena.mark()()
+	sqlPtr := c.arena.string(sql)
+	r := c.call("sqlite3_declare_vtab", uint64(c.handle), uint64(sqlPtr))
+	return c.error(r)
+}
+
+// VTabConflictMode is a virtual table conflict resolution mode.
+//
+// https://sqlite.org/c3ref/c_fail.html
+type VTabConflictMode uint8
+
+const (
+	VTAB_ROLLBACK VTabConflictMode = 1
+	VTAB_IGNORE   VTabConflictMode = 2
+	VTAB_FAIL     VTabConflictMode = 3
+	VTAB_ABORT    VTabConflictMode = 4
+	VTAB_REPLACE  VTabConflictMode = 5
+)
+
+// VTabOnConflict determines the virtual table conflict policy.
+//
+// https://sqlite.org/c3ref/vtab_on_conflict.html
+func (c *Conn) VTabOnConflict() VTabConflictMode {
+	r := c.call("sqlite3_vtab_on_conflict", uint64(c.handle))
+	return VTabConflictMode(r)
+}
+
+// VTabConfigOption is a virtual table configuration option.
+//
+// https://sqlite.org/c3ref/c_vtab_constraint_support.html
+type VTabConfigOption uint8
+
+const (
+	VTAB_CONSTRAINT_SUPPORT VTabConfigOption = 1
+	VTAB_INNOCUOUS          VTabConfigOption = 2
+	VTAB_DIRECTONLY         VTabConfigOption = 3
+	VTAB_USES_ALL_SCHEMAS   VTabConfigOption = 4
+)
+
+// VTabConfig configures various facets of the virtual table interface.
+//
+// https://sqlite.org/c3ref/vtab_config.html
+func (c *Conn) VTabConfig(op VTabConfigOption, args ...any) error {
+	var i uint64
+	if op == VTAB_CONSTRAINT_SUPPORT && len(args) > 0 {
+		if b, ok := args[0].(bool); ok && b {
+			i = 1
+		}
+	}
+	r := c.call("sqlite3_vtab_config_go", uint64(c.handle), uint64(op), i)
+	return c.error(r)
+}
+
+// VTabConstructor is a virtual table constructor function.
+type VTabConstructor[T VTab] func(db *Conn, module, schema, table string, arg ...string) (T, error)
+
+type module[T VTab] [2]VTabConstructor[T]
+
+type vtabConstructor int
+
+const (
+	xCreate  vtabConstructor = 0
+	xConnect vtabConstructor = 1
+)
+
+// A VTab describes a particular instance of the virtual table.
+// A VTab may optionally implement [io.Closer] to free resources.
+//
+// https://sqlite.org/c3ref/vtab.html
+type VTab interface {
+	// https://sqlite.org/vtab.html#xbestindex
+	BestIndex(*IndexInfo) error
+	// https://sqlite.org/vtab.html#xopen
+	Open() (VTabCursor, error)
+}
+
+// A VTabDestroyer allows a virtual table to drop persistent state.
+type VTabDestroyer interface {
+	VTab
+	// https://sqlite.org/vtab.html#sqlite3_module.xDestroy
+	Destroy() error
+}
+
+// A VTabUpdater allows a virtual table to be updated.
+type VTabUpdater interface {
+	VTab
+	// https://sqlite.org/vtab.html#xupdate
+	Update(arg ...Value) (rowid int64, err error)
+}
+
+// A VTabRenamer allows a virtual table to be renamed.
+type VTabRenamer interface {
+	VTab
+	// https://sqlite.org/vtab.html#xrename
+	Rename(new string) error
+}
+
+// A VTabOverloader allows a virtual table to overload SQL functions.
+type VTabOverloader interface {
+	VTab
+	// https://sqlite.org/vtab.html#xfindfunction
+	FindFunction(arg int, name string) (ScalarFunction, IndexConstraintOp)
+}
+
+// A VTabChecker allows a virtual table to report errors
+// to the PRAGMA integrity_check and PRAGMA quick_check commands.
+//
+// Integrity should return an error if it finds problems in the content of the virtual table,
+// but should avoid returning a (wrapped) [Error], [ErrorCode] or [ExtendedErrorCode],
+// as those indicate the Integrity method itself encountered problems
+// while trying to evaluate the virtual table content.
+type VTabChecker interface {
+	VTab
+	// https://sqlite.org/vtab.html#xintegrity
+	Integrity(schema, table string, flags int) error
+}
+
+// A VTabTxn allows a virtual table to implement
+// transactions with two-phase commit.
+//
+// Anything that is required as part of a commit that may fail
+// should be performed in the Sync() callback.
+// Current versions of SQLite ignore any errors
+// returned by Commit() and Rollback().
+type VTabTxn interface {
+	VTab
+	// https://sqlite.org/vtab.html#xBegin
+	Begin() error
+	// https://sqlite.org/vtab.html#xsync
+	Sync() error
+	// https://sqlite.org/vtab.html#xcommit
+	Commit() error
+	// https://sqlite.org/vtab.html#xrollback
+	Rollback() error
+}
+
+// A VTabSavepointer allows a virtual table to implement
+// nested transactions.
+//
+// https://sqlite.org/vtab.html#xsavepoint
+type VTabSavepointer interface {
+	VTabTxn
+	Savepoint(id int) error
+	Release(id int) error
+	RollbackTo(id int) error
+}
+
+// A VTabCursor describes cursors that point
+// into the virtual table and are used
+// to loop through the virtual table.
+// A VTabCursor may optionally implement
+// [io.Closer] to free resources.
+//
+// http://sqlite.org/c3ref/vtab_cursor.html
+type VTabCursor interface {
+	// https://sqlite.org/vtab.html#xfilter
+	Filter(idxNum int, idxStr string, arg ...Value) error
+	// https://sqlite.org/vtab.html#xnext
+	Next() error
+	// https://sqlite.org/vtab.html#xeof
+	EOF() bool
+	// https://sqlite.org/vtab.html#xcolumn
+	Column(ctx *Context, n int) error
+	// https://sqlite.org/vtab.html#xrowid
+	RowID() (int64, error)
+}
+
+// An IndexInfo describes virtual table indexing information.
+//
+// https://sqlite.org/c3ref/index_info.html
+type IndexInfo struct {
+	// Inputs
+	Constraint  []IndexConstraint
+	OrderBy     []IndexOrderBy
+	ColumnsUsed int64
+	// Outputs
+	ConstraintUsage []IndexConstraintUsage
+	IdxNum          int
+	IdxStr          string
+	IdxFlags        IndexScanFlag
+	OrderByConsumed bool
+	EstimatedCost   float64
+	EstimatedRows   int64
+	// Internal
+	c      *Conn
+	handle uint32
+}
+
+// An IndexConstraint describes virtual table indexing constraint information.
+//
+// https://sqlite.org/c3ref/index_info.html
+type IndexConstraint struct {
+	Column int
+	Op     IndexConstraintOp
+	Usable bool
+}
+
+// An IndexOrderBy describes virtual table indexing order by information.
+//
+// https://sqlite.org/c3ref/index_info.html
+type IndexOrderBy struct {
+	Column int
+	Desc   bool
+}
+
+// An IndexConstraintUsage describes how virtual table indexing constraints will be used.
+//
+// https://sqlite.org/c3ref/index_info.html
+type IndexConstraintUsage struct {
+	ArgvIndex int
+	Omit      bool
+}
+
+// RHSValue returns the value of the right-hand operand of a constraint
+// if the right-hand operand is known.
+//
+// https://sqlite.org/c3ref/vtab_rhs_value.html
+func (idx *IndexInfo) RHSValue(column int) (Value, error) {
+	defer idx.c.arena.mark()()
+	valPtr := idx.c.arena.new(ptrlen)
+	r := idx.c.call("sqlite3_vtab_rhs_value", uint64(idx.handle),
+		uint64(column), uint64(valPtr))
+	if err := idx.c.error(r); err != nil {
+		return Value{}, err
+	}
+	return Value{
+		c:      idx.c,
+		handle: util.ReadUint32(idx.c.mod, valPtr),
+	}, nil
+}
+
+// Collation returns the name of the collation for a virtual table constraint.
+//
+// https://sqlite.org/c3ref/vtab_collation.html
+func (idx *IndexInfo) Collation(column int) string {
+	r := idx.c.call("sqlite3_vtab_collation", uint64(idx.handle),
+		uint64(column))
+	return util.ReadString(idx.c.mod, uint32(r), _MAX_NAME)
+}
+
+// Distinct determines if a virtual table query is DISTINCT.
+//
+// https://sqlite.org/c3ref/vtab_distinct.html
+func (idx *IndexInfo) Distinct() int {
+	r := idx.c.call("sqlite3_vtab_distinct", uint64(idx.handle))
+	return int(r)
+}
+
+// In identifies and handles IN constraints.
+//
+// https://sqlite.org/c3ref/vtab_in.html
+func (idx *IndexInfo) In(column, handle int) bool {
+	r := idx.c.call("sqlite3_vtab_in", uint64(idx.handle),
+		uint64(column), uint64(handle))
+	return r != 0
+}
+
+func (idx *IndexInfo) load() {
+	// https://sqlite.org/c3ref/index_info.html
+	mod := idx.c.mod
+	ptr := idx.handle
+
+	idx.Constraint = make([]IndexConstraint, util.ReadUint32(mod, ptr+0))
+	idx.ConstraintUsage = make([]IndexConstraintUsage, util.ReadUint32(mod, ptr+0))
+	idx.OrderBy = make([]IndexOrderBy, util.ReadUint32(mod, ptr+8))
+
+	constraintPtr := util.ReadUint32(mod, ptr+4)
+	for i := range idx.Constraint {
+		idx.Constraint[i] = IndexConstraint{
+			Column: int(int32(util.ReadUint32(mod, constraintPtr+0))),
+			Op:     IndexConstraintOp(util.ReadUint8(mod, constraintPtr+4)),
+			Usable: util.ReadUint8(mod, constraintPtr+5) != 0,
+		}
+		constraintPtr += 12
+	}
+
+	orderByPtr := util.ReadUint32(mod, ptr+12)
+	for i := range idx.OrderBy {
+		idx.OrderBy[i] = IndexOrderBy{
+			Column: int(int32(util.ReadUint32(mod, orderByPtr+0))),
+			Desc:   util.ReadUint8(mod, orderByPtr+4) != 0,
+		}
+		orderByPtr += 8
+	}
+
+	idx.EstimatedCost = util.ReadFloat64(mod, ptr+40)
+	idx.EstimatedRows = int64(util.ReadUint64(mod, ptr+48))
+	idx.ColumnsUsed = int64(util.ReadUint64(mod, ptr+64))
+}
+
+func (idx *IndexInfo) save() {
+	// https://sqlite.org/c3ref/index_info.html
+	mod := idx.c.mod
+	ptr := idx.handle
+
+	usagePtr := util.ReadUint32(mod, ptr+16)
+	for _, usage := range idx.ConstraintUsage {
+		util.WriteUint32(mod, usagePtr+0, uint32(usage.ArgvIndex))
+		if usage.Omit {
+			util.WriteUint8(mod, usagePtr+4, 1)
+		}
+		usagePtr += 8
+	}
+
+	util.WriteUint32(mod, ptr+20, uint32(idx.IdxNum))
+	if idx.IdxStr != "" {
+		util.WriteUint32(mod, ptr+24, idx.c.newString(idx.IdxStr))
+		util.WriteUint32(mod, ptr+28, 1) // needToFreeIdxStr
+	}
+	if idx.OrderByConsumed {
+		util.WriteUint32(mod, ptr+32, 1)
+	}
+	util.WriteFloat64(mod, ptr+40, idx.EstimatedCost)
+	util.WriteUint64(mod, ptr+48, uint64(idx.EstimatedRows))
+	util.WriteUint32(mod, ptr+56, uint32(idx.IdxFlags))
+}
+
+// IndexConstraintOp is a virtual table constraint operator code.
+//
+// https://sqlite.org/c3ref/c_index_constraint_eq.html
+type IndexConstraintOp uint8
+
+const (
+	INDEX_CONSTRAINT_EQ        IndexConstraintOp = 2
+	INDEX_CONSTRAINT_GT        IndexConstraintOp = 4
+	INDEX_CONSTRAINT_LE        IndexConstraintOp = 8
+	INDEX_CONSTRAINT_LT        IndexConstraintOp = 16
+	INDEX_CONSTRAINT_GE        IndexConstraintOp = 32
+	INDEX_CONSTRAINT_MATCH     IndexConstraintOp = 64
+	INDEX_CONSTRAINT_LIKE      IndexConstraintOp = 65
+	INDEX_CONSTRAINT_GLOB      IndexConstraintOp = 66
+	INDEX_CONSTRAINT_REGEXP    IndexConstraintOp = 67
+	INDEX_CONSTRAINT_NE        IndexConstraintOp = 68
+	INDEX_CONSTRAINT_ISNOT     IndexConstraintOp = 69
+	INDEX_CONSTRAINT_ISNOTNULL IndexConstraintOp = 70
+	INDEX_CONSTRAINT_ISNULL    IndexConstraintOp = 71
+	INDEX_CONSTRAINT_IS        IndexConstraintOp = 72
+	INDEX_CONSTRAINT_LIMIT     IndexConstraintOp = 73
+	INDEX_CONSTRAINT_OFFSET    IndexConstraintOp = 74
+	INDEX_CONSTRAINT_FUNCTION  IndexConstraintOp = 150
+)
+
+// IndexScanFlag is a virtual table scan flag.
+//
+// https://sqlite.org/c3ref/c_index_scan_unique.html
+type IndexScanFlag uint32
+
+const (
+	INDEX_SCAN_UNIQUE IndexScanFlag = 1
+)
+
+func vtabModuleCallback(i vtabConstructor) func(_ context.Context, _ api.Module, _, _, _, _, _ uint32) uint32 {
+	return func(ctx context.Context, mod api.Module, pMod, nArg, pArg, ppVTab, pzErr uint32) uint32 {
+		arg := make([]reflect.Value, 1+nArg)
+		arg[0] = reflect.ValueOf(ctx.Value(connKey{}))
+
+		for i := uint32(0); i < nArg; i++ {
+			ptr := util.ReadUint32(mod, pArg+i*ptrlen)
+			arg[i+1] = reflect.ValueOf(util.ReadString(mod, ptr, _MAX_SQL_LENGTH))
+		}
+
+		module := vtabGetHandle(ctx, mod, pMod)
+		res := reflect.ValueOf(module).Index(int(i)).Call(arg)
+		err, _ := res[1].Interface().(error)
+		if err == nil {
+			vtabPutHandle(ctx, mod, ppVTab, res[0].Interface())
+		}
+
+		return vtabError(ctx, mod, pzErr, _PTR_ERROR, err)
+	}
+}
+
+func vtabDisconnectCallback(ctx context.Context, mod api.Module, pVTab uint32) uint32 {
+	err := vtabDelHandle(ctx, mod, pVTab)
+	return vtabError(ctx, mod, 0, _PTR_ERROR, err)
+}
+
+func vtabDestroyCallback(ctx context.Context, mod api.Module, pVTab uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabDestroyer)
+	err := vtab.Destroy()
+	if cerr := vtabDelHandle(ctx, mod, pVTab); err == nil {
+		err = cerr
+	}
+	return vtabError(ctx, mod, 0, _PTR_ERROR, err)
+}
+
+func vtabBestIndexCallback(ctx context.Context, mod api.Module, pVTab, pIdxInfo uint32) uint32 {
+	var info IndexInfo
+	info.handle = pIdxInfo
+	info.c = ctx.Value(connKey{}).(*Conn)
+	info.load()
+
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTab)
+	err := vtab.BestIndex(&info)
+
+	info.save()
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabUpdateCallback(ctx context.Context, mod api.Module, pVTab, nArg, pArg, pRowID uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabUpdater)
+
+	db := ctx.Value(connKey{}).(*Conn)
+	args := make([]Value, nArg)
+	callbackArgs(db, args, pArg)
+	rowID, err := vtab.Update(args...)
+	if err == nil {
+		util.WriteUint64(mod, pRowID, uint64(rowID))
+	}
+
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabRenameCallback(ctx context.Context, mod api.Module, pVTab, zNew uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabRenamer)
+	err := vtab.Rename(util.ReadString(mod, zNew, _MAX_NAME))
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabFindFuncCallback(ctx context.Context, mod api.Module, pVTab uint32, nArg int32, zName, pxFunc uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabOverloader)
+	f, op := vtab.FindFunction(int(nArg), util.ReadString(mod, zName, _MAX_NAME))
+	if op != 0 {
+		var wrapper uint32
+		wrapper = util.AddHandle(ctx, func(c Context, arg ...Value) {
+			defer util.DelHandle(ctx, wrapper)
+			f(c, arg...)
+		})
+		util.WriteUint32(mod, pxFunc, wrapper)
+	}
+	return uint32(op)
+}
+
+func vtabIntegrityCallback(ctx context.Context, mod api.Module, pVTab, zSchema, zTabName, mFlags, pzErr uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabChecker)
+	schema := util.ReadString(mod, zSchema, _MAX_NAME)
+	table := util.ReadString(mod, zTabName, _MAX_NAME)
+	err := vtab.Integrity(schema, table, int(mFlags))
+	// xIntegrity should return OK - even if it finds problems in the content of the virtual table.
+	// https://sqlite.org/vtab.html#xintegrity
+	vtabError(ctx, mod, pzErr, _PTR_ERROR, err)
+	_, code := errorCode(err, _OK)
+	return code
+}
+
+func vtabBeginCallback(ctx context.Context, mod api.Module, pVTab uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabTxn)
+	err := vtab.Begin()
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabSyncCallback(ctx context.Context, mod api.Module, pVTab uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabTxn)
+	err := vtab.Sync()
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabCommitCallback(ctx context.Context, mod api.Module, pVTab uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabTxn)
+	err := vtab.Commit()
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabRollbackCallback(ctx context.Context, mod api.Module, pVTab uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabTxn)
+	err := vtab.Rollback()
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabSavepointCallback(ctx context.Context, mod api.Module, pVTab uint32, id int32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabSavepointer)
+	err := vtab.Savepoint(int(id))
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabReleaseCallback(ctx context.Context, mod api.Module, pVTab uint32, id int32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabSavepointer)
+	err := vtab.Release(int(id))
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func vtabRollbackToCallback(ctx context.Context, mod api.Module, pVTab uint32, id int32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTabSavepointer)
+	err := vtab.RollbackTo(int(id))
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func cursorOpenCallback(ctx context.Context, mod api.Module, pVTab, ppCur uint32) uint32 {
+	vtab := vtabGetHandle(ctx, mod, pVTab).(VTab)
+
+	cursor, err := vtab.Open()
+	if err == nil {
+		vtabPutHandle(ctx, mod, ppCur, cursor)
+	}
+
+	return vtabError(ctx, mod, pVTab, _VTAB_ERROR, err)
+}
+
+func cursorCloseCallback(ctx context.Context, mod api.Module, pCur uint32) uint32 {
+	err := vtabDelHandle(ctx, mod, pCur)
+	return vtabError(ctx, mod, 0, _VTAB_ERROR, err)
+}
+
+func cursorFilterCallback(ctx context.Context, mod api.Module, pCur uint32, idxNum int32, idxStr, nArg, pArg uint32) uint32 {
+	cursor := vtabGetHandle(ctx, mod, pCur).(VTabCursor)
+	db := ctx.Value(connKey{}).(*Conn)
+	args := make([]Value, nArg)
+	callbackArgs(db, args, pArg)
+	var idxName string
+	if idxStr != 0 {
+		idxName = util.ReadString(mod, idxStr, _MAX_LENGTH)
+	}
+	err := cursor.Filter(int(idxNum), idxName, args...)
+	return vtabError(ctx, mod, pCur, _CURSOR_ERROR, err)
+}
+
+func cursorEOFCallback(ctx context.Context, mod api.Module, pCur uint32) uint32 {
+	cursor := vtabGetHandle(ctx, mod, pCur).(VTabCursor)
+	if cursor.EOF() {
+		return 1
+	}
+	return 0
+}
+
+func cursorNextCallback(ctx context.Context, mod api.Module, pCur uint32) uint32 {
+	cursor := vtabGetHandle(ctx, mod, pCur).(VTabCursor)
+	err := cursor.Next()
+	return vtabError(ctx, mod, pCur, _CURSOR_ERROR, err)
+}
+
+func cursorColumnCallback(ctx context.Context, mod api.Module, pCur, pCtx uint32, n int32) uint32 {
+	cursor := vtabGetHandle(ctx, mod, pCur).(VTabCursor)
+	db := ctx.Value(connKey{}).(*Conn)
+	err := cursor.Column(&Context{db, pCtx}, int(n))
+	return vtabError(ctx, mod, pCur, _CURSOR_ERROR, err)
+}
+
+func cursorRowIDCallback(ctx context.Context, mod api.Module, pCur, pRowID uint32) uint32 {
+	cursor := vtabGetHandle(ctx, mod, pCur).(VTabCursor)
+
+	rowID, err := cursor.RowID()
+	if err == nil {
+		util.WriteUint64(mod, pRowID, uint64(rowID))
+	}
+
+	return vtabError(ctx, mod, pCur, _CURSOR_ERROR, err)
+}
+
+const (
+	_PTR_ERROR = iota
+	_VTAB_ERROR
+	_CURSOR_ERROR
+)
+
+func vtabError(ctx context.Context, mod api.Module, ptr, kind uint32, err error) uint32 {
+	const zErrMsgOffset = 8
+	msg, code := errorCode(err, ERROR)
+	if msg != "" && ptr != 0 {
+		switch kind {
+		case _VTAB_ERROR:
+			ptr = ptr + zErrMsgOffset // zErrMsg
+		case _CURSOR_ERROR:
+			ptr = util.ReadUint32(mod, ptr) + zErrMsgOffset // pVTab->zErrMsg
+		}
+		db := ctx.Value(connKey{}).(*Conn)
+		if ptr := util.ReadUint32(mod, ptr); ptr != 0 {
+			db.free(ptr)
+		}
+		util.WriteUint32(mod, ptr, db.newString(msg))
+	}
+	return code
+}
+
+func vtabGetHandle(ctx context.Context, mod api.Module, ptr uint32) any {
+	const handleOffset = 4
+	handle := util.ReadUint32(mod, ptr-handleOffset)
+	return util.GetHandle(ctx, handle)
+}
+
+func vtabDelHandle(ctx context.Context, mod api.Module, ptr uint32) error {
+	const handleOffset = 4
+	handle := util.ReadUint32(mod, ptr-handleOffset)
+	return util.DelHandle(ctx, handle)
+}
+
+func vtabPutHandle(ctx context.Context, mod api.Module, pptr uint32, val any) {
+	const handleOffset = 4
+	handle := util.AddHandle(ctx, val)
+	ptr := util.ReadUint32(mod, pptr)
+	util.WriteUint32(mod, ptr-handleOffset, handle)
+}
diff --git a/vendor/github.com/ncruces/julianday/.gitignore b/vendor/github.com/ncruces/julianday/.gitignore
new file mode 100644
index 000000000..66fd13c90
--- /dev/null
+++ b/vendor/github.com/ncruces/julianday/.gitignore
@@ -0,0 +1,15 @@
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
diff --git a/vendor/github.com/ncruces/julianday/LICENSE b/vendor/github.com/ncruces/julianday/LICENSE
new file mode 100644
index 000000000..7f0f5534c
--- /dev/null
+++ b/vendor/github.com/ncruces/julianday/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Nuno Cruces
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/ncruces/julianday/README.md b/vendor/github.com/ncruces/julianday/README.md
new file mode 100644
index 000000000..828ae5749
--- /dev/null
+++ b/vendor/github.com/ncruces/julianday/README.md
@@ -0,0 +1,9 @@
+# Julian Day calculator
+
+[![Go Reference](https://pkg.go.dev/badge/image)](https://pkg.go.dev/github.com/ncruces/julianday)
+[![Go Report](https://goreportcard.com/badge/github.com/ncruces/julianday)](https://goreportcard.com/report/github.com/ncruces/julianday)
+[![Go Coverage](https://github.com/ncruces/julianday/wiki/coverage.svg)](https://raw.githack.com/wiki/ncruces/julianday/coverage.html)
+
+https://en.wikipedia.org/wiki/Julian_day
+
+Compatible with [SQLite](https://www.sqlite.org/lang_datefunc.html).
diff --git a/vendor/github.com/ncruces/julianday/julianday.go b/vendor/github.com/ncruces/julianday/julianday.go
new file mode 100644
index 000000000..d7d0e1960
--- /dev/null
+++ b/vendor/github.com/ncruces/julianday/julianday.go
@@ -0,0 +1,124 @@
+// Package julianday provides Time to Julian day conversions.
+package julianday
+
+import (
+	"bytes"
+	"errors"
+	"math"
+	"strconv"
+	"time"
+)
+
+const secs_per_day = 86_400
+const nsec_per_sec = 1_000_000_000
+const nsec_per_day = nsec_per_sec * secs_per_day
+const epoch_days = 2_440_587
+const epoch_secs = secs_per_day / 2
+
+func jd(t time.Time) (day, nsec int64) {
+	sec := t.Unix()
+	// guaranteed not to overflow
+	day, sec = sec/secs_per_day+epoch_days, sec%secs_per_day+epoch_secs
+	return day, sec*nsec_per_sec + int64(t.Nanosecond())
+}
+
+// Date returns the Julian day number for t,
+// and the nanosecond offset within that day,
+// in the range [0, 86399999999999].
+func Date(t time.Time) (day, nsec int64) {
+	day, nsec = jd(t)
+	switch {
+	case nsec < 0:
+		day -= 1
+		nsec += nsec_per_day
+	case nsec >= nsec_per_day:
+		day += 1
+		nsec -= nsec_per_day
+	}
+	return day, nsec
+}
+
+// Float returns the Julian date for t as a float64.
+//
+// In the XXI century, this has submillisecond precision.
+func Float(t time.Time) float64 {
+	day, nsec := jd(t)
+	// converting day and nsec to float64 is exact
+	return float64(day) + float64(nsec)/nsec_per_day
+}
+
+// Format returns the Julian date for t as a string.
+//
+// This has nanosecond precision.
+func Format(t time.Time) string {
+	var buf [32]byte
+	return string(AppendFormat(buf[:0], t))
+}
+
+// AppendFormat is like Format but appends the textual representation to dst
+// and returns the extended buffer.
+func AppendFormat(dst []byte, t time.Time) []byte {
+	day, nsec := Date(t)
+	if day < 0 && nsec != 0 {
+		dst = append(dst, '-')
+		day = ^day
+		nsec = nsec_per_day - nsec
+	}
+	var buf [20]byte
+	dst = strconv.AppendInt(dst, day, 10)
+	frac := strconv.AppendFloat(buf[:0], float64(nsec)/nsec_per_day, 'f', 15, 64)
+	return append(dst, bytes.TrimRight(frac[1:], ".0")...)
+}
+
+// Time returns the UTC Time corresponding to the Julian day number
+// and nanosecond offset within that day.
+// Not all day values have a corresponding time value.
+func Time(day, nsec int64) time.Time {
+	return time.Unix((day-epoch_days)*secs_per_day-epoch_secs, nsec).UTC()
+}
+
+// FloatTime returns the UTC Time corresponding to a Julian date.
+// Not all date values have a corresponding time value.
+//
+// In the XXI century, this has submillisecond precision.
+func FloatTime(date float64) time.Time {
+	day, frac := math.Modf(date)
+	nsec := math.Floor(frac * nsec_per_day)
+	return Time(int64(day), int64(nsec))
+}
+
+// Parse parses a formatted Julian date and returns the UTC Time it represents.
+//
+// This has nanosecond precision.
+func Parse(s string) (time.Time, error) {
+	digits := 0
+	dot := len(s)
+	for i, b := range []byte(s) {
+		if '0' <= b && b <= '9' {
+			digits++
+			continue
+		}
+		if b == '.' && i < dot {
+			dot = i
+			continue
+		}
+		if (b == '+' || b == '-') && i == 0 {
+			continue
+		}
+		return time.Time{}, errors.New("julianday: invalid syntax")
+	}
+	if digits == 0 {
+		return time.Time{}, errors.New("julianday: invalid syntax")
+	}
+
+	day, err := strconv.ParseInt(s[:dot], 10, 64)
+	if err != nil && dot > 0 {
+		return time.Time{}, errors.New("julianday: value out of range")
+	}
+	frac, _ := strconv.ParseFloat(s[dot:], 64)
+	nsec := int64(math.Round(frac * nsec_per_day))
+	if s[0] == '-' {
+		nsec = -nsec
+	}
+	return Time(day, nsec), nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/.editorconfig b/vendor/github.com/tetratelabs/wazero/.editorconfig
new file mode 100644
index 000000000..f999431de
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/.editorconfig
@@ -0,0 +1,7 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
diff --git a/vendor/github.com/tetratelabs/wazero/.gitattributes b/vendor/github.com/tetratelabs/wazero/.gitattributes
new file mode 100644
index 000000000..3a08bc389
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/.gitattributes
@@ -0,0 +1,2 @@
+# Improves experience of commands like `make format` on Windows
+* text=auto eol=lf
diff --git a/vendor/github.com/tetratelabs/wazero/.gitignore b/vendor/github.com/tetratelabs/wazero/.gitignore
new file mode 100644
index 000000000..6a14146d4
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/.gitignore
@@ -0,0 +1,45 @@
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+/wazero
+build
+dist
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
+
+# Goland
+.idea
+
+# AssemblyScript
+node_modules
+package-lock.json
+
+# codecov.io
+/coverage.txt
+
+.vagrant
+
+zig-cache/
+zig-out/
+
+.DS_Store
+
+# Ignore compiled stdlib test cases.
+/internal/integration_test/stdlibs/testdata
+/internal/integration_test/libsodium/testdata
diff --git a/vendor/github.com/tetratelabs/wazero/.gitmodules b/vendor/github.com/tetratelabs/wazero/.gitmodules
new file mode 100644
index 000000000..410c91f44
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "site/themes/hello-friend"]
+	path = site/themes/hello-friend
+	url = https://github.com/panr/hugo-theme-hello-friend.git
diff --git a/vendor/github.com/tetratelabs/wazero/CONTRIBUTING.md b/vendor/github.com/tetratelabs/wazero/CONTRIBUTING.md
new file mode 100644
index 000000000..8ab866f0e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/CONTRIBUTING.md
@@ -0,0 +1,75 @@
+# Contributing
+
+We welcome contributions from the community. Please read the following guidelines carefully to maximize the chances of your PR being merged.
+
+## Coding Style
+
+- To ensure your change passes format checks, run `make check`. To format your files, you can run `make format`.
+- We follow standard Go table-driven tests and use an internal [testing library](./internal/testing/require) to assert correctness. To verify all tests pass, you can run `make test`.
+
+## DCO
+
+We require DCO signoff line in every commit to this repo.
+
+The sign-off is a simple line at the end of the explanation for the
+patch, which certifies that you wrote it or otherwise have the right to
+pass it on as an open-source patch. The rules are pretty simple: if you
+can certify the below (from
+[developercertificate.org](https://developercertificate.org/)):
+
+```
+Developer Certificate of Origin
+Version 1.1
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+Developer's Certificate of Origin 1.1
+By making a contribution to this project, I certify that:
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+```
+
+then you just add a line to every git commit message:
+
+    Signed-off-by: Joe Smith <joe@gmail.com>
+
+using your real name (sorry, no pseudonyms or anonymous contributions.)
+
+You can add the sign off when creating the git commit via `git commit -s`.
+
+## Code Reviews
+
+* The pull request title should describe what the change does and not embed issue numbers.
+The pull request should only be blank when the change is minor. Any feature should include
+a description of the change and what motivated it. If the change or design changes through
+review, please keep the title and description updated accordingly.
+* A single approval is sufficient to merge. If a reviewer asks for
+changes in a PR they should be addressed before the PR is merged,
+even if another reviewer has already approved the PR.
+* During the review, address the comments and commit the changes
+_without_ squashing the commits. This facilitates incremental reviews
+since the reviewer does not go through all the code again to find out
+what has changed since the last review. When a change goes out of sync with main,
+please rebase and force push, keeping the original commits where practical.
+* Commits are squashed prior to merging a pull request, using the title
+as commit message by default. Maintainers may request contributors to
+edit the pull request tite to ensure that it remains descriptive as a
+commit message. Alternatively, maintainers may change the commit message directly.
diff --git a/vendor/github.com/tetratelabs/wazero/LICENSE b/vendor/github.com/tetratelabs/wazero/LICENSE
new file mode 100644
index 000000000..e21d69958
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2023 wazero authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/vendor/github.com/tetratelabs/wazero/Makefile b/vendor/github.com/tetratelabs/wazero/Makefile
new file mode 100644
index 000000000..e5ae8a261
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/Makefile
@@ -0,0 +1,381 @@
+
+gofumpt       := mvdan.cc/gofumpt@v0.5.0
+gosimports    := github.com/rinchsan/gosimports/cmd/gosimports@v0.3.8
+golangci_lint := github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2
+asmfmt        := github.com/klauspost/asmfmt/cmd/asmfmt@v1.3.2
+# sync this with netlify.toml!
+hugo          := github.com/gohugoio/hugo@v0.115.2
+
+# Make 3.81 doesn't support '**' globbing: Set explicitly instead of recursion.
+all_sources   := $(wildcard *.go */*.go */*/*.go */*/*/*.go */*/*/*.go */*/*/*/*.go)
+all_testdata  := $(wildcard testdata/* */testdata/* */*/testdata/* */*/testdata/*/* */*/*/testdata/*)
+all_testing   := $(wildcard internal/testing/* internal/testing/*/* internal/testing/*/*/*)
+all_examples  := $(wildcard examples/* examples/*/* examples/*/*/* */*/example/* */*/example/*/* */*/example/*/*/*)
+all_it        := $(wildcard internal/integration_test/* internal/integration_test/*/* internal/integration_test/*/*/*)
+# main_sources exclude any test or example related code
+main_sources  := $(wildcard $(filter-out %_test.go $(all_testdata) $(all_testing) $(all_examples) $(all_it), $(all_sources)))
+# main_packages collect the unique main source directories (sort will dedupe).
+# Paths need to all start with ./, so we do that manually vs foreach which strips it.
+main_packages := $(sort $(foreach f,$(dir $(main_sources)),$(if $(findstring ./,$(f)),./,./$(f))))
+
+go_test_options ?= -timeout 300s
+
+ensureCompilerFastest := -ldflags '-X github.com/tetratelabs/wazero/internal/integration_test/vs.ensureCompilerFastest=true'
+.PHONY: bench
+bench:
+	@go build ./internal/integration_test/bench/...
+	@# Don't use -test.benchmem as it isn't accurate when comparing against CGO libs
+	@for d in vs/time vs/wasmedge vs/wasmtime ; do \
+		cd ./internal/integration_test/$$d ; \
+		go test -bench=. . -tags='wasmedge' $(ensureCompilerFastest) ; \
+		cd - ;\
+	done
+
+bench_testdata_dir := internal/integration_test/bench/testdata
+.PHONY: build.bench
+build.bench:
+	@tinygo build -o $(bench_testdata_dir)/case.wasm -scheduler=none --no-debug -target=wasi $(bench_testdata_dir)/case.go
+
+.PHONY: test.examples
+test.examples:
+	@go test $(go_test_options) ./examples/... ./imports/assemblyscript/example/... ./imports/emscripten/... ./imports/wasi_snapshot_preview1/example/...
+
+.PHONY: build.examples.as
+build.examples.as:
+	@cd ./imports/assemblyscript/example/testdata && npm install && npm run build
+
+%.wasm: %.zig
+	@(cd $(@D); zig build -Doptimize=ReleaseSmall)
+	@mv $(@D)/zig-out/*/$(@F) $(@D)
+
+.PHONY: build.examples.zig
+build.examples.zig: examples/allocation/zig/testdata/greet.wasm imports/wasi_snapshot_preview1/example/testdata/zig/cat.wasm imports/wasi_snapshot_preview1/testdata/zig/wasi.wasm
+	@cd internal/testing/dwarftestdata/testdata/zig; zig build; mv zig-out/*/main.wasm ./ # Need DWARF custom sections.
+
+tinygo_sources := examples/basic/testdata/add.go examples/allocation/tinygo/testdata/greet.go examples/cli/testdata/cli.go imports/wasi_snapshot_preview1/example/testdata/tinygo/cat.go imports/wasi_snapshot_preview1/testdata/tinygo/wasi.go cmd/wazero/testdata/cat/cat.go
+.PHONY: build.examples.tinygo
+build.examples.tinygo: $(tinygo_sources)
+	@for f in $^; do \
+	    tinygo build -o $$(echo $$f | sed -e 's/\.go/\.wasm/') -scheduler=none --no-debug --target=wasi $$f; \
+	done
+	@mv cmd/wazero/testdata/cat/cat.wasm cmd/wazero/testdata/cat/cat-tinygo.wasm
+
+# We use zig to build C as it is easy to install and embeds a copy of zig-cc.
+# Note: Don't use "-Oz" as that breaks our wasi sock example.
+c_sources := imports/wasi_snapshot_preview1/example/testdata/zig-cc/cat.c imports/wasi_snapshot_preview1/testdata/zig-cc/wasi.c internal/testing/dwarftestdata/testdata/zig-cc/main.c
+.PHONY: build.examples.zig-cc
+build.examples.zig-cc: $(c_sources)
+	@for f in $^; do \
+	    zig cc --target=wasm32-wasi -o $$(echo $$f | sed -e 's/\.c/\.wasm/') $$f; \
+	done
+
+# Here are the emcc args we use:
+#
+# * `-Oz` - most optimization for code size.
+# * `--profiling` - adds the name section.
+# * `-s STANDALONE_WASM` - ensures wasm is built for a non-js runtime.
+# * `-s EXPORTED_FUNCTIONS=_malloc,_free` - export allocation functions so that
+#   they can be used externally as "malloc" and "free".
+# * `-s WARN_ON_UNDEFINED_SYMBOLS=0` - imports not defined in JavaScript error
+#   otherwise. See https://github.com/emscripten-core/emscripten/issues/13641
+# * `-s TOTAL_STACK=8KB -s TOTAL_MEMORY=64KB` - reduce memory default from 16MB
+#   to one page (64KB). To do this, we have to reduce the stack size.
+# * `-s ALLOW_MEMORY_GROWTH` - allows "memory.grow" instructions to succeed, but
+#   requires a function import "emscripten_notify_memory_growth".
+emscripten_sources := $(wildcard imports/emscripten/testdata/*.cc)
+.PHONY: build.examples.emscripten
+build.examples.emscripten: $(emscripten_sources)
+	@for f in $^; do \
+		em++ -Oz --profiling \
+		-s STANDALONE_WASM \
+		-s EXPORTED_FUNCTIONS=_malloc,_free \
+		-s WARN_ON_UNDEFINED_SYMBOLS=0 \
+		-s TOTAL_STACK=8KB -s TOTAL_MEMORY=64KB \
+		-s ALLOW_MEMORY_GROWTH \
+		--std=c++17 -o $$(echo $$f | sed -e 's/\.cc/\.wasm/') $$f; \
+	done
+
+%/greet.wasm : cargo_target := wasm32-unknown-unknown
+%/cat.wasm : cargo_target := wasm32-wasi
+%/wasi.wasm : cargo_target := wasm32-wasi
+
+.PHONY: build.examples.rust
+build.examples.rust: examples/allocation/rust/testdata/greet.wasm imports/wasi_snapshot_preview1/example/testdata/cargo-wasi/cat.wasm imports/wasi_snapshot_preview1/testdata/cargo-wasi/wasi.wasm internal/testing/dwarftestdata/testdata/rust/main.wasm.xz
+
+# Normally, we build release because it is smaller. Testing dwarf requires the debug build.
+internal/testing/dwarftestdata/testdata/rust/main.wasm.xz:
+	cd $(@D) && cargo wasi build
+	mv $(@D)/target/wasm32-wasi/debug/main.wasm $(@D)
+	cd $(@D) && xz -k -f ./main.wasm # Rust's DWARF section is huge, so compress it.
+
+# Builds rust using cargo normally, or cargo-wasi.
+%.wasm: %.rs
+	@(cd $(@D); cargo $(if $(findstring wasi,$(cargo_target)),wasi build,build --target $(cargo_target)) --release)
+	@mv $(@D)/target/$(cargo_target)/release/$(@F) $(@D)
+
+spectest_base_dir := internal/integration_test/spectest
+spectest_v1_dir := $(spectest_base_dir)/v1
+spectest_v1_testdata_dir := $(spectest_v1_dir)/testdata
+spec_version_v1 := wg-1.0
+spectest_v2_dir := $(spectest_base_dir)/v2
+spectest_v2_testdata_dir := $(spectest_v2_dir)/testdata
+# Latest draft state as of March 12, 2024.
+spec_version_v2 := 1c5e5d178bd75c79b7a12881c529098beaee2a05
+spectest_threads_dir := $(spectest_base_dir)/threads
+spectest_threads_testdata_dir := $(spectest_threads_dir)/testdata
+# From https://github.com/WebAssembly/threads/tree/upstream-rebuild which has not been merged to main yet.
+# It will likely be renamed to main in the future - https://github.com/WebAssembly/threads/issues/216.
+spec_version_threads := 3635ca51a17e57e106988846c5b0e0cc48ac04fc
+
+.PHONY: build.spectest
+build.spectest:
+	@$(MAKE) build.spectest.v1
+	@$(MAKE) build.spectest.v2
+
+.PHONY: build.spectest.v1
+build.spectest.v1: # Note: wabt by default uses >1.0 features, so wast2json flags might drift as they include more. See WebAssembly/wabt#1878
+	@rm -rf $(spectest_v1_testdata_dir)
+	@mkdir -p $(spectest_v1_testdata_dir)
+	@cd $(spectest_v1_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/spec/contents/test/core?ref=$(spec_version_v1)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_v1_testdata_dir) && for f in `find . -name '*.wast'`; do \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"f32.demote_f64"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f32.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"f32.demote_f64"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f32.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"f64\.promote_f32"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f64.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"f64\.promote_f32"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f64.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\s\([a-z0-9.\s+-:]+\)\))\)/\(assert_return $$1 \($$2.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\s\([a-z0-9.\s+-:]+\)\))\)/\(assert_return $$1 \($$2.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:arithmetic\)\)/g' $$f; \
+		wast2json \
+			--disable-saturating-float-to-int \
+			--disable-sign-extension \
+			--disable-simd \
+			--disable-multi-value \
+			--disable-bulk-memory \
+			--disable-reference-types \
+			--debug-names $$f; \
+	done
+
+.PHONY: build.spectest.v2
+build.spectest.v2: # Note: SIMD cases are placed in the "simd" subdirectory.
+	@mkdir -p $(spectest_v2_testdata_dir)
+	@cd $(spectest_v2_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/spec/contents/test/core?ref=$(spec_version_v2)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_v2_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/spec/contents/test/core/simd?ref=$(spec_version_v2)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_v2_testdata_dir) && for f in `find . -name '*.wast'`; do \
+		wast2json --debug-names --no-check $$f || true; \
+	done # Ignore the error here as some tests (e.g. comments.wast right now) are not supported by wast2json yet.
+
+# Note: We currently cannot build the "threads" subdirectory that spawns threads due to missing support in wast2json.
+# https://github.com/WebAssembly/wabt/issues/2348#issuecomment-1878003959
+.PHONY: build.spectest.threads
+build.spectest.threads:
+	@mkdir -p $(spectest_threads_testdata_dir)
+	@cd $(spectest_threads_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/threads/contents/test/core?ref=$(spec_version_threads)' | jq -r '.[]| .download_url' | grep -E "atomic.wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_threads_testdata_dir) && for f in `find . -name '*.wast'`; do \
+		wast2json --enable-threads --debug-names $$f; \
+	done
+
+.PHONY: test
+test:
+	@go test $(go_test_options) $$(go list ./... | grep -vE '$(spectest_v1_dir)|$(spectest_v2_dir)')
+	@cd internal/version/testdata && go test $(go_test_options) ./...
+	@cd internal/integration_test/fuzz/wazerolib && CGO_ENABLED=0 WASM_BINARY_PATH=testdata/test.wasm go test ./...
+
+.PHONY: coverage
+# replace spaces with commas
+coverpkg = $(shell echo $(main_packages) | tr ' ' ',')
+coverage: ## Generate test coverage
+	@go test -coverprofile=coverage.txt -covermode=atomic --coverpkg=$(coverpkg) $(main_packages)
+	@go tool cover -func coverage.txt
+
+.PHONY: spectest
+spectest:
+	@$(MAKE) spectest.v1
+	@$(MAKE) spectest.v2
+
+spectest.v1:
+	@go test $(go_test_options) $$(go list ./... | grep $(spectest_v1_dir))
+
+spectest.v2:
+	@go test $(go_test_options) $$(go list ./... | grep $(spectest_v2_dir))
+
+golangci_lint_path := $(shell go env GOPATH)/bin/golangci-lint
+
+$(golangci_lint_path):
+	@go install $(golangci_lint)
+
+golangci_lint_goarch ?= $(shell go env GOARCH)
+
+.PHONY: lint
+lint: $(golangci_lint_path)
+	@GOARCH=$(golangci_lint_goarch) CGO_ENABLED=0 $(golangci_lint_path) run --timeout 5m
+
+.PHONY: format
+format:
+	@go run $(gofumpt) -l -w .
+	@go run $(gosimports) -local github.com/tetratelabs/ -w $(shell find . -name '*.go' -type f)
+	@go run $(asmfmt) -w $(shell find . -name '*.s' -type f)
+
+.PHONY: check  # Pre-flight check for pull requests
+check:
+# The following checks help ensure our platform-specific code used for system
+# calls safely falls back on a platform unsupported by the compiler engine.
+# This makes sure the intepreter can be used. Most often the package that can
+# drift here is "platform" or "sysfs":
+#
+# Ensure we build on plan9. See #1578
+	@GOARCH=amd64 GOOS=plan9 go build ./...
+# Ensure we build on gojs. See #1526.
+	@GOARCH=wasm GOOS=js go build ./...
+# Ensure we build on wasip1. See #1526.
+	@GOARCH=wasm GOOS=wasip1 go build ./...
+# Ensure we build on aix. See #1723
+	@GOARCH=ppc64 GOOS=aix go build ./...
+# Ensure we build on windows:
+	@GOARCH=amd64 GOOS=windows go build ./...
+# Ensure we build on an arbitrary operating system:
+	@GOARCH=amd64 GOOS=dragonfly go build ./...
+# Ensure we build on solaris/illumos:
+	@GOARCH=amd64 GOOS=illumos go build ./...
+	@GOARCH=amd64 GOOS=solaris go build ./...
+# Ensure we build on linux arm for Dapr:
+#	gh release view -R dapr/dapr --json assets --jq 'first(.assets[] | select(.name = "daprd_linux_arm.tar.gz") | {url, downloadCount})'
+	@GOARCH=arm GOOS=linux go build ./...
+# Ensure we build on linux 386 for Trivy:
+#	gh release view -R aquasecurity/trivy --json assets --jq 'first(.assets[] | select(.name| test("Linux-32bit.*tar.gz")) | {url, downloadCount})'
+	@GOARCH=386 GOOS=linux go build ./...
+# Ensure we build on FreeBSD amd64 for Trivy:
+#	gh release view -R aquasecurity/trivy --json assets --jq 'first(.assets[] | select(.name| test("FreeBSD-64bit.*tar.gz")) | {url, downloadCount})'
+	@GOARCH=amd64 GOOS=freebsd go build ./...
+	@$(MAKE) lint golangci_lint_goarch=arm64
+	@$(MAKE) lint golangci_lint_goarch=amd64
+	@$(MAKE) format
+	@go mod tidy
+	@if [ ! -z "`git status -s`" ]; then \
+		echo "The following differences will fail CI until committed:"; \
+		git diff --exit-code; \
+	fi
+
+.PHONY: site
+site: ## Serve website content
+	@git submodule update --init
+	@cd site && go run $(hugo) server --minify --disableFastRender --baseURL localhost:1313 --cleanDestinationDir -D
+
+.PHONY: clean
+clean: ## Ensure a clean build
+	@rm -rf dist build coverage.txt
+	@go clean -testcache
+
+fuzz_default_flags := --no-trace-compares --sanitizer=none -- -rss_limit_mb=8192
+
+fuzz_timeout_seconds ?= 10
+.PHONY: fuzz
+fuzz:
+	@cd internal/integration_test/fuzz && cargo test
+	@cd internal/integration_test/fuzz && cargo fuzz run logging_no_diff $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+	@cd internal/integration_test/fuzz && cargo fuzz run no_diff $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+	@cd internal/integration_test/fuzz && cargo fuzz run memory_no_diff $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+	@cd internal/integration_test/fuzz && cargo fuzz run validation $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+
+libsodium:
+	cd ./internal/integration_test/libsodium/testdata && \
+		curl -s "https://api.github.com/repos/jedisct1/webassembly-benchmarks/contents/2022-12/wasm?ref=7e86d68e99e60130899fbe3b3ab6e9dce9187a7c" \
+		| jq -r '.[] | .download_url' | xargs -n 1 curl -LO
+
+#### CLI release related ####
+
+VERSION ?= dev
+# Default to a dummy version 0.0.1.1, which is always lower than a real release.
+# Legal version values should look like 'x.x.x.x' where x is an integer from 0 to 65534.
+# https://learn.microsoft.com/en-us/windows/win32/msi/productversion?redirectedfrom=MSDN
+# https://stackoverflow.com/questions/9312221/msi-version-numbers
+MSI_VERSION ?= 0.0.1.1
+non_windows_platforms := darwin_amd64 darwin_arm64 linux_amd64 linux_arm64
+non_windows_archives  := $(non_windows_platforms:%=dist/wazero_$(VERSION)_%.tar.gz)
+windows_platforms     := windows_amd64 # TODO: add arm64 windows once we start testing on it.
+windows_archives      := $(windows_platforms:%=dist/wazero_$(VERSION)_%.zip) $(windows_platforms:%=dist/wazero_$(VERSION)_%.msi)
+checksum_txt          := dist/wazero_$(VERSION)_checksums.txt
+
+# define macros for multi-platform builds. these parse the filename being built
+go-arch = $(if $(findstring amd64,$1),amd64,arm64)
+go-os   = $(if $(findstring .exe,$1),windows,$(if $(findstring linux,$1),linux,darwin))
+# msi-arch is a macro so we can detect it based on the file naming convention
+msi-arch     = $(if $(findstring amd64,$1),x64,arm64)
+
+build/wazero_%/wazero:
+	$(call go-build,$@,$<)
+
+build/wazero_%/wazero.exe:
+	$(call go-build,$@,$<)
+
+dist/wazero_$(VERSION)_%.tar.gz: build/wazero_%/wazero
+	@echo tar.gz "tarring $@"
+	@mkdir -p $(@D)
+# On Windows, we pass the special flag `--mode='+rx' to ensure that we set the executable flag.
+# This is only supported by GNU Tar, so we set it conditionally.
+	@tar -C $(<D) -cpzf $@ $(if $(findstring Windows_NT,$(OS)),--mode='+rx',) $(<F)
+	@echo tar.gz "ok"
+
+define go-build
+	@echo "building $1"
+	@# $(go:go=) removes the trailing 'go', so we can insert cross-build variables
+	@$(go:go=) CGO_ENABLED=0 GOOS=$(call go-os,$1) GOARCH=$(call go-arch,$1) go build \
+		-ldflags "-s -w -X github.com/tetratelabs/wazero/internal/version.version=$(VERSION)" \
+		-o $1 $2 ./cmd/wazero
+	@echo build "ok"
+endef
+
+# this makes a marker file ending in .signed to avoid repeatedly calling codesign
+%.signed: %
+	$(call codesign,$<)
+	@touch $@
+
+# This requires osslsigncode package (apt or brew) or latest windows release from mtrojnar/osslsigncode
+#
+# Default is self-signed while production should be a Digicert signing key
+#
+# Ex.
+# ```bash
+# keytool -genkey -alias wazero -storetype PKCS12 -keyalg RSA -keysize 2048 -storepass wazero-bunch \
+# -keystore wazero.p12 -dname "O=wazero,CN=wazero.io" -validity 3650
+# ```
+WINDOWS_CODESIGN_P12      ?= packaging/msi/wazero.p12
+WINDOWS_CODESIGN_PASSWORD ?= wazero-bunch
+define codesign
+	@printf "$(ansi_format_dark)" codesign "signing $1"
+	@osslsigncode sign -h sha256 -pkcs12 ${WINDOWS_CODESIGN_P12} -pass "${WINDOWS_CODESIGN_PASSWORD}" \
+	-n "wazero is the zero dependency WebAssembly runtime for Go developers" -i https://wazero.io -t http://timestamp.digicert.com \
+	$(if $(findstring msi,$(1)),-add-msi-dse) -in $1 -out $1-signed
+	@mv $1-signed $1
+	@printf "$(ansi_format_bright)" codesign "ok"
+endef
+
+# This task is only supported on Windows, where we use candle.exe (compile wxs to wixobj) and light.exe (link to msi)
+dist/wazero_$(VERSION)_%.msi: build/wazero_%/wazero.exe.signed
+ifeq ($(OS),Windows_NT)
+	@echo msi "building $@"
+	@mkdir -p $(@D)
+	@candle -nologo -arch $(call msi-arch,$@) -dVersion=$(MSI_VERSION) -dBin=$(<:.signed=) -o build/wazero.wixobj packaging/msi/wazero.wxs
+	@light -nologo -o $@ build/wazero.wixobj -spdb
+	$(call codesign,$@)
+	@echo msi "ok"
+endif
+
+dist/wazero_$(VERSION)_%.zip: build/wazero_%/wazero.exe.signed
+	@echo zip "zipping $@"
+	@mkdir -p $(@D)
+	@zip -qj $@ $(<:.signed=)
+	@echo zip "ok"
+
+# Darwin doesn't have sha256sum. See https://github.com/actions/virtual-environments/issues/90
+sha256sum := $(if $(findstring darwin,$(shell go env GOOS)),shasum -a 256,sha256sum)
+$(checksum_txt):
+	@cd $(@D); touch $(@F); $(sha256sum) * >> $(@F)
+
+dist: $(non_windows_archives) $(if $(findstring Windows_NT,$(OS)),$(windows_archives),) $(checksum_txt)
diff --git a/vendor/github.com/tetratelabs/wazero/NOTICE b/vendor/github.com/tetratelabs/wazero/NOTICE
new file mode 100644
index 000000000..2f5ea8ebf
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/NOTICE
@@ -0,0 +1,2 @@
+wazero
+Copyright 2020-2023 wazero authors
diff --git a/vendor/github.com/tetratelabs/wazero/RATIONALE.md b/vendor/github.com/tetratelabs/wazero/RATIONALE.md
new file mode 100644
index 000000000..8d783cb44
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/RATIONALE.md
@@ -0,0 +1,1587 @@
+# Notable rationale of wazero
+
+## Zero dependencies
+
+Wazero has zero dependencies to differentiate itself from other runtimes which
+have heavy impact usually due to CGO. By avoiding CGO, wazero avoids
+prerequisites such as shared libraries or libc, and lets users keep features
+like cross compilation.
+
+Avoiding go.mod dependencies reduces interference on Go version support, and
+size of a statically compiled binary. However, doing so brings some
+responsibility into the project.
+
+Go's native platform support is good: We don't need platform-specific code to
+get monotonic time, nor do we need much work to implement certain features
+needed by our compiler such as `mmap`. That said, Go does not support all
+common operating systems to the same degree. For example, Go 1.18 includes
+`Mprotect` on Linux and Darwin, but not FreeBSD.
+
+The general tradeoff the project takes from a zero dependency policy is more
+explicit support of platforms (in the compiler runtime), as well a larger and
+more technically difficult codebase.
+
+At some point, we may allow extensions to supply their own platform-specific
+hooks. Until then, one end user impact/tradeoff is some glitches trying
+untested platforms (with the Compiler runtime).
+
+### Why do we use CGO to implement system calls on darwin?
+
+wazero is dependency and CGO free by design. In some cases, we have code that
+can optionally use CGO, but retain a fallback for when that's disabled. The only
+operating system (`GOOS`) we use CGO by default in is `darwin`.
+
+Unlike other operating systems, regardless of `CGO_ENABLED`, Go always uses
+"CGO" mechanisms in the runtime layer of `darwin`. This is explained in
+[Statically linked binaries on Mac OS X](https://developer.apple.com/library/archive/qa/qa1118/_index.html#//apple_ref/doc/uid/DTS10001666):
+
+> Apple does not support statically linked binaries on Mac OS X. A statically
+> linked binary assumes binary compatibility at the kernel system call
+> interface, and we do not make any guarantees on that front. Rather, we strive
+> to ensure binary compatibility in each dynamically linked system library and
+> framework.
+
+This plays to our advantage for system calls that aren't yet exposed in the Go
+standard library, notably `futimens` for nanosecond-precision timestamp
+manipulation.
+
+### Why not x/sys
+
+Going beyond Go's SDK limitations can be accomplished with their [x/sys library](https://pkg.go.dev/golang.org/x/sys/unix).
+For example, this includes `zsyscall_freebsd_amd64.go` missing from the Go SDK.
+
+However, like all dependencies, x/sys is a source of conflict. For example,
+x/sys had to be in order to upgrade to Go 1.18.
+
+If we depended on x/sys, we could get more precise functionality needed for
+features such as clocks or more platform support for the compiler runtime.
+
+That said, formally supporting an operating system may still require testing as
+even use of x/sys can require platform-specifics. For example, [mmap-go](https://github.com/edsrzf/mmap-go)
+uses x/sys, but also mentions limitations, some not surmountable with x/sys
+alone.
+
+Regardless, we may at some point introduce a separate go.mod for users to use
+x/sys as a platform plugin without forcing all users to maintain that
+dependency.
+
+## Project structure
+
+wazero uses internal packages extensively to balance API compatibility desires for end users with the need to safely
+share internals between compilers.
+
+End-user packages include `wazero`, with `Config` structs, `api`, with shared types, and the built-in `wasi` library.
+Everything else is internal.
+
+We put the main program for wazero into a directory of the same name to match conventions used in `go install`,
+notably the name of the folder becomes the binary name. We chose to use `cmd/wazero` as it is common practice
+and less surprising than `wazero/wazero`.
+
+### Internal packages
+
+Most code in wazero is internal, and it is acknowledged that this prevents external implementation of facets such as
+compilers or decoding. It also prevents splitting this code into separate repositories, resulting in a larger monorepo.
+This also adds work as more code needs to be centrally reviewed.
+
+However, the alternative is neither secure nor viable. To allow external implementation would require exporting symbols
+public, such as the `CodeSection`, which can easily create bugs. Moreover, there's a high drift risk for any attempt at
+external implementations, compounded not just by wazero's code organization, but also the fast moving Wasm and WASI
+specifications.
+
+For example, implementing a compiler correctly requires expertise in Wasm, Golang and assembly. This requires deep
+insight into how internals are meant to be structured and the various tiers of testing required for `wazero` to result
+in a high quality experience. Even if someone had these skills, supporting external code would introduce variables which
+are constants in the central one. Supporting an external codebase is harder on the project team, and could starve time
+from the already large burden on the central codebase.
+
+The tradeoffs of internal packages are a larger codebase and responsibility to implement all standard features. It also
+implies thinking about extension more as forking is not viable for reasons above also. The primary mitigation of these
+realities are friendly OSS licensing, high rigor and a collaborative spirit which aim to make contribution in the shared
+codebase productive.
+
+### Avoiding cyclic dependencies
+
+wazero shares constants and interfaces with internal code by a sharing pattern described below:
+* shared interfaces and constants go in one package under root: `api`.
+* user APIs and structs depend on `api` and go into the root package `wazero`.
+  * e.g. `InstantiateModule` -> `/wasm.go` depends on the type `api.Module`.
+* implementation code can also depend on `api` in a corresponding package under `/internal`.
+  * Ex  package `wasm` -> `/internal/wasm/*.go` and can depend on the type `api.Module`.
+
+The above guarantees no cyclic dependencies at the cost of having to re-define symbols that exist in both packages.
+For example, if `wasm.Store` is a type the user needs access to, it is narrowed by a cover type in the `wazero`:
+
+```go
+type runtime struct {
+	s *wasm.Store
+}
+```
+
+This is not as bad as it sounds as mutations are only available via configuration. This means exported functions are
+limited to only a few functions.
+
+### Avoiding security bugs
+
+In order to avoid security flaws such as code insertion, nothing in the public API is permitted to write directly to any
+mutable symbol in the internal package. For example, the package `api` is shared with internal code. To ensure
+immutability, the `api` package cannot contain any mutable public symbol, such as a slice or a struct with an exported
+field.
+
+In practice, this means shared functionality like memory mutation need to be implemented by interfaces.
+
+Here are some examples:
+* `api.Memory` protects access by exposing functions like `WriteFloat64Le` instead of exporting a buffer (`[]byte`).
+* There is no exported symbol for the `[]byte` representing the `CodeSection`
+
+Besides security, this practice prevents other bugs and allows centralization of validation logic such as decoding Wasm.
+
+## API Design
+
+### Why is `context.Context` inconsistent?
+
+It may seem strange that only certain API have an initial `context.Context`
+parameter. We originally had a `context.Context` for anything that might be
+traced, but it turned out to be only useful for lifecycle and host functions.
+
+For instruction-scoped aspects like memory updates, a context parameter is too
+fine-grained and also invisible in practice. For example, most users will use
+the compiler engine, and its memory, global or table access will never use go's
+context.
+
+### Why does `api.ValueType` map to uint64?
+
+WebAssembly allows functions to be defined either by the guest or the host,
+with signatures expressed as WebAssembly types. For example, `i32` is a 32-bit
+type which might be interpreted as signed. Function signatures can have zero or
+more parameters or results even if WebAssembly 1.0 allows up to one result.
+
+The guest can export functions, so that the host can call it. In the case of
+wazero, the host is Go and an exported function can be called via
+`api.Function`. `api.Function` allows users to supply parameters and read
+results as a slice of uint64. For example, if there are no results, an empty
+slice is returned. The user can learn the signature via `FunctionDescription`,
+which returns the `api.ValueType` corresponding to each parameter or result.
+`api.ValueType` defines the mapping of WebAssembly types to `uint64` values for
+reason described in this section. The special case of `v128` is also mentioned
+below.
+
+wazero maps each value type to a uint64 values because it holds the largest
+type in WebAssembly 1.0 (i64). A slice allows you to express empty (e.g. a
+nullary signature), for example a start function.
+
+Here's an example of calling a function, noting this syntax works for both a
+signature `(param i32 i32) (result i32)` and `(param i64 i64) (result i64)`
+```go
+x, y := uint64(1), uint64(2)
+results, err := mod.ExportedFunction("add").Call(ctx, x, y)
+if err != nil {
+	log.Panicln(err)
+}
+fmt.Printf("%d + %d = %d\n", x, y, results[0])
+```
+
+WebAssembly does not define an encoding strategy for host defined parameters or
+results. This means the encoding rules above are defined by wazero instead. To
+address this, we clarified mapping both in `api.ValueType` and added helper
+functions like `api.EncodeF64`. This allows users conversions typical in Go
+programming, and utilities to avoid ambiguity and edge cases around casting.
+
+Alternatively, we could have defined a byte buffer based approach and a binary
+encoding of value types in and out. For example, an empty byte slice would mean
+no values, while a non-empty could use a binary encoding for supported values.
+This could work, but it is more difficult for the normal case of i32 and i64.
+It also shares a struggle with the current approach, which is that value types
+were added after WebAssembly 1.0 and not all of them have an encoding. More on
+this below.
+
+In summary, wazero chose an approach for signature mapping because there was
+none, and the one we chose biases towards simplicity with integers and handles
+the rest with documentation and utilities.
+
+#### Post 1.0 value types
+
+Value types added after WebAssembly 1.0 stressed the current model, as some
+have no encoding or are larger than 64 bits. While problematic, these value
+types are not commonly used in exported (extern) functions. However, some
+decisions were made and detailed below.
+
+For example `externref` has no guest representation. wazero chose to map
+references to uint64 as that's the largest value needed to encode a pointer on
+supported platforms. While there are two reference types, `externref` and
+`functype`, the latter is an internal detail of function tables, and the former
+is rarely if ever used in function signatures as of the end of 2022.
+
+The only value larger than 64 bits is used for SIMD (`v128`). Vectorizing via
+host functions is not used as of the end of 2022. Even if it were, it would be
+inefficient vs guest vectorization due to host function overhead. In other
+words, the `v128` value type is unlikely to be in an exported function
+signature. That it requires two uint64 values to encode is an internal detail
+and not worth changing the exported function interface `api.Function`, as doing
+so would break all users.
+
+### Interfaces, not structs
+
+All exported types in public packages, regardless of configuration vs runtime, are interfaces. The primary benefits are
+internal flexibility and avoiding people accidentally mis-initializing by instantiating the types on their own vs using
+the `NewXxx` constructor functions. In other words, there's less support load when things can't be done incorrectly.
+
+Here's an example:
+```go
+rt := &RuntimeConfig{} // not initialized properly (fields are nil which shouldn't be)
+rt := RuntimeConfig{} // not initialized properly (should be a pointer)
+rt := wazero.NewRuntimeConfig() // initialized properly
+```
+
+There are a few drawbacks to this, notably some work for maintainers.
+* Interfaces are decoupled from the structs implementing them, which means the signature has to be repeated twice.
+* Interfaces have to be documented and guarded at time of use, that 3rd party implementations aren't supported.
+* As of Golang 1.21, interfaces are still [not well supported](https://github.com/golang/go/issues/5860) in godoc.
+
+## Config
+
+wazero configures scopes such as Runtime and Module using `XxxConfig` types. For example, `RuntimeConfig` configures
+`Runtime` and `ModuleConfig` configure `Module` (instantiation). In all cases, config types begin defaults and can be
+customized by a user, e.g., selecting features or a module name override.
+
+### Why don't we make each configuration setting return an error?
+No config types create resources that would need to be closed, nor do they return errors on use. This helps reduce
+resource leaks, and makes chaining easier. It makes it possible to parse configuration (ex by parsing yaml) independent
+of validating it.
+
+Instead of:
+```
+cfg, err = cfg.WithFS(fs)
+if err != nil {
+  return err
+}
+cfg, err = cfg.WithName(name)
+if err != nil {
+  return err
+}
+mod, err = rt.InstantiateModuleWithConfig(ctx, code, cfg)
+if err != nil {
+  return err
+}
+```
+
+There's only one call site to handle errors:
+```
+cfg = cfg.WithFS(fs).WithName(name)
+mod, err = rt.InstantiateModuleWithConfig(ctx, code, cfg)
+if err != nil {
+  return err
+}
+```
+
+This allows users one place to look for errors, and also the benefit that if anything internally opens a resource, but
+errs, there's nothing they need to close. In other words, users don't need to track which resources need closing on
+partial error, as that is handled internally by the only code that can read configuration fields.
+
+### Why are configuration immutable?
+While it seems certain scopes like `Runtime` won't repeat within a process, they do, possibly in different goroutines.
+For example, some users create a new runtime for each module, and some re-use the same base module configuration with
+only small updates (ex the name) for each instantiation. Making configuration immutable allows them to be safely used in
+any goroutine.
+
+Since config are immutable, changes apply via return val, similar to `append` in a slice.
+
+For example, both of these are the same sort of error:
+```go
+append(slice, element) // bug as only the return value has the updated slice.
+cfg.WithName(next) // bug as only the return value has the updated name.
+```
+
+Here's an example of correct use: re-assigning explicitly or via chaining.
+```go
+cfg = cfg.WithName(name) // explicit
+
+mod, err = rt.InstantiateModuleWithConfig(ctx, code, cfg.WithName(name)) // implicit
+if err != nil {
+  return err
+}
+```
+
+### Why aren't configuration assigned with option types?
+The option pattern is a familiar one in Go. For example, someone defines a type `func (x X) err` and uses it to update
+the target. For example, you could imagine wazero could choose to make `ModuleConfig` from options vs chaining fields.
+
+Ex instead of:
+```go
+type ModuleConfig interface {
+	WithName(string) ModuleConfig
+	WithFS(fs.FS) ModuleConfig
+}
+
+struct moduleConfig {
+	name string
+	fs fs.FS
+}
+
+func (c *moduleConfig) WithName(name string) ModuleConfig {
+    ret := *c // copy
+    ret.name = name
+    return &ret
+}
+
+func (c *moduleConfig) WithFS(fs fs.FS) ModuleConfig {
+    ret := *c // copy
+    ret.setFS("/", fs)
+    return &ret
+}
+
+config := r.NewModuleConfig().WithFS(fs)
+configDerived := config.WithName("name")
+```
+
+An option function could be defined, then refactor each config method into an name prefixed option function:
+```go
+type ModuleConfig interface {
+}
+struct moduleConfig {
+    name string
+    fs fs.FS
+}
+
+type ModuleConfigOption func(c *moduleConfig)
+
+func ModuleConfigName(name string) ModuleConfigOption {
+    return func(c *moduleConfig) {
+        c.name = name
+	}
+}
+
+func ModuleConfigFS(fs fs.FS) ModuleConfigOption {
+    return func(c *moduleConfig) {
+        c.fs = fs
+    }
+}
+
+func (r *runtime) NewModuleConfig(opts ...ModuleConfigOption) ModuleConfig {
+	ret := newModuleConfig() // defaults
+    for _, opt := range opts {
+        opt(&ret.config)
+    }
+    return ret
+}
+
+func (c *moduleConfig) WithOptions(opts ...ModuleConfigOption) ModuleConfig {
+    ret := *c // copy base config
+    for _, opt := range opts {
+        opt(&ret.config)
+    }
+    return ret
+}
+
+config := r.NewModuleConfig(ModuleConfigFS(fs))
+configDerived := config.WithOptions(ModuleConfigName("name"))
+```
+
+wazero took the path of the former design primarily due to:
+* interfaces provide natural namespaces for their methods, which is more direct than functions with name prefixes.
+* parsing config into function callbacks is more direct vs parsing config into a slice of functions to do the same.
+* in either case derived config is needed and the options pattern is more awkward to achieve that.
+
+There are other reasons such as test and debug being simpler without options: the above list is constrained to conserve
+space. It is accepted that the options pattern is common in Go, which is the main reason for documenting this decision.
+
+### Why aren't config types deeply structured?
+wazero's configuration types cover the two main scopes of WebAssembly use:
+* `RuntimeConfig`: This is the broadest scope, so applies also to compilation
+  and instantiation. e.g. This controls the WebAssembly Specification Version.
+* `ModuleConfig`: This affects modules instantiated after compilation and what
+  resources are allowed. e.g. This defines how or if STDOUT is captured. This
+  also allows sub-configuration of `FSConfig`.
+
+These default to a flat definition each, with lazy sub-configuration only after
+proven to be necessary. A flat structure is easier to work with and is also
+easy to discover. Unlike the option pattern described earlier, more
+configuration in the interface doesn't taint the package namespace, only
+`ModuleConfig`.
+
+We default to a flat structure to encourage simplicity. If we eagerly broke out
+all possible configurations into sub-types (e.g. ClockConfig), it would be hard
+to notice configuration sprawl. By keeping the config flat, it is easy to see
+the cognitive load we may be adding to our users.
+
+In other words, discomfort adding more configuration is a feature, not a bug.
+We should only add new configuration rarely, and before doing so, ensure it
+will be used. In fact, this is why we support using context fields for
+experimental configuration. By letting users practice, we can find out if a
+configuration was a good idea or not before committing to it, and potentially
+sprawling our types.
+
+In reflection, this approach worked well for the nearly 1.5 year period leading
+to version 1.0. We've only had to create a single sub-configuration, `FSConfig`,
+and it was well understood why when it occurred.
+
+## Why does `ModuleConfig.WithStartFunctions` default to `_start`?
+
+We formerly had functions like `StartWASICommand` that would verify
+preconditions and start WASI's `_start` command. However, this caused confusion
+because both many languages compiled a WASI dependency, and many did so
+inconsistently.
+
+The conflict is that exported functions need to use features the language
+runtime provides, such as garbage collection. There's a "chicken-egg problem"
+where `_start` needs to complete in order for exported behavior to work.
+
+For example, unlike `GOOS=wasip1` in Go 1.21, TinyGo's "wasi" target supports
+function exports. So, the only way to use FFI style is via the "wasi" target.
+Not explicitly calling `_start` before an ABI such as wapc-go, would crash, due
+to setup not happening (e.g. to implement `panic`). Other embedders such as
+Envoy also called `_start` for the same reason. To avoid a common problem for
+users unaware of WASI, and also to simplify normal use of WASI (e.g. `main`),
+we added `_start` to `ModuleConfig.WithStartFunctions`.
+
+In cases of multiple initializers, such as in wapc-go, users can override this
+to add the others *after* `_start`. Users who want to explicitly control
+`_start`, such as some of our unit tests, can clear the start functions and
+remove it.
+
+This decision was made in 2022, and holds true in 2023, even with the
+introduction of "wasix". It holds because "wasix" is backwards compatible with
+"wasip1". In the future, there will be other ways to start applications, and
+may not be backwards compatible with "wasip1".
+
+Most notably WASI "Preview 2" is not implemented in a way compatible with
+wasip1. Its start function is likely to be different, and defined in the
+wasi-cli "world". When the design settles, and it is implemented by compilers,
+wazero will attempt to support "wasip2". However, it won't do so in a way that
+breaks existing compilers.
+
+In other words, we won't remove `_start` if "wasip2" continues a path of an
+alternate function name. If we did, we'd break existing users despite our
+compatibility promise saying we don't. The most likely case is that when we
+build-in something incompatible with "wasip1", that start function will be
+added to the start functions list in addition to `_start`.
+
+See http://wasix.org
+See https://github.com/WebAssembly/wasi-cli
+
+## Runtime == Engine+Store
+wazero defines a single user-type which combines the specification concept of `Store` with the unspecified `Engine`
+which manages them.
+
+### Why not multi-store?
+Multi-store isn't supported as the extra tier complicates lifecycle and locking. Moreover, in practice it is unusual for
+there to be an engine that has multiple stores which have multiple modules. More often, it is the case that there is
+either 1 engine with 1 store and multiple modules, or 1 engine with many stores, each having 1 non-host module. In worst
+case, a user can use multiple runtimes until "multi-store" is better understood.
+
+If later, we have demand for multiple stores, that can be accomplished by overload. e.g. `Runtime.InstantiateInStore` or
+`Runtime.Store(name) Store`.
+
+## Exit
+
+### Why do we only return a `sys.ExitError` on a non-zero exit code?
+
+It is reasonable to think an exit error should be returned, even if the code is
+success (zero). Even on success, the module is no longer functional. For
+example, function exports would error later. However, wazero does not. The only
+time `sys.ExitError` is on error (non-zero).
+
+This decision was to improve performance and ergonomics for guests that both
+use WASI (have a `_start` function), and also allow custom exports.
+Specifically, Rust, TinyGo and normal wasi-libc, don't exit the module during
+`_start`. If they did, it would invalidate their function exports. This means
+it is unlikely most compilers will change this behavior.
+
+`GOOS=waspi1` from Go 1.21 does exit during `_start`. However, it doesn't
+support other exports besides `_start`, and `_start` is not defined to be
+called multiple times anyway.
+
+Since `sys.ExitError` is not always returned, we added `Module.IsClosed` for
+defensive checks. This helps integrators avoid calling functions which will
+always fail.
+
+### Why panic with `sys.ExitError` after a host function exits?
+
+Currently, the only portable way to stop processing code is via panic. For
+example, WebAssembly "trap" instructions, such as divide by zero, are
+implemented via panic. This ensures code isn't executed after it.
+
+When code reaches the WASI `proc_exit` instruction, we need to stop processing.
+Regardless of the exit code, any code invoked after exit would be in an
+inconsistent state. This is likely why unreachable instructions are sometimes
+inserted after exit: https://github.com/emscripten-core/emscripten/issues/12322
+
+## WASI
+
+Unfortunately, (WASI Snapshot Preview 1)[https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md] is not formally defined enough, and has APIs with ambiguous semantics.
+This section describes how Wazero interprets and implements the semantics of several WASI APIs that may be interpreted differently by different wasm runtimes.
+Those APIs may affect the portability of a WASI application.
+
+### Why don't we attempt to pass wasi-testsuite on user-defined `fs.FS`?
+
+While most cases work fine on an `os.File` based implementation, we won't
+promise wasi-testsuite compatibility on user defined wrappers of `os.DirFS`.
+The only option for real systems is to use our `sysfs.FS`.
+
+There are a lot of areas where windows behaves differently, despite the
+`os.File` abstraction. This goes well beyond file locking concerns (e.g.
+`EBUSY` errors on open files). For example, errors like `ACCESS_DENIED` aren't
+properly mapped to `EPERM`. There are trickier parts too. `FileInfo.Sys()`
+doesn't return enough information to build inodes needed for WASI. To rebuild
+them requires the full path to the underlying file, not just its directory
+name, and there's no way for us to get that information. At one point we tried,
+but in practice things became tangled and functionality such as read-only
+wrappers became untenable. Finally, there are version-specific behaviors which
+are difficult to maintain even in our own code. For example, go 1.20 opens
+files in a different way than versions before it.
+
+### Why aren't WASI rules enforced?
+
+The [snapshot-01](https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md) version of WASI has a
+number of rules for a "command module", but only the memory export rule is enforced. If a "_start" function exists, it
+is enforced to be the correct signature and succeed, but the export itself isn't enforced. It follows that this means
+exports are not required to be contained to a "_start" function invocation. Finally, the "__indirect_function_table"
+export is also not enforced.
+
+The reason for the exceptions are that implementations aren't following the rules. For example, TinyGo doesn't export
+"__indirect_function_table", so crashing on this would make wazero unable to run TinyGo modules. Similarly, modules
+loaded by wapc-go don't always define a "_start" function. Since "snapshot-01" is not a proper version, and certainly
+not a W3C recommendation, there's no sense in breaking users over matters like this.
+
+### Why is I/O configuration not coupled to WASI?
+
+WebAssembly System Interfaces (WASI) is a formalization of a practice that can be done anyway: Define a host function to
+access a system interface, such as writing to STDOUT. WASI stalled at snapshot-01 and as of early 2023, is being
+rewritten entirely.
+
+This instability implies a need to transition between WASI specs, which places wazero in a position that requires
+decoupling. For example, if code uses two different functions to call `fd_write`, the underlying configuration must be
+centralized and decoupled. Otherwise, calls using the same file descriptor number will end up writing to different
+places.
+
+In short, wazero defined system configuration in `ModuleConfig`, not a WASI type. This allows end-users to switch from
+one spec to another with minimal impact. This has other helpful benefits, as centralized resources are simpler to close
+coherently (ex via `Module.Close`).
+
+In reflection, this worked well as more ABI became usable in wazero.
+
+### Background on `ModuleConfig` design
+
+WebAssembly 1.0 (20191205) specifies some aspects to control isolation between modules ([sandboxing](https://en.wikipedia.org/wiki/Sandbox_(computer_security))).
+For example, `wasm.Memory` has size constraints and each instance of it is isolated from each other. While `wasm.Memory`
+can be shared, by exporting it, it is not exported by default. In fact a WebAssembly Module (Wasm) has no memory by
+default.
+
+While memory is defined in WebAssembly 1.0 (20191205), many aspects are not. Let's use an example of `exec.Cmd` as for
+example, a WebAssembly System Interfaces (WASI) command is implemented as a module with a `_start` function, and in many
+ways acts similar to a process with a `main` function.
+
+To capture "hello world" written to the console (stdout a.k.a. file descriptor 1) in `exec.Cmd`, you would set the
+`Stdout` field accordingly, perhaps to a buffer. In WebAssembly 1.0 (20191205), the only way to perform something like
+this is via a host function (ex `HostModuleFunctionBuilder`) and internally copy memory corresponding to that string
+to a buffer.
+
+WASI implements system interfaces with host functions. Concretely, to write to console, a WASI command `Module` imports
+"fd_write" from "wasi_snapshot_preview1" and calls it with the `fd` parameter set to 1 (STDOUT).
+
+The [snapshot-01](https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md) version of WASI has no
+means to declare configuration, although its function definitions imply configuration for example if fd 1 should exist,
+and if so where should it write. Moreover, snapshot-01 was last updated in late 2020 and the specification is being
+completely rewritten as of early 2022. This means WASI as defined by "snapshot-01" will not clarify aspects like which
+file descriptors are required. While it is possible a subsequent version may, it is too early to tell as no version of
+WASI has reached a stage near W3C recommendation. Even if it did, module authors are not required to only use WASI to
+write to console, as they can define their own host functions, such as they did before WASI existed.
+
+wazero aims to serve Go developers as a primary function, and help them transition between WASI specifications. In
+order to do this, we have to allow top-level configuration. To ensure isolation by default, `ModuleConfig` has WithXXX
+that override defaults to no-op or empty. One `ModuleConfig` instance is used regardless of how many times the same WASI
+functions are imported. The nil defaults allow safe concurrency in these situations, as well lower the cost when they
+are never used. Finally, a one-to-one mapping with `Module` allows the module to close the `ModuleConfig` instead of
+confusing users with another API to close.
+
+Naming, defaults and validation rules of aspects like `STDIN` and `Environ` are intentionally similar to other Go
+libraries such as `exec.Cmd` or `syscall.SetEnv`, and differences called out where helpful. For example, there's no goal
+to emulate any operating system primitive specific to Windows (such as a 'c:\' drive). Moreover, certain defaults
+working with real system calls are neither relevant nor safe to inherit: For example, `exec.Cmd` defaults to read STDIN
+from a real file descriptor ("/dev/null"). Defaulting to this, vs reading `io.EOF`, would be unsafe as it can exhaust
+file descriptors if resources aren't managed properly. In other words, blind copying of defaults isn't wise as it can
+violate isolation or endanger the embedding process. In summary, we try to be similar to normal Go code, but often need
+act differently and document `ModuleConfig` is more about emulating, not necessarily performing real system calls.
+
+## File systems
+
+### Motivation on `sys.FS`
+
+The `sys.FS` abstraction in wazero was created because of limitations in
+`fs.FS`, and `fs.File` in Go. Compilers targeting `wasip1` may access
+functionality that writes new files. The ability to overcome this was requested
+even before wazero was named this, via issue #21 in March 2021.
+
+A month later, golang/go#45757 was raised by someone else on the same topic. As
+of July 2023, this has not resolved to a writeable file system abstraction.
+
+Over the next year more use cases accumulated, consolidated in March 2022 into
+#390. This closed in January 2023 with a milestone of providing more
+functionality, limited to users giving a real directory. This didn't yet expose
+a file abstraction for general purpose use. Internally, this used `os.File`.
+However, a wasm module instance is a virtual machine. Only supporting `os.File`
+breaks sand-boxing use cases. Moreover, `os.File` is not an interface. Even
+though this abstracts functionality, it does allow interception use cases.
+
+Hence, a few days later in January 2023, we had more issues asking to expose an
+abstraction, #1013 and later #1532, on use cases like masking access to files.
+In other words, the use case requests never stopped, and aren't solved by
+exposing only real files.
+
+In summary, the primary motivation for exposing a replacement for `fs.FS` and
+`fs.File` was around repetitive use case requests for years, around
+interception and the ability to create new files, both virtual and real files.
+While some use cases are solved with real files, not all are. Regardless, an
+interface approach is necessary to ensure users can intercept I/O operations.
+
+### Why doesn't `sys.File` have a `Fd()` method?
+
+There are many features we could expose. We could make File expose underlying
+file descriptors in case they are supported, for integration of system calls
+that accept multiple ones, namely `poll` for multiplexing. This special case is
+described in a subsequent section.
+
+As noted above, users have been asking for a file abstraction for over two
+years, and a common answer was to wait. Making users wait is a problem,
+especially so long. Good reasons to make people wait are stabilization. Edge
+case features are not a great reason to hold abstractions from users.
+
+Another reason is implementation difficulty. Go did not attempt to abstract
+file descriptors. For example, unlike `fs.ReadFile` there is no `fs.FdFile`
+interface. Most likely, this is because file descriptors are an implementation
+detail of common features. Programming languages, including Go, do not require
+end users to know about file descriptors. Types such as `fs.File` can be used
+without any knowledge of them. Implementations may or may not have file
+descriptors. For example, in Go, `os.DirFS` has underlying file descriptors
+while `embed.FS` does not.
+
+Despite this, some may want to expose a non-standard interface because
+`os.File` has `Fd() uintptr` to return a file descriptor. Mainly, this is
+handy to integrate with `syscall` package functions (on `GOOS` values that
+declare them). Notice, though that `uintptr` is unsafe and not an abstraction.
+Close inspection will find some `os.File` types internally use `poll.FD`
+instead, yet this is not possible to use abstractly because that type is not
+exposed. For example, `plan9` uses a different type than `poll.FD`. In other
+words, even in real files, `Fd()` is not wholly portable, despite it being
+useful on many operating systems with the `syscall` package.
+
+The reasons above, why Go doesn't abstract `FdFile` interface are a subset of
+reasons why `sys.File` does not. If we exposed `File.Fd()` we not only would
+have to declare all the edge cases that Go describes including impact of
+finalizers, we would have to describe these in terms of virtualized files.
+Then, we would have to reason with this value vs our existing virtualized
+`sys.FileTable`, mapping whatever type we return to keys in that table, also
+in consideration of garbage collection impact. The combination of issues like
+this could lead down a path of not implementing a file system abstraction at
+all, and instead a weak key mapped abstraction of the `syscall` package. Once
+we finished with all the edge cases, we would have lost context of the original
+reason why we started.. simply to allow file write access!
+
+When wazero attempts to do more than what the Go programming language team, it
+has to be carefully evaluated, to:
+* Be possible to implement at least for `os.File` backed files
+* Not be confusing or cognitively hard for virtual file systems and normal use.
+* Affordable: custom code is solely the responsible by the core team, a much
+  smaller group of individuals than who maintain the Go programming language.
+
+Due to problems well known in Go, consideration of the end users who constantly
+ask for basic file system functionality, and the difficulty virtualizing file
+descriptors at multiple levels, we don't expose `Fd()` and likely won't ever
+expose `Fd()` on `sys.File`.
+
+### Why does `sys.File` have a `Poll()` method, while `sys.FS` does not?
+
+wazero exposes `File.Poll` which allows one-at-a-time poll use cases,
+requested by multiple users. This not only includes abstract tests such as
+Go 1.21 `GOOS=wasip1`, but real use cases including python and container2wasm
+repls, as well listen sockets. The main use cases is non-blocking poll on a
+single file. Being a single file, this has no risk of problems such as
+head-of-line blocking, even when emulated.
+
+The main use case of multi-poll are bidirectional network services, something
+not used in `GOOS=wasip1` standard libraries, but could be in the future.
+Moving forward without a multi-poller allows wazero to expose its file system
+abstraction instead of continuing to hold back it back for edge cases. We'll
+continue discussion below regardless, as rationale was requested.
+
+You can loop through multiple `sys.File`, using `File.Poll` to see if an event
+is ready, but there is a head-of-line blocking problem. If a long timeout is
+used, bad luck could have a file that has nothing to read or write before one
+that does. This could cause more blocking than necessary, even if you could
+poll the others just after with a zero timeout. What's worse than this is if
+unlimited blocking was used (`timeout=-1`). The host implementations could use
+goroutines to avoid this, but interrupting a "forever" poll is problematic. All
+of these are reasons to consider a multi-poll API, but do not require exporting
+`File.Fd()`.
+
+Should multi-poll becomes critical, `sys.FS` could expose a `Poll` function
+like below, despite it being the non-portable, complicated if possible to
+implement on all platforms and virtual file systems.
+```go
+ready, errno := fs.Poll([]sys.PollFile{{f1, sys.POLLIN}, {f2, sys.POLLOUT}}, timeoutMillis)
+```
+
+A real filesystem could handle this by using an approach like the internal
+`unix.Poll` function in Go, passing file descriptors on unix platforms, or
+returning `sys.ENOSYS` for unsupported operating systems. Implementation for
+virtual files could have a strategy around timeout to avoid the worst case of
+head-of-line blocking (unlimited timeout).
+
+Let's remember that when designing abstractions, it is not best to add an
+interface for everything. Certainly, Go doesn't, as evidenced by them not
+exposing `poll.FD` in `os.File`! Such a multi-poll could be limited to
+built-in filesystems in the wazero repository, avoiding complexity of trying to
+support and test this abstractly. This would still permit multiplexing for CLI
+users, and also permit single file polling as exists now.
+
+### Why doesn't wazero implement the working directory?
+
+An early design of wazero's API included a `WithWorkDirFS` which allowed
+control over which file a relative path such as "./config.yml" resolved to,
+independent of the root file system. This intended to help separate concerns
+like mutability of files, but it didn't work and was removed.
+
+Compilers that target wasm act differently with regard to the working
+directory. For example, wasi-libc, used by TinyGo,
+tracks working directory changes in compiled wasm instead: initially "/" until
+code calls `chdir`. Zig assumes the first pre-opened file descriptor is the
+working directory.
+
+The only place wazero can standardize a layered concern is via a host function.
+Since WASI doesn't use host functions to track the working directory, we can't
+standardize the storage and initial value of it.
+
+Meanwhile, code may be able to affect the working directory by compiling
+`chdir` into their main function, using an argument or ENV for the initial
+value (possibly `PWD`). Those unable to control the compiled code should only
+use absolute paths in configuration.
+
+See
+* https://github.com/golang/go/blob/go1.20/src/syscall/fs_js.go#L324
+* https://github.com/WebAssembly/wasi-libc/pull/214#issue-673090117
+* https://github.com/ziglang/zig/blob/53a9ee699a35a3d245ab6d1dac1f0687a4dcb42c/src/main.zig#L32
+
+### Why ignore the error returned by io.Reader when n > 1?
+
+Per https://pkg.go.dev/io#Reader, if we receive an error, any bytes read should
+be processed first. At the syscall abstraction (`fd_read`), the caller is the
+processor, so we can't process the bytes inline and also return the error (as
+`EIO`).
+
+Let's assume we want to return the bytes read on error to the caller. This
+implies we at least temporarily ignore the error alongside them. The choice
+remaining is whether to persist the error returned with the read until a
+possible next call, or ignore the error.
+
+If we persist an error returned, it would be coupled to a file descriptor, but
+effectively it is boolean as this case coerces to `EIO`. If we track a "last
+error" on a file descriptor, it could be complicated for a couple reasons
+including whether the error is transient or permanent, or if the error would
+apply to any FD operation, or just read. Finally, there may never be a
+subsequent read as perhaps the bytes leading up to the error are enough to
+satisfy the processor.
+
+This decision boils down to whether or not to track an error bit per file
+descriptor or not. If not, the assumption is that a subsequent operation would
+also error, this time without reading any bytes.
+
+The current opinion is to go with the simplest path, which is to return the
+bytes read and ignore the error the there were any. Assume a subsequent
+operation will err if it needs to. This helps reduce the complexity of the code
+in wazero and also accommodates the scenario where the bytes read are enough to
+satisfy its processor.
+
+### File descriptor allocation strategy
+
+File descriptor allocation currently uses a strategy similar the one implemented
+by unix systems: when opening a file, the lowest unused number is picked.
+
+The WASI standard documents that programs cannot expect that file descriptor
+numbers will be allocated with a lowest-first strategy, and they should instead
+assume the values will be random. Since _random_ is a very imprecise concept in
+computers, we technically satisfying the implementation with the descriptor
+allocation strategy we use in Wazero. We could imagine adding more _randomness_
+to the descriptor selection process, however this should never be used as a
+security measure to prevent applications from guessing the next file number so
+there are no strong incentives to complicate the logic.
+
+### Why does `FSConfig.WithDirMount` not match behaviour with `os.DirFS`?
+
+It may seem that we should require any feature that seems like a standard
+library in Go, to behave the same way as the standard library. Doing so would
+present least surprise to Go developers. In the case of how we handle
+filesystems, we break from that as it is incompatible with the expectations of
+WASI, the most commonly implemented filesystem ABI.
+
+The main reason is that `os.DirFS` is a virtual filesystem abstraction while
+WASI is an abstraction over syscalls. For example, the signature of `fs.Open`
+does not permit use of flags. This creates conflict on what default behaviors
+to take when Go implemented `os.DirFS`. On the other hand, `path_open` can pass
+flags, and in fact tests require them to be honored in specific ways.
+
+This conflict requires us to choose what to be more compatible with, and which
+type of user to surprise the least. We assume there will be more developers
+compiling code to wasm than developers of custom filesystem plugins, and those
+compiling code to wasm will be better served if we are compatible with WASI.
+Hence on conflict, we prefer WASI behavior vs the behavior of `os.DirFS`.
+
+See https://github.com/WebAssembly/wasi-testsuite
+See https://github.com/golang/go/issues/58141
+
+## Why is our `Readdir` function more like Go's `os.File` than POSIX `readdir`?
+
+At one point we attempted to move from a bulk `Readdir` function to something
+more like the POSIX `DIR` struct, exposing functions like `telldir`, `seekdir`
+and `readdir`. However, we chose the design more like `os.File.Readdir`,
+because it performs and fits wasip1 better.
+
+### wasip1/wasix
+
+`fd_readdir` in wasip1 (and so also wasix) is like `getdents` in Linux, not
+`readdir` in POSIX. `getdents` is more like Go's `os.File.Readdir`.
+
+We currently have an internal type `sys.DirentCache` which only is used by
+wasip1 or wasix. When `HostModuleBuilder` adds support for instantiation state,
+we could move this to the `wasi_snapshot_preview1` package. Meanwhile, all
+filesystem code is internal anyway, so this special-case is acceptable.
+
+### wasip2
+
+`directory-entry-stream` in wasi-filesystem preview2 is defined in component
+model, not an ABI, but in wasmtime it is a consuming iterator. A consuming
+iterator is easy to support with anything (like `Readdir(1)`), even if it is
+inefficient as you can neither bulk read nor skip. The implementation of the
+preview1 adapter (uses preview2) confirms this. They use a dirent cache similar
+in some ways to our `sysfs.DirentCache`. As there is no seek concept in
+preview2, they interpret the cookie as numeric and read on repeat entries when
+a cache wasn't available. Note: we currently do not skip-read like this as it
+risks buffering large directories, and no user has requested entries before the
+cache, yet.
+
+Regardless, wasip2 is not complete until the end of 2023. We can defer design
+discussion until after it is stable and after the reference impl wasmtime
+implements it.
+
+See
+ * https://github.com/WebAssembly/wasi-filesystem/blob/ef9fc87c07323a6827632edeb6a7388b31266c8e/example-world.md#directory_entry_stream
+ * https://github.com/bytecodealliance/wasmtime/blob/b741f7c79d72492d17ab8a29c8ffe4687715938e/crates/wasi/src/preview2/preview2/filesystem.rs#L286-L296
+ * https://github.com/bytecodealliance/preview2-prototyping/blob/e4c04bcfbd11c42c27c28984948d501a3e168121/crates/wasi-preview1-component-adapter/src/lib.rs#L2131-L2137
+ * https://github.com/bytecodealliance/preview2-prototyping/blob/e4c04bcfbd11c42c27c28984948d501a3e168121/crates/wasi-preview1-component-adapter/src/lib.rs#L936
+
+### wasip3
+
+`directory-entry-stream` is documented to change significantly in wasip3 moving
+from synchronous to synchronous streams. This is dramatically different than
+POSIX `readdir` which is synchronous.
+
+Regardless, wasip3 is not complete until after wasip2, which means 2024 or
+later. We can defer design discussion until after it is stable and after the
+reference impl wasmtime implements it.
+
+See
+ * https://github.com/WebAssembly/WASI/blob/ddfe3d1dda5d1473f37ecebc552ae20ce5fd319a/docs/WitInWasi.md#Streams
+ * https://docs.google.com/presentation/d/1MNVOZ8hdofO3tI0szg_i-Yoy0N2QPU2C--LzVuoGSlE/edit#slide=id.g1270ef7d5b6_0_662
+
+### How do we implement `Pread` with an `fs.File`?
+
+`ReadAt` is the Go equivalent to `pread`: it does not affect, and is not
+affected by, the underlying file offset. Unfortunately, `io.ReaderAt` is not
+implemented by all `fs.File`. For example, as of Go 1.19, `embed.openFile` does
+not.
+
+The initial implementation of `fd_pread` instead used `Seek`. To avoid a
+regression, we fall back to `io.Seeker` when `io.ReaderAt` is not supported.
+
+This requires obtaining the initial file offset, seeking to the intended read
+offset, and resetting the file offset the initial state. If this final seek
+fails, the file offset is left in an undefined state. This is not thread-safe.
+
+While seeking per read seems expensive, the common case of `embed.openFile` is
+only accessing a single int64 field, which is cheap.
+
+### Pre-opened files
+
+WASI includes `fd_prestat_get` and `fd_prestat_dir_name` functions used to
+learn any directory paths for file descriptors open at initialization time.
+
+For example, `__wasilibc_register_preopened_fd` scans any file descriptors past
+STDERR (1) and invokes `fd_prestat_dir_name` to learn any path prefixes they
+correspond to. Zig's `preopensAlloc` does similar. These pre-open functions are
+not used again after initialization.
+
+wazero supports stdio pre-opens followed by any mounts e.g `.:/`. The guest
+path is a directory and its name, e.g. "/" is returned by `fd_prestat_dir_name`
+for file descriptor 3 (STDERR+1). The first longest match wins on multiple
+pre-opens, which allows a path like "/tmp" to match regardless of order vs "/".
+
+See
+ * https://github.com/WebAssembly/wasi-libc/blob/a02298043ff551ce1157bc2ee7ab74c3bffe7144/libc-bottom-half/sources/preopens.c
+ * https://github.com/ziglang/zig/blob/9cb06f3b8bf9ea6b5e5307711bc97328762d6a1d/lib/std/fs/wasi.zig#L50-L53
+
+### fd_prestat_dir_name
+
+`fd_prestat_dir_name` is a WASI function to return the path of the pre-opened
+directory of a file descriptor. It has the following three parameters, and the
+third `path_len` has ambiguous semantics.
+
+* `fd`: a file descriptor
+* `path`: the offset for the result path
+* `path_len`: In wazero, `FdPrestatDirName` writes the result path string to
+  `path` offset for the exact length of `path_len`.
+
+Wasmer considers `path_len` to be the maximum length instead of the exact
+length that should be written.
+See https://github.com/wasmerio/wasmer/blob/3463c51268ed551933392a4063bd4f8e7498b0f6/lib/wasi/src/syscalls/mod.rs#L764
+
+The semantics in wazero follows that of wasmtime.
+See https://github.com/bytecodealliance/wasmtime/blob/2ca01ae9478f199337cf743a6ab543e8c3f3b238/crates/wasi-common/src/snapshots/preview_1.rs#L578-L582
+
+Their semantics match when `path_len` == the length of `path`, so in practice
+this difference won't matter match.
+
+## fd_readdir
+
+### Why does "wasi_snapshot_preview1" require dot entries when POSIX does not?
+
+In October 2019, WASI project knew requiring dot entries ("." and "..") was not
+documented in preview1, not required by POSIX and problematic to synthesize.
+For example, Windows runtimes backed by `FindNextFileW` could not return these.
+A year later, the tag representing WASI preview 1 (`snapshot-01`) was made.
+This did not include the requested change of making dot entries optional.
+
+The `phases/snapshot/docs.md` document was altered in subsequent years in
+significant ways, often in lock-step with wasmtime or wasi-libc. In January
+2022, `sock_accept` was added to `phases/snapshot/docs.md`, a document later
+renamed to later renamed to `legacy/preview1/docs.md`.
+
+As a result, the ABI and behavior remained unstable: The `snapshot-01` tag was
+not an effective basis of portability. A test suite was requested well before
+this tag, in April 2019. Meanwhile, compliance had no meaning. Developers had
+to track changes to the latest doc, while clarifying with wasi-libc or wasmtime
+behavior. This lack of stability could have permitted a fix to the dot entries
+problem, just as it permitted changes desired by other users.
+
+In November 2022, the wasi-testsuite project began and started solidifying
+expectations. This quickly led to changes in runtimes and the spec doc. WASI
+began importing tests from wasmtime as required behaviors for all runtimes.
+Some changes implied changes to wasi-libc. For example, `readdir` began to
+imply inode fan-outs, which caused performance regressions. Most notably a
+test merged in January required dot entries. Tests were merged without running
+against any runtime, and even when run ad-hoc only against Linux. Hence,
+portability issues mentioned over three years earlier did not trigger any
+failure until wazero (which tests Windows) noticed.
+
+In the same month, wazero requested to revert this change primarily because
+Go does not return them from `os.ReadDir`, and materializing them is
+complicated due to tests also requiring inodes. Moreover, they are discarded by
+not just Go, but other common programming languages. This was rejected by the
+WASI lead for preview1, but considered for the completely different ABI named
+preview2.
+
+In February 2023, the WASI chair declared that new rule requiring preview1 to
+return dot entries "was decided by the subgroup as a whole", citing meeting
+notes. According to these notes, the WASI lead stated incorrectly that POSIX
+conformance required returning dot entries, something it explicitly says are
+optional. In other words, he said filtering them out would make Preview1
+non-conforming, and asked if anyone objects to this. The co-chair was noted to
+say "Because there are existing P1 programs, we shouldn’t make changes like
+this." No other were recorded to say anything.
+
+In summary, preview1 was changed retrospectively to require dot entries and
+preview2 was changed to require their absence. This rule was reverse engineered
+from wasmtime tests, and affirmed on two false premises:
+
+* POSIX compliance requires dot entries
+  * POSIX literally says these are optional
+* WASI cannot make changes because there are existing P1 programs.
+  * Changes to Preview 1 happened before and after this topic.
+
+As of June 2023, wasi-testsuite still only runs on Linux, so compliance of this
+rule on Windows is left to runtimes to decide to validate. The preview2 adapter
+uses fake cookies zero and one to refer to dot dirents, uses a real inode for
+the dot(".") entry and zero inode for dot-dot("..").
+
+See https://github.com/WebAssembly/wasi-filesystem/issues/3
+See https://github.com/WebAssembly/WASI/tree/snapshot-01
+See https://github.com/WebAssembly/WASI/issues/9
+See https://github.com/WebAssembly/WASI/pull/458
+See https://github.com/WebAssembly/wasi-testsuite/pull/32
+See https://github.com/WebAssembly/wasi-libc/pull/345
+See https://github.com/WebAssembly/wasi-testsuite/issues/52
+See https://github.com/WebAssembly/WASI/pull/516
+See https://github.com/WebAssembly/meetings/blob/main/wasi/2023/WASI-02-09.md#should-preview1-fd_readdir-filter-out--and-
+See https://github.com/bytecodealliance/preview2-prototyping/blob/e4c04bcfbd11c42c27c28984948d501a3e168121/crates/wasi-preview1-component-adapter/src/lib.rs#L1026-L1041
+
+### Why are dot (".") and dot-dot ("..") entries problematic?
+
+When reading a directory, dot (".") and dot-dot ("..") entries are problematic.
+For example, Go does not return them from `os.ReadDir`, and materializing them
+is complicated (at least dot-dot is).
+
+A directory entry has stat information in it. The stat information includes
+inode which is used for comparing file equivalence. In the simple case of dot,
+we could materialize a special entry to expose the same info as stat on the fd
+would return. However, doing this and not doing dot-dot would cause confusion,
+and dot-dot is far more tricky. To back-fill inode information about a parent
+directory would be costly and subtle. For example, the pre-open (mount) of the
+directory may be different than its logical parent. This is easy to understand
+when considering the common case of mounting "/" and "/tmp" as pre-opens. To
+implement ".." from "/tmp" requires information from a separate pre-open, this
+includes state to even know the difference. There are easier edge cases as
+well, such as the decision to not return ".." from a root path. In any case,
+this should start to explain that faking entries when underlying stdlib doesn't
+return them is tricky and requires quite a lot of state.
+
+Another issue is around the `Dirent.Off` value of a directory entry, sometimes
+called a "cookie" in Linux man pagers. When the host operating system or
+library function does not return dot entries, to support functions such as
+`seekdir`, you still need a value for `Dirent.Off`. Naively, you can synthesize
+these by choosing sequential offsets zero and one. However, POSIX strictly says
+offsets should be treated opaquely. The backing filesystem could use these to
+represent real entries. For example, a directory with one entry could use zero
+as the `Dirent.Off` value. If you also used zero for the "." dirent, there
+would be a clash. This means if you synthesize `Dirent.Off` for any entry, you
+need to synthesize this value for all entries. In practice, the simplest way is
+using an incrementing number, such as done in the WASI preview2 adapter.
+
+Working around these issues causes expense to all users of wazero, so we'd
+then look to see if that would be justified or not. However, the most common
+compilers involved in end user questions, as of early 2023 are TinyGo, Rust and
+Zig. All of these compile code which ignores dot and dot-dot entries. In other
+words, faking these entries would not only cost our codebase with complexity,
+but it would also add unnecessary overhead as the values aren't commonly used.
+
+The final reason why we might do this, is an end users or a specification
+requiring us to. As of early 2023, no end user has raised concern over Go and
+by extension wazero not returning dot and dot-dot. The snapshot-01 spec of WASI
+does not mention anything on this point. Also, POSIX has the following to say,
+which summarizes to "these are optional"
+
+> The readdir() function shall not return directory entries containing empty names. If entries for dot or dot-dot exist, one entry shall be returned for dot and one entry shall be returned for dot-dot; otherwise, they shall not be returned.
+
+Unfortunately, as described above, the WASI project decided in early 2023 to
+require dot entries in both the spec and the wasi-testsuite. For only this
+reason, wazero adds overhead to synthesize dot entries despite it being
+unnecessary for most users.
+
+See https://pubs.opengroup.org/onlinepubs/9699919799/functions/readdir.html
+See https://github.com/golang/go/blob/go1.20/src/os/dir_unix.go#L108-L110
+See https://github.com/bytecodealliance/preview2-prototyping/blob/e4c04bcfbd11c42c27c28984948d501a3e168121/crates/wasi-preview1-component-adapter/src/lib.rs#L1026-L1041
+
+### Why don't we pre-populate an inode for the dot-dot ("..") entry?
+
+We only populate an inode for dot (".") because wasi-testsuite requires it, and
+we likely already have it (because we cache it). We could attempt to populate
+one for dot-dot (".."), but chose not to.
+
+Firstly, wasi-testsuite does not require the inode of dot-dot, possibly because
+the wasip2 adapter doesn't populate it (but we don't really know why).
+
+The only other reason to populate it would be to avoid wasi-libc's stat fanout
+when it is missing. However, wasi-libc explicitly doesn't fan-out to lstat on
+the ".." entry on a zero ino.
+
+Fetching dot-dot's inode despite the above not only doesn't help wasi-libc, but
+it also hurts languages that don't use it, such as Go. These languages would
+pay a stat syscall penalty even if they don't need the inode. In fact, Go
+discards both dot entries!
+
+In summary, there are no significant upsides in attempting to pre-fetch
+dot-dot's inode, and there are downsides to doing it anyway.
+
+See
+ * https://github.com/WebAssembly/wasi-libc/blob/bd950eb128bff337153de217b11270f948d04bb4/libc-bottom-half/cloudlibc/src/libc/dirent/readdir.c#L87-L94
+ * https://github.com/WebAssembly/wasi-testsuite/blob/main/tests/rust/src/bin/fd_readdir.rs#L108
+ * https://github.com/bytecodealliance/preview2-prototyping/blob/e4c04bcfbd11c42c27c28984948d501a3e168121/crates/wasi-preview1-component-adapter/src/lib.rs#L1037
+
+### Why don't we require inodes to be non-zero?
+
+We don't require a non-zero value for `Dirent.Ino` because doing so can prevent
+a real one from resolving later via `Stat_t.Ino`.
+
+We define `Ino` like `d_ino` in POSIX which doesn't special-case zero. It can
+be zero for a few reasons:
+
+* The file is not a regular file or directory.
+* The underlying filesystem does not support inodes. e.g. embed:fs
+* A directory doesn't include inodes, but a later stat can. e.g. Windows
+* The backend is based on wasi-filesystem (a.k.a wasip2), which has
+  `directory_entry.inode` optional, and might remove it entirely.
+
+There are other downsides to returning a zero inode in widely used compilers:
+
+* File equivalence utilities, like `os.SameFile` will not work.
+* wasi-libc's `wasip1` mode will call `lstat` and attempt to retrieve a
+  non-zero value (unless the entry is named "..").
+
+A new compiler may accidentally skip a `Dirent` with a zero `Ino` if emulating
+a non-POSIX function and re-using `Dirent.Ino` for `d_fileno`.
+
+* Linux `getdents` doesn't define `d_fileno` must be non-zero
+* BSD `getdirentries` is implementation specific. For example, OpenBSD will
+  return dirents with a zero `d_fileno`, but Darwin will skip them.
+
+The above shouldn't be a problem, even in the case of BSD, because `wasip1` is
+defined more in terms of `getdents` than `getdirentries`. The bottom half of
+either should treat `wasip1` (or any similar ABI such as wasix or wasip2) as a
+different operating system and either use different logic that doesn't skip, or
+synthesize a fake non-zero `d_fileno` when `d_ino` is zero.
+
+However, this has been a problem. Go's `syscall.ParseDirent` utility is shared
+for all `GOOS=unix`. For simplicity, this abstracts `direntIno` with data from
+`d_fileno` or `d_ino`, and drops if either are zero, even if `d_fileno` is the
+only field with zero explicitly defined. This led to a change to special case
+`GOOS=wasip1` as otherwise virtual files would be unconditionally skipped.
+
+In practice, this problem is rather unique due to so many compilers relying on
+wasi-libc, which tolerates a zero inode. For example, while issues were
+reported about the performance regression when wasi-libc began doing a fan-out
+on zero `Dirent.Ino`, no issues were reported about dirents being dropped as a
+result.
+
+In summary, rather than complicating implementation and forcing non-zero inodes
+for a rare case, we permit zero. We instead document this topic thoroughly, so
+that emerging compilers can re-use the research and reference it on conflict.
+We also document that `Ino` should be non-zero, so that users implementing that
+field will attempt to get it.
+
+See
+ * https://github.com/WebAssembly/wasi-filesystem/pull/81
+ * https://github.com/WebAssembly/wasi-libc/blob/bd950eb128bff337153de217b11270f948d04bb4/libc-bottom-half/cloudlibc/src/libc/dirent/readdir.c#L87-L94
+ * https://linux.die.net/man/3/getdents
+ * https://www.unix.com/man-page/osx/2/getdirentries/
+ * https://man.openbsd.org/OpenBSD-5.4/getdirentries.2
+ * https://github.com/golang/go/blob/go1.20/src/syscall/dirent.go#L60-L102
+ * https://go-review.googlesource.com/c/go/+/507915
+
+## sys.Walltime and Nanotime
+
+The `sys` package has two function types, `Walltime` and `Nanotime` for real
+and monotonic clock exports. The naming matches conventions used in Go.
+
+```go
+func time_now() (sec int64, nsec int32, mono int64) {
+	sec, nsec = walltime()
+	return sec, nsec, nanotime()
+}
+```
+
+Splitting functions for wall and clock time allow implementations to choose
+whether to implement the clock once (as in Go), or split them out.
+
+Each can be configured with a `ClockResolution`, although is it usually
+incorrect as detailed in a sub-heading below. The only reason for exposing this
+is to satisfy WASI:
+
+See https://github.com/WebAssembly/wasi-clocks
+
+### Why default to fake time?
+
+WebAssembly has an implicit design pattern of capabilities based security. By
+defaulting to a fake time, we reduce the chance of timing attacks, at the cost
+of requiring configuration to opt-into real clocks.
+
+See https://gruss.cc/files/fantastictimers.pdf for an example attacks.
+
+### Why does fake time increase on reading?
+
+Both the fake nanotime and walltime increase by 1ms on reading. Particularly in
+the case of nanotime, this prevents spinning.
+
+### Why not `time.Clock`?
+
+wazero can't use `time.Clock` as a plugin for clock implementation as it is
+only substitutable with build flags (`faketime`) and conflates wall and
+monotonic time in the same call.
+
+Go's `time.Clock` was added monotonic time after the fact. For portability with
+prior APIs, a decision was made to combine readings into the same API call.
+
+See https://go.googlesource.com/proposal/+/master/design/12914-monotonic.md
+
+WebAssembly time imports do not have the same concern. In fact even Go's
+imports for clocks split walltime from nanotime readings.
+
+See https://github.com/golang/go/blob/go1.20/misc/wasm/wasm_exec.js#L243-L255
+
+Finally, Go's clock is not an interface. WebAssembly users who want determinism
+or security need to be able to substitute an alternative clock implementation
+from the host process one.
+
+### `ClockResolution`
+
+A clock's resolution is hardware and OS dependent so requires a system call to retrieve an accurate value.
+Go does not provide a function for getting resolution, so without CGO we don't have an easy way to get an actual
+value. For now, we return fixed values of 1us for realtime and 1ns for monotonic, assuming that realtime clocks are
+often lower precision than monotonic clocks. In the future, this could be improved by having OS+arch specific assembly
+to make syscalls.
+
+For example, Go implements time.Now for linux-amd64 with this [assembly](https://github.com/golang/go/blob/go1.20/src/runtime/time_linux_amd64.s).
+Because retrieving resolution is not generally called often, unlike getting time, it could be appropriate to only
+implement the fallback logic that does not use VDSO (executing syscalls in user mode). The syscall for clock_getres
+is 229 and should be usable. https://pkg.go.dev/syscall#pkg-constants.
+
+If implementing similar for Windows, [mingw](https://github.com/mirror/mingw-w64/blob/6a0e9165008f731bccadfc41a59719cf7c8efc02/mingw-w64-libraries/winpthreads/src/clock.c#L77
+) is often a good source to find the Windows API calls that correspond
+to a POSIX method.
+
+Writing assembly would allow making syscalls without CGO, but comes with the cost that it will require implementations
+across many combinations of OS and architecture.
+
+## sys.Nanosleep
+
+All major programming languages have a `sleep` mechanism to block for a
+duration. Sleep is typically implemented by a WASI `poll_oneoff` relative clock
+subscription.
+
+For example, the below ends up calling `wasi_snapshot_preview1.poll_oneoff`:
+
+```zig
+const std = @import("std");
+pub fn main() !void {
+    std.time.sleep(std.time.ns_per_s * 5);
+}
+```
+
+Besides Zig, this is also the case with TinyGo (`-target=wasi`) and Rust
+(`--target wasm32-wasi`).
+
+We decided to expose `sys.Nanosleep` to allow overriding the implementation
+used in the common case, even if it isn't used by Go, because this gives an
+easy and efficient closure over a common program function. We also documented
+`sys.Nanotime` to warn users that some compilers don't optimize sleep.
+
+## sys.Osyield
+
+We expose `sys.Osyield`, to allow users to control the behavior of WASI's
+`sched_yield` without a new build of wazero. This is mainly for parity with
+all other related features which we allow users to implement, including
+`sys.Nanosleep`. Unlike others, we don't provide an out-of-box implementation
+primarily because it will cause performance problems when accessed.
+
+For example, the below implementation uses CGO, which might result in a 1us
+delay per invocation depending on the platform.
+
+See https://github.com/golang/go/issues/19409#issuecomment-284788196
+```go
+//go:noescape
+//go:linkname osyield runtime.osyield
+func osyield()
+```
+
+In practice, a request to customize this is unlikely to happen until other
+thread based functions are implemented. That said, as of early 2023, there are
+a few signs of implementation interest and cross-referencing:
+
+See https://github.com/WebAssembly/stack-switching/discussions/38
+See https://github.com/WebAssembly/wasi-threads#what-can-be-skipped
+See https://slinkydeveloper.com/Kubernetes-controllers-A-New-Hope/
+
+## sys.Stat_t
+
+We expose `stat` information as `sys.Stat_t`, like `syscall.Stat_t` except
+defined without build constraints. For example, you can use `sys.Stat_t` on
+`GOOS=windows` which doesn't define `syscall.Stat_t`.
+
+The first use case of this is to return inodes from `fs.FileInfo` without
+relying on platform-specifics. For example, a user could return `*sys.Stat_t`
+from `info.Sys()` and define a non-zero inode for a virtual file, or map a
+real inode to a virtual one.
+
+Notable choices per field are listed below, where `sys.Stat_t` is unlike
+`syscall.Stat_t` on `GOOS=linux`, or needs clarification. One common issue
+not repeated below is that numeric fields are 64-bit when at least one platform
+defines it that large. Also, zero values are equivalent to nil or absent.
+
+* `Dev` and `Ino` (`Inode`) are both defined unsigned as they are defined
+  opaque, and most `syscall.Stat_t` also defined them unsigned. There are
+  separate sections in this document discussing the impact of zero in `Ino`.
+* `Mode` is defined as a `fs.FileMode` even though that is not defined in POSIX
+  and will not map to all possible values. This is because the current use is
+  WASI, which doesn't define any types or features not already supported. By
+  using `fs.FileMode`, we can re-use routine experience in Go.
+* `NLink` is unsigned because it is defined that way in `syscall.Stat_t`: there
+  can never be less than zero links to a file. We suggest defaulting to 1 in
+  conversions when information is not knowable because at least that many links
+  exist.
+* `Size` is signed because it is defined that way in `syscall.Stat_t`: while
+  regular files and directories will always be non-negative, irregular files
+  are possibly negative or not defined. Notably sparse files are known to
+  return negative values.
+* `Atim`, `Mtim` and `Ctim` are signed because they are defined that way in
+  `syscall.Stat_t`: Negative values are time before 1970. The resolution is
+  nanosecond because that's the maximum resolution currently supported in Go.
+
+### Why do we use `sys.EpochNanos` instead of `time.Time` or similar?
+
+To simplify documentation, we defined a type alias `sys.EpochNanos` for int64.
+`time.Time` is a data structure, and we could have used this for
+`syscall.Stat_t` time values. The most important reason we do not is conversion
+penalty deriving time from common types.
+
+The most common ABI used in `wasip2`. This, and compatible ABI such as `wasix`,
+encode timestamps in memory as a 64-bit number. If we used `time.Time`, we
+would have to convert an underlying type like `syscall.Timespec` to `time.Time`
+only to later have to call `.UnixNano()` to convert it back to a 64-bit number.
+
+In the future, the component model module "wasi-filesystem" may represent stat
+timestamps with a type shared with "wasi-clocks", abstractly structured similar
+to `time.Time`. However, component model intentionally does not define an ABI.
+It is likely that the canonical ABI for timestamp will be in two parts, but it
+is not required for it to be intermediately represented this way. A utility
+like `syscall.NsecToTimespec` could split an int64 so that it could be written
+to memory as 96 bytes (int64, int32), without allocating a struct.
+
+Finally, some may confuse epoch nanoseconds with 32-bit epoch seconds. While
+32-bit epoch seconds has "The year 2038" problem, epoch nanoseconds has
+"The Year 2262" problem, which is even less concerning for this library. If
+the Go programming language and wazero exist in the 2200's, we can make a major
+version increment to adjust the `sys.EpochNanos` approach. Meanwhile, we have
+faster code.
+
+## poll_oneoff
+
+`poll_oneoff` is a WASI API for waiting for I/O events on multiple handles.
+It is conceptually similar to the POSIX `poll(2)` syscall.
+The name is not `poll`, because it references [“the fact that this function is not efficient
+when used repeatedly with the same large set of handles”][poll_oneoff].
+
+We chose to support this API in a handful of cases that work for regular files
+and standard input. We currently do not support other types of file descriptors such
+as socket handles.
+
+### Clock Subscriptions
+
+As detailed above in [sys.Nanosleep](#sysnanosleep), `poll_oneoff` handles
+relative clock subscriptions. In our implementation we use `sys.Nanosleep()`
+for this purpose in most cases, except when polling for interactive input
+from `os.Stdin` (see more details below).
+
+### FdRead and FdWrite Subscriptions
+
+When subscribing a file descriptor (except `Stdin`) for reads or writes,
+the implementation will generally return immediately with success, unless
+the file descriptor is unknown. The file descriptor is not checked further
+for new incoming data. Any timeout is cancelled, and the API call is able
+to return, unless there are subscriptions to `Stdin`: these are handled
+separately.
+
+### FdRead and FdWrite Subscription to Stdin
+
+Subscribing `Stdin` for reads (writes make no sense and cause an error),
+requires extra care: wazero allows to configure a custom reader for `Stdin`.
+
+In general, if a custom reader is found, the behavior will be the same
+as for regular file descriptors: data is assumed to be present and
+a success is written back to the result buffer.
+
+However, if the reader is detected to read from `os.Stdin`,
+a special code path is followed, invoking `sysfs.poll()`.
+
+`sysfs.poll()` is a wrapper for `poll(2)` on POSIX systems,
+and it is emulated on Windows.
+
+### Poll on POSIX
+
+On POSIX systems, `poll(2)` allows to wait for incoming data on a file
+descriptor, and block until either data becomes available or the timeout
+expires.
+
+Usage of `syfs.poll()` is currently only reserved for standard input, because
+
+1. it is really only necessary to handle interactive input: otherwise,
+   there is no way in Go to peek from Standard Input without actually
+   reading (and thus consuming) from it;
+
+2. if `Stdin` is connected to a pipe, it is ok in most cases to return
+   with success immediately;
+
+3. `syfs.poll()` is currently a blocking call, irrespective of goroutines,
+   because the underlying syscall is; thus, it is better to limit its usage.
+
+So, if the subscription is for `os.Stdin` and the handle is detected
+to correspond to an interactive session, then `sysfs.poll()` will be
+invoked with a the `Stdin` handle *and* the timeout.
+
+This also means that in this specific case, the timeout is uninterruptible,
+unless data becomes available on `Stdin` itself.
+
+### Select on Windows
+
+On Windows `sysfs.poll()` cannot be delegated to a single
+syscall, because there is no single syscall to handle sockets,
+pipes and regular files.
+
+Instead, we emulate its behavior for the cases that are currently
+of interest.
+
+- For regular files, we _always_ report them as ready, as
+[most operating systems do anyway][async-io-windows].
+
+- For pipes, we invoke [`PeekNamedPipe`][peeknamedpipe]
+for each file handle we detect is a pipe open for reading.
+We currently ignore pipes open for writing.
+
+- Notably, we include also support for sockets using the [WinSock
+implementation of `poll`][wsapoll], but instead
+of relying on the timeout argument of the `WSAPoll` function,
+we set a 0-duration timeout so that it behaves like a peek.
+
+This way, we can check for regular files all at once,
+at the beginning of the function, then we poll pipes and
+sockets periodically using a cancellable `time.Tick`,
+which plays nicely with the rest of the Go runtime.
+
+### Impact of blocking
+
+Because this is a blocking syscall, it will also block the carrier thread of
+the goroutine, preventing any means to support context cancellation directly.
+
+There are ways to obviate this issue. We outline here one idea, that is however
+not currently implemented. A common approach to support context cancellation is
+to add a signal file descriptor to the set, e.g. the read-end of a pipe or an
+eventfd on Linux. When the context is canceled, we may unblock a Select call by
+writing to the fd, causing it to return immediately. This however requires to
+do a bit of housekeeping to hide the "special" FD from the end-user.
+
+[poll_oneoff]: https://github.com/WebAssembly/wasi-poll#why-is-the-function-called-poll_oneoff
+[async-io-windows]: https://tinyclouds.org/iocp_links
+[peeknamedpipe]: https://learn.microsoft.com/en-us/windows/win32/api/namedpipeapi/nf-namedpipeapi-peeknamedpipe
+[wsapoll]: https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsapoll
+
+## Signed encoding of integer global constant initializers
+
+wazero treats integer global constant initializers signed as their interpretation is not known at declaration time. For
+example, there is no signed integer [value type](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#value-types%E2%91%A0).
+
+To get at the problem, let's use an example.
+```
+(global (export "start_epoch") i64 (i64.const 1620216263544))
+```
+
+In both signed and unsigned LEB128 encoding, this value is the same bit pattern. The problem is that some numbers are
+not. For example, 16256 is `807f` encoded as unsigned, but `80ff00` encoded as signed.
+
+While the specification mentions uninterpreted integers are in abstract [unsigned values](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#integers%E2%91%A0),
+the binary encoding is clear that they are encoded [signed](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#integers%E2%91%A4).
+
+For consistency, we go with signed encoding in the special case of global constant initializers.
+
+## Implementation limitations
+
+WebAssembly 1.0 (20191205) specification allows runtimes to [limit certain aspects of Wasm module or execution](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#a2-implementation-limitations).
+
+wazero limitations are imposed pragmatically and described below.
+
+### Number of functions in a module
+
+The possible number of function instances in [a module](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#module-instances%E2%91%A0) is not specified in the WebAssembly specifications since [`funcaddr`](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-funcaddr) corresponding to a function instance in a store can be arbitrary number.
+wazero limits the maximum function instances to 2^27 as even that number would occupy 1GB in function pointers.
+
+That is because not only we _believe_ that all use cases are fine with the limitation, but also we have no way to test wazero runtimes under these unusual circumstances.
+
+### Number of function types in a store
+
+There's no limitation on the number of function types in [a store](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#store%E2%91%A0) according to the spec. In wazero implementation, we assign each function type to a unique ID, and choose to use `uint32` to represent the IDs.
+Therefore the maximum number of function types a store can have is limited to 2^27 as even that number would occupy 512MB just to reference the function types.
+
+This is due to the same reason for the limitation on the number of functions above.
+
+### Number of values on the stack in a function
+
+While the the spec does not clarify a limitation of function stack values, wazero limits this to 2^27 = 134,217,728.
+The reason is that we internally represent all the values as 64-bit integers regardless of its types (including f32, f64), and 2^27 values means
+1 GiB = (2^30). 1 GiB is the reasonable for most applications [as we see a Goroutine has 250 MB as a limit on the stack for 32-bit arch](https://github.com/golang/go/blob/go1.20/src/runtime/proc.go#L152-L159), considering that WebAssembly is (currently) 32-bit environment.
+
+All the functions are statically analyzed at module instantiation phase, and if a function can potentially reach this limit, an error is returned.
+
+### Number of globals in a module
+
+Theoretically, a module can declare globals (including imports) up to 2^32 times. However, wazero limits this to  2^27(134,217,728) per module.
+That is because internally we store globals in a slice with pointer types (meaning 8 bytes on 64-bit platforms), and therefore 2^27 globals
+means that we have 1 GiB size of slice which seems large enough for most applications.
+
+### Number of tables in a module
+
+While the the spec says that a module can have up to 2^32 tables, wazero limits this to 2^27 = 134,217,728.
+One of the reasons is even that number would occupy 1GB in the pointers tables alone. Not only that, we access tables slice by
+table index by using 32-bit signed offset in the compiler implementation, which means that the table index of 2^27 can reach 2^27 * 8 (pointer size on 64-bit machines) = 2^30 offsets in bytes.
+
+We _believe_ that all use cases are fine with the limitation, but also note that we have no way to test wazero runtimes under these unusual circumstances.
+
+If a module reaches this limit, an error is returned at the compilation phase.
+
+## Compiler engine implementation
+
+### Why it's safe to execute runtime-generated machine codes against async Goroutine preemption
+
+Goroutine preemption is the mechanism of the Go runtime to switch goroutines contexts on an OS thread.
+There are two types of preemption: cooperative preemption and async preemption. The former happens, for example,
+when making a function call, and it is not an issue for our runtime-generated functions as they do not make
+direct function calls to Go-implemented functions. On the other hand, the latter, async preemption, can be problematic
+since it tries to interrupt the execution of Goroutine at any point of function, and manipulates CPU register states.
+
+Fortunately, our runtime-generated machine codes do not need to take the async preemption into account.
+All the assembly codes are entered via the trampoline implemented as Go Assembler Function (e.g. [arch_amd64.s](./arch_amd64.s)),
+and as of Go 1.20, these assembler functions are considered as _unsafe_ for async preemption:
+- https://github.com/golang/go/blob/go1.20rc1/src/runtime/preempt.go#L406-L407
+- https://github.com/golang/go/blob/9f0234214473dfb785a5ad84a8fc62a6a395cbc3/src/runtime/traceback.go#L227
+
+From the Go runtime point of view, the execution of runtime-generated machine codes is considered as a part of
+that trampoline function. Therefore, runtime-generated machine code is also correctly considered unsafe for async preemption.
+
+## Why context cancellation is handled in Go code rather than native code
+
+Since [wazero v1.0.0-pre.9](https://github.com/tetratelabs/wazero/releases/tag/v1.0.0-pre.9), the runtime
+supports integration with Go contexts to interrupt execution after a timeout, or in response to explicit cancellation.
+This support is internally implemented as a special opcode `builtinFunctionCheckExitCode` that triggers the execution of
+a Go function (`ModuleInstance.FailIfClosed`) that atomically checks a sentinel value at strategic points in the code.
+
+[It _is indeed_ possible to check the sentinel value directly, without leaving the native world][native_check], thus sparing some cycles;
+however, because native code never preempts (see section above), this may lead to a state where the other goroutines
+never get the chance to run, and thus never get the chance to set the sentinel value; effectively preventing
+cancellation from taking place.
+
+[native_check]: https://github.com/tetratelabs/wazero/issues/1409
+
+## Golang patterns
+
+### Hammer tests
+Code that uses concurrency primitives, such as locks or atomics, should include "hammer tests", which run large loops
+inside a bounded amount of goroutines, run by half that many `GOMAXPROCS`. These are named consistently "hammer", so
+they are easy to find. The name inherits from some existing tests in [golang/go](https://github.com/golang/go/search?q=hammer&type=code).
+
+Here is an annotated description of the key pieces of a hammer test:
+1. `P` declares the count of goroutines to use, defaulting to 8 or 4 if `testing.Short`.
+   * Half this amount are the cores used, and 4 is less than a modern laptop's CPU. This allows multiple "hammer" tests to run in parallel.
+2. `N` declares the scale of work (loop) per goroutine, defaulting to value that finishes in ~0.1s on a modern laptop.
+   * When in doubt, try 1000 or 100 if `testing.Short`
+   * Remember, there are multiple hammer tests and CI nodes are slow. Slower tests hurt feedback loops.
+3. `defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P/2))` makes goroutines switch cores, testing visibility of shared data.
+4. To ensure goroutines execute at the same time, block them with `sync.WaitGroup`, initialized to `Add(P)`.
+   * `sync.WaitGroup` internally uses `runtime_Semacquire` not available in any other library.
+   * `sync.WaitGroup.Add` with a negative value can unblock many goroutines at the same time, e.g. without a for loop.
+5. Track goroutines progress via `finished := make(chan int)` where each goroutine in `P` defers `finished <- 1`.
+   1. Tests use `require.XXX`, so `recover()` into `t.Fail` in a `defer` function before `finished <- 1`.
+      * This makes it easier to spot larger concurrency problems as you see each failure, not just the first.
+   2. After the `defer` function, await unblocked, then run the stateful function `N` times in a normal loop.
+      * This loop should trigger shared state problems as locks or atomics are contended by `P` goroutines.
+6. After all `P` goroutines launch, atomically release all of them with `WaitGroup.Add(-P)`.
+7. Block the runner on goroutine completion, by (`<-finished`) for each `P`.
+8. When all goroutines complete, `return` if `t.Failed()`, otherwise perform follow-up state checks.
+
+This is implemented in wazero in [hammer.go](internal/testing/hammer/hammer.go)
+
+### Lock-free, cross-goroutine observations of updates
+
+How to achieve cross-goroutine reads of a variable are not explicitly defined in https://go.dev/ref/mem. wazero uses
+atomics to implement this following unofficial practice. For example, a `Close` operation can be guarded to happen only
+once via compare-and-swap (CAS) against a zero value. When we use this pattern, we consistently use atomics to both
+read and update the same numeric field.
+
+In lieu of formal documentation, we infer this pattern works from other sources (besides tests):
+ * `sync.WaitGroup` by definition must support calling `Add` from other goroutines. Internally, it uses atomics.
+ * rsc in golang/go#5045 writes "atomics guarantee sequential consistency among the atomic variables".
+
+See https://github.com/golang/go/blob/go1.20/src/sync/waitgroup.go#L64
+See https://github.com/golang/go/issues/5045#issuecomment-252730563
+See https://www.youtube.com/watch?v=VmrEG-3bWyM
diff --git a/vendor/github.com/tetratelabs/wazero/README.md b/vendor/github.com/tetratelabs/wazero/README.md
new file mode 100644
index 000000000..657da2959
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/README.md
@@ -0,0 +1,132 @@
+# wazero: the zero dependency WebAssembly runtime for Go developers
+
+[![WebAssembly Core Specification Test](https://github.com/tetratelabs/wazero/actions/workflows/spectest.yaml/badge.svg)](https://github.com/tetratelabs/wazero/actions/workflows/spectest.yaml) [![Go Reference](https://pkg.go.dev/badge/github.com/tetratelabs/wazero.svg)](https://pkg.go.dev/github.com/tetratelabs/wazero) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+WebAssembly is a way to safely run code compiled in other languages. Runtimes
+execute WebAssembly Modules (Wasm), which are most often binaries with a `.wasm`
+extension.
+
+wazero is a WebAssembly Core Specification [1.0][1] and [2.0][2] compliant
+runtime written in Go. It has *zero dependencies*, and doesn't rely on CGO.
+This means you can run applications in other languages and still keep cross
+compilation.
+
+Import wazero and extend your Go application with code written in any language!
+
+## Example
+
+The best way to learn wazero is by trying one of our [examples](examples/README.md). The
+most [basic example](examples/basic) extends a Go application with an addition
+function defined in WebAssembly.
+
+## Runtime
+
+There are two runtime configurations supported in wazero: _Compiler_ is default:
+
+By default, ex `wazero.NewRuntime(ctx)`, the Compiler is used if supported. You
+can also force the interpreter like so:
+```go
+r := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigInterpreter())
+```
+
+### Interpreter
+Interpreter is a naive interpreter-based implementation of Wasm virtual
+machine. Its implementation doesn't have any platform (GOARCH, GOOS) specific
+code, therefore _interpreter_ can be used for any compilation target available
+for Go (such as `riscv64`).
+
+### Compiler
+Compiler compiles WebAssembly modules into machine code ahead of time (AOT),
+during `Runtime.CompileModule`. This means your WebAssembly functions execute
+natively at runtime. Compiler is faster than Interpreter, often by order of
+magnitude (10x) or more. This is done without host-specific dependencies.
+
+### Conformance
+
+Both runtimes pass WebAssembly Core [1.0][7] and [2.0][14] specification tests
+on supported platforms:
+
+|   Runtime   |                 Usage                  | amd64 | arm64 | others |
+|:-----------:|:--------------------------------------:|:-----:|:-----:|:------:|
+| Interpreter | `wazero.NewRuntimeConfigInterpreter()` |   ✅   |   ✅   |   ✅    |
+|  Compiler   |  `wazero.NewRuntimeConfigCompiler()`   |   ✅   |   ✅   |   ❌    |
+
+## Support Policy
+
+The below support policy focuses on compatibility concerns of those embedding
+wazero into their Go applications.
+
+### wazero
+
+wazero's [1.0 release][15] happened in March 2023, and is [in use][16] by many
+projects and production sites.
+
+We offer an API stability promise with semantic versioning. In other words, we
+promise to not break any exported function signature without incrementing the
+major version. This does not mean no innovation: New features and behaviors
+happen with a minor version increment, e.g. 1.0.11 to 1.2.0. We also fix bugs
+or change internal details with a patch version, e.g. 1.0.0 to 1.0.1.
+
+You can get the latest version of wazero like this.
+```bash
+go get github.com/tetratelabs/wazero@latest
+```
+
+Please give us a [star][17] if you end up using wazero!
+
+### Go
+
+wazero has no dependencies except Go, so the only source of conflict in your
+project's use of wazero is the Go version.
+
+wazero follows the same version policy as Go's [Release Policy][10]: two
+versions. wazero will ensure these versions work and bugs are valid if there's
+an issue with a current Go version.
+
+Additionally, wazero intentionally delays usage of language or standard library
+features one additional version. For example, when Go 1.29 is released, wazero
+can use language features or standard libraries added in 1.27. This is a
+convenience for embedders who have a slower version policy than Go. However,
+only supported Go versions may be used to raise support issues.
+
+### Platform
+
+wazero has two runtime modes: Interpreter and Compiler. The only supported operating
+systems are ones we test, but that doesn't necessarily mean other operating
+system versions won't work.
+
+We currently test Linux (Ubuntu and scratch), MacOS and Windows as packaged by
+[GitHub Actions][11], as well compilation of 32-bit Linux and 64-bit FreeBSD.
+
+* Interpreter
+  * Linux is tested on amd64 (native) as well arm64 and riscv64 via emulation.
+  * MacOS and Windows are only tested on amd64.
+* Compiler
+  * Linux is tested on amd64 (native) as well arm64 via emulation.
+  * MacOS and Windows are only tested on amd64.
+
+wazero has no dependencies and doesn't require CGO. This means it can also be
+embedded in an application that doesn't use an operating system. This is a main
+differentiator between wazero and alternatives.
+
+We verify zero dependencies by running tests in Docker's [scratch image][12].
+This approach ensures compatibility with any parent image.
+
+-----
+wazero is a registered trademark of Tetrate.io, Inc. in the United States and/or other countries
+
+[1]: https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+[2]: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/
+[4]: https://github.com/WebAssembly/meetings/blob/main/process/subgroups.md
+[5]: https://github.com/WebAssembly/WASI
+[6]: https://pkg.go.dev/golang.org/x/sys/unix
+[7]: https://github.com/WebAssembly/spec/tree/wg-1.0/test/core
+[9]: https://github.com/tetratelabs/wazero/issues/506
+[10]: https://go.dev/doc/devel/release
+[11]: https://github.com/actions/virtual-environments
+[12]: https://docs.docker.com/develop/develop-images/baseimages/#create-a-simple-parent-image-using-scratch
+[13]: https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md
+[14]: https://github.com/WebAssembly/spec/tree/d39195773112a22b245ffbe864bab6d1182ccb06/test/core
+[15]: https://tetrate.io/blog/introducing-wazero-from-tetrate/
+[16]: https://wazero.io/community/users/
+[17]: https://github.com/tetratelabs/wazero/stargazers
diff --git a/vendor/github.com/tetratelabs/wazero/api/features.go b/vendor/github.com/tetratelabs/wazero/api/features.go
new file mode 100644
index 000000000..c739d3bf7
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/api/features.go
@@ -0,0 +1,214 @@
+package api
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CoreFeatures is a bit flag of WebAssembly Core specification features. See
+// https://github.com/WebAssembly/proposals for proposals and their status.
+//
+// Constants define individual features, such as CoreFeatureMultiValue, or
+// groups of "finished" features, assigned to a WebAssembly Core Specification
+// version, e.g. CoreFeaturesV1 or CoreFeaturesV2.
+//
+// Note: Numeric values are not intended to be interpreted except as bit flags.
+type CoreFeatures uint64
+
+// CoreFeaturesV1 are features included in the WebAssembly Core Specification
+// 1.0. As of late 2022, this is the only version that is a Web Standard (W3C
+// Recommendation).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+const CoreFeaturesV1 = CoreFeatureMutableGlobal
+
+// CoreFeaturesV2 are features included in the WebAssembly Core Specification
+// 2.0 (20220419). As of late 2022, version 2.0 is a W3C working draft, not yet
+// a Web Standard (W3C Recommendation).
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#release-1-1
+const CoreFeaturesV2 = CoreFeaturesV1 |
+	CoreFeatureBulkMemoryOperations |
+	CoreFeatureMultiValue |
+	CoreFeatureNonTrappingFloatToIntConversion |
+	CoreFeatureReferenceTypes |
+	CoreFeatureSignExtensionOps |
+	CoreFeatureSIMD
+
+const (
+	// CoreFeatureBulkMemoryOperations adds instructions modify ranges of
+	// memory or table entries ("bulk-memory-operations"). This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Adds `memory.fill`, `memory.init`, `memory.copy` and `data.drop`
+	//     instructions.
+	//   - Adds `table.init`, `table.copy` and `elem.drop` instructions.
+	//   - Introduces a "passive" form of element and data segments.
+	//   - Stops checking "active" element and data segment boundaries at
+	//     compile-time, meaning they can error at runtime.
+	//
+	// Note: "bulk-memory-operations" is mixed with the "reference-types"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureReferenceTypes, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureBulkMemoryOperations CoreFeatures = 1 << iota
+
+	// CoreFeatureMultiValue enables multiple values ("multi-value"). This is
+	// included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Function (`func`) types allow more than one result.
+	//   - Block types (`block`, `loop` and `if`) can be arbitrary function
+	//     types.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+	CoreFeatureMultiValue
+
+	// CoreFeatureMutableGlobal allows globals to be mutable. This is included
+	// in both CoreFeaturesV1 and CoreFeaturesV2.
+	//
+	// When false, an api.Global can never be cast to an api.MutableGlobal, and
+	// any wasm that includes global vars will fail to parse.
+	CoreFeatureMutableGlobal
+
+	// CoreFeatureNonTrappingFloatToIntConversion enables non-trapping
+	// float-to-int conversions ("nontrapping-float-to-int-conversion"). This
+	// is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// The only effect of enabling is allowing the following instructions,
+	// which return 0 on NaN instead of panicking.
+	//   - `i32.trunc_sat_f32_s`
+	//   - `i32.trunc_sat_f32_u`
+	//   - `i32.trunc_sat_f64_s`
+	//   - `i32.trunc_sat_f64_u`
+	//   - `i64.trunc_sat_f32_s`
+	//   - `i64.trunc_sat_f32_u`
+	//   - `i64.trunc_sat_f64_s`
+	//   - `i64.trunc_sat_f64_u`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+	CoreFeatureNonTrappingFloatToIntConversion
+
+	// CoreFeatureReferenceTypes enables various instructions and features
+	// related to table and new reference types. This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	//   - Introduction of new value types: `funcref` and `externref`.
+	//   - Support for the following new instructions:
+	//     - `ref.null`
+	//     - `ref.func`
+	//     - `ref.is_null`
+	//     - `table.fill`
+	//     - `table.get`
+	//     - `table.grow`
+	//     - `table.set`
+	//     - `table.size`
+	//   - Support for multiple tables per module:
+	//     - `call_indirect`, `table.init`, `table.copy` and `elem.drop`
+	//   - Support for instructions can take non-zero table index.
+	//     - Element segments can take non-zero table index.
+	//
+	// Note: "reference-types" is mixed with the "bulk-memory-operations"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureBulkMemoryOperations, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureReferenceTypes
+
+	// CoreFeatureSignExtensionOps enables sign extension instructions
+	// ("sign-extension-ops"). This is included in CoreFeaturesV2, but not
+	// CoreFeaturesV1.
+	//
+	// Adds instructions:
+	//   - `i32.extend8_s`
+	//   - `i32.extend16_s`
+	//   - `i64.extend8_s`
+	//   - `i64.extend16_s`
+	//   - `i64.extend32_s`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+	CoreFeatureSignExtensionOps
+
+	// CoreFeatureSIMD enables the vector value type and vector instructions
+	// (aka SIMD). This is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Note: The instruction list is too long to enumerate in godoc.
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+	CoreFeatureSIMD
+
+	// Update experimental/features.go when adding elements here.
+)
+
+// SetEnabled enables or disables the feature or group of features.
+func (f CoreFeatures) SetEnabled(feature CoreFeatures, val bool) CoreFeatures {
+	if val {
+		return f | feature
+	}
+	return f &^ feature
+}
+
+// IsEnabled returns true if the feature (or group of features) is enabled.
+func (f CoreFeatures) IsEnabled(feature CoreFeatures) bool {
+	return f&feature != 0
+}
+
+// RequireEnabled returns an error if the feature (or group of features) is not
+// enabled.
+func (f CoreFeatures) RequireEnabled(feature CoreFeatures) error {
+	if f&feature == 0 {
+		return fmt.Errorf("feature %q is disabled", feature)
+	}
+	return nil
+}
+
+// String implements fmt.Stringer by returning each enabled feature.
+func (f CoreFeatures) String() string {
+	var builder strings.Builder
+	for i := 0; i <= 63; i++ { // cycle through all bits to reduce code and maintenance
+		target := CoreFeatures(1 << i)
+		if f.IsEnabled(target) {
+			if name := featureName(target); name != "" {
+				if builder.Len() > 0 {
+					builder.WriteByte('|')
+				}
+				builder.WriteString(name)
+			}
+		}
+	}
+	return builder.String()
+}
+
+func featureName(f CoreFeatures) string {
+	switch f {
+	case CoreFeatureMutableGlobal:
+		// match https://github.com/WebAssembly/mutable-global
+		return "mutable-global"
+	case CoreFeatureSignExtensionOps:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+		return "sign-extension-ops"
+	case CoreFeatureMultiValue:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+		return "multi-value"
+	case CoreFeatureNonTrappingFloatToIntConversion:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+		return "nontrapping-float-to-int-conversion"
+	case CoreFeatureBulkMemoryOperations:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+		return "bulk-memory-operations"
+	case CoreFeatureReferenceTypes:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md
+		return "reference-types"
+	case CoreFeatureSIMD:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+		return "simd"
+	}
+	return ""
+}
diff --git a/vendor/github.com/tetratelabs/wazero/api/wasm.go b/vendor/github.com/tetratelabs/wazero/api/wasm.go
new file mode 100644
index 000000000..c66b582fa
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/api/wasm.go
@@ -0,0 +1,762 @@
+// Package api includes constants and interfaces used by both end-users and internal implementations.
+package api
+
+import (
+	"context"
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/internalapi"
+)
+
+// ExternType classifies imports and exports with their respective types.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#external-types%E2%91%A0
+type ExternType = byte
+
+const (
+	ExternTypeFunc   ExternType = 0x00
+	ExternTypeTable  ExternType = 0x01
+	ExternTypeMemory ExternType = 0x02
+	ExternTypeGlobal ExternType = 0x03
+)
+
+// The below are exported to consolidate parsing behavior for external types.
+const (
+	// ExternTypeFuncName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeFunc.
+	ExternTypeFuncName = "func"
+	// ExternTypeTableName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeTable.
+	ExternTypeTableName = "table"
+	// ExternTypeMemoryName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeMemory.
+	ExternTypeMemoryName = "memory"
+	// ExternTypeGlobalName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeGlobal.
+	ExternTypeGlobalName = "global"
+)
+
+// ExternTypeName returns the name of the WebAssembly 1.0 (20191205) Text Format field of the given type.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A4
+func ExternTypeName(et ExternType) string {
+	switch et {
+	case ExternTypeFunc:
+		return ExternTypeFuncName
+	case ExternTypeTable:
+		return ExternTypeTableName
+	case ExternTypeMemory:
+		return ExternTypeMemoryName
+	case ExternTypeGlobal:
+		return ExternTypeGlobalName
+	}
+	return fmt.Sprintf("%#x", et)
+}
+
+// ValueType describes a parameter or result type mapped to a WebAssembly
+// function signature.
+//
+// The following describes how to convert between Wasm and Golang types:
+//
+//   - ValueTypeI32 - EncodeU32 DecodeU32 for uint32 / EncodeI32 DecodeI32 for int32
+//   - ValueTypeI64 - uint64(int64)
+//   - ValueTypeF32 - EncodeF32 DecodeF32 from float32
+//   - ValueTypeF64 - EncodeF64 DecodeF64 from float64
+//   - ValueTypeExternref - unintptr(unsafe.Pointer(p)) where p is any pointer
+//     type in Go (e.g. *string)
+//
+// e.g. Given a Text Format type use (param i64) (result i64), no conversion is
+// necessary.
+//
+//	results, _ := fn(ctx, input)
+//	result := result[0]
+//
+// e.g. Given a Text Format type use (param f64) (result f64), conversion is
+// necessary.
+//
+//	results, _ := fn(ctx, api.EncodeF64(input))
+//	result := api.DecodeF64(result[0])
+//
+// Note: This is a type alias as it is easier to encode and decode in the
+// binary format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-valtype
+type ValueType = byte
+
+const (
+	// ValueTypeI32 is a 32-bit integer.
+	ValueTypeI32 ValueType = 0x7f
+	// ValueTypeI64 is a 64-bit integer.
+	ValueTypeI64 ValueType = 0x7e
+	// ValueTypeF32 is a 32-bit floating point number.
+	ValueTypeF32 ValueType = 0x7d
+	// ValueTypeF64 is a 64-bit floating point number.
+	ValueTypeF64 ValueType = 0x7c
+
+	// ValueTypeExternref is a externref type.
+	//
+	// Note: in wazero, externref type value are opaque raw 64-bit pointers,
+	// and the ValueTypeExternref type in the signature will be translated as
+	// uintptr in wazero's API level.
+	//
+	// For example, given the import function:
+	//	(func (import "env" "f") (param externref) (result externref))
+	//
+	// This can be defined in Go as:
+	//  r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().
+	//		WithFunc(func(context.Context, _ uintptr) (_ uintptr) { return }).
+	//		Export("f")
+	//
+	// Note: The usage of this type is toggled with api.CoreFeatureBulkMemoryOperations.
+	ValueTypeExternref ValueType = 0x6f
+)
+
+// ValueTypeName returns the type name of the given ValueType as a string.
+// These type names match the names used in the WebAssembly text format.
+//
+// Note: This returns "unknown", if an undefined ValueType value is passed.
+func ValueTypeName(t ValueType) string {
+	switch t {
+	case ValueTypeI32:
+		return "i32"
+	case ValueTypeI64:
+		return "i64"
+	case ValueTypeF32:
+		return "f32"
+	case ValueTypeF64:
+		return "f64"
+	case ValueTypeExternref:
+		return "externref"
+	}
+	return "unknown"
+}
+
+// Module is a sandboxed, ready to execute Wasm module. This can be used to get exported functions, etc.
+//
+// In WebAssembly terminology, this corresponds to a "Module Instance", but wazero calls pre-instantiation module as
+// "Compiled Module" as in wazero.CompiledModule, therefore we call this post-instantiation module simply "Module".
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#module-instances%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing the wazero.Runtime closes any Module it instantiated.
+type Module interface {
+	fmt.Stringer
+
+	// Name is the name this module was instantiated with. Exported functions can be imported with this name.
+	Name() string
+
+	// Memory returns a memory defined in this module or nil if there are none wasn't.
+	Memory() Memory
+
+	// ExportedFunction returns a function exported from this module or nil if it wasn't.
+	//
+	// Note: The default wazero.ModuleConfig attempts to invoke `_start`, which
+	// in rare cases can close the module. When in doubt, check IsClosed prior
+	// to invoking a function export after instantiation.
+	ExportedFunction(name string) Function
+
+	// ExportedFunctionDefinitions returns all the exported function
+	// definitions in this module, keyed on export name.
+	ExportedFunctionDefinitions() map[string]FunctionDefinition
+
+	// TODO: Table
+
+	// ExportedMemory returns a memory exported from this module or nil if it wasn't.
+	//
+	// WASI modules require exporting a Memory named "memory". This means that a module successfully initialized
+	// as a WASI Command or Reactor will never return nil for this name.
+	//
+	// See https://github.com/WebAssembly/WASI/blob/snapshot-01/design/application-abi.md#current-unstable-abi
+	ExportedMemory(name string) Memory
+
+	// ExportedMemoryDefinitions returns all the exported memory definitions
+	// in this module, keyed on export name.
+	//
+	// Note: As of WebAssembly Core Specification 2.0, there can be at most one
+	// memory.
+	ExportedMemoryDefinitions() map[string]MemoryDefinition
+
+	// ExportedGlobal a global exported from this module or nil if it wasn't.
+	ExportedGlobal(name string) Global
+
+	// CloseWithExitCode releases resources allocated for this Module. Use a non-zero exitCode parameter to indicate a
+	// failure to ExportedFunction callers.
+	//
+	// The error returned here, if present, is about resource de-allocation (such as I/O errors). Only the last error is
+	// returned, so a non-nil return means at least one error happened. Regardless of error, this Module will
+	// be removed, making its name available again.
+	//
+	// Calling this inside a host function is safe, and may cause ExportedFunction callers to receive a sys.ExitError
+	// with the exitCode.
+	CloseWithExitCode(ctx context.Context, exitCode uint32) error
+
+	// Closer closes this module by delegating to CloseWithExitCode with an exit code of zero.
+	Closer
+
+	// IsClosed returns true if the module is closed, so no longer usable.
+	//
+	// This can happen for the following reasons:
+	//   - Closer was called directly.
+	//   - A guest function called Closer indirectly, such as `_start` calling
+	//     `proc_exit`, which internally closed the module.
+	//   - wazero.RuntimeConfig `WithCloseOnContextDone` was enabled and a
+	//     context completion closed the module.
+	//
+	// Where any of the above are possible, check this value before calling an
+	// ExportedFunction, even if you didn't formerly receive a sys.ExitError.
+	// sys.ExitError is only returned on non-zero code, something that closes
+	// the module successfully will not result it one.
+	IsClosed() bool
+
+	internalapi.WazeroOnly
+}
+
+// Closer closes a resource.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Closer interface {
+	// Close closes the resource.
+	//
+	// Note: The context parameter is used for value lookup, such as for
+	// logging. A canceled or otherwise done context will not prevent Close
+	// from succeeding.
+	Close(context.Context) error
+}
+
+// ExportDefinition is a WebAssembly type exported in a module
+// (wazero.CompiledModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type ExportDefinition interface {
+	// ModuleName is the possibly empty name of the module defining this
+	// export.
+	//
+	// Note: This may be different from Module.Name, because a compiled module
+	// can be instantiated multiple times as different names.
+	ModuleName() string
+
+	// Index is the position in the module's index, imports first.
+	Index() uint32
+
+	// Import returns true with the module and name when this was imported.
+	// Otherwise, it returns false.
+	//
+	// Note: Empty string is valid for both names in the WebAssembly Core
+	// Specification, so "" "" is possible.
+	Import() (moduleName, name string, isImport bool)
+
+	// ExportNames include all exported names.
+	//
+	// Note: The empty name is allowed in the WebAssembly Core Specification,
+	// so "" is possible.
+	ExportNames() []string
+
+	internalapi.WazeroOnly
+}
+
+// MemoryDefinition is a WebAssembly memory exported in a module
+// (wazero.CompiledModule). Units are in pages (64KB).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type MemoryDefinition interface {
+	ExportDefinition
+
+	// Min returns the possibly zero initial count of 64KB pages.
+	Min() uint32
+
+	// Max returns the possibly zero max count of 64KB pages, or false if
+	// unbounded.
+	Max() (uint32, bool)
+
+	internalapi.WazeroOnly
+}
+
+// FunctionDefinition is a WebAssembly function exported in a module
+// (wazero.CompiledModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type FunctionDefinition interface {
+	ExportDefinition
+
+	// Name is the module-defined name of the function, which is not necessarily
+	// the same as its export name.
+	Name() string
+
+	// DebugName identifies this function based on its Index or Name in the
+	// module. This is used for errors and stack traces. e.g. "env.abort".
+	//
+	// When the function name is empty, a substitute name is generated by
+	// prefixing '$' to its position in the index. Ex ".$0" is the
+	// first function (possibly imported) in an unnamed module.
+	//
+	// The format is dot-delimited module and function name, but there are no
+	// restrictions on the module and function name. This means either can be
+	// empty or include dots. e.g. "x.x.x" could mean module "x" and name "x.x",
+	// or it could mean module "x.x" and name "x".
+	//
+	// Note: This name is stable regardless of import or export. For example,
+	// if Import returns true, the value is still based on the Name or Index
+	// and not the imported function name.
+	DebugName() string
+
+	// GoFunction is non-nil when implemented by the embedder instead of a wasm
+	// binary, e.g. via wazero.HostModuleBuilder
+	//
+	// The expected results are nil, GoFunction or GoModuleFunction.
+	GoFunction() interface{}
+
+	// ParamTypes are the possibly empty sequence of value types accepted by a
+	// function with this signature.
+	//
+	// See ValueType documentation for encoding rules.
+	ParamTypes() []ValueType
+
+	// ParamNames are index-correlated with ParamTypes or nil if not available
+	// for one or more parameters.
+	ParamNames() []string
+
+	// ResultTypes are the results of the function.
+	//
+	// When WebAssembly 1.0 (20191205), there can be at most one result.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#result-types%E2%91%A0
+	//
+	// See ValueType documentation for encoding rules.
+	ResultTypes() []ValueType
+
+	// ResultNames are index-correlated with ResultTypes or nil if not
+	// available for one or more results.
+	ResultNames() []string
+
+	internalapi.WazeroOnly
+}
+
+// Function is a WebAssembly function exported from an instantiated module
+// (wazero.Runtime InstantiateModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-func
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Function interface {
+	// Definition is metadata about this function from its defining module.
+	Definition() FunctionDefinition
+
+	// Call invokes the function with the given parameters and returns any
+	// results or an error for any failure looking up or invoking the function.
+	//
+	// Encoding is described in Definition, and supplying an incorrect count of
+	// parameters vs FunctionDefinition.ParamTypes is an error.
+	//
+	// If the exporting Module was closed during this call, the error returned
+	// may be a sys.ExitError. See Module.CloseWithExitCode for details.
+	//
+	// Call is not goroutine-safe, therefore it is recommended to create
+	// another Function if you want to invoke the same function concurrently.
+	// On the other hand, sequential invocations of Call is allowed.
+	// However, this should not be called multiple times until the previous Call returns.
+	//
+	// To safely encode/decode params/results expressed as uint64, users are encouraged to
+	// use api.EncodeXXX or DecodeXXX functions. See the docs on api.ValueType.
+	//
+	// When RuntimeConfig.WithCloseOnContextDone is toggled, the invocation of this Call method is ensured to be closed
+	// whenever one of the three conditions is met. In the event of close, sys.ExitError will be returned and
+	// the api.Module from which this api.Function is derived will be made closed. See the documentation of
+	// WithCloseOnContextDone on wazero.RuntimeConfig for detail. See examples in context_done_example_test.go for
+	// the end-to-end demonstrations of how these terminations can be performed.
+	Call(ctx context.Context, params ...uint64) ([]uint64, error)
+
+	// CallWithStack is an optimized variation of Call that saves memory
+	// allocations when the stack slice is reused across calls.
+	//
+	// Stack length must be at least the max of parameter or result length.
+	// The caller adds parameters in order to the stack, and reads any results
+	// in order from the stack, except in the error case.
+	//
+	// For example, the following reuses the same stack slice to call searchFn
+	// repeatedly saving one allocation per iteration:
+	//
+	//	stack := make([]uint64, 4)
+	//	for i, search := range searchParams {
+	//		// copy the next params to the stack
+	//		copy(stack, search)
+	//		if err := searchFn.CallWithStack(ctx, stack); err != nil {
+	//			return err
+	//		} else if stack[0] == 1 { // found
+	//			return i // searchParams[i] matched!
+	//		}
+	//	}
+	//
+	// # Notes
+	//
+	//   - This is similar to GoModuleFunction, except for using calling functions
+	//     instead of implementing them. Moreover, this is used regardless of
+	//     whether the callee is a host or wasm defined function.
+	CallWithStack(ctx context.Context, stack []uint64) error
+
+	internalapi.WazeroOnly
+}
+
+// GoModuleFunction is a Function implemented in Go instead of a wasm binary.
+// The Module parameter is the calling module, used to access memory or
+// exported functions. See GoModuleFunc for an example.
+//
+// The stack is includes any parameters encoded according to their ValueType.
+// Its length is the max of parameter or result length. When there are results,
+// write them in order beginning at index zero. Do not use the stack after the
+// function returns.
+//
+// Here's a typical way to read three parameters and write back one.
+//
+//	// read parameters off the stack in index order
+//	argv, argvBuf := api.DecodeU32(stack[0]), api.DecodeU32(stack[1])
+//
+//	// write results back to the stack in index order
+//	stack[0] = api.EncodeU32(ErrnoSuccess)
+//
+// This function can be non-deterministic or cause side effects. It also
+// has special properties not defined in the WebAssembly Core specification.
+// Notably, this uses the caller's memory (via Module.Memory). See
+// https://www.w3.org/TR/wasm-core-1/#host-functions%E2%91%A0
+//
+// Most end users will not define functions directly with this, as they will
+// use reflection or code generators instead. These approaches are more
+// idiomatic as they can map go types to ValueType. This type is exposed for
+// those willing to trade usability and safety for performance.
+//
+// To safely decode/encode values from/to the uint64 stack, users are encouraged to use
+// api.EncodeXXX or api.DecodeXXX functions. See the docs on api.ValueType.
+type GoModuleFunction interface {
+	Call(ctx context.Context, mod Module, stack []uint64)
+}
+
+// GoModuleFunc is a convenience for defining an inlined function.
+//
+// For example, the following returns an uint32 value read from parameter zero:
+//
+//	api.GoModuleFunc(func(ctx context.Context, mod api.Module, stack []uint64) {
+//		offset := api.DecodeU32(stack[0]) // read the parameter from the stack
+//
+//		ret, ok := mod.Memory().ReadUint32Le(offset)
+//		if !ok {
+//			panic("out of memory")
+//		}
+//
+//		stack[0] = api.EncodeU32(ret) // add the result back to the stack.
+//	})
+type GoModuleFunc func(ctx context.Context, mod Module, stack []uint64)
+
+// Call implements GoModuleFunction.Call.
+func (f GoModuleFunc) Call(ctx context.Context, mod Module, stack []uint64) {
+	f(ctx, mod, stack)
+}
+
+// GoFunction is an optimized form of GoModuleFunction which doesn't require
+// the Module parameter. See GoFunc for an example.
+//
+// For example, this function does not need to use the importing module's
+// memory or exported functions.
+type GoFunction interface {
+	Call(ctx context.Context, stack []uint64)
+}
+
+// GoFunc is a convenience for defining an inlined function.
+//
+// For example, the following returns the sum of two uint32 parameters:
+//
+//	api.GoFunc(func(ctx context.Context, stack []uint64) {
+//		x, y := api.DecodeU32(stack[0]), api.DecodeU32(stack[1])
+//		stack[0] = api.EncodeU32(x + y)
+//	})
+type GoFunc func(ctx context.Context, stack []uint64)
+
+// Call implements GoFunction.Call.
+func (f GoFunc) Call(ctx context.Context, stack []uint64) {
+	f(ctx, stack)
+}
+
+// Global is a WebAssembly 1.0 (20191205) global exported from an instantiated module (wazero.Runtime InstantiateModule).
+//
+// For example, if the value is not mutable, you can read it once:
+//
+//	offset := module.ExportedGlobal("memory.offset").Get()
+//
+// Globals are allowed by specification to be mutable. However, this can be disabled by configuration. When in doubt,
+// safe cast to find out if the value can change. Here's an example:
+//
+//	offset := module.ExportedGlobal("memory.offset")
+//	if _, ok := offset.(api.MutableGlobal); ok {
+//		// value can change
+//	} else {
+//		// value is constant
+//	}
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#globals%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Global interface {
+	fmt.Stringer
+
+	// Type describes the numeric type of the global.
+	Type() ValueType
+
+	// Get returns the last known value of this global.
+	//
+	// See Type for how to decode this value to a Go type.
+	Get() uint64
+}
+
+// MutableGlobal is a Global whose value can be updated at runtime (variable).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type MutableGlobal interface {
+	Global
+
+	// Set updates the value of this global.
+	//
+	// See Global.Type for how to encode this value from a Go type.
+	Set(v uint64)
+
+	internalapi.WazeroOnly
+}
+
+// Memory allows restricted access to a module's memory. Notably, this does not allow growing.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#storage%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - This includes all value types available in WebAssembly 1.0 (20191205) and all are encoded little-endian.
+type Memory interface {
+	// Definition is metadata about this memory from its defining module.
+	Definition() MemoryDefinition
+
+	// Size returns the memory size in bytes available.
+	// e.g. If the underlying memory has 1 page: 65536
+	//
+	// # Notes
+	//
+	//   - This overflows (returns zero) if the memory has the maximum 65536 pages.
+	// 	   As a workaround until wazero v2 to fix the return type, use Grow(0) to obtain the current pages and
+	//     multiply by 65536.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefsyntax-instr-memorymathsfmemorysize%E2%91%A0
+	Size() uint32
+
+	// Grow increases memory by the delta in pages (65536 bytes per page).
+	// The return val is the previous memory size in pages, or false if the
+	// delta was ignored as it exceeds MemoryDefinition.Max.
+	//
+	// # Notes
+	//
+	//   - This is the same as the "memory.grow" instruction defined in the
+	//	   WebAssembly Core Specification, except returns false instead of -1.
+	//   - When this returns true, any shared views via Read must be refreshed.
+	//
+	// See MemorySizer Read and https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	Grow(deltaPages uint32) (previousPages uint32, ok bool)
+
+	// ReadByte reads a single byte from the underlying buffer at the offset or returns false if out of range.
+	ReadByte(offset uint32) (byte, bool)
+
+	// ReadUint16Le reads a uint16 in little-endian encoding from the underlying buffer at the offset in or returns
+	// false if out of range.
+	ReadUint16Le(offset uint32) (uint16, bool)
+
+	// ReadUint32Le reads a uint32 in little-endian encoding from the underlying buffer at the offset in or returns
+	// false if out of range.
+	ReadUint32Le(offset uint32) (uint32, bool)
+
+	// ReadFloat32Le reads a float32 from 32 IEEE 754 little-endian encoded bits in the underlying buffer at the offset
+	// or returns false if out of range.
+	// See math.Float32bits
+	ReadFloat32Le(offset uint32) (float32, bool)
+
+	// ReadUint64Le reads a uint64 in little-endian encoding from the underlying buffer at the offset or returns false
+	// if out of range.
+	ReadUint64Le(offset uint32) (uint64, bool)
+
+	// ReadFloat64Le reads a float64 from 64 IEEE 754 little-endian encoded bits in the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float64bits
+	ReadFloat64Le(offset uint32) (float64, bool)
+
+	// Read reads byteCount bytes from the underlying buffer at the offset or
+	// returns false if out of range.
+	//
+	// For example, to search for a NUL-terminated string:
+	//	buf, _ = memory.Read(offset, byteCount)
+	//	n := bytes.IndexByte(buf, 0)
+	//	if n < 0 {
+	//		// Not found!
+	//	}
+	//
+	// Write-through
+	//
+	// This returns a view of the underlying memory, not a copy. This means any
+	// writes to the slice returned are visible to Wasm, and any updates from
+	// Wasm are visible reading the returned slice.
+	//
+	// For example:
+	//	buf, _ = memory.Read(offset, byteCount)
+	//	buf[1] = 'a' // writes through to memory, meaning Wasm code see 'a'.
+	//
+	// If you don't intend-write through, make a copy of the returned slice.
+	//
+	// When to refresh Read
+	//
+	// The returned slice disconnects on any capacity change. For example,
+	// `buf = append(buf, 'a')` might result in a slice that is no longer
+	// shared. The same exists Wasm side. For example, if Wasm changes its
+	// memory capacity, ex via "memory.grow"), the host slice is no longer
+	// shared. Those who need a stable view must set Wasm memory min=max, or
+	// use wazero.RuntimeConfig WithMemoryCapacityPages to ensure max is always
+	// allocated.
+	Read(offset, byteCount uint32) ([]byte, bool)
+
+	// WriteByte writes a single byte to the underlying buffer at the offset in or returns false if out of range.
+	WriteByte(offset uint32, v byte) bool
+
+	// WriteUint16Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint16Le(offset uint32, v uint16) bool
+
+	// WriteUint32Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint32Le(offset, v uint32) bool
+
+	// WriteFloat32Le writes the value in 32 IEEE 754 little-endian encoded bits to the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float32bits
+	WriteFloat32Le(offset uint32, v float32) bool
+
+	// WriteUint64Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint64Le(offset uint32, v uint64) bool
+
+	// WriteFloat64Le writes the value in 64 IEEE 754 little-endian encoded bits to the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float64bits
+	WriteFloat64Le(offset uint32, v float64) bool
+
+	// Write writes the slice to the underlying buffer at the offset or returns false if out of range.
+	Write(offset uint32, v []byte) bool
+
+	// WriteString writes the string to the underlying buffer at the offset or returns false if out of range.
+	WriteString(offset uint32, v string) bool
+
+	internalapi.WazeroOnly
+}
+
+// CustomSection contains the name and raw data of a custom section.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type CustomSection interface {
+	// Name is the name of the custom section
+	Name() string
+	// Data is the raw data of the custom section
+	Data() []byte
+
+	internalapi.WazeroOnly
+}
+
+// EncodeExternref encodes the input as a ValueTypeExternref.
+//
+// See DecodeExternref
+func EncodeExternref(input uintptr) uint64 {
+	return uint64(input)
+}
+
+// DecodeExternref decodes the input as a ValueTypeExternref.
+//
+// See EncodeExternref
+func DecodeExternref(input uint64) uintptr {
+	return uintptr(input)
+}
+
+// EncodeI32 encodes the input as a ValueTypeI32.
+func EncodeI32(input int32) uint64 {
+	return uint64(uint32(input))
+}
+
+// DecodeI32 decodes the input as a ValueTypeI32.
+func DecodeI32(input uint64) int32 {
+	return int32(input)
+}
+
+// EncodeU32 encodes the input as a ValueTypeI32.
+func EncodeU32(input uint32) uint64 {
+	return uint64(input)
+}
+
+// DecodeU32 decodes the input as a ValueTypeI32.
+func DecodeU32(input uint64) uint32 {
+	return uint32(input)
+}
+
+// EncodeI64 encodes the input as a ValueTypeI64.
+func EncodeI64(input int64) uint64 {
+	return uint64(input)
+}
+
+// EncodeF32 encodes the input as a ValueTypeF32.
+//
+// See DecodeF32
+func EncodeF32(input float32) uint64 {
+	return uint64(math.Float32bits(input))
+}
+
+// DecodeF32 decodes the input as a ValueTypeF32.
+//
+// See EncodeF32
+func DecodeF32(input uint64) float32 {
+	return math.Float32frombits(uint32(input))
+}
+
+// EncodeF64 encodes the input as a ValueTypeF64.
+//
+// See EncodeF32
+func EncodeF64(input float64) uint64 {
+	return math.Float64bits(input)
+}
+
+// DecodeF64 decodes the input as a ValueTypeF64.
+//
+// See EncodeF64
+func DecodeF64(input uint64) float64 {
+	return math.Float64frombits(input)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/builder.go b/vendor/github.com/tetratelabs/wazero/builder.go
new file mode 100644
index 000000000..f64afabdf
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/builder.go
@@ -0,0 +1,352 @@
+package wazero
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// HostFunctionBuilder defines a host function (in Go), so that a
+// WebAssembly binary (e.g. %.wasm file) can import and use it.
+//
+// Here's an example of an addition function:
+//
+//	hostModuleBuilder.NewFunctionBuilder().
+//		WithFunc(func(cxt context.Context, x, y uint32) uint32 {
+//			return x + y
+//		}).
+//		Export("add")
+//
+// # Memory
+//
+// All host functions act on the importing api.Module, including any memory
+// exported in its binary (%.wasm file). If you are reading or writing memory,
+// it is sand-boxed Wasm memory defined by the guest.
+//
+// Below, `m` is the importing module, defined in Wasm. `fn` is a host function
+// added via Export. This means that `x` was read from memory defined in Wasm,
+// not arbitrary memory in the process.
+//
+//	fn := func(ctx context.Context, m api.Module, offset uint32) uint32 {
+//		x, _ := m.Memory().ReadUint32Le(ctx, offset)
+//		return x
+//	}
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type HostFunctionBuilder interface {
+	// WithGoFunction is an advanced feature for those who need higher
+	// performance than WithFunc at the cost of more complexity.
+	//
+	// Here's an example addition function:
+	//
+	//	builder.WithGoFunction(api.GoFunc(func(ctx context.Context, stack []uint64) {
+	//		x, y := api.DecodeI32(stack[0]), api.DecodeI32(stack[1])
+	//		sum := x + y
+	//		stack[0] = api.EncodeI32(sum)
+	//	}), []api.ValueType{api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32})
+	//
+	// As you can see above, defining in this way implies knowledge of which
+	// WebAssembly api.ValueType is appropriate for each parameter and result.
+	//
+	// See WithGoModuleFunction if you also need to access the calling module.
+	WithGoFunction(fn api.GoFunction, params, results []api.ValueType) HostFunctionBuilder
+
+	// WithGoModuleFunction is an advanced feature for those who need higher
+	// performance than WithFunc at the cost of more complexity.
+	//
+	// Here's an example addition function that loads operands from memory:
+	//
+	//	builder.WithGoModuleFunction(api.GoModuleFunc(func(ctx context.Context, m api.Module, stack []uint64) {
+	//		mem := m.Memory()
+	//		offset := api.DecodeU32(stack[0])
+	//
+	//		x, _ := mem.ReadUint32Le(ctx, offset)
+	//		y, _ := mem.ReadUint32Le(ctx, offset + 4) // 32 bits == 4 bytes!
+	//		sum := x + y
+	//
+	//		stack[0] = api.EncodeU32(sum)
+	//	}), []api.ValueType{api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32})
+	//
+	// As you can see above, defining in this way implies knowledge of which
+	// WebAssembly api.ValueType is appropriate for each parameter and result.
+	//
+	// See WithGoFunction if you don't need access to the calling module.
+	WithGoModuleFunction(fn api.GoModuleFunction, params, results []api.ValueType) HostFunctionBuilder
+
+	// WithFunc uses reflect.Value to map a go `func` to a WebAssembly
+	// compatible Signature. An input that isn't a `func` will fail to
+	// instantiate.
+	//
+	// Here's an example of an addition function:
+	//
+	//	builder.WithFunc(func(cxt context.Context, x, y uint32) uint32 {
+	//		return x + y
+	//	})
+	//
+	// # Defining a function
+	//
+	// Except for the context.Context and optional api.Module, all parameters
+	// or result types must map to WebAssembly numeric value types. This means
+	// uint32, int32, uint64, int64, float32 or float64.
+	//
+	// api.Module may be specified as the second parameter, usually to access
+	// memory. This is important because there are only numeric types in Wasm.
+	// The only way to share other data is via writing memory and sharing
+	// offsets.
+	//
+	//	builder.WithFunc(func(ctx context.Context, m api.Module, offset uint32) uint32 {
+	//		mem := m.Memory()
+	//		x, _ := mem.ReadUint32Le(ctx, offset)
+	//		y, _ := mem.ReadUint32Le(ctx, offset + 4) // 32 bits == 4 bytes!
+	//		return x + y
+	//	})
+	//
+	// This example propagates context properly when calling other functions
+	// exported in the api.Module:
+	//
+	//	builder.WithFunc(func(ctx context.Context, m api.Module, offset, byteCount uint32) uint32 {
+	//		fn = m.ExportedFunction("__read")
+	//		results, err := fn(ctx, offset, byteCount)
+	//	--snip--
+	WithFunc(interface{}) HostFunctionBuilder
+
+	// WithName defines the optional module-local name of this function, e.g.
+	// "random_get"
+	//
+	// Note: This is not required to match the Export name.
+	WithName(name string) HostFunctionBuilder
+
+	// WithParameterNames defines optional parameter names of the function
+	// signature, e.x. "buf", "buf_len"
+	//
+	// Note: When defined, names must be provided for all parameters.
+	WithParameterNames(names ...string) HostFunctionBuilder
+
+	// WithResultNames defines optional result names of the function
+	// signature, e.x. "errno"
+	//
+	// Note: When defined, names must be provided for all results.
+	WithResultNames(names ...string) HostFunctionBuilder
+
+	// Export exports this to the HostModuleBuilder as the given name, e.g.
+	// "random_get"
+	Export(name string) HostModuleBuilder
+}
+
+// HostModuleBuilder is a way to define host functions (in Go), so that a
+// WebAssembly binary (e.g. %.wasm file) can import and use them.
+//
+// Specifically, this implements the host side of an Application Binary
+// Interface (ABI) like WASI or AssemblyScript.
+//
+// For example, this defines and instantiates a module named "env" with one
+// function:
+//
+//	ctx := context.Background()
+//	r := wazero.NewRuntime(ctx)
+//	defer r.Close(ctx) // This closes everything this Runtime created.
+//
+//	hello := func() {
+//		println("hello!")
+//	}
+//	env, _ := r.NewHostModuleBuilder("env").
+//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+//		Instantiate(ctx)
+//
+// If the same module may be instantiated multiple times, it is more efficient
+// to separate steps. Here's an example:
+//
+//	compiled, _ := r.NewHostModuleBuilder("env").
+//		NewFunctionBuilder().WithFunc(getRandomString).Export("get_random_string").
+//		Compile(ctx)
+//
+//	env1, _ := r.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().WithName("env.1"))
+//	env2, _ := r.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().WithName("env.2"))
+//
+// See HostFunctionBuilder for valid host function signatures and other details.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - HostModuleBuilder is mutable: each method returns the same instance for
+//     chaining.
+//   - methods do not return errors, to allow chaining. Any validation errors
+//     are deferred until Compile.
+//   - Functions are indexed in order of calls to NewFunctionBuilder as
+//     insertion ordering is needed by ABI such as Emscripten (invoke_*).
+type HostModuleBuilder interface {
+	// Note: until golang/go#5860, we can't use example tests to embed code in interface godocs.
+
+	// NewFunctionBuilder begins the definition of a host function.
+	NewFunctionBuilder() HostFunctionBuilder
+
+	// Compile returns a CompiledModule that can be instantiated by Runtime.
+	Compile(context.Context) (CompiledModule, error)
+
+	// Instantiate is a convenience that calls Compile, then Runtime.InstantiateModule.
+	// This can fail for reasons documented on Runtime.InstantiateModule.
+	//
+	// Here's an example:
+	//
+	//	ctx := context.Background()
+	//	r := wazero.NewRuntime(ctx)
+	//	defer r.Close(ctx) // This closes everything this Runtime created.
+	//
+	//	hello := func() {
+	//		println("hello!")
+	//	}
+	//	env, _ := r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+	//		Instantiate(ctx)
+	//
+	// # Notes
+	//
+	//   - Closing the Runtime has the same effect as closing the result.
+	//   - Fields in the builder are copied during instantiation: Later changes do not affect the instantiated result.
+	//   - To avoid using configuration defaults, use Compile instead.
+	Instantiate(context.Context) (api.Module, error)
+}
+
+// hostModuleBuilder implements HostModuleBuilder
+type hostModuleBuilder struct {
+	r              *runtime
+	moduleName     string
+	exportNames    []string
+	nameToHostFunc map[string]*wasm.HostFunc
+}
+
+// NewHostModuleBuilder implements Runtime.NewHostModuleBuilder
+func (r *runtime) NewHostModuleBuilder(moduleName string) HostModuleBuilder {
+	return &hostModuleBuilder{
+		r:              r,
+		moduleName:     moduleName,
+		nameToHostFunc: map[string]*wasm.HostFunc{},
+	}
+}
+
+// hostFunctionBuilder implements HostFunctionBuilder
+type hostFunctionBuilder struct {
+	b           *hostModuleBuilder
+	fn          interface{}
+	name        string
+	paramNames  []string
+	resultNames []string
+}
+
+// WithGoFunction implements HostFunctionBuilder.WithGoFunction
+func (h *hostFunctionBuilder) WithGoFunction(fn api.GoFunction, params, results []api.ValueType) HostFunctionBuilder {
+	h.fn = &wasm.HostFunc{ParamTypes: params, ResultTypes: results, Code: wasm.Code{GoFunc: fn}}
+	return h
+}
+
+// WithGoModuleFunction implements HostFunctionBuilder.WithGoModuleFunction
+func (h *hostFunctionBuilder) WithGoModuleFunction(fn api.GoModuleFunction, params, results []api.ValueType) HostFunctionBuilder {
+	h.fn = &wasm.HostFunc{ParamTypes: params, ResultTypes: results, Code: wasm.Code{GoFunc: fn}}
+	return h
+}
+
+// WithFunc implements HostFunctionBuilder.WithFunc
+func (h *hostFunctionBuilder) WithFunc(fn interface{}) HostFunctionBuilder {
+	h.fn = fn
+	return h
+}
+
+// WithName implements HostFunctionBuilder.WithName
+func (h *hostFunctionBuilder) WithName(name string) HostFunctionBuilder {
+	h.name = name
+	return h
+}
+
+// WithParameterNames implements HostFunctionBuilder.WithParameterNames
+func (h *hostFunctionBuilder) WithParameterNames(names ...string) HostFunctionBuilder {
+	h.paramNames = names
+	return h
+}
+
+// WithResultNames implements HostFunctionBuilder.WithResultNames
+func (h *hostFunctionBuilder) WithResultNames(names ...string) HostFunctionBuilder {
+	h.resultNames = names
+	return h
+}
+
+// Export implements HostFunctionBuilder.Export
+func (h *hostFunctionBuilder) Export(exportName string) HostModuleBuilder {
+	var hostFn *wasm.HostFunc
+	if fn, ok := h.fn.(*wasm.HostFunc); ok {
+		hostFn = fn
+	} else {
+		hostFn = &wasm.HostFunc{Code: wasm.Code{GoFunc: h.fn}}
+	}
+
+	// Assign any names from the builder
+	hostFn.ExportName = exportName
+	if h.name != "" {
+		hostFn.Name = h.name
+	}
+	if len(h.paramNames) != 0 {
+		hostFn.ParamNames = h.paramNames
+	}
+	if len(h.resultNames) != 0 {
+		hostFn.ResultNames = h.resultNames
+	}
+
+	h.b.ExportHostFunc(hostFn)
+	return h.b
+}
+
+// ExportHostFunc implements wasm.HostFuncExporter
+func (b *hostModuleBuilder) ExportHostFunc(fn *wasm.HostFunc) {
+	if _, ok := b.nameToHostFunc[fn.ExportName]; !ok { // add a new name
+		b.exportNames = append(b.exportNames, fn.ExportName)
+	}
+	b.nameToHostFunc[fn.ExportName] = fn
+}
+
+// NewFunctionBuilder implements HostModuleBuilder.NewFunctionBuilder
+func (b *hostModuleBuilder) NewFunctionBuilder() HostFunctionBuilder {
+	return &hostFunctionBuilder{b: b}
+}
+
+// Compile implements HostModuleBuilder.Compile
+func (b *hostModuleBuilder) Compile(ctx context.Context) (CompiledModule, error) {
+	module, err := wasm.NewHostModule(b.moduleName, b.exportNames, b.nameToHostFunc, b.r.enabledFeatures)
+	if err != nil {
+		return nil, err
+	} else if err = module.Validate(b.r.enabledFeatures); err != nil {
+		return nil, err
+	}
+
+	c := &compiledModule{module: module, compiledEngine: b.r.store.Engine}
+	listeners, err := buildFunctionListeners(ctx, module)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = b.r.store.Engine.CompileModule(ctx, module, listeners, false); err != nil {
+		return nil, err
+	}
+
+	// typeIDs are static and compile-time known.
+	typeIDs, err := b.r.store.GetFunctionTypeIDs(module.TypeSection)
+	if err != nil {
+		return nil, err
+	}
+	c.typeIDs = typeIDs
+
+	return c, nil
+}
+
+// Instantiate implements HostModuleBuilder.Instantiate
+func (b *hostModuleBuilder) Instantiate(ctx context.Context) (api.Module, error) {
+	if compiled, err := b.Compile(ctx); err != nil {
+		return nil, err
+	} else {
+		compiled.(*compiledModule).closeWithModule = true
+		return b.r.InstantiateModule(ctx, compiled, NewModuleConfig())
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/cache.go b/vendor/github.com/tetratelabs/wazero/cache.go
new file mode 100644
index 000000000..2d1b4e3b9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/cache.go
@@ -0,0 +1,116 @@
+package wazero
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	goruntime "runtime"
+	"sync"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/version"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// CompilationCache reduces time spent compiling (Runtime.CompileModule) the same wasm module.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Instances of this can be reused across multiple runtimes, if configured
+//     via RuntimeConfig.
+type CompilationCache interface{ api.Closer }
+
+// NewCompilationCache returns a new CompilationCache to be passed to RuntimeConfig.
+// This configures only in-memory cache, and doesn't persist to the file system. See wazero.NewCompilationCacheWithDir for detail.
+//
+// The returned CompilationCache can be used to share the in-memory compilation results across multiple instances of wazero.Runtime.
+func NewCompilationCache() CompilationCache {
+	return &cache{}
+}
+
+// NewCompilationCacheWithDir is like wazero.NewCompilationCache except the result also writes
+// state into the directory specified by `dirname` parameter.
+//
+// If the dirname doesn't exist, this creates it or returns an error.
+//
+// Those running wazero as a CLI or frequently restarting a process using the same wasm should
+// use this feature to reduce time waiting to compile the same module a second time.
+//
+// The contents written into dirname are wazero-version specific, meaning different versions of
+// wazero will duplicate entries for the same input wasm.
+//
+// Note: The embedder must safeguard this directory from external changes.
+func NewCompilationCacheWithDir(dirname string) (CompilationCache, error) {
+	c := &cache{}
+	err := c.ensuresFileCache(dirname, version.GetWazeroVersion())
+	return c, err
+}
+
+// cache implements Cache interface.
+type cache struct {
+	// eng is the engine for this cache. If the cache is configured, the engine is shared across multiple instances of
+	// Runtime, and its lifetime is not bound to them. Instead, the engine is alive until Cache.Close is called.
+	engs      [engineKindCount]wasm.Engine
+	fileCache filecache.Cache
+	initOnces [engineKindCount]sync.Once
+}
+
+func (c *cache) initEngine(ek engineKind, ne newEngine, ctx context.Context, features api.CoreFeatures) wasm.Engine {
+	c.initOnces[ek].Do(func() { c.engs[ek] = ne(ctx, features, c.fileCache) })
+	return c.engs[ek]
+}
+
+// Close implements the same method on the Cache interface.
+func (c *cache) Close(_ context.Context) (err error) {
+	for _, eng := range c.engs {
+		if eng != nil {
+			if err = eng.Close(); err != nil {
+				return
+			}
+		}
+	}
+	return
+}
+
+func (c *cache) ensuresFileCache(dir string, wazeroVersion string) error {
+	// Resolve a potentially relative directory into an absolute one.
+	var err error
+	dir, err = filepath.Abs(dir)
+	if err != nil {
+		return err
+	}
+
+	// Ensure the user-supplied directory.
+	if err = mkdir(dir); err != nil {
+		return err
+	}
+
+	// Create a version-specific directory to avoid conflicts.
+	dirname := path.Join(dir, "wazero-"+wazeroVersion+"-"+goruntime.GOARCH+"-"+goruntime.GOOS)
+	if err = mkdir(dirname); err != nil {
+		return err
+	}
+
+	c.fileCache = filecache.New(dirname)
+	return nil
+}
+
+func mkdir(dirname string) error {
+	if st, err := os.Stat(dirname); errors.Is(err, os.ErrNotExist) {
+		// If the directory not found, create the cache dir.
+		if err = os.MkdirAll(dirname, 0o700); err != nil {
+			return fmt.Errorf("create directory %s: %v", dirname, err)
+		}
+	} else if err != nil {
+		return err
+	} else if !st.IsDir() {
+		return fmt.Errorf("%s is not dir", dirname)
+	}
+	return nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/codecov.yml b/vendor/github.com/tetratelabs/wazero/codecov.yml
new file mode 100644
index 000000000..cf9d94df4
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/codecov.yml
@@ -0,0 +1,9 @@
+# Codecov for main is visible here https://app.codecov.io/gh/tetratelabs/wazero
+
+# We use codecov only as a UI, so we disable PR comments and commit status.
+# See https://docs.codecov.com/docs/pull-request-comments
+comment: false
+coverage:
+  status:
+    project: off
+    patch: off
diff --git a/vendor/github.com/tetratelabs/wazero/config.go b/vendor/github.com/tetratelabs/wazero/config.go
new file mode 100644
index 000000000..819a76df5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/config.go
@@ -0,0 +1,876 @@
+package wazero
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"math"
+	"net"
+	"time"
+
+	"github.com/tetratelabs/wazero/api"
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/engine/interpreter"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/platform"
+	internalsock "github.com/tetratelabs/wazero/internal/sock"
+	internalsys "github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// RuntimeConfig controls runtime behavior, with the default implementation as
+// NewRuntimeConfig
+//
+// The example below explicitly limits to Wasm Core 1.0 features as opposed to
+// relying on defaults:
+//
+//	rConfig = wazero.NewRuntimeConfig().WithCoreFeatures(api.CoreFeaturesV1)
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - RuntimeConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+type RuntimeConfig interface {
+	// WithCoreFeatures sets the WebAssembly Core specification features this
+	// runtime supports. Defaults to api.CoreFeaturesV2.
+	//
+	// Example of disabling a specific feature:
+	//	features := api.CoreFeaturesV2.SetEnabled(api.CoreFeatureMutableGlobal, false)
+	//	rConfig = wazero.NewRuntimeConfig().WithCoreFeatures(features)
+	//
+	// # Why default to version 2.0?
+	//
+	// Many compilers that target WebAssembly require features after
+	// api.CoreFeaturesV1 by default. For example, TinyGo v0.24+ requires
+	// api.CoreFeatureBulkMemoryOperations. To avoid runtime errors, wazero
+	// defaults to api.CoreFeaturesV2, even though it is not yet a Web
+	// Standard (REC).
+	WithCoreFeatures(api.CoreFeatures) RuntimeConfig
+
+	// WithMemoryLimitPages overrides the maximum pages allowed per memory. The
+	// default is 65536, allowing 4GB total memory per instance if the maximum is
+	// not encoded in a Wasm binary. Setting a value larger than default will panic.
+	//
+	// This example reduces the largest possible memory size from 4GB to 128KB:
+	//	rConfig = wazero.NewRuntimeConfig().WithMemoryLimitPages(2)
+	//
+	// Note: Wasm has 32-bit memory and each page is 65536 (2^16) bytes. This
+	// implies a max of 65536 (2^16) addressable pages.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	WithMemoryLimitPages(memoryLimitPages uint32) RuntimeConfig
+
+	// WithMemoryCapacityFromMax eagerly allocates max memory, unless max is
+	// not defined. The default is false, which means minimum memory is
+	// allocated and any call to grow memory results in re-allocations.
+	//
+	// This example ensures any memory.grow instruction will never re-allocate:
+	//	rConfig = wazero.NewRuntimeConfig().WithMemoryCapacityFromMax(true)
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	//
+	// Note: if the memory maximum is not encoded in a Wasm binary, this
+	// results in allocating 4GB. See the doc on WithMemoryLimitPages for detail.
+	WithMemoryCapacityFromMax(memoryCapacityFromMax bool) RuntimeConfig
+
+	// WithDebugInfoEnabled toggles DWARF based stack traces in the face of
+	// runtime errors. Defaults to true.
+	//
+	// Those who wish to disable this, can like so:
+	//
+	//	r := wazero.NewRuntimeWithConfig(wazero.NewRuntimeConfig().WithDebugInfoEnabled(false)
+	//
+	// When disabled, a stack trace message looks like:
+	//
+	//	wasm stack trace:
+	//		.runtime._panic(i32)
+	//		.myFunc()
+	//		.main.main()
+	//		.runtime.run()
+	//		._start()
+	//
+	// When enabled, the stack trace includes source code information:
+	//
+	//	wasm stack trace:
+	//		.runtime._panic(i32)
+	//		  0x16e2: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_tinygowasm.go:73:6
+	//		.myFunc()
+	//		  0x190b: /Users/XXXXX/wazero/internal/testing/dwarftestdata/testdata/main.go:19:7
+	//		.main.main()
+	//		  0x18ed: /Users/XXXXX/wazero/internal/testing/dwarftestdata/testdata/main.go:4:3
+	//		.runtime.run()
+	//		  0x18cc: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/scheduler_none.go:26:10
+	//		._start()
+	//		  0x18b6: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_wasm_wasi.go:22:5
+	//
+	// Note: This only takes into effect when the original Wasm binary has the
+	// DWARF "custom sections" that are often stripped, depending on
+	// optimization flags passed to the compiler.
+	WithDebugInfoEnabled(bool) RuntimeConfig
+
+	// WithCompilationCache configures how runtime caches the compiled modules. In the default configuration, compilation results are
+	// only in-memory until Runtime.Close is closed, and not shareable by multiple Runtime.
+	//
+	// Below defines the shared cache across multiple instances of Runtime:
+	//
+	//	// Creates the new Cache and the runtime configuration with it.
+	//	cache := wazero.NewCompilationCache()
+	//	defer cache.Close()
+	//	config := wazero.NewRuntimeConfig().WithCompilationCache(c)
+	//
+	//	// Creates two runtimes while sharing compilation caches.
+	//	foo := wazero.NewRuntimeWithConfig(context.Background(), config)
+	// 	bar := wazero.NewRuntimeWithConfig(context.Background(), config)
+	//
+	// # Cache Key
+	//
+	// Cached files are keyed on the version of wazero. This is obtained from go.mod of your application,
+	// and we use it to verify the compatibility of caches against the currently-running wazero.
+	// However, if you use this in tests of a package not named as `main`, then wazero cannot obtain the correct
+	// version of wazero due to the known issue of debug.BuildInfo function: https://github.com/golang/go/issues/33976.
+	// As a consequence, your cache won't contain the correct version information and always be treated as `dev` version.
+	// To avoid this issue, you can pass -ldflags "-X github.com/tetratelabs/wazero/internal/version.version=foo" when running tests.
+	WithCompilationCache(CompilationCache) RuntimeConfig
+
+	// WithCustomSections toggles parsing of "custom sections". Defaults to false.
+	//
+	// When enabled, it is possible to retrieve custom sections from a CompiledModule:
+	//
+	//	config := wazero.NewRuntimeConfig().WithCustomSections(true)
+	//	r := wazero.NewRuntimeWithConfig(ctx, config)
+	//	c, err := r.CompileModule(ctx, wasm)
+	//	customSections := c.CustomSections()
+	WithCustomSections(bool) RuntimeConfig
+
+	// WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances:
+	//
+	// 	- context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel)
+	// 	- context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline)
+	// 	- Close or CloseWithExitCode of api.Module is explicitly called during execution.
+	//
+	// This is especially useful when one wants to run untrusted Wasm binaries since otherwise, any invocation of
+	// api.Function can potentially block the corresponding Goroutine forever. Moreover, it might block the
+	// entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated
+	// machine codes against async Goroutine preemption" section in RATIONALE.md for detail.
+	//
+	// Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces
+	// interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason,
+	// this is disabled by default.
+	//
+	// See examples in context_done_example_test.go for the end-to-end demonstrations.
+	//
+	// When the invocations of api.Function are closed due to this, sys.ExitError is raised to the callers and
+	// the api.Module from which the functions are derived is made closed.
+	WithCloseOnContextDone(bool) RuntimeConfig
+}
+
+// NewRuntimeConfig returns a RuntimeConfig using the compiler if it is supported in this environment,
+// or the interpreter otherwise.
+func NewRuntimeConfig() RuntimeConfig {
+	return newRuntimeConfig()
+}
+
+type newEngine func(context.Context, api.CoreFeatures, filecache.Cache) wasm.Engine
+
+type runtimeConfig struct {
+	enabledFeatures       api.CoreFeatures
+	memoryLimitPages      uint32
+	memoryCapacityFromMax bool
+	engineKind            engineKind
+	dwarfDisabled         bool // negative as defaults to enabled
+	newEngine             newEngine
+	cache                 CompilationCache
+	storeCustomSections   bool
+	ensureTermination     bool
+}
+
+// engineLessConfig helps avoid copy/pasting the wrong defaults.
+var engineLessConfig = &runtimeConfig{
+	enabledFeatures:       api.CoreFeaturesV2,
+	memoryLimitPages:      wasm.MemoryLimitPages,
+	memoryCapacityFromMax: false,
+	dwarfDisabled:         false,
+}
+
+type engineKind int
+
+const (
+	engineKindCompiler engineKind = iota
+	engineKindInterpreter
+	engineKindCount
+)
+
+// NewRuntimeConfigCompiler compiles WebAssembly modules into
+// runtime.GOARCH-specific assembly for optimal performance.
+//
+// The default implementation is AOT (Ahead of Time) compilation, applied at
+// Runtime.CompileModule. This allows consistent runtime performance, as well
+// the ability to reduce any first request penalty.
+//
+// Note: While this is technically AOT, this does not imply any action on your
+// part. wazero automatically performs ahead-of-time compilation as needed when
+// Runtime.CompileModule is invoked.
+//
+// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not
+// support compiler. Use NewRuntimeConfig to safely detect and fallback to
+// NewRuntimeConfigInterpreter if needed.
+func NewRuntimeConfigCompiler() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindCompiler
+	ret.newEngine = wazevo.NewEngine
+	return ret
+}
+
+// NewRuntimeConfigInterpreter interprets WebAssembly modules instead of compiling them into assembly.
+func NewRuntimeConfigInterpreter() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindInterpreter
+	ret.newEngine = interpreter.NewEngine
+	return ret
+}
+
+// clone makes a deep copy of this runtime config.
+func (c *runtimeConfig) clone() *runtimeConfig {
+	ret := *c // copy except maps which share a ref
+	return &ret
+}
+
+// WithCoreFeatures implements RuntimeConfig.WithCoreFeatures
+func (c *runtimeConfig) WithCoreFeatures(features api.CoreFeatures) RuntimeConfig {
+	ret := c.clone()
+	ret.enabledFeatures = features
+	return ret
+}
+
+// WithCloseOnContextDone implements RuntimeConfig.WithCloseOnContextDone
+func (c *runtimeConfig) WithCloseOnContextDone(ensure bool) RuntimeConfig {
+	ret := c.clone()
+	ret.ensureTermination = ensure
+	return ret
+}
+
+// WithMemoryLimitPages implements RuntimeConfig.WithMemoryLimitPages
+func (c *runtimeConfig) WithMemoryLimitPages(memoryLimitPages uint32) RuntimeConfig {
+	ret := c.clone()
+	// This panics instead of returning an error as it is unlikely.
+	if memoryLimitPages > wasm.MemoryLimitPages {
+		panic(fmt.Errorf("memoryLimitPages invalid: %d > %d", memoryLimitPages, wasm.MemoryLimitPages))
+	}
+	ret.memoryLimitPages = memoryLimitPages
+	return ret
+}
+
+// WithCompilationCache implements RuntimeConfig.WithCompilationCache
+func (c *runtimeConfig) WithCompilationCache(ca CompilationCache) RuntimeConfig {
+	ret := c.clone()
+	ret.cache = ca
+	return ret
+}
+
+// WithMemoryCapacityFromMax implements RuntimeConfig.WithMemoryCapacityFromMax
+func (c *runtimeConfig) WithMemoryCapacityFromMax(memoryCapacityFromMax bool) RuntimeConfig {
+	ret := c.clone()
+	ret.memoryCapacityFromMax = memoryCapacityFromMax
+	return ret
+}
+
+// WithDebugInfoEnabled implements RuntimeConfig.WithDebugInfoEnabled
+func (c *runtimeConfig) WithDebugInfoEnabled(dwarfEnabled bool) RuntimeConfig {
+	ret := c.clone()
+	ret.dwarfDisabled = !dwarfEnabled
+	return ret
+}
+
+// WithCustomSections implements RuntimeConfig.WithCustomSections
+func (c *runtimeConfig) WithCustomSections(storeCustomSections bool) RuntimeConfig {
+	ret := c.clone()
+	ret.storeCustomSections = storeCustomSections
+	return ret
+}
+
+// CompiledModule is a WebAssembly module ready to be instantiated (Runtime.InstantiateModule) as an api.Module.
+//
+// In WebAssembly terminology, this is a decoded, validated, and possibly also compiled module. wazero avoids using
+// the name "Module" for both before and after instantiation as the name conflation has caused confusion.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#semantic-phases%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing the wazero.Runtime closes any CompiledModule it compiled.
+type CompiledModule interface {
+	// Name returns the module name encoded into the binary or empty if not.
+	Name() string
+
+	// ImportedFunctions returns all the imported functions
+	// (api.FunctionDefinition) in this module or nil if there are none.
+	//
+	// Note: Unlike ExportedFunctions, there is no unique constraint on
+	// imports.
+	ImportedFunctions() []api.FunctionDefinition
+
+	// ExportedFunctions returns all the exported functions
+	// (api.FunctionDefinition) in this module keyed on export name.
+	ExportedFunctions() map[string]api.FunctionDefinition
+
+	// ImportedMemories returns all the imported memories
+	// (api.MemoryDefinition) in this module or nil if there are none.
+	//
+	// ## Notes
+	//   - As of WebAssembly Core Specification 2.0, there can be at most one
+	//     memory.
+	//   - Unlike ExportedMemories, there is no unique constraint on imports.
+	ImportedMemories() []api.MemoryDefinition
+
+	// ExportedMemories returns all the exported memories
+	// (api.MemoryDefinition) in this module keyed on export name.
+	//
+	// Note: As of WebAssembly Core Specification 2.0, there can be at most one
+	// memory.
+	ExportedMemories() map[string]api.MemoryDefinition
+
+	// CustomSections returns all the custom sections
+	// (api.CustomSection) in this module keyed on the section name.
+	CustomSections() []api.CustomSection
+
+	// Close releases all the allocated resources for this CompiledModule.
+	//
+	// Note: It is safe to call Close while having outstanding calls from an
+	// api.Module instantiated from this.
+	Close(context.Context) error
+}
+
+// compile-time check to ensure compiledModule implements CompiledModule
+var _ CompiledModule = &compiledModule{}
+
+type compiledModule struct {
+	module *wasm.Module
+	// compiledEngine holds an engine on which `module` is compiled.
+	compiledEngine wasm.Engine
+	// closeWithModule prevents leaking compiled code when a module is compiled implicitly.
+	closeWithModule bool
+	typeIDs         []wasm.FunctionTypeID
+}
+
+// Name implements CompiledModule.Name
+func (c *compiledModule) Name() (moduleName string) {
+	if ns := c.module.NameSection; ns != nil {
+		moduleName = ns.ModuleName
+	}
+	return
+}
+
+// Close implements CompiledModule.Close
+func (c *compiledModule) Close(context.Context) error {
+	c.compiledEngine.DeleteCompiledModule(c.module)
+	// It is possible the underlying may need to return an error later, but in any case this matches api.Module.Close.
+	return nil
+}
+
+// ImportedFunctions implements CompiledModule.ImportedFunctions
+func (c *compiledModule) ImportedFunctions() []api.FunctionDefinition {
+	return c.module.ImportedFunctions()
+}
+
+// ExportedFunctions implements CompiledModule.ExportedFunctions
+func (c *compiledModule) ExportedFunctions() map[string]api.FunctionDefinition {
+	return c.module.ExportedFunctions()
+}
+
+// ImportedMemories implements CompiledModule.ImportedMemories
+func (c *compiledModule) ImportedMemories() []api.MemoryDefinition {
+	return c.module.ImportedMemories()
+}
+
+// ExportedMemories implements CompiledModule.ExportedMemories
+func (c *compiledModule) ExportedMemories() map[string]api.MemoryDefinition {
+	return c.module.ExportedMemories()
+}
+
+// CustomSections implements CompiledModule.CustomSections
+func (c *compiledModule) CustomSections() []api.CustomSection {
+	ret := make([]api.CustomSection, len(c.module.CustomSections))
+	for i, d := range c.module.CustomSections {
+		ret[i] = &customSection{data: d.Data, name: d.Name}
+	}
+	return ret
+}
+
+// customSection implements wasm.CustomSection
+type customSection struct {
+	internalapi.WazeroOnlyType
+	name string
+	data []byte
+}
+
+// Name implements wasm.CustomSection.Name
+func (c *customSection) Name() string {
+	return c.name
+}
+
+// Data implements wasm.CustomSection.Data
+func (c *customSection) Data() []byte {
+	return c.data
+}
+
+// ModuleConfig configures resources needed by functions that have low-level interactions with the host operating
+// system. Using this, resources such as STDIN can be isolated, so that the same module can be safely instantiated
+// multiple times.
+//
+// Here's an example:
+//
+//	// Initialize base configuration:
+//	config := wazero.NewModuleConfig().WithStdout(buf).WithSysNanotime()
+//
+//	// Assign different configuration on each instantiation
+//	mod, _ := r.InstantiateModule(ctx, compiled, config.WithName("rotate").WithArgs("rotate", "angle=90", "dir=cw"))
+//
+// While wazero supports Windows as a platform, host functions using ModuleConfig follow a UNIX dialect.
+// See RATIONALE.md for design background and relationship to WebAssembly System Interfaces (WASI).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - ModuleConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+type ModuleConfig interface {
+	// WithArgs assigns command-line arguments visible to an imported function that reads an arg vector (argv). Defaults to
+	// none. Runtime.InstantiateModule errs if any arg is empty.
+	//
+	// These values are commonly read by the functions like "args_get" in "wasi_snapshot_preview1" although they could be
+	// read by functions imported from other modules.
+	//
+	// Similar to os.Args and exec.Cmd Env, many implementations would expect a program name to be argv[0]. However, neither
+	// WebAssembly nor WebAssembly System Interfaces (WASI) define this. Regardless, you may choose to set the first
+	// argument to the same value set via WithName.
+	//
+	// Note: This does not default to os.Args as that violates sandboxing.
+	//
+	// See https://linux.die.net/man/3/argv and https://en.wikipedia.org/wiki/Null-terminated_string
+	WithArgs(...string) ModuleConfig
+
+	// WithEnv sets an environment variable visible to a Module that imports functions. Defaults to none.
+	// Runtime.InstantiateModule errs if the key is empty or contains a NULL(0) or equals("") character.
+	//
+	// Validation is the same as os.Setenv on Linux and replaces any existing value. Unlike exec.Cmd Env, this does not
+	// default to the current process environment as that would violate sandboxing. This also does not preserve order.
+	//
+	// Environment variables are commonly read by the functions like "environ_get" in "wasi_snapshot_preview1" although
+	// they could be read by functions imported from other modules.
+	//
+	// While similar to process configuration, there are no assumptions that can be made about anything OS-specific. For
+	// example, neither WebAssembly nor WebAssembly System Interfaces (WASI) define concerns processes have, such as
+	// case-sensitivity on environment keys. For portability, define entries with case-insensitively unique keys.
+	//
+	// See https://linux.die.net/man/3/environ and https://en.wikipedia.org/wiki/Null-terminated_string
+	WithEnv(key, value string) ModuleConfig
+
+	// WithFS is a convenience that calls WithFSConfig with an FSConfig of the
+	// input for the root ("/") guest path.
+	WithFS(fs.FS) ModuleConfig
+
+	// WithFSConfig configures the filesystem available to each guest
+	// instantiated with this configuration. By default, no file access is
+	// allowed, so functions like `path_open` result in unsupported errors
+	// (e.g. syscall.ENOSYS).
+	WithFSConfig(FSConfig) ModuleConfig
+
+	// WithName configures the module name. Defaults to what was decoded from
+	// the name section. Empty string ("") clears any name.
+	WithName(string) ModuleConfig
+
+	// WithStartFunctions configures the functions to call after the module is
+	// instantiated. Defaults to "_start".
+	//
+	// Clearing the default is supported, via `WithStartFunctions()`.
+	//
+	// # Notes
+	//
+	//   - If a start function doesn't exist, it is skipped. However, any that
+	//     do exist are called in order.
+	//   - Start functions are not intended to be called multiple times.
+	//     Functions that should be called multiple times should be invoked
+	//     manually via api.Module's `ExportedFunction` method.
+	//   - Start functions commonly exit the module during instantiation,
+	//     preventing use of any functions later. This is the case in "wasip1",
+	//     which defines the default value "_start".
+	//   - See /RATIONALE.md for motivation of this feature.
+	WithStartFunctions(...string) ModuleConfig
+
+	// WithStderr configures where standard error (file descriptor 2) is written. Defaults to io.Discard.
+	//
+	// This writer is most commonly used by the functions like "fd_write" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Writer they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stderr as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stderr
+	WithStderr(io.Writer) ModuleConfig
+
+	// WithStdin configures where standard input (file descriptor 0) is read. Defaults to return io.EOF.
+	//
+	// This reader is most commonly used by the functions like "fd_read" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Reader they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stdin as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stdin
+	WithStdin(io.Reader) ModuleConfig
+
+	// WithStdout configures where standard output (file descriptor 1) is written. Defaults to io.Discard.
+	//
+	// This writer is most commonly used by the functions like "fd_write" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Writer they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stdout as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stdout
+	WithStdout(io.Writer) ModuleConfig
+
+	// WithWalltime configures the wall clock, sometimes referred to as the
+	// real time clock. sys.Walltime returns the current unix/epoch time,
+	// seconds since midnight UTC 1 January 1970, with a nanosecond fraction.
+	// This defaults to a fake result that increases by 1ms on each reading.
+	//
+	// Here's an example that uses a custom clock:
+	//	moduleConfig = moduleConfig.
+	//		WithWalltime(func(context.Context) (sec int64, nsec int32) {
+	//			return clock.walltime()
+	//		}, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+	//
+	// # Notes:
+	//   - This does not default to time.Now as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI
+	//     `clock_time_get` with the `realtime` clock ID.
+	//   - Use WithSysWalltime for a usable implementation.
+	WithWalltime(sys.Walltime, sys.ClockResolution) ModuleConfig
+
+	// WithSysWalltime uses time.Now for sys.Walltime with a resolution of 1us
+	// (1000ns).
+	//
+	// See WithWalltime
+	WithSysWalltime() ModuleConfig
+
+	// WithNanotime configures the monotonic clock, used to measure elapsed
+	// time in nanoseconds. Defaults to a fake result that increases by 1ms
+	// on each reading.
+	//
+	// Here's an example that uses a custom clock:
+	//	moduleConfig = moduleConfig.
+	//		WithNanotime(func(context.Context) int64 {
+	//			return clock.nanotime()
+	//		}, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+	//
+	// # Notes:
+	//   - This does not default to time.Since as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI
+	//     `clock_time_get` with the `monotonic` clock ID.
+	//   - Some compilers implement sleep by looping on sys.Nanotime (e.g. Go).
+	//   - If you set this, you should probably set WithNanosleep also.
+	//   - Use WithSysNanotime for a usable implementation.
+	WithNanotime(sys.Nanotime, sys.ClockResolution) ModuleConfig
+
+	// WithSysNanotime uses time.Now for sys.Nanotime with a resolution of 1us.
+	//
+	// See WithNanotime
+	WithSysNanotime() ModuleConfig
+
+	// WithNanosleep configures the how to pause the current goroutine for at
+	// least the configured nanoseconds. Defaults to return immediately.
+	//
+	// This example uses a custom sleep function:
+	//	moduleConfig = moduleConfig.
+	//		WithNanosleep(func(ns int64) {
+	//			rel := unix.NsecToTimespec(ns)
+	//			remain := unix.Timespec{}
+	//			for { // loop until no more time remaining
+	//				err := unix.ClockNanosleep(unix.CLOCK_MONOTONIC, 0, &rel, &remain)
+	//			--snip--
+	//
+	// # Notes:
+	//   - This does not default to time.Sleep as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI `poll_oneoff`.
+	//   - Some compilers implement sleep by looping on sys.Nanotime (e.g. Go).
+	//   - If you set this, you should probably set WithNanotime also.
+	//   - Use WithSysNanosleep for a usable implementation.
+	WithNanosleep(sys.Nanosleep) ModuleConfig
+
+	// WithOsyield yields the processor, typically to implement spin-wait
+	// loops. Defaults to return immediately.
+	//
+	// # Notes:
+	//   - This primarily supports `sched_yield` in WASI
+	//   - This does not default to runtime.osyield as that violates sandboxing.
+	WithOsyield(sys.Osyield) ModuleConfig
+
+	// WithSysNanosleep uses time.Sleep for sys.Nanosleep.
+	//
+	// See WithNanosleep
+	WithSysNanosleep() ModuleConfig
+
+	// WithRandSource configures a source of random bytes. Defaults to return a
+	// deterministic source. You might override this with crypto/rand.Reader
+	//
+	// This reader is most commonly used by the functions like "random_get" in
+	// "wasi_snapshot_preview1", "seed" in AssemblyScript standard "env", and
+	// "getRandomData" when runtime.GOOS is "js".
+	//
+	// Note: The caller is responsible to close any io.Reader they supply: It
+	// is not closed on api.Module Close.
+	WithRandSource(io.Reader) ModuleConfig
+}
+
+type moduleConfig struct {
+	name               string
+	nameSet            bool
+	startFunctions     []string
+	stdin              io.Reader
+	stdout             io.Writer
+	stderr             io.Writer
+	randSource         io.Reader
+	walltime           sys.Walltime
+	walltimeResolution sys.ClockResolution
+	nanotime           sys.Nanotime
+	nanotimeResolution sys.ClockResolution
+	nanosleep          sys.Nanosleep
+	osyield            sys.Osyield
+	args               [][]byte
+	// environ is pair-indexed to retain order similar to os.Environ.
+	environ [][]byte
+	// environKeys allow overwriting of existing values.
+	environKeys map[string]int
+	// fsConfig is the file system configuration for ABI like WASI.
+	fsConfig FSConfig
+	// sockConfig is the network listener configuration for ABI like WASI.
+	sockConfig *internalsock.Config
+}
+
+// NewModuleConfig returns a ModuleConfig that can be used for configuring module instantiation.
+func NewModuleConfig() ModuleConfig {
+	return &moduleConfig{
+		startFunctions: []string{"_start"},
+		environKeys:    map[string]int{},
+	}
+}
+
+// clone makes a deep copy of this module config.
+func (c *moduleConfig) clone() *moduleConfig {
+	ret := *c // copy except maps which share a ref
+	ret.environKeys = make(map[string]int, len(c.environKeys))
+	for key, value := range c.environKeys {
+		ret.environKeys[key] = value
+	}
+	return &ret
+}
+
+// WithArgs implements ModuleConfig.WithArgs
+func (c *moduleConfig) WithArgs(args ...string) ModuleConfig {
+	ret := c.clone()
+	ret.args = toByteSlices(args)
+	return ret
+}
+
+func toByteSlices(strings []string) (result [][]byte) {
+	if len(strings) == 0 {
+		return
+	}
+	result = make([][]byte, len(strings))
+	for i, a := range strings {
+		result[i] = []byte(a)
+	}
+	return
+}
+
+// WithEnv implements ModuleConfig.WithEnv
+func (c *moduleConfig) WithEnv(key, value string) ModuleConfig {
+	ret := c.clone()
+	// Check to see if this key already exists and update it.
+	if i, ok := ret.environKeys[key]; ok {
+		ret.environ[i+1] = []byte(value) // environ is pair-indexed, so the value is 1 after the key.
+	} else {
+		ret.environKeys[key] = len(ret.environ)
+		ret.environ = append(ret.environ, []byte(key), []byte(value))
+	}
+	return ret
+}
+
+// WithFS implements ModuleConfig.WithFS
+func (c *moduleConfig) WithFS(fs fs.FS) ModuleConfig {
+	var config FSConfig
+	if fs != nil {
+		config = NewFSConfig().WithFSMount(fs, "")
+	}
+	return c.WithFSConfig(config)
+}
+
+// WithFSConfig implements ModuleConfig.WithFSConfig
+func (c *moduleConfig) WithFSConfig(config FSConfig) ModuleConfig {
+	ret := c.clone()
+	ret.fsConfig = config
+	return ret
+}
+
+// WithName implements ModuleConfig.WithName
+func (c *moduleConfig) WithName(name string) ModuleConfig {
+	ret := c.clone()
+	ret.nameSet = true
+	ret.name = name
+	return ret
+}
+
+// WithStartFunctions implements ModuleConfig.WithStartFunctions
+func (c *moduleConfig) WithStartFunctions(startFunctions ...string) ModuleConfig {
+	ret := c.clone()
+	ret.startFunctions = startFunctions
+	return ret
+}
+
+// WithStderr implements ModuleConfig.WithStderr
+func (c *moduleConfig) WithStderr(stderr io.Writer) ModuleConfig {
+	ret := c.clone()
+	ret.stderr = stderr
+	return ret
+}
+
+// WithStdin implements ModuleConfig.WithStdin
+func (c *moduleConfig) WithStdin(stdin io.Reader) ModuleConfig {
+	ret := c.clone()
+	ret.stdin = stdin
+	return ret
+}
+
+// WithStdout implements ModuleConfig.WithStdout
+func (c *moduleConfig) WithStdout(stdout io.Writer) ModuleConfig {
+	ret := c.clone()
+	ret.stdout = stdout
+	return ret
+}
+
+// WithWalltime implements ModuleConfig.WithWalltime
+func (c *moduleConfig) WithWalltime(walltime sys.Walltime, resolution sys.ClockResolution) ModuleConfig {
+	ret := c.clone()
+	ret.walltime = walltime
+	ret.walltimeResolution = resolution
+	return ret
+}
+
+// We choose arbitrary resolutions here because there's no perfect alternative. For example, according to the
+// source in time.go, windows monotonic resolution can be 15ms. This chooses arbitrarily 1us for wall time and
+// 1ns for monotonic. See RATIONALE.md for more context.
+
+// WithSysWalltime implements ModuleConfig.WithSysWalltime
+func (c *moduleConfig) WithSysWalltime() ModuleConfig {
+	return c.WithWalltime(platform.Walltime, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+}
+
+// WithNanotime implements ModuleConfig.WithNanotime
+func (c *moduleConfig) WithNanotime(nanotime sys.Nanotime, resolution sys.ClockResolution) ModuleConfig {
+	ret := c.clone()
+	ret.nanotime = nanotime
+	ret.nanotimeResolution = resolution
+	return ret
+}
+
+// WithSysNanotime implements ModuleConfig.WithSysNanotime
+func (c *moduleConfig) WithSysNanotime() ModuleConfig {
+	return c.WithNanotime(platform.Nanotime, sys.ClockResolution(1))
+}
+
+// WithNanosleep implements ModuleConfig.WithNanosleep
+func (c *moduleConfig) WithNanosleep(nanosleep sys.Nanosleep) ModuleConfig {
+	ret := *c // copy
+	ret.nanosleep = nanosleep
+	return &ret
+}
+
+// WithOsyield implements ModuleConfig.WithOsyield
+func (c *moduleConfig) WithOsyield(osyield sys.Osyield) ModuleConfig {
+	ret := *c // copy
+	ret.osyield = osyield
+	return &ret
+}
+
+// WithSysNanosleep implements ModuleConfig.WithSysNanosleep
+func (c *moduleConfig) WithSysNanosleep() ModuleConfig {
+	return c.WithNanosleep(platform.Nanosleep)
+}
+
+// WithRandSource implements ModuleConfig.WithRandSource
+func (c *moduleConfig) WithRandSource(source io.Reader) ModuleConfig {
+	ret := c.clone()
+	ret.randSource = source
+	return ret
+}
+
+// toSysContext creates a baseline wasm.Context configured by ModuleConfig.
+func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) {
+	var environ [][]byte // Intentionally doesn't pre-allocate to reduce logic to default to nil.
+	// Same validation as syscall.Setenv for Linux
+	for i := 0; i < len(c.environ); i += 2 {
+		key, value := c.environ[i], c.environ[i+1]
+		keyLen := len(key)
+		if keyLen == 0 {
+			err = errors.New("environ invalid: empty key")
+			return
+		}
+		valueLen := len(value)
+		result := make([]byte, keyLen+valueLen+1)
+		j := 0
+		for ; j < keyLen; j++ {
+			if k := key[j]; k == '=' { // NUL enforced in NewContext
+				err = errors.New("environ invalid: key contains '=' character")
+				return
+			} else {
+				result[j] = k
+			}
+		}
+		result[j] = '='
+		copy(result[j+1:], value)
+		environ = append(environ, result)
+	}
+
+	var fs []experimentalsys.FS
+	var guestPaths []string
+	if f, ok := c.fsConfig.(*fsConfig); ok {
+		fs, guestPaths = f.preopens()
+	}
+
+	var listeners []*net.TCPListener
+	if n := c.sockConfig; n != nil {
+		if listeners, err = n.BuildTCPListeners(); err != nil {
+			return
+		}
+	}
+
+	return internalsys.NewContext(
+		math.MaxUint32,
+		c.args,
+		environ,
+		c.stdin,
+		c.stdout,
+		c.stderr,
+		c.randSource,
+		c.walltime, c.walltimeResolution,
+		c.nanotime, c.nanotimeResolution,
+		c.nanosleep, c.osyield,
+		fs, guestPaths,
+		listeners,
+	)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/config_supported.go b/vendor/github.com/tetratelabs/wazero/config_supported.go
new file mode 100644
index 000000000..eb31ab935
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/config_supported.go
@@ -0,0 +1,14 @@
+// Note: The build constraints here are about the compiler, which is more
+// narrow than the architectures supported by the assembler.
+//
+// Constraints here must match platform.CompilerSupported.
+//
+// Meanwhile, users who know their runtime.GOOS can operate with the compiler
+// may choose to use NewRuntimeConfigCompiler explicitly.
+//go:build (amd64 || arm64) && (darwin || linux || freebsd || windows)
+
+package wazero
+
+func newRuntimeConfig() RuntimeConfig {
+	return NewRuntimeConfigCompiler()
+}
diff --git a/vendor/github.com/tetratelabs/wazero/config_unsupported.go b/vendor/github.com/tetratelabs/wazero/config_unsupported.go
new file mode 100644
index 000000000..3e5a53cda
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/config_unsupported.go
@@ -0,0 +1,8 @@
+// This is the opposite constraint of config_supported.go
+//go:build !(amd64 || arm64) || !(darwin || linux || freebsd || windows)
+
+package wazero
+
+func newRuntimeConfig() RuntimeConfig {
+	return NewRuntimeConfigInterpreter()
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
new file mode 100644
index 000000000..443c5a294
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
@@ -0,0 +1,48 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// Snapshot holds the execution state at the time of a Snapshotter.Snapshot call.
+type Snapshot interface {
+	// Restore sets the Wasm execution state to the capture. Because a host function
+	// calling this is resetting the pointer to the executation stack, the host function
+	// will not be able to return values in the normal way. ret is a slice of values the
+	// host function intends to return from the restored function.
+	Restore(ret []uint64)
+}
+
+// Snapshotter allows host functions to snapshot the WebAssembly execution environment.
+type Snapshotter interface {
+	// Snapshot captures the current execution state.
+	Snapshot() Snapshot
+}
+
+// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled.
+// The context.Context passed to a exported function invocation should have this key set
+// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey.
+//
+// Deprecated: use WithSnapshotter to enable snapshots.
+type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey
+
+// WithSnapshotter enables snapshots.
+// Passing the returned context to a exported function invocation enables snapshots,
+// and allows host functions to retrieve the Snapshotter using GetSnapshotter.
+func WithSnapshotter(ctx context.Context) context.Context {
+	return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{})
+}
+
+// SnapshotterKey is a context key to access a Snapshotter from a host function.
+// It is only present if EnableSnapshotter was set in the function invocation context.
+//
+// Deprecated: use GetSnapshotter to get the snapshotter.
+type SnapshotterKey = expctxkeys.SnapshotterKey
+
+// GetSnapshotter gets the Snapshotter from a host function.
+// It is only present if WithSnapshotter was called with the function invocation context.
+func GetSnapshotter(ctx context.Context) Snapshotter {
+	return ctx.Value(expctxkeys.SnapshotterKey{}).(Snapshotter)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/close.go b/vendor/github.com/tetratelabs/wazero/experimental/close.go
new file mode 100644
index 000000000..babecaec4
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/close.go
@@ -0,0 +1,63 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// CloseNotifier is a notification hook, invoked when a module is closed.
+//
+// Note: This is experimental progress towards #1197, and likely to change. Do
+// not expose this in shared libraries as it can cause version locks.
+type CloseNotifier interface {
+	// CloseNotify is a notification that occurs *before* an api.Module is
+	// closed. `exitCode` is zero on success or in the case there was no exit
+	// code.
+	//
+	// Notes:
+	//   - This does not return an error because the module will be closed
+	//     unconditionally.
+	//   - Do not panic from this function as it doing so could cause resource
+	//     leaks.
+	//   - While this is only called once per module, if configured for
+	//     multiple modules, it will be called for each, e.g. on runtime close.
+	CloseNotify(ctx context.Context, exitCode uint32)
+}
+
+// ^-- Note: This might need to be a part of the listener or become a part of
+// host state implementation. For example, if this is used to implement state
+// cleanup for host modules, possibly something like below would be better, as
+// it could be implemented in a way that allows concurrent module use.
+//
+//	// key is like a context key, stateFactory is invoked per instantiate and
+//	// is associated with the key (exposed as `Module.State` similar to go
+//	// context). Using a key is better than the module name because we can
+//	// de-dupe it for host modules that can be instantiated into different
+//	// names. Also, you can make the key package private.
+//	HostModuleBuilder.WithState(key any, stateFactory func() Cleanup)`
+//
+// Such a design could work to isolate state only needed for wasip1, for
+// example the dirent cache. However, if end users use this for different
+// things, we may need separate designs.
+//
+// In summary, the purpose of this iteration is to identify projects that
+// would use something like this, and then we can figure out which way it
+// should go.
+
+// CloseNotifyFunc is a convenience for defining inlining a CloseNotifier.
+type CloseNotifyFunc func(ctx context.Context, exitCode uint32)
+
+// CloseNotify implements CloseNotifier.CloseNotify.
+func (f CloseNotifyFunc) CloseNotify(ctx context.Context, exitCode uint32) {
+	f(ctx, exitCode)
+}
+
+// WithCloseNotifier registers the given CloseNotifier into the given
+// context.Context.
+func WithCloseNotifier(ctx context.Context, notifier CloseNotifier) context.Context {
+	if notifier != nil {
+		return context.WithValue(ctx, expctxkeys.CloseNotifierKey{}, notifier)
+	}
+	return ctx
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/experimental.go b/vendor/github.com/tetratelabs/wazero/experimental/experimental.go
new file mode 100644
index 000000000..63fd564da
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/experimental.go
@@ -0,0 +1,41 @@
+// Package experimental includes features we aren't yet sure about. These are enabled with context.Context keys.
+//
+// Note: All features here may be changed or deleted at any time, so use with caution!
+package experimental
+
+import (
+	"github.com/tetratelabs/wazero/api"
+)
+
+// InternalModule is an api.Module that exposes additional
+// information.
+type InternalModule interface {
+	api.Module
+
+	// NumGlobal returns the count of all globals in the module.
+	NumGlobal() int
+
+	// Global provides a read-only view for a given global index.
+	//
+	// The methods panics if i is out of bounds.
+	Global(i int) api.Global
+}
+
+// ProgramCounter is an opaque value representing a specific execution point in
+// a module. It is meant to be used with Function.SourceOffsetForPC and
+// StackIterator.
+type ProgramCounter uint64
+
+// InternalFunction exposes some information about a function instance.
+type InternalFunction interface {
+	// Definition provides introspection into the function's names and
+	// signature.
+	Definition() api.FunctionDefinition
+
+	// SourceOffsetForPC resolves a program counter into its corresponding
+	// offset in the Code section of the module this function belongs to.
+	// The source offset is meant to help map the function calls to their
+	// location in the original source files. Returns 0 if the offset cannot
+	// be calculated.
+	SourceOffsetForPC(pc ProgramCounter) uint64
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/features.go b/vendor/github.com/tetratelabs/wazero/experimental/features.go
new file mode 100644
index 000000000..b2a5b9069
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/features.go
@@ -0,0 +1,15 @@
+package experimental
+
+import "github.com/tetratelabs/wazero/api"
+
+// CoreFeaturesThreads enables threads instructions ("threads").
+//
+// # Notes
+//
+//   - The instruction list is too long to enumerate in godoc.
+//     See https://github.com/WebAssembly/threads/blob/main/proposals/threads/Overview.md
+//   - Atomic operations are guest-only until api.Memory or otherwise expose them to host functions.
+//   - On systems without mmap available, the memory will pre-allocate to the maximum size. Many
+//     binaries will use a theroetical maximum like 4GB, so if using such a binary on a system
+//     without mmap, consider editing the binary to reduce the max size setting of memory.
+const CoreFeaturesThreads = api.CoreFeatureSIMD << 1
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/listener.go b/vendor/github.com/tetratelabs/wazero/experimental/listener.go
new file mode 100644
index 000000000..b2ba1fe83
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go
@@ -0,0 +1,330 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// StackIterator allows iterating on each function of the call stack, starting
+// from the top. At least one call to Next() is required to start the iteration.
+//
+// Note: The iterator provides a view of the call stack at the time of
+// iteration. As a result, parameter values may be different than the ones their
+// function was called with.
+type StackIterator interface {
+	// Next moves the iterator to the next function in the stack. Returns
+	// false if it reached the bottom of the stack.
+	Next() bool
+	// Function describes the function called by the current frame.
+	Function() InternalFunction
+	// ProgramCounter returns the program counter associated with the
+	// function call.
+	ProgramCounter() ProgramCounter
+}
+
+// FunctionListenerFactoryKey is a context.Context Value key.
+// Its associated value should be a FunctionListenerFactory.
+//
+// Deprecated: use WithFunctionListenerFactory to enable snapshots.
+type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey
+
+// WithFunctionListenerFactory registers a FunctionListenerFactory
+// with the context.
+func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context {
+	return context.WithValue(ctx, expctxkeys.FunctionListenerFactoryKey{}, factory)
+}
+
+// FunctionListenerFactory returns FunctionListeners to be notified when a
+// function is called.
+type FunctionListenerFactory interface {
+	// NewFunctionListener returns a FunctionListener for a defined function.
+	// If nil is returned, no listener will be notified.
+	NewFunctionListener(api.FunctionDefinition) FunctionListener
+	// ^^ A single instance can be returned to avoid instantiating a listener
+	// per function, especially as they may be thousands of functions. Shared
+	// listeners use their FunctionDefinition parameter to clarify.
+}
+
+// FunctionListener can be registered for any function via
+// FunctionListenerFactory to be notified when the function is called.
+type FunctionListener interface {
+	// Before is invoked before a function is called.
+	//
+	// There is always one corresponding call to After or Abort for each call to
+	// Before. This guarantee allows the listener to maintain an internal stack
+	// to perform correlations between the entry and exit of functions.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function which must be the same
+	//	   instance or parent of the result.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - params:  api.ValueType encoded parameters.
+	//   - stackIterator: iterator on the call stack. At least one entry is
+	//     guaranteed (the called function), whose Args() will be equal to
+	//     params. The iterator will be reused between calls to Before.
+	//
+	// Note: api.Memory is meant for inspection, not modification.
+	// mod can be cast to InternalModule to read non-exported globals.
+	Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, stackIterator StackIterator)
+
+	// After is invoked after a function is called.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - results: api.ValueType encoded results.
+	//
+	// # Notes
+	//
+	//   - api.Memory is meant for inspection, not modification.
+	//   - This is not called when a host function panics, or a guest function traps.
+	//      See Abort for more details.
+	After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64)
+
+	// Abort is invoked when a function does not return due to a trap or panic.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - err: the error value representing the reason why the function aborted.
+	//
+	// # Notes
+	//
+	//   - api.Memory is meant for inspection, not modification.
+	Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, err error)
+}
+
+// FunctionListenerFunc is a function type implementing the FunctionListener
+// interface, making it possible to use regular functions and methods as
+// listeners of function invocation.
+//
+// The FunctionListener interface declares two methods (Before and After),
+// but this type invokes its value only when Before is called. It is best
+// suites for cases where the host does not need to perform correlation
+// between the start and end of the function call.
+type FunctionListenerFunc func(context.Context, api.Module, api.FunctionDefinition, []uint64, StackIterator)
+
+// Before satisfies the FunctionListener interface, calls f.
+func (f FunctionListenerFunc) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, stackIterator StackIterator) {
+	f(ctx, mod, def, params, stackIterator)
+}
+
+// After is declared to satisfy the FunctionListener interface, but it does
+// nothing.
+func (f FunctionListenerFunc) After(context.Context, api.Module, api.FunctionDefinition, []uint64) {
+}
+
+// Abort is declared to satisfy the FunctionListener interface, but it does
+// nothing.
+func (f FunctionListenerFunc) Abort(context.Context, api.Module, api.FunctionDefinition, error) {
+}
+
+// FunctionListenerFactoryFunc is a function type implementing the
+// FunctionListenerFactory interface, making it possible to use regular
+// functions and methods as factory of function listeners.
+type FunctionListenerFactoryFunc func(api.FunctionDefinition) FunctionListener
+
+// NewFunctionListener satisfies the FunctionListenerFactory interface, calls f.
+func (f FunctionListenerFactoryFunc) NewFunctionListener(def api.FunctionDefinition) FunctionListener {
+	return f(def)
+}
+
+// MultiFunctionListenerFactory constructs a FunctionListenerFactory which
+// combines the listeners created by each of the factories passed as arguments.
+//
+// This function is useful when multiple listeners need to be hooked to a module
+// because the propagation mechanism based on installing a listener factory in
+// the context.Context used when instantiating modules allows for a single
+// listener to be installed.
+//
+// The stack iterator passed to the Before method is reset so that each listener
+// can iterate the call stack independently without impacting the ability of
+// other listeners to do so.
+func MultiFunctionListenerFactory(factories ...FunctionListenerFactory) FunctionListenerFactory {
+	multi := make(multiFunctionListenerFactory, len(factories))
+	copy(multi, factories)
+	return multi
+}
+
+type multiFunctionListenerFactory []FunctionListenerFactory
+
+func (multi multiFunctionListenerFactory) NewFunctionListener(def api.FunctionDefinition) FunctionListener {
+	var lstns []FunctionListener
+	for _, factory := range multi {
+		if lstn := factory.NewFunctionListener(def); lstn != nil {
+			lstns = append(lstns, lstn)
+		}
+	}
+	switch len(lstns) {
+	case 0:
+		return nil
+	case 1:
+		return lstns[0]
+	default:
+		return &multiFunctionListener{lstns: lstns}
+	}
+}
+
+type multiFunctionListener struct {
+	lstns []FunctionListener
+	stack stackIterator
+}
+
+func (multi *multiFunctionListener) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, si StackIterator) {
+	multi.stack.base = si
+	for _, lstn := range multi.lstns {
+		multi.stack.index = -1
+		lstn.Before(ctx, mod, def, params, &multi.stack)
+	}
+}
+
+func (multi *multiFunctionListener) After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64) {
+	for _, lstn := range multi.lstns {
+		lstn.After(ctx, mod, def, results)
+	}
+}
+
+func (multi *multiFunctionListener) Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, err error) {
+	for _, lstn := range multi.lstns {
+		lstn.Abort(ctx, mod, def, err)
+	}
+}
+
+type stackIterator struct {
+	base  StackIterator
+	index int
+	pcs   []uint64
+	fns   []InternalFunction
+}
+
+func (si *stackIterator) Next() bool {
+	if si.base != nil {
+		si.pcs = si.pcs[:0]
+		si.fns = si.fns[:0]
+
+		for si.base.Next() {
+			si.pcs = append(si.pcs, uint64(si.base.ProgramCounter()))
+			si.fns = append(si.fns, si.base.Function())
+		}
+
+		si.base = nil
+	}
+	si.index++
+	return si.index < len(si.pcs)
+}
+
+func (si *stackIterator) ProgramCounter() ProgramCounter {
+	return ProgramCounter(si.pcs[si.index])
+}
+
+func (si *stackIterator) Function() InternalFunction {
+	return si.fns[si.index]
+}
+
+// StackFrame represents a frame on the call stack.
+type StackFrame struct {
+	Function     api.Function
+	Params       []uint64
+	Results      []uint64
+	PC           uint64
+	SourceOffset uint64
+}
+
+type internalFunction struct {
+	definition   api.FunctionDefinition
+	sourceOffset uint64
+}
+
+func (f internalFunction) Definition() api.FunctionDefinition {
+	return f.definition
+}
+
+func (f internalFunction) SourceOffsetForPC(pc ProgramCounter) uint64 {
+	return f.sourceOffset
+}
+
+// stackFrameIterator is an implementation of the experimental.stackFrameIterator
+// interface.
+type stackFrameIterator struct {
+	index int
+	stack []StackFrame
+	fndef []api.FunctionDefinition
+}
+
+func (si *stackFrameIterator) Next() bool {
+	si.index++
+	return si.index < len(si.stack)
+}
+
+func (si *stackFrameIterator) Function() InternalFunction {
+	return internalFunction{
+		definition:   si.fndef[si.index],
+		sourceOffset: si.stack[si.index].SourceOffset,
+	}
+}
+
+func (si *stackFrameIterator) ProgramCounter() ProgramCounter {
+	return ProgramCounter(si.stack[si.index].PC)
+}
+
+// NewStackIterator constructs a stack iterator from a list of stack frames.
+// The top most frame is the last one.
+func NewStackIterator(stack ...StackFrame) StackIterator {
+	si := &stackFrameIterator{
+		index: -1,
+		stack: make([]StackFrame, len(stack)),
+		fndef: make([]api.FunctionDefinition, len(stack)),
+	}
+	for i := range stack {
+		si.stack[i] = stack[len(stack)-(i+1)]
+	}
+	// The size of function definition is only one pointer which should allow
+	// the compiler to optimize the conversion to api.FunctionDefinition; but
+	// the presence of internal.WazeroOnlyType, despite being defined as an
+	// empty struct, forces a heap allocation that we amortize by caching the
+	// result.
+	for i, frame := range stack {
+		si.fndef[i] = frame.Function.Definition()
+	}
+	return si
+}
+
+// BenchmarkFunctionListener implements a benchmark for function listeners.
+//
+// The benchmark calls Before and After methods repeatedly using the provided
+// module an stack frames to invoke the methods.
+//
+// The stack frame is a representation of the call stack that the Before method
+// will be invoked with. The top of the stack is stored at index zero. The stack
+// must contain at least one frame or the benchmark will fail.
+func BenchmarkFunctionListener(n int, module api.Module, stack []StackFrame, listener FunctionListener) {
+	if len(stack) == 0 {
+		panic("cannot benchmark function listener with an empty stack")
+	}
+
+	ctx := context.Background()
+	def := stack[0].Function.Definition()
+	params := stack[0].Params
+	results := stack[0].Results
+	stackIterator := &stackIterator{base: NewStackIterator(stack...)}
+
+	for i := 0; i < n; i++ {
+		stackIterator.index = -1
+		listener.Before(ctx, module, def, params, stackIterator)
+		listener.After(ctx, module, def, results)
+	}
+}
+
+// TODO: the calls to Abort are not yet tested in internal/testing/enginetest,
+// but they are validated indirectly in tests which exercise host logging,
+// like Test_procExit in imports/wasi_snapshot_preview1. Eventually we should
+// add dedicated tests to validate the behavior of the interpreter and compiler
+// engines independently.
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/memory.go b/vendor/github.com/tetratelabs/wazero/experimental/memory.go
new file mode 100644
index 000000000..e379bf053
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/memory.go
@@ -0,0 +1,50 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// MemoryAllocator is a memory allocation hook,
+// invoked to create a LinearMemory.
+type MemoryAllocator interface {
+	// Allocate should create a new LinearMemory with the given specification:
+	// cap is the suggested initial capacity for the backing []byte,
+	// and max the maximum length that will ever be requested.
+	//
+	// Notes:
+	//   - To back a shared memory, the address of the backing []byte cannot
+	//     change. This is checked at runtime. Implementations should document
+	//     if the returned LinearMemory meets this requirement.
+	Allocate(cap, max uint64) LinearMemory
+}
+
+// MemoryAllocatorFunc is a convenience for defining inlining a MemoryAllocator.
+type MemoryAllocatorFunc func(cap, max uint64) LinearMemory
+
+// Allocate implements MemoryAllocator.Allocate.
+func (f MemoryAllocatorFunc) Allocate(cap, max uint64) LinearMemory {
+	return f(cap, max)
+}
+
+// LinearMemory is an expandable []byte that backs a Wasm linear memory.
+type LinearMemory interface {
+	// Reallocates the linear memory to size bytes in length.
+	//
+	// Notes:
+	//   - To back a shared memory, Reallocate can't change the address of the
+	//     backing []byte (only its length/capacity may change).
+	Reallocate(size uint64) []byte
+	// Free the backing memory buffer.
+	Free()
+}
+
+// WithMemoryAllocator registers the given MemoryAllocator into the given
+// context.Context.
+func WithMemoryAllocator(ctx context.Context, allocator MemoryAllocator) context.Context {
+	if allocator != nil {
+		return context.WithValue(ctx, expctxkeys.MemoryAllocatorKey{}, allocator)
+	}
+	return ctx
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/dir.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/dir.go
new file mode 100644
index 000000000..0b997cb8f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/dir.go
@@ -0,0 +1,92 @@
+package sys
+
+import (
+	"fmt"
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FileType is fs.FileMode masked on fs.ModeType. For example, zero is a
+// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown.
+//
+// Note: This is defined by Linux, not POSIX.
+type FileType = fs.FileMode
+
+// Dirent is an entry read from a directory via File.Readdir.
+//
+// # Notes
+//
+//   - This extends `dirent` defined in POSIX with some fields defined by
+//     Linux. See https://man7.org/linux/man-pages/man3/readdir.3.html and
+//     https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html
+//   - This has a subset of fields defined in sys.Stat_t. Notably, there is no
+//     field corresponding to Stat_t.Dev because that value will be constant
+//     for all files in a directory. To get the Dev value, call File.Stat on
+//     the directory File.Readdir was called on.
+type Dirent struct {
+	// Ino is the file serial number, or zero if not available. See Ino for
+	// more details including impact returning a zero value.
+	Ino sys.Inode
+
+	// Name is the base name of the directory entry. Empty is invalid.
+	Name string
+
+	// Type is fs.FileMode masked on fs.ModeType. For example, zero is a
+	// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown.
+	//
+	// Note: This is defined by Linux, not POSIX.
+	Type fs.FileMode
+}
+
+func (d *Dirent) String() string {
+	return fmt.Sprintf("name=%s, type=%v, ino=%d", d.Name, d.Type, d.Ino)
+}
+
+// IsDir returns true if the Type is fs.ModeDir.
+func (d *Dirent) IsDir() bool {
+	return d.Type == fs.ModeDir
+}
+
+// DirFile is embeddable to reduce the amount of functions to implement a file.
+type DirFile struct{}
+
+// IsAppend implements File.IsAppend
+func (DirFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements File.SetAppend
+func (DirFile) SetAppend(bool) Errno {
+	return EISDIR
+}
+
+// IsDir implements File.IsDir
+func (DirFile) IsDir() (bool, Errno) {
+	return true, 0
+}
+
+// Read implements File.Read
+func (DirFile) Read([]byte) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Pread implements File.Pread
+func (DirFile) Pread([]byte, int64) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Write implements File.Write
+func (DirFile) Write([]byte) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Pwrite implements File.Pwrite
+func (DirFile) Pwrite([]byte, int64) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Truncate implements File.Truncate
+func (DirFile) Truncate(int64) Errno {
+	return EISDIR
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/errno.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/errno.go
new file mode 100644
index 000000000..238949496
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/errno.go
@@ -0,0 +1,98 @@
+package sys
+
+import "strconv"
+
+// Errno is a subset of POSIX errno used by wazero interfaces. Zero is not an
+// error. Other values should not be interpreted numerically, rather by constants
+// prefixed with 'E'.
+//
+// See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+type Errno uint16
+
+// ^-- Note: This will eventually move to the public /sys package. It is
+// experimental until we audit the socket related APIs to ensure we have all
+// the Errno it returns, and we export fs.FS. This is not in /internal/sys as
+// that would introduce a package cycle.
+
+// This is a subset of errors to reduce implementation burden. `wasip1` defines
+// almost all POSIX error numbers, but not all are used in practice. wazero
+// will add ones needed in POSIX order, as needed by functions that explicitly
+// document returning them.
+//
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-errno-enumu16
+const (
+	EACCES Errno = iota + 1
+	EAGAIN
+	EBADF
+	EEXIST
+	EFAULT
+	EINTR
+	EINVAL
+	EIO
+	EISDIR
+	ELOOP
+	ENAMETOOLONG
+	ENOENT
+	ENOSYS
+	ENOTDIR
+	ERANGE
+	ENOTEMPTY
+	ENOTSOCK
+	ENOTSUP
+	EPERM
+	EROFS
+
+	// NOTE ENOTCAPABLE is defined in wasip1, but not in POSIX. wasi-libc
+	// converts it to EBADF, ESPIPE or EINVAL depending on the call site.
+	// It isn't known if compilers who don't use ENOTCAPABLE would crash on it.
+)
+
+// Error implements error
+func (e Errno) Error() string {
+	switch e {
+	case 0: // not an error
+		return "success"
+	case EACCES:
+		return "permission denied"
+	case EAGAIN:
+		return "resource unavailable, try again"
+	case EBADF:
+		return "bad file descriptor"
+	case EEXIST:
+		return "file exists"
+	case EFAULT:
+		return "bad address"
+	case EINTR:
+		return "interrupted function"
+	case EINVAL:
+		return "invalid argument"
+	case EIO:
+		return "input/output error"
+	case EISDIR:
+		return "is a directory"
+	case ELOOP:
+		return "too many levels of symbolic links"
+	case ENAMETOOLONG:
+		return "filename too long"
+	case ENOENT:
+		return "no such file or directory"
+	case ENOSYS:
+		return "functionality not supported"
+	case ENOTDIR:
+		return "not a directory or a symbolic link to a directory"
+	case ERANGE:
+		return "result too large"
+	case ENOTEMPTY:
+		return "directory not empty"
+	case ENOTSOCK:
+		return "not a socket"
+	case ENOTSUP:
+		return "not supported (may be the same value as [EOPNOTSUPP])"
+	case EPERM:
+		return "operation not permitted"
+	case EROFS:
+		return "read-only file system"
+	default:
+		return "Errno(" + strconv.Itoa(int(e)) + ")"
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/error.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/error.go
new file mode 100644
index 000000000..a0c76019a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/error.go
@@ -0,0 +1,45 @@
+package sys
+
+import (
+	"io"
+	"io/fs"
+	"os"
+)
+
+// UnwrapOSError returns an Errno or zero if the input is nil.
+func UnwrapOSError(err error) Errno {
+	if err == nil {
+		return 0
+	}
+	err = underlyingError(err)
+	switch err {
+	case nil, io.EOF:
+		return 0 // EOF is not a Errno
+	case fs.ErrInvalid:
+		return EINVAL
+	case fs.ErrPermission:
+		return EPERM
+	case fs.ErrExist:
+		return EEXIST
+	case fs.ErrNotExist:
+		return ENOENT
+	case fs.ErrClosed:
+		return EBADF
+	}
+	return errorToErrno(err)
+}
+
+// underlyingError returns the underlying error if a well-known OS error type.
+//
+// This impl is basically the same as os.underlyingError in os/error.go
+func underlyingError(err error) error {
+	switch err := err.(type) {
+	case *os.PathError:
+		return err.Err
+	case *os.LinkError:
+		return err.Err
+	case *os.SyscallError:
+		return err.Err
+	}
+	return err
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/file.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/file.go
new file mode 100644
index 000000000..b6bfbcfeb
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/file.go
@@ -0,0 +1,316 @@
+package sys
+
+import "github.com/tetratelabs/wazero/sys"
+
+// File is a writeable fs.File bridge backed by syscall functions needed for ABI
+// including WASI.
+//
+// Implementations should embed UnimplementedFile for forward compatibility. Any
+// unsupported method or parameter should return ENOSYS.
+//
+// # Errors
+//
+// All methods that can return an error return a Errno, which is zero
+// on success.
+//
+// Restricting to Errno matches current WebAssembly host functions,
+// which are constrained to well-known error codes. For example, WASI maps syscall
+// errors to u32 numeric values.
+//
+// # Notes
+//
+//   - You must call Close to avoid file resource conflicts. For example,
+//     Windows cannot delete the underlying directory while a handle to it
+//     remains open.
+//   - A writable filesystem abstraction is not yet implemented as of Go 1.20.
+//     See https://github.com/golang/go/issues/45757
+type File interface {
+	// Dev returns the device ID (Stat_t.Dev) of this file, zero if unknown or
+	// an error retrieving it.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. Zero should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	//   - This combined with Ino can implement os.SameFile.
+	Dev() (uint64, Errno)
+
+	// Ino returns the serial number (Stat_t.Ino) of this file, zero if unknown
+	// or an error retrieving it.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. Zero should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	//   - This combined with Dev can implement os.SameFile.
+	Ino() (sys.Inode, Errno)
+
+	// IsDir returns true if this file is a directory or an error there was an
+	// error retrieving this information.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. false should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	IsDir() (bool, Errno)
+
+	// IsAppend returns true if the file was opened with O_APPEND, or
+	// SetAppend was successfully enabled on this file.
+	//
+	// # Notes
+	//
+	//   - This might not match the underlying state of the file descriptor if
+	//     the file was not opened via OpenFile.
+	IsAppend() bool
+
+	// SetAppend toggles the append mode (O_APPEND) of this file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - There is no `O_APPEND` for `fcntl` in POSIX, so implementations may
+	//     have to re-open the underlying file to apply this. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+	SetAppend(enable bool) Errno
+
+	// Stat is similar to syscall.Fstat.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fstat and `fstatat` with `AT_FDCWD` in POSIX.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+	//   - A fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - Windows allows you to stat a closed directory.
+	Stat() (sys.Stat_t, Errno)
+
+	// Read attempts to read all bytes in the file into `buf`, and returns the
+	// count read even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.Reader and `read` in POSIX, preferring semantics of
+	//     io.Reader. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/read.html
+	//   - Unlike io.Reader, there is no io.EOF returned on end-of-file. To
+	//     read the file completely, the caller must repeat until `n` is zero.
+	Read(buf []byte) (n int, errno Errno)
+
+	// Pread attempts to read all bytes in the file into `p`, starting at the
+	// offset `off`, and returns the count read even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EINVAL: the offset was negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.ReaderAt and `pread` in POSIX, preferring semantics
+	//     of io.ReaderAt. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/pread.html
+	//   - Unlike io.ReaderAt, there is no io.EOF returned on end-of-file. To
+	//     read the file completely, the caller must repeat until `n` is zero.
+	Pread(buf []byte, off int64) (n int, errno Errno)
+
+	// Seek attempts to set the next offset for Read or Write and returns the
+	// resulting absolute offset or an error.
+	//
+	// # Parameters
+	//
+	// The `offset` parameters is interpreted in terms of `whence`:
+	//   - io.SeekStart: relative to the start of the file, e.g. offset=0 sets
+	//     the next Read or Write to the beginning of the file.
+	//   - io.SeekCurrent: relative to the current offset, e.g. offset=16 sets
+	//     the next Read or Write 16 bytes past the prior.
+	//   - io.SeekEnd: relative to the end of the file, e.g. offset=-1 sets the
+	//     next Read or Write to the last byte in the file.
+	//
+	// # Behavior when a directory
+	//
+	// The only supported use case for a directory is seeking to `offset` zero
+	// (`whence` = io.SeekStart). This should have the same behavior as
+	// os.File, which resets any internal state used by Readdir.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EINVAL: the offset was negative.
+	//
+	// # Notes
+	//
+	//   - This is like io.Seeker and `fseek` in POSIX, preferring semantics
+	//     of io.Seeker. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/fseek.html
+	Seek(offset int64, whence int) (newOffset int64, errno Errno)
+
+	// Readdir reads the contents of the directory associated with file and
+	// returns a slice of up to n Dirent values in an arbitrary order. This is
+	// a stateful function, so subsequent calls return any next values.
+	//
+	// If n > 0, Readdir returns at most n entries or an error.
+	// If n <= 0, Readdir returns all remaining entries or an error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file was closed or not a directory.
+	//   - ENOENT: the directory could not be read (e.g. deleted).
+	//
+	// # Notes
+	//
+	//   - This is like `Readdir` on os.File, but unlike `readdir` in POSIX.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/readdir.html
+	//   - Unlike os.File, there is no io.EOF returned on end-of-directory. To
+	//     read the directory completely, the caller must repeat until the
+	//     count read (`len(dirents)`) is less than `n`.
+	//   - See /RATIONALE.md for design notes.
+	Readdir(n int) (dirents []Dirent, errno Errno)
+
+	// Write attempts to write all bytes in `p` to the file, and returns the
+	// count written even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file was closed, not writeable, or a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.Writer and `write` in POSIX, preferring semantics of
+	//     io.Writer. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/write.html
+	Write(buf []byte) (n int, errno Errno)
+
+	// Pwrite attempts to write all bytes in `p` to the file at the given
+	// offset `off`, and returns the count written even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not writeable.
+	//   - EINVAL: the offset was negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.WriterAt and `pwrite` in POSIX, preferring semantics
+	//     of io.WriterAt. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/pwrite.html
+	Pwrite(buf []byte, off int64) (n int, errno Errno)
+
+	// Truncate truncates a file to a specified length.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//   - EINVAL: the `size` is negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Ftruncate and `ftruncate` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/ftruncate.html
+	//   - Windows does not error when calling Truncate on a closed file.
+	Truncate(size int64) Errno
+
+	// Sync synchronizes changes to the file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fsync and `fsync` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html
+	//   - This returns with no error instead of ENOSYS when
+	//     unimplemented. This prevents fake filesystems from erring.
+	//   - Windows does not error when calling Sync on a closed file.
+	Sync() Errno
+
+	// Datasync synchronizes the data of a file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fdatasync and `fdatasync` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/fdatasync.html
+	//   - This returns with no error instead of ENOSYS when
+	//     unimplemented. This prevents fake filesystems from erring.
+	//   - As this is commonly missing, some implementations dispatch to Sync.
+	Datasync() Errno
+
+	// Utimens set file access and modification times of this file, at
+	// nanosecond precision.
+	//
+	// # Parameters
+	//
+	// The `atim` and `mtim` parameters refer to access and modification time
+	// stamps as defined in sys.Stat_t. To retain one or the other, substitute
+	// it with the pseudo-timestamp UTIME_OMIT.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.UtimesNano and `futimens` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html
+	//   - Windows requires files to be open with O_RDWR, which means you
+	//     cannot use this to update timestamps on a directory (EPERM).
+	Utimens(atim, mtim int64) Errno
+
+	// Close closes the underlying file.
+	//
+	// A zero Errno is returned if unimplemented or success.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Close and `close` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
+	Close() Errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/fs.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/fs.go
new file mode 100644
index 000000000..87810510a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/fs.go
@@ -0,0 +1,292 @@
+package sys
+
+import (
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FS is a writeable fs.FS bridge backed by syscall functions needed for ABI
+// including WASI.
+//
+// Implementations should embed UnimplementedFS for forward compatibility. Any
+// unsupported method or parameter should return ENO
+//
+// # Errors
+//
+// All methods that can return an error return a Errno, which is zero
+// on success.
+//
+// Restricting to Errno matches current WebAssembly host functions,
+// which are constrained to well-known error codes. For example, WASI maps syscall
+// errors to u32 numeric values.
+//
+// # Notes
+//
+// A writable filesystem abstraction is not yet implemented as of Go 1.20. See
+// https://github.com/golang/go/issues/45757
+type FS interface {
+	// OpenFile opens a file. It should be closed via Close on File.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` or `flag` is invalid.
+	//   - EISDIR: the path was a directory, but flag included O_RDWR or
+	//     O_WRONLY
+	//   - ENOENT: `path` doesn't exist and `flag` doesn't contain O_CREAT.
+	//
+	// # Constraints on the returned file
+	//
+	// Implementations that can read flags should enforce them regardless of
+	// the type returned. For example, while os.File implements io.Writer,
+	// attempts to write to a directory or a file opened with O_RDONLY fail
+	// with a EBADF.
+	//
+	// Some implementations choose whether to enforce read-only opens, namely
+	// fs.FS. While fs.FS is supported (Adapt), wazero cannot runtime enforce
+	// open flags. Instead, we encourage good behavior and test our built-in
+	// implementations.
+	//
+	// # Notes
+	//
+	//   - This is like os.OpenFile, except the path is relative to this file
+	//     system, and Errno is returned instead of os.PathError.
+	//   - Implications of permissions when O_CREAT are described in Chmod notes.
+	//   - This is like `open` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+	OpenFile(path string, flag Oflag, perm fs.FileMode) (File, Errno)
+
+	// Lstat gets file status without following symbolic links.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOENT: `path` doesn't exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Lstat, except the `path` is relative to this
+	//     file system.
+	//   - This is like `lstat` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/lstat.html
+	//   - An fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - When the path is a symbolic link, the stat returned is for the link,
+	//     not the file it refers to.
+	Lstat(path string) (sys.Stat_t, Errno)
+
+	// Stat gets file status.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOENT: `path` doesn't exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Stat, except the `path` is relative to this
+	//     file system.
+	//   - This is like `stat` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+	//   - An fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - When the path is a symbolic link, the stat returned is for the file
+	//     it refers to.
+	Stat(path string) (sys.Stat_t, Errno)
+
+	// Mkdir makes a directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - EEXIST: `path` exists and is a directory.
+	//   - ENOTDIR: `path` exists and is a file.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Mkdir, except the `path` is relative to this
+	//     file system.
+	//   - This is like `mkdir` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/mkdir.html
+	//   - Implications of permissions are described in Chmod notes.
+	Mkdir(path string, perm fs.FileMode) Errno
+
+	// Chmod changes the mode of the file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` does not exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Chmod, except the `path` is relative to this
+	//     file system.
+	//   - This is like `chmod` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/chmod.html
+	//   - Windows ignores the execute bit, and any permissions come back as
+	//     group and world. For example, chmod of 0400 reads back as 0444, and
+	//     0700 0666. Also, permissions on directories aren't supported at all.
+	Chmod(path string, perm fs.FileMode) Errno
+
+	// Rename renames file or directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `from` or `to` is invalid.
+	//   - ENOENT: `from` or `to` don't exist.
+	//   - ENOTDIR: `from` is a directory and `to` exists as a file.
+	//   - EISDIR: `from` is a file and `to` exists as a directory.
+	//   - ENOTEMPTY: `both from` and `to` are existing directory, but
+	//    `to` is not empty.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Rename, except the paths are relative to this
+	//     file system.
+	//   - This is like `rename` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html
+	//   -  Windows doesn't let you overwrite an existing directory.
+	Rename(from, to string) Errno
+
+	// Rmdir removes a directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` doesn't exist.
+	//   - ENOTDIR: `path` exists, but isn't a directory.
+	//   - ENOTEMPTY: `path` exists, but isn't empty.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Rmdir, except the `path` is relative to this
+	//     file system.
+	//   - This is like `rmdir` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/rmdir.html
+	//   - As of Go 1.19, Windows maps ENOTDIR to ENOENT.
+	Rmdir(path string) Errno
+
+	// Unlink removes a directory entry.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` doesn't exist.
+	//   - EISDIR: `path` exists, but is a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Unlink, except the `path` is relative to this
+	//     file system.
+	//   - This is like `unlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/unlink.html
+	//   - On Windows, syscall.Unlink doesn't delete symlink to directory unlike other platforms. Implementations might
+	//     want to combine syscall.RemoveDirectory with syscall.Unlink in order to delete such links on Windows.
+	//     See https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-removedirectorya
+	Unlink(path string) Errno
+
+	// Link creates a "hard" link from oldPath to newPath, in contrast to a
+	// soft link (via Symlink).
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EPERM: `oldPath` is invalid.
+	//   - ENOENT: `oldPath` doesn't exist.
+	//   - EISDIR: `newPath` exists, but is a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Link, except the `oldPath` is relative to this
+	//     file system.
+	//   - This is like `link` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/link.html
+	Link(oldPath, newPath string) Errno
+
+	// Symlink creates a "soft" link from oldPath to newPath, in contrast to a
+	// hard link (via Link).
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EPERM: `oldPath` or `newPath` is invalid.
+	//   - EEXIST: `newPath` exists.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Symlink, except the `oldPath` is relative to
+	//     this file system.
+	//   - This is like `symlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/symlink.html
+	//   - Only `newPath` is relative to this file system and `oldPath` is kept
+	//     as-is. That is because the link is only resolved relative to the
+	//     directory when dereferencing it (e.g. ReadLink).
+	//     See https://github.com/bytecodealliance/cap-std/blob/v1.0.4/cap-std/src/fs/dir.rs#L404-L409
+	//     for how others implement this.
+	//   - Symlinks in Windows requires `SeCreateSymbolicLinkPrivilege`.
+	//     Otherwise, EPERM results.
+	//     See https://learn.microsoft.com/en-us/windows/security/threat-protection/security-policy-settings/create-symbolic-links
+	Symlink(oldPath, linkName string) Errno
+
+	// Readlink reads the contents of a symbolic link.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Readlink, except the path is relative to this
+	//     filesystem.
+	//   - This is like `readlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/readlink.html
+	//   - On Windows, the path separator is different from other platforms,
+	//     but to provide consistent results to Wasm, this normalizes to a "/"
+	//     separator.
+	Readlink(path string) (string, Errno)
+
+	// Utimens set file access and modification times on a path relative to
+	// this file system, at nanosecond precision.
+	//
+	// # Parameters
+	//
+	// If the path is a symbolic link, the target of expanding that link is
+	// updated.
+	//
+	// The `atim` and `mtim` parameters refer to access and modification time
+	// stamps as defined in sys.Stat_t. To retain one or the other, substitute
+	// it with the pseudo-timestamp UTIME_OMIT.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - EEXIST: `path` exists and is a directory.
+	//   - ENOTDIR: `path` exists and is a file.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.UtimesNano and `utimensat` with `AT_FDCWD` in
+	//     POSIX. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html
+	Utimens(path string, atim, mtim int64) Errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/oflag.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/oflag.go
new file mode 100644
index 000000000..39ebd378f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/oflag.go
@@ -0,0 +1,70 @@
+package sys
+
+// Oflag are flags used for FS.OpenFile. Values, including zero, should not be
+// interpreted numerically. Instead, use by constants prefixed with 'O_' with
+// special casing noted below.
+//
+// # Notes
+//
+//   - O_RDONLY, O_RDWR and O_WRONLY are mutually exclusive, while the other
+//     flags can coexist bitwise.
+//   - This is like `flag` in os.OpenFile and `oflag` in POSIX. See
+//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+type Oflag uint32
+
+// This is a subset of oflags to reduce implementation burden. `wasip1` splits
+// these across `oflags` and `fdflags`. We can't rely on the Go `os` package,
+// as it is missing some values. Any flags added will be defined in POSIX
+// order, as needed by functions that explicitly document accepting them.
+//
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-oflags-flagsu16
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-fdflags-flagsu16
+const (
+	// O_RDONLY is like os.O_RDONLY
+	O_RDONLY Oflag = iota
+
+	// O_RDWR is like os.O_RDWR
+	O_RDWR
+
+	// O_WRONLY is like os.O_WRONLY
+	O_WRONLY
+
+	// Define bitflags as they are in POSIX `open`: alphabetically
+
+	// O_APPEND is like os.O_APPEND
+	O_APPEND Oflag = 1 << iota
+
+	// O_CREAT is link os.O_CREATE
+	O_CREAT
+
+	// O_DIRECTORY is defined on some platforms as syscall.O_DIRECTORY.
+	//
+	// Note: This ensures that the opened file is a directory. Those emulating
+	// on platforms that don't support the O_DIRECTORY, can double-check the
+	// result with File.IsDir (or stat) and err if not a directory.
+	O_DIRECTORY
+
+	// O_DSYNC is defined on some platforms as syscall.O_DSYNC.
+	O_DSYNC
+
+	// O_EXCL is defined on some platforms as syscall.O_EXCL.
+	O_EXCL
+
+	// O_NOFOLLOW is defined on some platforms as syscall.O_NOFOLLOW.
+	//
+	// Note: This allows programs to ensure that if the opened file is a
+	// symbolic link, the link itself is opened instead of its target.
+	O_NOFOLLOW
+
+	// O_NONBLOCK is defined on some platforms as syscall.O_NONBLOCK.
+	O_NONBLOCK
+
+	// O_RSYNC is defined on some platforms as syscall.O_RSYNC.
+	O_RSYNC
+
+	// O_SYNC is defined on some platforms as syscall.O_SYNC.
+	O_SYNC
+
+	// O_TRUNC is defined on some platforms as syscall.O_TRUNC.
+	O_TRUNC
+)
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno.go
new file mode 100644
index 000000000..ea511ec25
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno.go
@@ -0,0 +1,106 @@
+//go:build !plan9 && !aix
+
+package sys
+
+import "syscall"
+
+func syscallToErrno(err error) (Errno, bool) {
+	errno, ok := err.(syscall.Errno)
+	if !ok {
+		return 0, false
+	}
+	switch errno {
+	case 0:
+		return 0, true
+	case syscall.EACCES:
+		return EACCES, true
+	case syscall.EAGAIN:
+		return EAGAIN, true
+	case syscall.EBADF:
+		return EBADF, true
+	case syscall.EEXIST:
+		return EEXIST, true
+	case syscall.EFAULT:
+		return EFAULT, true
+	case syscall.EINTR:
+		return EINTR, true
+	case syscall.EINVAL:
+		return EINVAL, true
+	case syscall.EIO:
+		return EIO, true
+	case syscall.EISDIR:
+		return EISDIR, true
+	case syscall.ELOOP:
+		return ELOOP, true
+	case syscall.ENAMETOOLONG:
+		return ENAMETOOLONG, true
+	case syscall.ENOENT:
+		return ENOENT, true
+	case syscall.ENOSYS:
+		return ENOSYS, true
+	case syscall.ENOTDIR:
+		return ENOTDIR, true
+	case syscall.ERANGE:
+		return ERANGE, true
+	case syscall.ENOTEMPTY:
+		return ENOTEMPTY, true
+	case syscall.ENOTSOCK:
+		return ENOTSOCK, true
+	case syscall.ENOTSUP:
+		return ENOTSUP, true
+	case syscall.EPERM:
+		return EPERM, true
+	case syscall.EROFS:
+		return EROFS, true
+	default:
+		return EIO, true
+	}
+}
+
+// Unwrap is a convenience for runtime.GOOS which define syscall.Errno.
+func (e Errno) Unwrap() error {
+	switch e {
+	case 0:
+		return nil
+	case EACCES:
+		return syscall.EACCES
+	case EAGAIN:
+		return syscall.EAGAIN
+	case EBADF:
+		return syscall.EBADF
+	case EEXIST:
+		return syscall.EEXIST
+	case EFAULT:
+		return syscall.EFAULT
+	case EINTR:
+		return syscall.EINTR
+	case EINVAL:
+		return syscall.EINVAL
+	case EIO:
+		return syscall.EIO
+	case EISDIR:
+		return syscall.EISDIR
+	case ELOOP:
+		return syscall.ELOOP
+	case ENAMETOOLONG:
+		return syscall.ENAMETOOLONG
+	case ENOENT:
+		return syscall.ENOENT
+	case ENOSYS:
+		return syscall.ENOSYS
+	case ENOTDIR:
+		return syscall.ENOTDIR
+	case ENOTEMPTY:
+		return syscall.ENOTEMPTY
+	case ENOTSOCK:
+		return syscall.ENOTSOCK
+	case ENOTSUP:
+		return syscall.ENOTSUP
+	case EPERM:
+		return syscall.EPERM
+	case EROFS:
+		return syscall.EROFS
+	default:
+		return syscall.EIO
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_notwindows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_notwindows.go
new file mode 100644
index 000000000..8a88ed765
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_notwindows.go
@@ -0,0 +1,13 @@
+//go:build !windows
+
+package sys
+
+func errorToErrno(err error) Errno {
+	if errno, ok := err.(Errno); ok {
+		return errno
+	}
+	if errno, ok := syscallToErrno(err); ok {
+		return errno
+	}
+	return EIO
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_unsupported.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_unsupported.go
new file mode 100644
index 000000000..1c6d423d0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_unsupported.go
@@ -0,0 +1,7 @@
+//go:build plan9 || aix
+
+package sys
+
+func syscallToErrno(err error) (Errno, bool) {
+	return 0, false
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
new file mode 100644
index 000000000..761a1f9dc
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
@@ -0,0 +1,62 @@
+package sys
+
+import "syscall"
+
+// These are errors not defined in the syscall package. They are prefixed with
+// underscore to avoid exporting them.
+//
+// See https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
+const (
+	// _ERROR_INVALID_HANDLE is a Windows error returned by syscall.Write
+	// instead of syscall.EBADF
+	_ERROR_INVALID_HANDLE = syscall.Errno(6)
+
+	// _ERROR_INVALID_NAME is a Windows error returned by open when a file
+	// path has a trailing slash
+	_ERROR_INVALID_NAME = syscall.Errno(0x7B)
+
+	// _ERROR_NEGATIVE_SEEK is a Windows error returned by os.Truncate
+	// instead of syscall.EINVAL
+	_ERROR_NEGATIVE_SEEK = syscall.Errno(0x83)
+
+	// _ERROR_DIRECTORY is a Windows error returned by syscall.Rmdir
+	// instead of syscall.ENOTDIR
+	_ERROR_DIRECTORY = syscall.Errno(0x10B)
+
+	// _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select
+	// when a given handle is not a socket.
+	_ERROR_INVALID_SOCKET = syscall.Errno(0x2736)
+)
+
+func errorToErrno(err error) Errno {
+	switch err := err.(type) {
+	case Errno:
+		return err
+	case syscall.Errno:
+		// Note: In windows, _ERROR_PATH_NOT_FOUND(0x3) maps to syscall.ENOTDIR
+		switch err {
+		case syscall.ERROR_ALREADY_EXISTS:
+			return EEXIST
+		case _ERROR_DIRECTORY:
+			return ENOTDIR
+		case syscall.ERROR_DIR_NOT_EMPTY:
+			return ENOTEMPTY
+		case syscall.ERROR_FILE_EXISTS:
+			return EEXIST
+		case _ERROR_INVALID_HANDLE, _ERROR_INVALID_SOCKET:
+			return EBADF
+		case syscall.ERROR_ACCESS_DENIED:
+			// POSIX read and write functions expect EBADF, not EACCES when not
+			// open for reading or writing.
+			return EBADF
+		case syscall.ERROR_PRIVILEGE_NOT_HELD:
+			return EPERM
+		case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME:
+			return EINVAL
+		}
+		errno, _ := syscallToErrno(err)
+		return errno
+	default:
+		return EIO
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/time.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/time.go
new file mode 100644
index 000000000..4f3e01fef
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/time.go
@@ -0,0 +1,10 @@
+package sys
+
+import "math"
+
+// UTIME_OMIT is a special constant for use in updating times via FS.Utimens
+// or File.Utimens. When used for atim or mtim, the value is retained.
+//
+// Note: This may be implemented via a stat when the underlying filesystem
+// does not support this value.
+const UTIME_OMIT int64 = math.MinInt64
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/unimplemented.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/unimplemented.go
new file mode 100644
index 000000000..d853d9e8f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/unimplemented.go
@@ -0,0 +1,160 @@
+package sys
+
+import (
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// UnimplementedFS is an FS that returns ENOSYS for all functions,
+// This should be embedded to have forward compatible implementations.
+type UnimplementedFS struct{}
+
+// OpenFile implements FS.OpenFile
+func (UnimplementedFS) OpenFile(path string, flag Oflag, perm fs.FileMode) (File, Errno) {
+	return nil, ENOSYS
+}
+
+// Lstat implements FS.Lstat
+func (UnimplementedFS) Lstat(path string) (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Stat implements FS.Stat
+func (UnimplementedFS) Stat(path string) (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Readlink implements FS.Readlink
+func (UnimplementedFS) Readlink(path string) (string, Errno) {
+	return "", ENOSYS
+}
+
+// Mkdir implements FS.Mkdir
+func (UnimplementedFS) Mkdir(path string, perm fs.FileMode) Errno {
+	return ENOSYS
+}
+
+// Chmod implements FS.Chmod
+func (UnimplementedFS) Chmod(path string, perm fs.FileMode) Errno {
+	return ENOSYS
+}
+
+// Rename implements FS.Rename
+func (UnimplementedFS) Rename(from, to string) Errno {
+	return ENOSYS
+}
+
+// Rmdir implements FS.Rmdir
+func (UnimplementedFS) Rmdir(path string) Errno {
+	return ENOSYS
+}
+
+// Link implements FS.Link
+func (UnimplementedFS) Link(_, _ string) Errno {
+	return ENOSYS
+}
+
+// Symlink implements FS.Symlink
+func (UnimplementedFS) Symlink(_, _ string) Errno {
+	return ENOSYS
+}
+
+// Unlink implements FS.Unlink
+func (UnimplementedFS) Unlink(path string) Errno {
+	return ENOSYS
+}
+
+// Utimens implements FS.Utimens
+func (UnimplementedFS) Utimens(path string, atim, mtim int64) Errno {
+	return ENOSYS
+}
+
+// UnimplementedFile is a File that returns ENOSYS for all functions,
+// except where no-op are otherwise documented.
+//
+// This should be embedded to have forward compatible implementations.
+type UnimplementedFile struct{}
+
+// Dev implements File.Dev
+func (UnimplementedFile) Dev() (uint64, Errno) {
+	return 0, 0
+}
+
+// Ino implements File.Ino
+func (UnimplementedFile) Ino() (sys.Inode, Errno) {
+	return 0, 0
+}
+
+// IsDir implements File.IsDir
+func (UnimplementedFile) IsDir() (bool, Errno) {
+	return false, 0
+}
+
+// IsAppend implements File.IsAppend
+func (UnimplementedFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements File.SetAppend
+func (UnimplementedFile) SetAppend(bool) Errno {
+	return ENOSYS
+}
+
+// Stat implements File.Stat
+func (UnimplementedFile) Stat() (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Read implements File.Read
+func (UnimplementedFile) Read([]byte) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Pread implements File.Pread
+func (UnimplementedFile) Pread([]byte, int64) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Seek implements File.Seek
+func (UnimplementedFile) Seek(int64, int) (int64, Errno) {
+	return 0, ENOSYS
+}
+
+// Readdir implements File.Readdir
+func (UnimplementedFile) Readdir(int) (dirents []Dirent, errno Errno) {
+	return nil, ENOSYS
+}
+
+// Write implements File.Write
+func (UnimplementedFile) Write([]byte) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Pwrite implements File.Pwrite
+func (UnimplementedFile) Pwrite([]byte, int64) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Truncate implements File.Truncate
+func (UnimplementedFile) Truncate(int64) Errno {
+	return ENOSYS
+}
+
+// Sync implements File.Sync
+func (UnimplementedFile) Sync() Errno {
+	return 0 // not ENOSYS
+}
+
+// Datasync implements File.Datasync
+func (UnimplementedFile) Datasync() Errno {
+	return 0 // not ENOSYS
+}
+
+// Utimens implements File.Utimens
+func (UnimplementedFile) Utimens(int64, int64) Errno {
+	return ENOSYS
+}
+
+// Close implements File.Close
+func (UnimplementedFile) Close() (errno Errno) { return }
diff --git a/vendor/github.com/tetratelabs/wazero/fsconfig.go b/vendor/github.com/tetratelabs/wazero/fsconfig.go
new file mode 100644
index 000000000..c21b6e80b
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/fsconfig.go
@@ -0,0 +1,213 @@
+package wazero
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/sysfs"
+)
+
+// FSConfig configures filesystem paths the embedding host allows the wasm
+// guest to access. Unconfigured paths are not allowed, so functions like
+// `path_open` result in unsupported errors (e.g. syscall.ENOSYS).
+//
+// # Guest Path
+//
+// `guestPath` is the name of the path the guest should use a filesystem for, or
+// empty for any files.
+//
+// All `guestPath` paths are normalized, specifically removing any leading or
+// trailing slashes. This means "/", "./" or "." all coerce to empty "".
+//
+// Multiple `guestPath` values can be configured, but the last longest match
+// wins. For example, if "tmp", then "" were added, a request to open
+// "tmp/foo.txt" use the filesystem associated with "tmp" even though a wider
+// path, "" (all files), was added later.
+//
+// A `guestPath` of "." coerces to the empty string "" because the current
+// directory is handled by the guest. In other words, the guest resolves ites
+// current directory prior to requesting files.
+//
+// More notes on `guestPath`
+//   - Working directories are typically tracked in wasm, though possible some
+//     relative paths are requested. For example, TinyGo may attempt to resolve
+//     a path "../.." in unit tests.
+//   - Zig uses the first path name it sees as the initial working directory of
+//     the process.
+//
+// # Scope
+//
+// Configuration here is module instance scoped. This means you can use the
+// same configuration for multiple calls to Runtime.InstantiateModule. Each
+// module will have a different file descriptor table. Any errors accessing
+// resources allowed here are deferred to instantiation time of each module.
+//
+// Any host resources present at the time of configuration, but deleted before
+// Runtime.InstantiateModule will trap/panic when the guest wasm initializes or
+// calls functions like `fd_read`.
+//
+// # Windows
+//
+// While wazero supports Windows as a platform, all known compilers use POSIX
+// conventions at runtime. For example, even when running on Windows, paths
+// used by wasm are separated by forward slash (/), not backslash (\).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - FSConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+//   - RATIONALE.md includes design background and relationship to WebAssembly
+//     System Interfaces (WASI).
+type FSConfig interface {
+	// WithDirMount assigns a directory at `dir` to any paths beginning at
+	// `guestPath`.
+	//
+	// For example, `dirPath` as / (or c:\ in Windows), makes the entire host
+	// volume writeable to the path on the guest. The `guestPath` is always a
+	// POSIX style path, slash (/) delimited, even if run on Windows.
+	//
+	// If the same `guestPath` was assigned before, this overrides its value,
+	// retaining the original precedence. See the documentation of FSConfig for
+	// more details on `guestPath`.
+	//
+	// # Isolation
+	//
+	// The guest will have full access to this directory including escaping it
+	// via relative path lookups like "../../". Full access includes operations
+	// such as creating or deleting files, limited to any host level access
+	// controls.
+	//
+	// # os.DirFS
+	//
+	// This configuration optimizes for WASI compatibility which is sometimes
+	// at odds with the behavior of os.DirFS. Hence, this will not behave
+	// exactly the same as os.DirFS. See /RATIONALE.md for more.
+	WithDirMount(dir, guestPath string) FSConfig
+
+	// WithReadOnlyDirMount assigns a directory at `dir` to any paths
+	// beginning at `guestPath`.
+	//
+	// This is the same as WithDirMount except only read operations are
+	// permitted. However, escaping the directory via relative path lookups
+	// like "../../" is still allowed.
+	WithReadOnlyDirMount(dir, guestPath string) FSConfig
+
+	// WithFSMount assigns a fs.FS file system for any paths beginning at
+	// `guestPath`.
+	//
+	// If the same `guestPath` was assigned before, this overrides its value,
+	// retaining the original precedence. See the documentation of FSConfig for
+	// more details on `guestPath`.
+	//
+	// # Isolation
+	//
+	// fs.FS does not restrict the ability to overwrite returned files via
+	// io.Writer. Moreover, os.DirFS documentation includes important notes
+	// about isolation, which also applies to fs.Sub. As of Go 1.19, the
+	// built-in file-systems are not jailed (chroot). See
+	// https://github.com/golang/go/issues/42322
+	//
+	// # os.DirFS
+	//
+	// Due to limited control and functionality available in os.DirFS, we
+	// advise using WithDirMount instead. There will be behavior differences
+	// between os.DirFS and WithDirMount, as the latter biases towards what's
+	// expected from WASI implementations.
+	//
+	// # Custom fs.FileInfo
+	//
+	// The underlying implementation supports data not usually in fs.FileInfo
+	// when `info.Sys` returns *sys.Stat_t. For example, a custom fs.FS can use
+	// this approach to generate or mask sys.Inode data. Such a filesystem
+	// needs to decorate any functions that can return fs.FileInfo:
+	//
+	//   - `Stat` as defined on `fs.File` (always)
+	//   - `Readdir` as defined on `os.File` (if defined)
+	//
+	// See sys.NewStat_t for examples.
+	WithFSMount(fs fs.FS, guestPath string) FSConfig
+}
+
+type fsConfig struct {
+	// fs are the currently configured filesystems.
+	fs []experimentalsys.FS
+	// guestPaths are the user-supplied names of the filesystems, retained for
+	// error messages and fmt.Stringer.
+	guestPaths []string
+	// guestPathToFS are the normalized paths to the currently configured
+	// filesystems, used for de-duplicating.
+	guestPathToFS map[string]int
+}
+
+// NewFSConfig returns a FSConfig that can be used for configuring module instantiation.
+func NewFSConfig() FSConfig {
+	return &fsConfig{guestPathToFS: map[string]int{}}
+}
+
+// clone makes a deep copy of this module config.
+func (c *fsConfig) clone() *fsConfig {
+	ret := *c // copy except slice and maps which share a ref
+	ret.fs = make([]experimentalsys.FS, 0, len(c.fs))
+	ret.fs = append(ret.fs, c.fs...)
+	ret.guestPaths = make([]string, 0, len(c.guestPaths))
+	ret.guestPaths = append(ret.guestPaths, c.guestPaths...)
+	ret.guestPathToFS = make(map[string]int, len(c.guestPathToFS))
+	for key, value := range c.guestPathToFS {
+		ret.guestPathToFS[key] = value
+	}
+	return &ret
+}
+
+// WithDirMount implements FSConfig.WithDirMount
+func (c *fsConfig) WithDirMount(dir, guestPath string) FSConfig {
+	return c.WithSysFSMount(sysfs.DirFS(dir), guestPath)
+}
+
+// WithReadOnlyDirMount implements FSConfig.WithReadOnlyDirMount
+func (c *fsConfig) WithReadOnlyDirMount(dir, guestPath string) FSConfig {
+	return c.WithSysFSMount(&sysfs.ReadFS{FS: sysfs.DirFS(dir)}, guestPath)
+}
+
+// WithFSMount implements FSConfig.WithFSMount
+func (c *fsConfig) WithFSMount(fs fs.FS, guestPath string) FSConfig {
+	var adapted experimentalsys.FS
+	if fs != nil {
+		adapted = &sysfs.AdaptFS{FS: fs}
+	}
+	return c.WithSysFSMount(adapted, guestPath)
+}
+
+// WithSysFSMount implements sysfs.FSConfig
+func (c *fsConfig) WithSysFSMount(fs experimentalsys.FS, guestPath string) FSConfig {
+	if _, ok := fs.(experimentalsys.UnimplementedFS); ok {
+		return c // don't add fake paths.
+	}
+	cleaned := sys.StripPrefixesAndTrailingSlash(guestPath)
+	ret := c.clone()
+	if i, ok := ret.guestPathToFS[cleaned]; ok {
+		ret.fs[i] = fs
+		ret.guestPaths[i] = guestPath
+	} else if fs != nil {
+		ret.guestPathToFS[cleaned] = len(ret.fs)
+		ret.fs = append(ret.fs, fs)
+		ret.guestPaths = append(ret.guestPaths, guestPath)
+	}
+	return ret
+}
+
+// preopens returns the possible nil index-correlated preopened filesystems
+// with guest paths.
+func (c *fsConfig) preopens() ([]experimentalsys.FS, []string) {
+	preopenCount := len(c.fs)
+	if preopenCount == 0 {
+		return nil, nil
+	}
+	fs := make([]experimentalsys.FS, len(c.fs))
+	copy(fs, c.fs)
+	guestPaths := make([]string, len(c.guestPaths))
+	copy(guestPaths, c.guestPaths)
+	return fs, guestPaths
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go b/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go
new file mode 100644
index 000000000..542958bc7
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go
@@ -0,0 +1,164 @@
+package descriptor
+
+import "math/bits"
+
+// Table is a data structure mapping 32 bit descriptor to items.
+//
+// # Negative keys are invalid.
+//
+// Negative keys (e.g. -1) are invalid inputs and will return a corresponding
+// not-found value. This matches POSIX behavior of file descriptors.
+// See https://pubs.opengroup.org/onlinepubs/9699919799/functions/dirfd.html#tag_16_90
+//
+// # Data structure design
+//
+// The data structure optimizes for memory density and lookup performance,
+// trading off compute at insertion time. This is a useful compromise for the
+// use cases we employ it with: items are usually accessed a lot more often
+// than they are inserted, each operation requires a table lookup, so we are
+// better off spending extra compute to insert items in the table in order to
+// get cheaper lookups. Memory efficiency is also crucial to support scaling
+// with programs that maintain thousands of items: having a high or non-linear
+// memory-to-item ratio could otherwise be used as an attack vector by
+// malicious applications attempting to damage performance of the host.
+type Table[Key ~int32, Item any] struct {
+	masks []uint64
+	items []Item
+}
+
+// Len returns the number of items stored in the table.
+func (t *Table[Key, Item]) Len() (n int) {
+	// We could make this a O(1) operation if we cached the number of items in
+	// the table. More state usually means more problems, so until we have a
+	// clear need for this, the simple implementation may be a better trade off.
+	for _, mask := range t.masks {
+		n += bits.OnesCount64(mask)
+	}
+	return n
+}
+
+// grow ensures that t has enough room for n items, potentially reallocating the
+// internal buffers if their capacity was too small to hold this many items.
+func (t *Table[Key, Item]) grow(n int) {
+	// Round up to a multiple of 64 since this is the smallest increment due to
+	// using 64 bits masks.
+	n = (n*64 + 63) / 64
+
+	if n > len(t.masks) {
+		masks := make([]uint64, n)
+		copy(masks, t.masks)
+
+		items := make([]Item, n*64)
+		copy(items, t.items)
+
+		t.masks = masks
+		t.items = items
+	}
+}
+
+// Insert inserts the given item to the table, returning the key that it is
+// mapped to or false if the table was full.
+//
+// The method does not perform deduplication, it is possible for the same item
+// to be inserted multiple times, each insertion will return a different key.
+func (t *Table[Key, Item]) Insert(item Item) (key Key, ok bool) {
+	offset := 0
+insert:
+	// Note: this loop could be made a lot more efficient using vectorized
+	// operations: 256 bits vector registers would yield a theoretical 4x
+	// speed up (e.g. using AVX2).
+	for index, mask := range t.masks[offset:] {
+		if ^mask != 0 { // not full?
+			shift := bits.TrailingZeros64(^mask)
+			index += offset
+			key = Key(index)*64 + Key(shift)
+			t.items[key] = item
+			t.masks[index] = mask | uint64(1<<shift)
+			return key, key >= 0
+		}
+	}
+
+	offset = len(t.masks)
+	n := 2 * len(t.masks)
+	if n == 0 {
+		n = 1
+	}
+
+	t.grow(n)
+	goto insert
+}
+
+// Lookup returns the item associated with the given key (may be nil).
+func (t *Table[Key, Item]) Lookup(key Key) (item Item, found bool) {
+	if key < 0 { // invalid key
+		return
+	}
+	if i := int(key); i >= 0 && i < len(t.items) {
+		index := uint(key) / 64
+		shift := uint(key) % 64
+		if (t.masks[index] & (1 << shift)) != 0 {
+			item, found = t.items[i], true
+		}
+	}
+	return
+}
+
+// InsertAt inserts the given `item` at the item descriptor `key`. This returns
+// false if the insert was impossible due to negative key.
+func (t *Table[Key, Item]) InsertAt(item Item, key Key) bool {
+	if key < 0 {
+		return false
+	}
+	if diff := int(key) - t.Len(); diff > 0 {
+		t.grow(diff)
+	}
+	index := uint(key) / 64
+	shift := uint(key) % 64
+	t.masks[index] |= 1 << shift
+	t.items[key] = item
+	return true
+}
+
+// Delete deletes the item stored at the given key from the table.
+func (t *Table[Key, Item]) Delete(key Key) {
+	if key < 0 { // invalid key
+		return
+	}
+	if index, shift := key/64, key%64; int(index) < len(t.masks) {
+		mask := t.masks[index]
+		if (mask & (1 << shift)) != 0 {
+			var zero Item
+			t.items[key] = zero
+			t.masks[index] = mask & ^uint64(1<<shift)
+		}
+	}
+}
+
+// Range calls f for each item and its associated key in the table. The function
+// f might return false to interupt the iteration.
+func (t *Table[Key, Item]) Range(f func(Key, Item) bool) {
+	for i, mask := range t.masks {
+		if mask == 0 {
+			continue
+		}
+		for j := Key(0); j < 64; j++ {
+			if (mask & (1 << j)) == 0 {
+				continue
+			}
+			if key := Key(i)*64 + j; !f(key, t.items[key]) {
+				return
+			}
+		}
+	}
+}
+
+// Reset clears the content of the table.
+func (t *Table[Key, Item]) Reset() {
+	for i := range t.masks {
+		t.masks[i] = 0
+	}
+	var zero Item
+	for i := range t.items {
+		t.items[i] = zero
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
new file mode 100644
index 000000000..56dfac620
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
@@ -0,0 +1,3634 @@
+package interpreter
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+type controlFrameKind byte
+
+const (
+	controlFrameKindBlockWithContinuationLabel controlFrameKind = iota
+	controlFrameKindBlockWithoutContinuationLabel
+	controlFrameKindFunction
+	controlFrameKindLoop
+	controlFrameKindIfWithElse
+	controlFrameKindIfWithoutElse
+)
+
+type (
+	controlFrame struct {
+		frameID uint32
+		// originalStackLen holds the number of values on the stack
+		// when Start executing this control frame minus params for the block.
+		originalStackLenWithoutParam int
+		blockType                    *wasm.FunctionType
+		kind                         controlFrameKind
+	}
+	controlFrames struct{ frames []controlFrame }
+)
+
+func (c *controlFrame) ensureContinuation() {
+	// Make sure that if the frame is block and doesn't have continuation,
+	// change the Kind so we can emit the continuation block
+	// later when we reach the End instruction of this frame.
+	if c.kind == controlFrameKindBlockWithoutContinuationLabel {
+		c.kind = controlFrameKindBlockWithContinuationLabel
+	}
+}
+
+func (c *controlFrame) asLabel() label {
+	switch c.kind {
+	case controlFrameKindBlockWithContinuationLabel,
+		controlFrameKindBlockWithoutContinuationLabel:
+		return newLabel(labelKindContinuation, c.frameID)
+	case controlFrameKindLoop:
+		return newLabel(labelKindHeader, c.frameID)
+	case controlFrameKindFunction:
+		return newLabel(labelKindReturn, 0)
+	case controlFrameKindIfWithElse,
+		controlFrameKindIfWithoutElse:
+		return newLabel(labelKindContinuation, c.frameID)
+	}
+	panic(fmt.Sprintf("unreachable: a bug in interpreterir implementation: %v", c.kind))
+}
+
+func (c *controlFrames) functionFrame() *controlFrame {
+	// No need to check stack bound
+	// as we can assume that all the operations
+	// are valid thanks to validateFunction
+	// at module validation phase.
+	return &c.frames[0]
+}
+
+func (c *controlFrames) get(n int) *controlFrame {
+	// No need to check stack bound
+	// as we can assume that all the operations
+	// are valid thanks to validateFunction
+	// at module validation phase.
+	return &c.frames[len(c.frames)-n-1]
+}
+
+func (c *controlFrames) top() *controlFrame {
+	// No need to check stack bound
+	// as we can assume that all the operations
+	// are valid thanks to validateFunction
+	// at module validation phase.
+	return &c.frames[len(c.frames)-1]
+}
+
+func (c *controlFrames) empty() bool {
+	return len(c.frames) == 0
+}
+
+func (c *controlFrames) pop() (frame *controlFrame) {
+	// No need to check stack bound
+	// as we can assume that all the operations
+	// are valid thanks to validateFunction
+	// at module validation phase.
+	frame = c.top()
+	c.frames = c.frames[:len(c.frames)-1]
+	return
+}
+
+func (c *controlFrames) push(frame controlFrame) {
+	c.frames = append(c.frames, frame)
+}
+
+func (c *compiler) initializeStack() {
+	// Reuse the existing slice.
+	c.localIndexToStackHeightInUint64 = c.localIndexToStackHeightInUint64[:0]
+	var current int
+	for _, lt := range c.sig.Params {
+		c.localIndexToStackHeightInUint64 = append(c.localIndexToStackHeightInUint64, current)
+		if lt == wasm.ValueTypeV128 {
+			current++
+		}
+		current++
+	}
+
+	if c.callFrameStackSizeInUint64 > 0 {
+		// We reserve the stack slots for result values below the return call frame slots.
+		if diff := c.sig.ResultNumInUint64 - c.sig.ParamNumInUint64; diff > 0 {
+			current += diff
+		}
+	}
+
+	// Non-func param locals Start after the return call frame.
+	current += c.callFrameStackSizeInUint64
+
+	for _, lt := range c.localTypes {
+		c.localIndexToStackHeightInUint64 = append(c.localIndexToStackHeightInUint64, current)
+		if lt == wasm.ValueTypeV128 {
+			current++
+		}
+		current++
+	}
+
+	// Push function arguments.
+	for _, t := range c.sig.Params {
+		c.stackPush(wasmValueTypeTounsignedType(t))
+	}
+
+	if c.callFrameStackSizeInUint64 > 0 {
+		// Reserve the stack slots for results.
+		for i := 0; i < c.sig.ResultNumInUint64-c.sig.ParamNumInUint64; i++ {
+			c.stackPush(unsignedTypeI64)
+		}
+
+		// Reserve the stack slots for call frame.
+		for i := 0; i < c.callFrameStackSizeInUint64; i++ {
+			c.stackPush(unsignedTypeI64)
+		}
+	}
+}
+
+// compiler is in charge of lowering raw Wasm function body to get compilationResult.
+// This is created per *wasm.Module and reused for all functions in it to reduce memory allocations.
+type compiler struct {
+	module                     *wasm.Module
+	enabledFeatures            api.CoreFeatures
+	callFrameStackSizeInUint64 int
+	stack                      []unsignedType
+	currentFrameID             uint32
+	controlFrames              controlFrames
+	unreachableState           struct {
+		on    bool
+		depth int
+	}
+	pc, currentOpPC uint64
+	result          compilationResult
+
+	// body holds the code for the function's body where Wasm instructions are stored.
+	body []byte
+	// sig is the function type of the target function.
+	sig *wasm.FunctionType
+	// localTypes holds the target function locals' value types except function params.
+	localTypes []wasm.ValueType
+	// localIndexToStackHeightInUint64 maps the local index (starting with function params) to the stack height
+	// where the local is places. This is the necessary mapping for functions who contain vector type locals.
+	localIndexToStackHeightInUint64 []int
+
+	// types hold all the function types in the module where the targe function exists.
+	types []wasm.FunctionType
+	// funcs holds the type indexes for all declared functions in the module where the target function exists.
+	funcs []uint32
+	// globals holds the global types for all declared globals in the module where the target function exists.
+	globals []wasm.GlobalType
+
+	// needSourceOffset is true if this module requires DWARF based stack trace.
+	needSourceOffset bool
+	// bodyOffsetInCodeSection is the offset of the body of this function in the original Wasm binary's code section.
+	bodyOffsetInCodeSection uint64
+
+	ensureTermination bool
+	// Pre-allocated bytes.Reader to be used in various places.
+	br             *bytes.Reader
+	funcTypeToSigs funcTypeToIRSignatures
+
+	next int
+}
+
+//lint:ignore U1000 for debugging only.
+func (c *compiler) stackDump() string {
+	strs := make([]string, 0, len(c.stack))
+	for _, s := range c.stack {
+		strs = append(strs, s.String())
+	}
+	return "[" + strings.Join(strs, ", ") + "]"
+}
+
+func (c *compiler) markUnreachable() {
+	c.unreachableState.on = true
+}
+
+func (c *compiler) resetUnreachable() {
+	c.unreachableState.on = false
+}
+
+// memoryType is the type of memory in a compiled module.
+type memoryType byte
+
+const (
+	// memoryTypeNone indicates there is no memory.
+	memoryTypeNone memoryType = iota
+	// memoryTypeStandard indicates there is a non-shared memory.
+	memoryTypeStandard
+	// memoryTypeShared indicates there is a shared memory.
+	memoryTypeShared
+)
+
+type compilationResult struct {
+	// Operations holds interpreterir operations compiled from Wasm instructions in a Wasm function.
+	Operations []unionOperation
+
+	// IROperationSourceOffsetsInWasmBinary is index-correlated with Operation and maps each operation to the corresponding source instruction's
+	// offset in the original WebAssembly binary.
+	// Non nil only when the given Wasm module has the DWARF section.
+	IROperationSourceOffsetsInWasmBinary []uint64
+
+	// LabelCallers maps label to the number of callers to that label.
+	// Here "callers" means that the call-sites which jumps to the label with br, br_if or br_table
+	// instructions.
+	//
+	// Note: zero possible and allowed in wasm. e.g.
+	//
+	//	(block
+	//	  (br 0)
+	//	  (block i32.const 1111)
+	//	)
+	//
+	// This example the label corresponding to `(block i32.const 1111)` is never be reached at runtime because `br 0` exits the function before we reach there
+	LabelCallers map[label]uint32
+	// UsesMemory is true if this function might use memory.
+	UsesMemory bool
+
+	// The following fields are per-module values, not per-function.
+
+	// Globals holds all the declarations of globals in the module from which this function is compiled.
+	Globals []wasm.GlobalType
+	// Functions holds all the declarations of function in the module from which this function is compiled, including itself.
+	Functions []wasm.Index
+	// Types holds all the types in the module from which this function is compiled.
+	Types []wasm.FunctionType
+	// Memory indicates the type of memory of the module.
+	Memory memoryType
+	// HasTable is true if the module from which this function is compiled has table declaration.
+	HasTable bool
+	// HasDataInstances is true if the module has data instances which might be used by memory.init or data.drop instructions.
+	HasDataInstances bool
+	// HasDataInstances is true if the module has element instances which might be used by table.init or elem.drop instructions.
+	HasElementInstances bool
+}
+
+// newCompiler returns the new *compiler for the given parameters.
+// Use compiler.Next function to get compilation result per function.
+func newCompiler(enabledFeatures api.CoreFeatures, callFrameStackSizeInUint64 int, module *wasm.Module, ensureTermination bool) (*compiler, error) {
+	functions, globals, mem, tables, err := module.AllDeclarations()
+	if err != nil {
+		return nil, err
+	}
+
+	hasTable, hasDataInstances, hasElementInstances := len(tables) > 0,
+		len(module.DataSection) > 0, len(module.ElementSection) > 0
+
+	var mt memoryType
+	switch {
+	case mem == nil:
+		mt = memoryTypeNone
+	case mem.IsShared:
+		mt = memoryTypeShared
+	default:
+		mt = memoryTypeStandard
+	}
+
+	types := module.TypeSection
+
+	c := &compiler{
+		module:                     module,
+		enabledFeatures:            enabledFeatures,
+		controlFrames:              controlFrames{},
+		callFrameStackSizeInUint64: callFrameStackSizeInUint64,
+		result: compilationResult{
+			Globals:             globals,
+			Functions:           functions,
+			Types:               types,
+			Memory:              mt,
+			HasTable:            hasTable,
+			HasDataInstances:    hasDataInstances,
+			HasElementInstances: hasElementInstances,
+			LabelCallers:        map[label]uint32{},
+		},
+		globals:           globals,
+		funcs:             functions,
+		types:             types,
+		ensureTermination: ensureTermination,
+		br:                bytes.NewReader(nil),
+		funcTypeToSigs: funcTypeToIRSignatures{
+			indirectCalls: make([]*signature, len(types)),
+			directCalls:   make([]*signature, len(types)),
+			wasmTypes:     types,
+		},
+		needSourceOffset: module.DWARFLines != nil,
+	}
+	return c, nil
+}
+
+// Next returns the next compilationResult for this compiler.
+func (c *compiler) Next() (*compilationResult, error) {
+	funcIndex := c.next
+	code := &c.module.CodeSection[funcIndex]
+	sig := &c.types[c.module.FunctionSection[funcIndex]]
+
+	// Reset the previous result.
+	c.result.Operations = c.result.Operations[:0]
+	c.result.IROperationSourceOffsetsInWasmBinary = c.result.IROperationSourceOffsetsInWasmBinary[:0]
+	c.result.UsesMemory = false
+	// Clears the existing entries in LabelCallers.
+	for frameID := uint32(0); frameID <= c.currentFrameID; frameID++ {
+		for k := labelKind(0); k < labelKindNum; k++ {
+			delete(c.result.LabelCallers, newLabel(k, frameID))
+		}
+	}
+	// Reset the previous states.
+	c.pc = 0
+	c.currentOpPC = 0
+	c.currentFrameID = 0
+	c.unreachableState.on, c.unreachableState.depth = false, 0
+
+	if err := c.compile(sig, code.Body, code.LocalTypes, code.BodyOffsetInCodeSection); err != nil {
+		return nil, err
+	}
+	c.next++
+	return &c.result, nil
+}
+
+// Compile lowers given function instance into interpreterir operations
+// so that the resulting operations can be consumed by the interpreter
+// or the compiler compilation engine.
+func (c *compiler) compile(sig *wasm.FunctionType, body []byte, localTypes []wasm.ValueType, bodyOffsetInCodeSection uint64) error {
+	// Set function specific fields.
+	c.body = body
+	c.localTypes = localTypes
+	c.sig = sig
+	c.bodyOffsetInCodeSection = bodyOffsetInCodeSection
+
+	// Reuses the underlying slices.
+	c.stack = c.stack[:0]
+	c.controlFrames.frames = c.controlFrames.frames[:0]
+
+	c.initializeStack()
+
+	// Emit const expressions for locals.
+	// Note that here we don't take function arguments
+	// into account, meaning that callers must push
+	// arguments before entering into the function body.
+	for _, t := range c.localTypes {
+		c.emitDefaultValue(t)
+	}
+
+	// Insert the function control frame.
+	c.controlFrames.push(controlFrame{
+		frameID:   c.nextFrameID(),
+		blockType: c.sig,
+		kind:      controlFrameKindFunction,
+	})
+
+	// Now, enter the function body.
+	for !c.controlFrames.empty() && c.pc < uint64(len(c.body)) {
+		if err := c.handleInstruction(); err != nil {
+			return fmt.Errorf("handling instruction: %w", err)
+		}
+	}
+	return nil
+}
+
+// Translate the current Wasm instruction to interpreterir's operations,
+// and emit the results into c.results.
+func (c *compiler) handleInstruction() error {
+	op := c.body[c.pc]
+	c.currentOpPC = c.pc
+	if false {
+		var instName string
+		if op == wasm.OpcodeVecPrefix {
+			instName = wasm.VectorInstructionName(c.body[c.pc+1])
+		} else if op == wasm.OpcodeAtomicPrefix {
+			instName = wasm.AtomicInstructionName(c.body[c.pc+1])
+		} else if op == wasm.OpcodeMiscPrefix {
+			instName = wasm.MiscInstructionName(c.body[c.pc+1])
+		} else {
+			instName = wasm.InstructionName(op)
+		}
+		fmt.Printf("handling %s, unreachable_state(on=%v,depth=%d), stack=%v\n",
+			instName, c.unreachableState.on, c.unreachableState.depth, c.stack,
+		)
+	}
+
+	var peekValueType unsignedType
+	if len(c.stack) > 0 {
+		peekValueType = c.stackPeek()
+	}
+
+	// Modify the stack according the current instruction.
+	// Note that some instructions will read "index" in
+	// applyToStack and advance c.pc inside the function.
+	index, err := c.applyToStack(op)
+	if err != nil {
+		return fmt.Errorf("apply stack failed for %s: %w", wasm.InstructionName(op), err)
+	}
+	// Now we handle each instruction, and
+	// emit the corresponding interpreterir operations to the results.
+operatorSwitch:
+	switch op {
+	case wasm.OpcodeUnreachable:
+		c.emit(newOperationUnreachable())
+		c.markUnreachable()
+	case wasm.OpcodeNop:
+		// Nop is noop!
+	case wasm.OpcodeBlock:
+		c.br.Reset(c.body[c.pc+1:])
+		bt, num, err := wasm.DecodeBlockType(c.types, c.br, c.enabledFeatures)
+		if err != nil {
+			return fmt.Errorf("reading block type for block instruction: %w", err)
+		}
+		c.pc += num
+
+		if c.unreachableState.on {
+			// If it is currently in unreachable,
+			// just remove the entire block.
+			c.unreachableState.depth++
+			break operatorSwitch
+		}
+
+		// Create a new frame -- entering this block.
+		frame := controlFrame{
+			frameID:                      c.nextFrameID(),
+			originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
+			kind:                         controlFrameKindBlockWithoutContinuationLabel,
+			blockType:                    bt,
+		}
+		c.controlFrames.push(frame)
+
+	case wasm.OpcodeLoop:
+		c.br.Reset(c.body[c.pc+1:])
+		bt, num, err := wasm.DecodeBlockType(c.types, c.br, c.enabledFeatures)
+		if err != nil {
+			return fmt.Errorf("reading block type for loop instruction: %w", err)
+		}
+		c.pc += num
+
+		if c.unreachableState.on {
+			// If it is currently in unreachable,
+			// just remove the entire block.
+			c.unreachableState.depth++
+			break operatorSwitch
+		}
+
+		// Create a new frame -- entering loop.
+		frame := controlFrame{
+			frameID:                      c.nextFrameID(),
+			originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
+			kind:                         controlFrameKindLoop,
+			blockType:                    bt,
+		}
+		c.controlFrames.push(frame)
+
+		// Prep labels for inside and the continuation of this loop.
+		loopLabel := newLabel(labelKindHeader, frame.frameID)
+		c.result.LabelCallers[loopLabel]++
+
+		// Emit the branch operation to enter inside the loop.
+		c.emit(newOperationBr(loopLabel))
+		c.emit(newOperationLabel(loopLabel))
+
+		// Insert the exit code check on the loop header, which is the only necessary point in the function body
+		// to prevent infinite loop.
+		//
+		// Note that this is a little aggressive: this checks the exit code regardless the loop header is actually
+		// the loop. In other words, this checks even when no br/br_if/br_table instructions jumping to this loop
+		// exist. However, in reality, that shouldn't be an issue since such "noop" loop header will highly likely be
+		// optimized out by almost all guest language compilers which have the control flow optimization passes.
+		if c.ensureTermination {
+			c.emit(newOperationBuiltinFunctionCheckExitCode())
+		}
+	case wasm.OpcodeIf:
+		c.br.Reset(c.body[c.pc+1:])
+		bt, num, err := wasm.DecodeBlockType(c.types, c.br, c.enabledFeatures)
+		if err != nil {
+			return fmt.Errorf("reading block type for if instruction: %w", err)
+		}
+		c.pc += num
+
+		if c.unreachableState.on {
+			// If it is currently in unreachable,
+			// just remove the entire block.
+			c.unreachableState.depth++
+			break operatorSwitch
+		}
+
+		// Create a new frame -- entering if.
+		frame := controlFrame{
+			frameID:                      c.nextFrameID(),
+			originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
+			// Note this will be set to controlFrameKindIfWithElse
+			// when else opcode found later.
+			kind:      controlFrameKindIfWithoutElse,
+			blockType: bt,
+		}
+		c.controlFrames.push(frame)
+
+		// Prep labels for if and else of this if.
+		thenLabel := newLabel(labelKindHeader, frame.frameID)
+		elseLabel := newLabel(labelKindElse, frame.frameID)
+		c.result.LabelCallers[thenLabel]++
+		c.result.LabelCallers[elseLabel]++
+
+		// Emit the branch operation to enter the then block.
+		c.emit(newOperationBrIf(thenLabel, elseLabel, nopinclusiveRange))
+		c.emit(newOperationLabel(thenLabel))
+	case wasm.OpcodeElse:
+		frame := c.controlFrames.top()
+		if c.unreachableState.on && c.unreachableState.depth > 0 {
+			// If it is currently in unreachable, and the nested if,
+			// just remove the entire else block.
+			break operatorSwitch
+		} else if c.unreachableState.on {
+			// If it is currently in unreachable, and the non-nested if,
+			// reset the stack so we can correctly handle the else block.
+			top := c.controlFrames.top()
+			c.stack = c.stack[:top.originalStackLenWithoutParam]
+			top.kind = controlFrameKindIfWithElse
+
+			// Re-push the parameters to the if block so that else block can use them.
+			for _, t := range frame.blockType.Params {
+				c.stackPush(wasmValueTypeTounsignedType(t))
+			}
+
+			// We are no longer unreachable in else frame,
+			// so emit the correct label, and reset the unreachable state.
+			elseLabel := newLabel(labelKindElse, frame.frameID)
+			c.resetUnreachable()
+			c.emit(
+				newOperationLabel(elseLabel),
+			)
+			break operatorSwitch
+		}
+
+		// Change the Kind of this If block, indicating that
+		// the if has else block.
+		frame.kind = controlFrameKindIfWithElse
+
+		// We need to reset the stack so that
+		// the values pushed inside the then block
+		// do not affect the else block.
+		dropOp := newOperationDrop(c.getFrameDropRange(frame, false))
+
+		// Reset the stack manipulated by the then block, and re-push the block param types to the stack.
+
+		c.stack = c.stack[:frame.originalStackLenWithoutParam]
+		for _, t := range frame.blockType.Params {
+			c.stackPush(wasmValueTypeTounsignedType(t))
+		}
+
+		// Prep labels for else and the continuation of this if block.
+		elseLabel := newLabel(labelKindElse, frame.frameID)
+		continuationLabel := newLabel(labelKindContinuation, frame.frameID)
+		c.result.LabelCallers[continuationLabel]++
+
+		// Emit the instructions for exiting the if loop,
+		// and then the initiation of else block.
+		c.emit(dropOp)
+		// Jump to the continuation of this block.
+		c.emit(newOperationBr(continuationLabel))
+		// Initiate the else block.
+		c.emit(newOperationLabel(elseLabel))
+	case wasm.OpcodeEnd:
+		if c.unreachableState.on && c.unreachableState.depth > 0 {
+			c.unreachableState.depth--
+			break operatorSwitch
+		} else if c.unreachableState.on {
+			c.resetUnreachable()
+
+			frame := c.controlFrames.pop()
+			if c.controlFrames.empty() {
+				return nil
+			}
+
+			c.stack = c.stack[:frame.originalStackLenWithoutParam]
+			for _, t := range frame.blockType.Results {
+				c.stackPush(wasmValueTypeTounsignedType(t))
+			}
+
+			continuationLabel := newLabel(labelKindContinuation, frame.frameID)
+			if frame.kind == controlFrameKindIfWithoutElse {
+				// Emit the else label.
+				elseLabel := newLabel(labelKindElse, frame.frameID)
+				c.result.LabelCallers[continuationLabel]++
+				c.emit(newOperationLabel(elseLabel))
+				c.emit(newOperationBr(continuationLabel))
+				c.emit(newOperationLabel(continuationLabel))
+			} else {
+				c.emit(
+					newOperationLabel(continuationLabel),
+				)
+			}
+
+			break operatorSwitch
+		}
+
+		frame := c.controlFrames.pop()
+
+		// We need to reset the stack so that
+		// the values pushed inside the block.
+		dropOp := newOperationDrop(c.getFrameDropRange(frame, true))
+		c.stack = c.stack[:frame.originalStackLenWithoutParam]
+
+		// Push the result types onto the stack.
+		for _, t := range frame.blockType.Results {
+			c.stackPush(wasmValueTypeTounsignedType(t))
+		}
+
+		// Emit the instructions according to the Kind of the current control frame.
+		switch frame.kind {
+		case controlFrameKindFunction:
+			if !c.controlFrames.empty() {
+				// Should never happen. If so, there's a bug in the translation.
+				panic("bug: found more function control frames")
+			}
+			// Return from function.
+			c.emit(dropOp)
+			c.emit(newOperationBr(newLabel(labelKindReturn, 0)))
+		case controlFrameKindIfWithoutElse:
+			// This case we have to emit "empty" else label.
+			elseLabel := newLabel(labelKindElse, frame.frameID)
+			continuationLabel := newLabel(labelKindContinuation, frame.frameID)
+			c.result.LabelCallers[continuationLabel] += 2
+			c.emit(dropOp)
+			c.emit(newOperationBr(continuationLabel))
+			// Emit the else which soon branches into the continuation.
+			c.emit(newOperationLabel(elseLabel))
+			c.emit(newOperationBr(continuationLabel))
+			// Initiate the continuation.
+			c.emit(newOperationLabel(continuationLabel))
+		case controlFrameKindBlockWithContinuationLabel,
+			controlFrameKindIfWithElse:
+			continuationLabel := newLabel(labelKindContinuation, frame.frameID)
+			c.result.LabelCallers[continuationLabel]++
+			c.emit(dropOp)
+			c.emit(newOperationBr(continuationLabel))
+			c.emit(newOperationLabel(continuationLabel))
+		case controlFrameKindLoop, controlFrameKindBlockWithoutContinuationLabel:
+			c.emit(
+				dropOp,
+			)
+		default:
+			// Should never happen. If so, there's a bug in the translation.
+			panic(fmt.Errorf("bug: invalid control frame Kind: 0x%x", frame.kind))
+		}
+
+	case wasm.OpcodeBr:
+		targetIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:])
+		if err != nil {
+			return fmt.Errorf("read the target for br_if: %w", err)
+		}
+		c.pc += n
+
+		if c.unreachableState.on {
+			// If it is currently in unreachable, br is no-op.
+			break operatorSwitch
+		}
+
+		targetFrame := c.controlFrames.get(int(targetIndex))
+		targetFrame.ensureContinuation()
+		dropOp := newOperationDrop(c.getFrameDropRange(targetFrame, false))
+		targetID := targetFrame.asLabel()
+		c.result.LabelCallers[targetID]++
+		c.emit(dropOp)
+		c.emit(newOperationBr(targetID))
+		// Br operation is stack-polymorphic, and mark the state as unreachable.
+		// That means subsequent instructions in the current control frame are "unreachable"
+		// and can be safely removed.
+		c.markUnreachable()
+	case wasm.OpcodeBrIf:
+		targetIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:])
+		if err != nil {
+			return fmt.Errorf("read the target for br_if: %w", err)
+		}
+		c.pc += n
+
+		if c.unreachableState.on {
+			// If it is currently in unreachable, br-if is no-op.
+			break operatorSwitch
+		}
+
+		targetFrame := c.controlFrames.get(int(targetIndex))
+		targetFrame.ensureContinuation()
+		drop := c.getFrameDropRange(targetFrame, false)
+		target := targetFrame.asLabel()
+		c.result.LabelCallers[target]++
+
+		continuationLabel := newLabel(labelKindHeader, c.nextFrameID())
+		c.result.LabelCallers[continuationLabel]++
+		c.emit(newOperationBrIf(target, continuationLabel, drop))
+		// Start emitting else block operations.
+		c.emit(newOperationLabel(continuationLabel))
+	case wasm.OpcodeBrTable:
+		c.br.Reset(c.body[c.pc+1:])
+		r := c.br
+		numTargets, n, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return fmt.Errorf("error reading number of targets in br_table: %w", err)
+		}
+		c.pc += n
+
+		if c.unreachableState.on {
+			// If it is currently in unreachable, br_table is no-op.
+			// But before proceeding to the next instruction, we must advance the pc
+			// according to the number of br_table targets.
+			for i := uint32(0); i <= numTargets; i++ { // inclusive as we also need to read the index of default target.
+				_, n, err := leb128.DecodeUint32(r)
+				if err != nil {
+					return fmt.Errorf("error reading target %d in br_table: %w", i, err)
+				}
+				c.pc += n
+			}
+			break operatorSwitch
+		}
+
+		// Read the branch targets.
+		s := numTargets * 2
+		targetLabels := make([]uint64, 2+s) // (label, inclusiveRange) * (default+numTargets)
+		for i := uint32(0); i < s; i += 2 {
+			l, n, err := leb128.DecodeUint32(r)
+			if err != nil {
+				return fmt.Errorf("error reading target %d in br_table: %w", i, err)
+			}
+			c.pc += n
+			targetFrame := c.controlFrames.get(int(l))
+			targetFrame.ensureContinuation()
+			drop := c.getFrameDropRange(targetFrame, false)
+			targetLabel := targetFrame.asLabel()
+			targetLabels[i] = uint64(targetLabel)
+			targetLabels[i+1] = drop.AsU64()
+			c.result.LabelCallers[targetLabel]++
+		}
+
+		// Prep default target control frame.
+		l, n, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return fmt.Errorf("error reading default target of br_table: %w", err)
+		}
+		c.pc += n
+		defaultTargetFrame := c.controlFrames.get(int(l))
+		defaultTargetFrame.ensureContinuation()
+		defaultTargetDrop := c.getFrameDropRange(defaultTargetFrame, false)
+		defaultLabel := defaultTargetFrame.asLabel()
+		c.result.LabelCallers[defaultLabel]++
+		targetLabels[s] = uint64(defaultLabel)
+		targetLabels[s+1] = defaultTargetDrop.AsU64()
+		c.emit(newOperationBrTable(targetLabels))
+
+		// br_table operation is stack-polymorphic, and mark the state as unreachable.
+		// That means subsequent instructions in the current control frame are "unreachable"
+		// and can be safely removed.
+		c.markUnreachable()
+	case wasm.OpcodeReturn:
+		functionFrame := c.controlFrames.functionFrame()
+		dropOp := newOperationDrop(c.getFrameDropRange(functionFrame, false))
+
+		// Cleanup the stack and then jmp to function frame's continuation (meaning return).
+		c.emit(dropOp)
+		c.emit(newOperationBr(functionFrame.asLabel()))
+
+		// Return operation is stack-polymorphic, and mark the state as unreachable.
+		// That means subsequent instructions in the current control frame are "unreachable"
+		// and can be safely removed.
+		c.markUnreachable()
+	case wasm.OpcodeCall:
+		c.emit(
+			newOperationCall(index),
+		)
+	case wasm.OpcodeCallIndirect:
+		typeIndex := index
+		tableIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:])
+		if err != nil {
+			return fmt.Errorf("read target for br_table: %w", err)
+		}
+		c.pc += n
+		c.emit(
+			newOperationCallIndirect(typeIndex, tableIndex),
+		)
+	case wasm.OpcodeDrop:
+		r := inclusiveRange{Start: 0, End: 0}
+		if peekValueType == unsignedTypeV128 {
+			// inclusiveRange is the range in uint64 representation, so dropping a vector value on top
+			// should be translated as drop [0..1] inclusively.
+			r.End++
+		}
+		c.emit(newOperationDrop(r))
+	case wasm.OpcodeSelect:
+		// If it is on the unreachable state, ignore the instruction.
+		if c.unreachableState.on {
+			break operatorSwitch
+		}
+		isTargetVector := c.stackPeek() == unsignedTypeV128
+		c.emit(
+			newOperationSelect(isTargetVector),
+		)
+	case wasm.OpcodeTypedSelect:
+		// Skips two bytes: vector size fixed to 1, and the value type for select.
+		c.pc += 2
+		// If it is on the unreachable state, ignore the instruction.
+		if c.unreachableState.on {
+			break operatorSwitch
+		}
+		// Typed select is semantically equivalent to select at runtime.
+		isTargetVector := c.stackPeek() == unsignedTypeV128
+		c.emit(
+			newOperationSelect(isTargetVector),
+		)
+	case wasm.OpcodeLocalGet:
+		depth := c.localDepth(index)
+		if isVector := c.localType(index) == wasm.ValueTypeV128; !isVector {
+			c.emit(
+				// -1 because we already manipulated the stack before
+				// called localDepth ^^.
+				newOperationPick(depth-1, isVector),
+			)
+		} else {
+			c.emit(
+				// -2 because we already manipulated the stack before
+				// called localDepth ^^.
+				newOperationPick(depth-2, isVector),
+			)
+		}
+	case wasm.OpcodeLocalSet:
+		depth := c.localDepth(index)
+
+		isVector := c.localType(index) == wasm.ValueTypeV128
+		if isVector {
+			c.emit(
+				// +2 because we already popped the operands for this operation from the c.stack before
+				// called localDepth ^^,
+				newOperationSet(depth+2, isVector),
+			)
+		} else {
+			c.emit(
+				// +1 because we already popped the operands for this operation from the c.stack before
+				// called localDepth ^^,
+				newOperationSet(depth+1, isVector),
+			)
+		}
+	case wasm.OpcodeLocalTee:
+		depth := c.localDepth(index)
+		isVector := c.localType(index) == wasm.ValueTypeV128
+		if isVector {
+			c.emit(newOperationPick(1, isVector))
+			c.emit(newOperationSet(depth+2, isVector))
+		} else {
+			c.emit(
+				newOperationPick(0, isVector))
+			c.emit(newOperationSet(depth+1, isVector))
+		}
+	case wasm.OpcodeGlobalGet:
+		c.emit(
+			newOperationGlobalGet(index),
+		)
+	case wasm.OpcodeGlobalSet:
+		c.emit(
+			newOperationGlobalSet(index),
+		)
+	case wasm.OpcodeI32Load:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32LoadName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad(unsignedTypeI32, imm))
+	case wasm.OpcodeI64Load:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64LoadName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad(unsignedTypeI64, imm))
+	case wasm.OpcodeF32Load:
+		imm, err := c.readMemoryArg(wasm.OpcodeF32LoadName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad(unsignedTypeF32, imm))
+	case wasm.OpcodeF64Load:
+		imm, err := c.readMemoryArg(wasm.OpcodeF64LoadName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad(unsignedTypeF64, imm))
+	case wasm.OpcodeI32Load8S:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32Load8SName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad8(signedInt32, imm))
+	case wasm.OpcodeI32Load8U:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32Load8UName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad8(signedUint32, imm))
+	case wasm.OpcodeI32Load16S:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32Load16SName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad16(signedInt32, imm))
+	case wasm.OpcodeI32Load16U:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32Load16UName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad16(signedUint32, imm))
+	case wasm.OpcodeI64Load8S:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Load8SName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad8(signedInt64, imm))
+	case wasm.OpcodeI64Load8U:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Load8UName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad8(signedUint64, imm))
+	case wasm.OpcodeI64Load16S:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Load16SName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad16(signedInt64, imm))
+	case wasm.OpcodeI64Load16U:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Load16UName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad16(signedUint64, imm))
+	case wasm.OpcodeI64Load32S:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Load32SName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad32(true, imm))
+	case wasm.OpcodeI64Load32U:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Load32UName)
+		if err != nil {
+			return err
+		}
+		c.emit(newOperationLoad32(false, imm))
+	case wasm.OpcodeI32Store:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32StoreName)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore(unsignedTypeI32, imm),
+		)
+	case wasm.OpcodeI64Store:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64StoreName)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore(unsignedTypeI64, imm),
+		)
+	case wasm.OpcodeF32Store:
+		imm, err := c.readMemoryArg(wasm.OpcodeF32StoreName)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore(unsignedTypeF32, imm),
+		)
+	case wasm.OpcodeF64Store:
+		imm, err := c.readMemoryArg(wasm.OpcodeF64StoreName)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore(unsignedTypeF64, imm),
+		)
+	case wasm.OpcodeI32Store8:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32Store8Name)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore8(imm),
+		)
+	case wasm.OpcodeI32Store16:
+		imm, err := c.readMemoryArg(wasm.OpcodeI32Store16Name)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore16(imm),
+		)
+	case wasm.OpcodeI64Store8:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Store8Name)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore8(imm),
+		)
+	case wasm.OpcodeI64Store16:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Store16Name)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore16(imm),
+		)
+	case wasm.OpcodeI64Store32:
+		imm, err := c.readMemoryArg(wasm.OpcodeI64Store32Name)
+		if err != nil {
+			return err
+		}
+		c.emit(
+			newOperationStore32(imm),
+		)
+	case wasm.OpcodeMemorySize:
+		c.result.UsesMemory = true
+		c.pc++ // Skip the reserved one byte.
+		c.emit(
+			newOperationMemorySize(),
+		)
+	case wasm.OpcodeMemoryGrow:
+		c.result.UsesMemory = true
+		c.pc++ // Skip the reserved one byte.
+		c.emit(
+			newOperationMemoryGrow(),
+		)
+	case wasm.OpcodeI32Const:
+		val, num, err := leb128.LoadInt32(c.body[c.pc+1:])
+		if err != nil {
+			return fmt.Errorf("reading i32.const value: %v", err)
+		}
+		c.pc += num
+		c.emit(
+			newOperationConstI32(uint32(val)),
+		)
+	case wasm.OpcodeI64Const:
+		val, num, err := leb128.LoadInt64(c.body[c.pc+1:])
+		if err != nil {
+			return fmt.Errorf("reading i64.const value: %v", err)
+		}
+		c.pc += num
+		c.emit(
+			newOperationConstI64(uint64(val)),
+		)
+	case wasm.OpcodeF32Const:
+		v := math.Float32frombits(binary.LittleEndian.Uint32(c.body[c.pc+1:]))
+		c.pc += 4
+		c.emit(
+			newOperationConstF32(v),
+		)
+	case wasm.OpcodeF64Const:
+		v := math.Float64frombits(binary.LittleEndian.Uint64(c.body[c.pc+1:]))
+		c.pc += 8
+		c.emit(
+			newOperationConstF64(v),
+		)
+	case wasm.OpcodeI32Eqz:
+		c.emit(
+			newOperationEqz(unsignedInt32),
+		)
+	case wasm.OpcodeI32Eq:
+		c.emit(
+			newOperationEq(unsignedTypeI32),
+		)
+	case wasm.OpcodeI32Ne:
+		c.emit(
+			newOperationNe(unsignedTypeI32),
+		)
+	case wasm.OpcodeI32LtS:
+		c.emit(
+			newOperationLt(signedTypeInt32),
+		)
+	case wasm.OpcodeI32LtU:
+		c.emit(
+			newOperationLt(signedTypeUint32),
+		)
+	case wasm.OpcodeI32GtS:
+		c.emit(
+			newOperationGt(signedTypeInt32),
+		)
+	case wasm.OpcodeI32GtU:
+		c.emit(
+			newOperationGt(signedTypeUint32),
+		)
+	case wasm.OpcodeI32LeS:
+		c.emit(
+			newOperationLe(signedTypeInt32),
+		)
+	case wasm.OpcodeI32LeU:
+		c.emit(
+			newOperationLe(signedTypeUint32),
+		)
+	case wasm.OpcodeI32GeS:
+		c.emit(
+			newOperationGe(signedTypeInt32),
+		)
+	case wasm.OpcodeI32GeU:
+		c.emit(
+			newOperationGe(signedTypeUint32),
+		)
+	case wasm.OpcodeI64Eqz:
+		c.emit(
+			newOperationEqz(unsignedInt64),
+		)
+	case wasm.OpcodeI64Eq:
+		c.emit(
+			newOperationEq(unsignedTypeI64),
+		)
+	case wasm.OpcodeI64Ne:
+		c.emit(
+			newOperationNe(unsignedTypeI64),
+		)
+	case wasm.OpcodeI64LtS:
+		c.emit(
+			newOperationLt(signedTypeInt64),
+		)
+	case wasm.OpcodeI64LtU:
+		c.emit(
+			newOperationLt(signedTypeUint64),
+		)
+	case wasm.OpcodeI64GtS:
+		c.emit(
+			newOperationGt(signedTypeInt64),
+		)
+	case wasm.OpcodeI64GtU:
+		c.emit(
+			newOperationGt(signedTypeUint64),
+		)
+	case wasm.OpcodeI64LeS:
+		c.emit(
+			newOperationLe(signedTypeInt64),
+		)
+	case wasm.OpcodeI64LeU:
+		c.emit(
+			newOperationLe(signedTypeUint64),
+		)
+	case wasm.OpcodeI64GeS:
+		c.emit(
+			newOperationGe(signedTypeInt64),
+		)
+	case wasm.OpcodeI64GeU:
+		c.emit(
+			newOperationGe(signedTypeUint64),
+		)
+	case wasm.OpcodeF32Eq:
+		c.emit(
+			newOperationEq(unsignedTypeF32),
+		)
+	case wasm.OpcodeF32Ne:
+		c.emit(
+			newOperationNe(unsignedTypeF32),
+		)
+	case wasm.OpcodeF32Lt:
+		c.emit(
+			newOperationLt(signedTypeFloat32),
+		)
+	case wasm.OpcodeF32Gt:
+		c.emit(
+			newOperationGt(signedTypeFloat32),
+		)
+	case wasm.OpcodeF32Le:
+		c.emit(
+			newOperationLe(signedTypeFloat32),
+		)
+	case wasm.OpcodeF32Ge:
+		c.emit(
+			newOperationGe(signedTypeFloat32),
+		)
+	case wasm.OpcodeF64Eq:
+		c.emit(
+			newOperationEq(unsignedTypeF64),
+		)
+	case wasm.OpcodeF64Ne:
+		c.emit(
+			newOperationNe(unsignedTypeF64),
+		)
+	case wasm.OpcodeF64Lt:
+		c.emit(
+			newOperationLt(signedTypeFloat64),
+		)
+	case wasm.OpcodeF64Gt:
+		c.emit(
+			newOperationGt(signedTypeFloat64),
+		)
+	case wasm.OpcodeF64Le:
+		c.emit(
+			newOperationLe(signedTypeFloat64),
+		)
+	case wasm.OpcodeF64Ge:
+		c.emit(
+			newOperationGe(signedTypeFloat64),
+		)
+	case wasm.OpcodeI32Clz:
+		c.emit(
+			newOperationClz(unsignedInt32),
+		)
+	case wasm.OpcodeI32Ctz:
+		c.emit(
+			newOperationCtz(unsignedInt32),
+		)
+	case wasm.OpcodeI32Popcnt:
+		c.emit(
+			newOperationPopcnt(unsignedInt32),
+		)
+	case wasm.OpcodeI32Add:
+		c.emit(
+			newOperationAdd(unsignedTypeI32),
+		)
+	case wasm.OpcodeI32Sub:
+		c.emit(
+			newOperationSub(unsignedTypeI32),
+		)
+	case wasm.OpcodeI32Mul:
+		c.emit(
+			newOperationMul(unsignedTypeI32),
+		)
+	case wasm.OpcodeI32DivS:
+		c.emit(
+			newOperationDiv(signedTypeInt32),
+		)
+	case wasm.OpcodeI32DivU:
+		c.emit(
+			newOperationDiv(signedTypeUint32),
+		)
+	case wasm.OpcodeI32RemS:
+		c.emit(
+			newOperationRem(signedInt32),
+		)
+	case wasm.OpcodeI32RemU:
+		c.emit(
+			newOperationRem(signedUint32),
+		)
+	case wasm.OpcodeI32And:
+		c.emit(
+			newOperationAnd(unsignedInt32),
+		)
+	case wasm.OpcodeI32Or:
+		c.emit(
+			newOperationOr(unsignedInt32),
+		)
+	case wasm.OpcodeI32Xor:
+		c.emit(
+			newOperationXor(unsignedInt64),
+		)
+	case wasm.OpcodeI32Shl:
+		c.emit(
+			newOperationShl(unsignedInt32),
+		)
+	case wasm.OpcodeI32ShrS:
+		c.emit(
+			newOperationShr(signedInt32),
+		)
+	case wasm.OpcodeI32ShrU:
+		c.emit(
+			newOperationShr(signedUint32),
+		)
+	case wasm.OpcodeI32Rotl:
+		c.emit(
+			newOperationRotl(unsignedInt32),
+		)
+	case wasm.OpcodeI32Rotr:
+		c.emit(
+			newOperationRotr(unsignedInt32),
+		)
+	case wasm.OpcodeI64Clz:
+		c.emit(
+			newOperationClz(unsignedInt64),
+		)
+	case wasm.OpcodeI64Ctz:
+		c.emit(
+			newOperationCtz(unsignedInt64),
+		)
+	case wasm.OpcodeI64Popcnt:
+		c.emit(
+			newOperationPopcnt(unsignedInt64),
+		)
+	case wasm.OpcodeI64Add:
+		c.emit(
+			newOperationAdd(unsignedTypeI64),
+		)
+	case wasm.OpcodeI64Sub:
+		c.emit(
+			newOperationSub(unsignedTypeI64),
+		)
+	case wasm.OpcodeI64Mul:
+		c.emit(
+			newOperationMul(unsignedTypeI64),
+		)
+	case wasm.OpcodeI64DivS:
+		c.emit(
+			newOperationDiv(signedTypeInt64),
+		)
+	case wasm.OpcodeI64DivU:
+		c.emit(
+			newOperationDiv(signedTypeUint64),
+		)
+	case wasm.OpcodeI64RemS:
+		c.emit(
+			newOperationRem(signedInt64),
+		)
+	case wasm.OpcodeI64RemU:
+		c.emit(
+			newOperationRem(signedUint64),
+		)
+	case wasm.OpcodeI64And:
+		c.emit(
+			newOperationAnd(unsignedInt64),
+		)
+	case wasm.OpcodeI64Or:
+		c.emit(
+			newOperationOr(unsignedInt64),
+		)
+	case wasm.OpcodeI64Xor:
+		c.emit(
+			newOperationXor(unsignedInt64),
+		)
+	case wasm.OpcodeI64Shl:
+		c.emit(
+			newOperationShl(unsignedInt64),
+		)
+	case wasm.OpcodeI64ShrS:
+		c.emit(
+			newOperationShr(signedInt64),
+		)
+	case wasm.OpcodeI64ShrU:
+		c.emit(
+			newOperationShr(signedUint64),
+		)
+	case wasm.OpcodeI64Rotl:
+		c.emit(
+			newOperationRotl(unsignedInt64),
+		)
+	case wasm.OpcodeI64Rotr:
+		c.emit(
+			newOperationRotr(unsignedInt64),
+		)
+	case wasm.OpcodeF32Abs:
+		c.emit(
+			newOperationAbs(f32),
+		)
+	case wasm.OpcodeF32Neg:
+		c.emit(
+			newOperationNeg(f32),
+		)
+	case wasm.OpcodeF32Ceil:
+		c.emit(
+			newOperationCeil(f32),
+		)
+	case wasm.OpcodeF32Floor:
+		c.emit(
+			newOperationFloor(f32),
+		)
+	case wasm.OpcodeF32Trunc:
+		c.emit(
+			newOperationTrunc(f32),
+		)
+	case wasm.OpcodeF32Nearest:
+		c.emit(
+			newOperationNearest(f32),
+		)
+	case wasm.OpcodeF32Sqrt:
+		c.emit(
+			newOperationSqrt(f32),
+		)
+	case wasm.OpcodeF32Add:
+		c.emit(
+			newOperationAdd(unsignedTypeF32),
+		)
+	case wasm.OpcodeF32Sub:
+		c.emit(
+			newOperationSub(unsignedTypeF32),
+		)
+	case wasm.OpcodeF32Mul:
+		c.emit(
+			newOperationMul(unsignedTypeF32),
+		)
+	case wasm.OpcodeF32Div:
+		c.emit(
+			newOperationDiv(signedTypeFloat32),
+		)
+	case wasm.OpcodeF32Min:
+		c.emit(
+			newOperationMin(f32),
+		)
+	case wasm.OpcodeF32Max:
+		c.emit(
+			newOperationMax(f32),
+		)
+	case wasm.OpcodeF32Copysign:
+		c.emit(
+			newOperationCopysign(f32),
+		)
+	case wasm.OpcodeF64Abs:
+		c.emit(
+			newOperationAbs(f64),
+		)
+	case wasm.OpcodeF64Neg:
+		c.emit(
+			newOperationNeg(f64),
+		)
+	case wasm.OpcodeF64Ceil:
+		c.emit(
+			newOperationCeil(f64),
+		)
+	case wasm.OpcodeF64Floor:
+		c.emit(
+			newOperationFloor(f64),
+		)
+	case wasm.OpcodeF64Trunc:
+		c.emit(
+			newOperationTrunc(f64),
+		)
+	case wasm.OpcodeF64Nearest:
+		c.emit(
+			newOperationNearest(f64),
+		)
+	case wasm.OpcodeF64Sqrt:
+		c.emit(
+			newOperationSqrt(f64),
+		)
+	case wasm.OpcodeF64Add:
+		c.emit(
+			newOperationAdd(unsignedTypeF64),
+		)
+	case wasm.OpcodeF64Sub:
+		c.emit(
+			newOperationSub(unsignedTypeF64),
+		)
+	case wasm.OpcodeF64Mul:
+		c.emit(
+			newOperationMul(unsignedTypeF64),
+		)
+	case wasm.OpcodeF64Div:
+		c.emit(
+			newOperationDiv(signedTypeFloat64),
+		)
+	case wasm.OpcodeF64Min:
+		c.emit(
+			newOperationMin(f64),
+		)
+	case wasm.OpcodeF64Max:
+		c.emit(
+			newOperationMax(f64),
+		)
+	case wasm.OpcodeF64Copysign:
+		c.emit(
+			newOperationCopysign(f64),
+		)
+	case wasm.OpcodeI32WrapI64:
+		c.emit(
+			newOperationI32WrapFromI64(),
+		)
+	case wasm.OpcodeI32TruncF32S:
+		c.emit(
+			newOperationITruncFromF(f32, signedInt32, false),
+		)
+	case wasm.OpcodeI32TruncF32U:
+		c.emit(
+			newOperationITruncFromF(f32, signedUint32, false),
+		)
+	case wasm.OpcodeI32TruncF64S:
+		c.emit(
+			newOperationITruncFromF(f64, signedInt32, false),
+		)
+	case wasm.OpcodeI32TruncF64U:
+		c.emit(
+			newOperationITruncFromF(f64, signedUint32, false),
+		)
+	case wasm.OpcodeI64ExtendI32S:
+		c.emit(
+			newOperationExtend(true),
+		)
+	case wasm.OpcodeI64ExtendI32U:
+		c.emit(
+			newOperationExtend(false),
+		)
+	case wasm.OpcodeI64TruncF32S:
+		c.emit(
+			newOperationITruncFromF(f32, signedInt64, false),
+		)
+	case wasm.OpcodeI64TruncF32U:
+		c.emit(
+			newOperationITruncFromF(f32, signedUint64, false),
+		)
+	case wasm.OpcodeI64TruncF64S:
+		c.emit(
+			newOperationITruncFromF(f64, signedInt64, false),
+		)
+	case wasm.OpcodeI64TruncF64U:
+		c.emit(
+			newOperationITruncFromF(f64, signedUint64, false),
+		)
+	case wasm.OpcodeF32ConvertI32S:
+		c.emit(
+			newOperationFConvertFromI(signedInt32, f32),
+		)
+	case wasm.OpcodeF32ConvertI32U:
+		c.emit(
+			newOperationFConvertFromI(signedUint32, f32),
+		)
+	case wasm.OpcodeF32ConvertI64S:
+		c.emit(
+			newOperationFConvertFromI(signedInt64, f32),
+		)
+	case wasm.OpcodeF32ConvertI64U:
+		c.emit(
+			newOperationFConvertFromI(signedUint64, f32),
+		)
+	case wasm.OpcodeF32DemoteF64:
+		c.emit(
+			newOperationF32DemoteFromF64(),
+		)
+	case wasm.OpcodeF64ConvertI32S:
+		c.emit(
+			newOperationFConvertFromI(signedInt32, f64),
+		)
+	case wasm.OpcodeF64ConvertI32U:
+		c.emit(
+			newOperationFConvertFromI(signedUint32, f64),
+		)
+	case wasm.OpcodeF64ConvertI64S:
+		c.emit(
+			newOperationFConvertFromI(signedInt64, f64),
+		)
+	case wasm.OpcodeF64ConvertI64U:
+		c.emit(
+			newOperationFConvertFromI(signedUint64, f64),
+		)
+	case wasm.OpcodeF64PromoteF32:
+		c.emit(
+			newOperationF64PromoteFromF32(),
+		)
+	case wasm.OpcodeI32ReinterpretF32:
+		c.emit(
+			newOperationI32ReinterpretFromF32(),
+		)
+	case wasm.OpcodeI64ReinterpretF64:
+		c.emit(
+			newOperationI64ReinterpretFromF64(),
+		)
+	case wasm.OpcodeF32ReinterpretI32:
+		c.emit(
+			newOperationF32ReinterpretFromI32(),
+		)
+	case wasm.OpcodeF64ReinterpretI64:
+		c.emit(
+			newOperationF64ReinterpretFromI64(),
+		)
+	case wasm.OpcodeI32Extend8S:
+		c.emit(
+			newOperationSignExtend32From8(),
+		)
+	case wasm.OpcodeI32Extend16S:
+		c.emit(
+			newOperationSignExtend32From16(),
+		)
+	case wasm.OpcodeI64Extend8S:
+		c.emit(
+			newOperationSignExtend64From8(),
+		)
+	case wasm.OpcodeI64Extend16S:
+		c.emit(
+			newOperationSignExtend64From16(),
+		)
+	case wasm.OpcodeI64Extend32S:
+		c.emit(
+			newOperationSignExtend64From32(),
+		)
+	case wasm.OpcodeRefFunc:
+		c.pc++
+		index, num, err := leb128.LoadUint32(c.body[c.pc:])
+		if err != nil {
+			return fmt.Errorf("failed to read function index for ref.func: %v", err)
+		}
+		c.pc += num - 1
+		c.emit(
+			newOperationRefFunc(index),
+		)
+	case wasm.OpcodeRefNull:
+		c.pc++ // Skip the type of reftype as every ref value is opaque pointer.
+		c.emit(
+			newOperationConstI64(0),
+		)
+	case wasm.OpcodeRefIsNull:
+		// Simply compare the opaque pointer (i64) with zero.
+		c.emit(
+			newOperationEqz(unsignedInt64),
+		)
+	case wasm.OpcodeTableGet:
+		c.pc++
+		tableIndex, num, err := leb128.LoadUint32(c.body[c.pc:])
+		if err != nil {
+			return fmt.Errorf("failed to read function index for table.get: %v", err)
+		}
+		c.pc += num - 1
+		c.emit(
+			newOperationTableGet(tableIndex),
+		)
+	case wasm.OpcodeTableSet:
+		c.pc++
+		tableIndex, num, err := leb128.LoadUint32(c.body[c.pc:])
+		if err != nil {
+			return fmt.Errorf("failed to read function index for table.set: %v", err)
+		}
+		c.pc += num - 1
+		c.emit(
+			newOperationTableSet(tableIndex),
+		)
+	case wasm.OpcodeMiscPrefix:
+		c.pc++
+		// A misc opcode is encoded as an unsigned variable 32-bit integer.
+		miscOp, num, err := leb128.LoadUint32(c.body[c.pc:])
+		if err != nil {
+			return fmt.Errorf("failed to read misc opcode: %v", err)
+		}
+		c.pc += num - 1
+		switch byte(miscOp) {
+		case wasm.OpcodeMiscI32TruncSatF32S:
+			c.emit(
+				newOperationITruncFromF(f32, signedInt32, true),
+			)
+		case wasm.OpcodeMiscI32TruncSatF32U:
+			c.emit(
+				newOperationITruncFromF(f32, signedUint32, true),
+			)
+		case wasm.OpcodeMiscI32TruncSatF64S:
+			c.emit(
+				newOperationITruncFromF(f64, signedInt32, true),
+			)
+		case wasm.OpcodeMiscI32TruncSatF64U:
+			c.emit(
+				newOperationITruncFromF(f64, signedUint32, true),
+			)
+		case wasm.OpcodeMiscI64TruncSatF32S:
+			c.emit(
+				newOperationITruncFromF(f32, signedInt64, true),
+			)
+		case wasm.OpcodeMiscI64TruncSatF32U:
+			c.emit(
+				newOperationITruncFromF(f32, signedUint64, true),
+			)
+		case wasm.OpcodeMiscI64TruncSatF64S:
+			c.emit(
+				newOperationITruncFromF(f64, signedInt64, true),
+			)
+		case wasm.OpcodeMiscI64TruncSatF64U:
+			c.emit(
+				newOperationITruncFromF(f64, signedUint64, true),
+			)
+		case wasm.OpcodeMiscMemoryInit:
+			c.result.UsesMemory = true
+			dataIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num + 1 // +1 to skip the memory index which is fixed to zero.
+			c.emit(
+				newOperationMemoryInit(dataIndex),
+			)
+		case wasm.OpcodeMiscDataDrop:
+			dataIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationDataDrop(dataIndex),
+			)
+		case wasm.OpcodeMiscMemoryCopy:
+			c.result.UsesMemory = true
+			c.pc += 2 // +2 to skip two memory indexes which are fixed to zero.
+			c.emit(
+				newOperationMemoryCopy(),
+			)
+		case wasm.OpcodeMiscMemoryFill:
+			c.result.UsesMemory = true
+			c.pc += 1 // +1 to skip the memory index which is fixed to zero.
+			c.emit(
+				newOperationMemoryFill(),
+			)
+		case wasm.OpcodeMiscTableInit:
+			elemIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			// Read table index which is fixed to zero currently.
+			tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationTableInit(elemIndex, tableIndex),
+			)
+		case wasm.OpcodeMiscElemDrop:
+			elemIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationElemDrop(elemIndex),
+			)
+		case wasm.OpcodeMiscTableCopy:
+			// Read the source table inde.g.
+			dst, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			// Read the destination table inde.g.
+			src, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationTableCopy(src, dst),
+			)
+		case wasm.OpcodeMiscTableGrow:
+			// Read the source table inde.g.
+			tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationTableGrow(tableIndex),
+			)
+		case wasm.OpcodeMiscTableSize:
+			// Read the source table inde.g.
+			tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationTableSize(tableIndex),
+			)
+		case wasm.OpcodeMiscTableFill:
+			// Read the source table index.
+			tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+			if err != nil {
+				return fmt.Errorf("reading i32.const value: %v", err)
+			}
+			c.pc += num
+			c.emit(
+				newOperationTableFill(tableIndex),
+			)
+		default:
+			return fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op)
+		}
+	case wasm.OpcodeVecPrefix:
+		c.pc++
+		switch vecOp := c.body[c.pc]; vecOp {
+		case wasm.OpcodeVecV128Const:
+			c.pc++
+			lo := binary.LittleEndian.Uint64(c.body[c.pc : c.pc+8])
+			c.pc += 8
+			hi := binary.LittleEndian.Uint64(c.body[c.pc : c.pc+8])
+			c.emit(
+				newOperationV128Const(lo, hi),
+			)
+			c.pc += 7
+		case wasm.OpcodeVecV128Load:
+			arg, err := c.readMemoryArg(wasm.OpcodeI32LoadName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType128, arg),
+			)
+		case wasm.OpcodeVecV128Load8x8s:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8x8SName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType8x8s, arg),
+			)
+		case wasm.OpcodeVecV128Load8x8u:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8x8UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType8x8u, arg),
+			)
+		case wasm.OpcodeVecV128Load16x4s:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16x4SName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType16x4s, arg),
+			)
+		case wasm.OpcodeVecV128Load16x4u:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16x4UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType16x4u, arg),
+			)
+		case wasm.OpcodeVecV128Load32x2s:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32x2SName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType32x2s, arg),
+			)
+		case wasm.OpcodeVecV128Load32x2u:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32x2UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType32x2u, arg),
+			)
+		case wasm.OpcodeVecV128Load8Splat:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8SplatName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType8Splat, arg),
+			)
+		case wasm.OpcodeVecV128Load16Splat:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16SplatName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType16Splat, arg),
+			)
+		case wasm.OpcodeVecV128Load32Splat:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32SplatName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType32Splat, arg),
+			)
+		case wasm.OpcodeVecV128Load64Splat:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64SplatName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType64Splat, arg),
+			)
+		case wasm.OpcodeVecV128Load32zero:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32zeroName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType32zero, arg),
+			)
+		case wasm.OpcodeVecV128Load64zero:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64zeroName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Load(v128LoadType64zero, arg),
+			)
+		case wasm.OpcodeVecV128Load8Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128LoadLane(laneIndex, 8, arg),
+			)
+		case wasm.OpcodeVecV128Load16Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128LoadLane(laneIndex, 16, arg),
+			)
+		case wasm.OpcodeVecV128Load32Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128LoadLane(laneIndex, 32, arg),
+			)
+		case wasm.OpcodeVecV128Load64Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128LoadLane(laneIndex, 64, arg),
+			)
+		case wasm.OpcodeVecV128Store:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128StoreName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationV128Store(arg),
+			)
+		case wasm.OpcodeVecV128Store8Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store8LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128StoreLane(laneIndex, 8, arg),
+			)
+		case wasm.OpcodeVecV128Store16Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store16LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128StoreLane(laneIndex, 16, arg),
+			)
+		case wasm.OpcodeVecV128Store32Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store32LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128StoreLane(laneIndex, 32, arg),
+			)
+		case wasm.OpcodeVecV128Store64Lane:
+			arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store64LaneName)
+			if err != nil {
+				return err
+			}
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128StoreLane(laneIndex, 64, arg),
+			)
+		case wasm.OpcodeVecI8x16ExtractLaneS:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, true, shapeI8x16),
+			)
+		case wasm.OpcodeVecI8x16ExtractLaneU:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, false, shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8ExtractLaneS:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, true, shapeI16x8),
+			)
+		case wasm.OpcodeVecI16x8ExtractLaneU:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, false, shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4ExtractLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, false, shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2ExtractLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, false, shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4ExtractLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, false, shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2ExtractLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ExtractLane(laneIndex, false, shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16ReplaceLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ReplaceLane(laneIndex, shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8ReplaceLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ReplaceLane(laneIndex, shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4ReplaceLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ReplaceLane(laneIndex, shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2ReplaceLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ReplaceLane(laneIndex, shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4ReplaceLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ReplaceLane(laneIndex, shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2ReplaceLane:
+			c.pc++
+			laneIndex := c.body[c.pc]
+			c.emit(
+				newOperationV128ReplaceLane(laneIndex, shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16Splat:
+			c.emit(
+				newOperationV128Splat(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8Splat:
+			c.emit(
+				newOperationV128Splat(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4Splat:
+			c.emit(
+				newOperationV128Splat(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2Splat:
+			c.emit(
+				newOperationV128Splat(shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4Splat:
+			c.emit(
+				newOperationV128Splat(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Splat:
+			c.emit(
+				newOperationV128Splat(shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16Swizzle:
+			c.emit(
+				newOperationV128Swizzle(),
+			)
+		case wasm.OpcodeVecV128i8x16Shuffle:
+			c.pc++
+			lanes := make([]uint64, 16)
+			for i := uint64(0); i < 16; i++ {
+				lanes[i] = uint64(c.body[c.pc+i])
+			}
+			op := newOperationV128Shuffle(lanes)
+			c.emit(op)
+			c.pc += 15
+		case wasm.OpcodeVecV128AnyTrue:
+			c.emit(
+				newOperationV128AnyTrue(),
+			)
+		case wasm.OpcodeVecI8x16AllTrue:
+			c.emit(
+				newOperationV128AllTrue(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8AllTrue:
+			c.emit(
+				newOperationV128AllTrue(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4AllTrue:
+			c.emit(
+				newOperationV128AllTrue(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2AllTrue:
+			c.emit(
+				newOperationV128AllTrue(shapeI64x2),
+			)
+		case wasm.OpcodeVecI8x16BitMask:
+			c.emit(
+				newOperationV128BitMask(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8BitMask:
+			c.emit(
+				newOperationV128BitMask(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4BitMask:
+			c.emit(
+				newOperationV128BitMask(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2BitMask:
+			c.emit(
+				newOperationV128BitMask(shapeI64x2),
+			)
+		case wasm.OpcodeVecV128And:
+			c.emit(
+				newOperationV128And(),
+			)
+		case wasm.OpcodeVecV128Not:
+			c.emit(
+				newOperationV128Not(),
+			)
+		case wasm.OpcodeVecV128Or:
+			c.emit(
+				newOperationV128Or(),
+			)
+		case wasm.OpcodeVecV128Xor:
+			c.emit(
+				newOperationV128Xor(),
+			)
+		case wasm.OpcodeVecV128Bitselect:
+			c.emit(
+				newOperationV128Bitselect(),
+			)
+		case wasm.OpcodeVecV128AndNot:
+			c.emit(
+				newOperationV128AndNot(),
+			)
+		case wasm.OpcodeVecI8x16Shl:
+			c.emit(
+				newOperationV128Shl(shapeI8x16),
+			)
+		case wasm.OpcodeVecI8x16ShrS:
+			c.emit(
+				newOperationV128Shr(shapeI8x16, true),
+			)
+		case wasm.OpcodeVecI8x16ShrU:
+			c.emit(
+				newOperationV128Shr(shapeI8x16, false),
+			)
+		case wasm.OpcodeVecI16x8Shl:
+			c.emit(
+				newOperationV128Shl(shapeI16x8),
+			)
+		case wasm.OpcodeVecI16x8ShrS:
+			c.emit(
+				newOperationV128Shr(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI16x8ShrU:
+			c.emit(
+				newOperationV128Shr(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecI32x4Shl:
+			c.emit(
+				newOperationV128Shl(shapeI32x4),
+			)
+		case wasm.OpcodeVecI32x4ShrS:
+			c.emit(
+				newOperationV128Shr(shapeI32x4, true),
+			)
+		case wasm.OpcodeVecI32x4ShrU:
+			c.emit(
+				newOperationV128Shr(shapeI32x4, false),
+			)
+		case wasm.OpcodeVecI64x2Shl:
+			c.emit(
+				newOperationV128Shl(shapeI64x2),
+			)
+		case wasm.OpcodeVecI64x2ShrS:
+			c.emit(
+				newOperationV128Shr(shapeI64x2, true),
+			)
+		case wasm.OpcodeVecI64x2ShrU:
+			c.emit(
+				newOperationV128Shr(shapeI64x2, false),
+			)
+		case wasm.OpcodeVecI8x16Eq:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16Eq),
+			)
+		case wasm.OpcodeVecI8x16Ne:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16Ne),
+			)
+		case wasm.OpcodeVecI8x16LtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16LtS),
+			)
+		case wasm.OpcodeVecI8x16LtU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16LtU),
+			)
+		case wasm.OpcodeVecI8x16GtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16GtS),
+			)
+		case wasm.OpcodeVecI8x16GtU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16GtU),
+			)
+		case wasm.OpcodeVecI8x16LeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16LeS),
+			)
+		case wasm.OpcodeVecI8x16LeU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16LeU),
+			)
+		case wasm.OpcodeVecI8x16GeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16GeS),
+			)
+		case wasm.OpcodeVecI8x16GeU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI8x16GeU),
+			)
+		case wasm.OpcodeVecI16x8Eq:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8Eq),
+			)
+		case wasm.OpcodeVecI16x8Ne:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8Ne),
+			)
+		case wasm.OpcodeVecI16x8LtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8LtS),
+			)
+		case wasm.OpcodeVecI16x8LtU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8LtU),
+			)
+		case wasm.OpcodeVecI16x8GtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8GtS),
+			)
+		case wasm.OpcodeVecI16x8GtU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8GtU),
+			)
+		case wasm.OpcodeVecI16x8LeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8LeS),
+			)
+		case wasm.OpcodeVecI16x8LeU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8LeU),
+			)
+		case wasm.OpcodeVecI16x8GeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8GeS),
+			)
+		case wasm.OpcodeVecI16x8GeU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI16x8GeU),
+			)
+		case wasm.OpcodeVecI32x4Eq:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4Eq),
+			)
+		case wasm.OpcodeVecI32x4Ne:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4Ne),
+			)
+		case wasm.OpcodeVecI32x4LtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4LtS),
+			)
+		case wasm.OpcodeVecI32x4LtU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4LtU),
+			)
+		case wasm.OpcodeVecI32x4GtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4GtS),
+			)
+		case wasm.OpcodeVecI32x4GtU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4GtU),
+			)
+		case wasm.OpcodeVecI32x4LeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4LeS),
+			)
+		case wasm.OpcodeVecI32x4LeU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4LeU),
+			)
+		case wasm.OpcodeVecI32x4GeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4GeS),
+			)
+		case wasm.OpcodeVecI32x4GeU:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI32x4GeU),
+			)
+		case wasm.OpcodeVecI64x2Eq:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI64x2Eq),
+			)
+		case wasm.OpcodeVecI64x2Ne:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI64x2Ne),
+			)
+		case wasm.OpcodeVecI64x2LtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI64x2LtS),
+			)
+		case wasm.OpcodeVecI64x2GtS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI64x2GtS),
+			)
+		case wasm.OpcodeVecI64x2LeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI64x2LeS),
+			)
+		case wasm.OpcodeVecI64x2GeS:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeI64x2GeS),
+			)
+		case wasm.OpcodeVecF32x4Eq:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF32x4Eq),
+			)
+		case wasm.OpcodeVecF32x4Ne:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF32x4Ne),
+			)
+		case wasm.OpcodeVecF32x4Lt:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF32x4Lt),
+			)
+		case wasm.OpcodeVecF32x4Gt:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF32x4Gt),
+			)
+		case wasm.OpcodeVecF32x4Le:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF32x4Le),
+			)
+		case wasm.OpcodeVecF32x4Ge:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF32x4Ge),
+			)
+		case wasm.OpcodeVecF64x2Eq:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF64x2Eq),
+			)
+		case wasm.OpcodeVecF64x2Ne:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF64x2Ne),
+			)
+		case wasm.OpcodeVecF64x2Lt:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF64x2Lt),
+			)
+		case wasm.OpcodeVecF64x2Gt:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF64x2Gt),
+			)
+		case wasm.OpcodeVecF64x2Le:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF64x2Le),
+			)
+		case wasm.OpcodeVecF64x2Ge:
+			c.emit(
+				newOperationV128Cmp(v128CmpTypeF64x2Ge),
+			)
+		case wasm.OpcodeVecI8x16Neg:
+			c.emit(
+				newOperationV128Neg(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8Neg:
+			c.emit(
+				newOperationV128Neg(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4Neg:
+			c.emit(
+				newOperationV128Neg(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2Neg:
+			c.emit(
+				newOperationV128Neg(shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4Neg:
+			c.emit(
+				newOperationV128Neg(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Neg:
+			c.emit(
+				newOperationV128Neg(shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16Add:
+			c.emit(
+				newOperationV128Add(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8Add:
+			c.emit(
+				newOperationV128Add(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4Add:
+			c.emit(
+				newOperationV128Add(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2Add:
+			c.emit(
+				newOperationV128Add(shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4Add:
+			c.emit(
+				newOperationV128Add(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Add:
+			c.emit(
+				newOperationV128Add(shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16Sub:
+			c.emit(
+				newOperationV128Sub(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8Sub:
+			c.emit(
+				newOperationV128Sub(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4Sub:
+			c.emit(
+				newOperationV128Sub(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2Sub:
+			c.emit(
+				newOperationV128Sub(shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4Sub:
+			c.emit(
+				newOperationV128Sub(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Sub:
+			c.emit(
+				newOperationV128Sub(shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16AddSatS:
+			c.emit(
+				newOperationV128AddSat(shapeI8x16, true),
+			)
+		case wasm.OpcodeVecI8x16AddSatU:
+			c.emit(
+				newOperationV128AddSat(shapeI8x16, false),
+			)
+		case wasm.OpcodeVecI16x8AddSatS:
+			c.emit(
+				newOperationV128AddSat(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI16x8AddSatU:
+			c.emit(
+				newOperationV128AddSat(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecI8x16SubSatS:
+			c.emit(
+				newOperationV128SubSat(shapeI8x16, true),
+			)
+		case wasm.OpcodeVecI8x16SubSatU:
+			c.emit(
+				newOperationV128SubSat(shapeI8x16, false),
+			)
+		case wasm.OpcodeVecI16x8SubSatS:
+			c.emit(
+				newOperationV128SubSat(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI16x8SubSatU:
+			c.emit(
+				newOperationV128SubSat(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecI16x8Mul:
+			c.emit(
+				newOperationV128Mul(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4Mul:
+			c.emit(
+				newOperationV128Mul(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2Mul:
+			c.emit(
+				newOperationV128Mul(shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4Mul:
+			c.emit(
+				newOperationV128Mul(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Mul:
+			c.emit(
+				newOperationV128Mul(shapeF64x2),
+			)
+		case wasm.OpcodeVecF32x4Sqrt:
+			c.emit(
+				newOperationV128Sqrt(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Sqrt:
+			c.emit(
+				newOperationV128Sqrt(shapeF64x2),
+			)
+		case wasm.OpcodeVecF32x4Div:
+			c.emit(
+				newOperationV128Div(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Div:
+			c.emit(
+				newOperationV128Div(shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16Abs:
+			c.emit(
+				newOperationV128Abs(shapeI8x16),
+			)
+		case wasm.OpcodeVecI8x16Popcnt:
+			c.emit(
+				newOperationV128Popcnt(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8Abs:
+			c.emit(
+				newOperationV128Abs(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4Abs:
+			c.emit(
+				newOperationV128Abs(shapeI32x4),
+			)
+		case wasm.OpcodeVecI64x2Abs:
+			c.emit(
+				newOperationV128Abs(shapeI64x2),
+			)
+		case wasm.OpcodeVecF32x4Abs:
+			c.emit(
+				newOperationV128Abs(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Abs:
+			c.emit(
+				newOperationV128Abs(shapeF64x2),
+			)
+		case wasm.OpcodeVecI8x16MinS:
+			c.emit(
+				newOperationV128Min(shapeI8x16, true),
+			)
+		case wasm.OpcodeVecI8x16MinU:
+			c.emit(
+				newOperationV128Min(shapeI8x16, false),
+			)
+		case wasm.OpcodeVecI8x16MaxS:
+			c.emit(
+				newOperationV128Max(shapeI8x16, true),
+			)
+		case wasm.OpcodeVecI8x16MaxU:
+			c.emit(
+				newOperationV128Max(shapeI8x16, false),
+			)
+		case wasm.OpcodeVecI8x16AvgrU:
+			c.emit(
+				newOperationV128AvgrU(shapeI8x16),
+			)
+		case wasm.OpcodeVecI16x8MinS:
+			c.emit(
+				newOperationV128Min(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI16x8MinU:
+			c.emit(
+				newOperationV128Min(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecI16x8MaxS:
+			c.emit(
+				newOperationV128Max(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI16x8MaxU:
+			c.emit(
+				newOperationV128Max(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecI16x8AvgrU:
+			c.emit(
+				newOperationV128AvgrU(shapeI16x8),
+			)
+		case wasm.OpcodeVecI32x4MinS:
+			c.emit(
+				newOperationV128Min(shapeI32x4, true),
+			)
+		case wasm.OpcodeVecI32x4MinU:
+			c.emit(
+				newOperationV128Min(shapeI32x4, false),
+			)
+		case wasm.OpcodeVecI32x4MaxS:
+			c.emit(
+				newOperationV128Max(shapeI32x4, true),
+			)
+		case wasm.OpcodeVecI32x4MaxU:
+			c.emit(
+				newOperationV128Max(shapeI32x4, false),
+			)
+		case wasm.OpcodeVecF32x4Min:
+			c.emit(
+				newOperationV128Min(shapeF32x4, false),
+			)
+		case wasm.OpcodeVecF32x4Max:
+			c.emit(
+				newOperationV128Max(shapeF32x4, false),
+			)
+		case wasm.OpcodeVecF64x2Min:
+			c.emit(
+				newOperationV128Min(shapeF64x2, false),
+			)
+		case wasm.OpcodeVecF64x2Max:
+			c.emit(
+				newOperationV128Max(shapeF64x2, false),
+			)
+		case wasm.OpcodeVecF32x4Pmin:
+			c.emit(
+				newOperationV128Pmin(shapeF32x4),
+			)
+		case wasm.OpcodeVecF32x4Pmax:
+			c.emit(
+				newOperationV128Pmax(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Pmin:
+			c.emit(
+				newOperationV128Pmin(shapeF64x2),
+			)
+		case wasm.OpcodeVecF64x2Pmax:
+			c.emit(
+				newOperationV128Pmax(shapeF64x2),
+			)
+		case wasm.OpcodeVecF32x4Ceil:
+			c.emit(
+				newOperationV128Ceil(shapeF32x4),
+			)
+		case wasm.OpcodeVecF32x4Floor:
+			c.emit(
+				newOperationV128Floor(shapeF32x4),
+			)
+		case wasm.OpcodeVecF32x4Trunc:
+			c.emit(
+				newOperationV128Trunc(shapeF32x4),
+			)
+		case wasm.OpcodeVecF32x4Nearest:
+			c.emit(
+				newOperationV128Nearest(shapeF32x4),
+			)
+		case wasm.OpcodeVecF64x2Ceil:
+			c.emit(
+				newOperationV128Ceil(shapeF64x2),
+			)
+		case wasm.OpcodeVecF64x2Floor:
+			c.emit(
+				newOperationV128Floor(shapeF64x2),
+			)
+		case wasm.OpcodeVecF64x2Trunc:
+			c.emit(
+				newOperationV128Trunc(shapeF64x2),
+			)
+		case wasm.OpcodeVecF64x2Nearest:
+			c.emit(
+				newOperationV128Nearest(shapeF64x2),
+			)
+		case wasm.OpcodeVecI16x8ExtendLowI8x16S:
+			c.emit(
+				newOperationV128Extend(shapeI8x16, true, true),
+			)
+		case wasm.OpcodeVecI16x8ExtendHighI8x16S:
+			c.emit(
+				newOperationV128Extend(shapeI8x16, true, false),
+			)
+		case wasm.OpcodeVecI16x8ExtendLowI8x16U:
+			c.emit(
+				newOperationV128Extend(shapeI8x16, false, true),
+			)
+		case wasm.OpcodeVecI16x8ExtendHighI8x16U:
+			c.emit(
+				newOperationV128Extend(shapeI8x16, false, false),
+			)
+		case wasm.OpcodeVecI32x4ExtendLowI16x8S:
+			c.emit(
+				newOperationV128Extend(shapeI16x8, true, true),
+			)
+		case wasm.OpcodeVecI32x4ExtendHighI16x8S:
+			c.emit(
+				newOperationV128Extend(shapeI16x8, true, false),
+			)
+		case wasm.OpcodeVecI32x4ExtendLowI16x8U:
+			c.emit(
+				newOperationV128Extend(shapeI16x8, false, true),
+			)
+		case wasm.OpcodeVecI32x4ExtendHighI16x8U:
+			c.emit(
+				newOperationV128Extend(shapeI16x8, false, false),
+			)
+		case wasm.OpcodeVecI64x2ExtendLowI32x4S:
+			c.emit(
+				newOperationV128Extend(shapeI32x4, true, true),
+			)
+		case wasm.OpcodeVecI64x2ExtendHighI32x4S:
+			c.emit(
+				newOperationV128Extend(shapeI32x4, true, false),
+			)
+		case wasm.OpcodeVecI64x2ExtendLowI32x4U:
+			c.emit(
+				newOperationV128Extend(shapeI32x4, false, true),
+			)
+		case wasm.OpcodeVecI64x2ExtendHighI32x4U:
+			c.emit(
+				newOperationV128Extend(shapeI32x4, false, false),
+			)
+		case wasm.OpcodeVecI16x8Q15mulrSatS:
+			c.emit(
+				newOperationV128Q15mulrSatS(),
+			)
+		case wasm.OpcodeVecI16x8ExtMulLowI8x16S:
+			c.emit(
+				newOperationV128ExtMul(shapeI8x16, true, true),
+			)
+		case wasm.OpcodeVecI16x8ExtMulHighI8x16S:
+			c.emit(
+				newOperationV128ExtMul(shapeI8x16, true, false),
+			)
+		case wasm.OpcodeVecI16x8ExtMulLowI8x16U:
+			c.emit(
+				newOperationV128ExtMul(shapeI8x16, false, true),
+			)
+		case wasm.OpcodeVecI16x8ExtMulHighI8x16U:
+			c.emit(
+				newOperationV128ExtMul(shapeI8x16, false, false),
+			)
+		case wasm.OpcodeVecI32x4ExtMulLowI16x8S:
+			c.emit(
+				newOperationV128ExtMul(shapeI16x8, true, true),
+			)
+		case wasm.OpcodeVecI32x4ExtMulHighI16x8S:
+			c.emit(
+				newOperationV128ExtMul(shapeI16x8, true, false),
+			)
+		case wasm.OpcodeVecI32x4ExtMulLowI16x8U:
+			c.emit(
+				newOperationV128ExtMul(shapeI16x8, false, true),
+			)
+		case wasm.OpcodeVecI32x4ExtMulHighI16x8U:
+			c.emit(
+				newOperationV128ExtMul(shapeI16x8, false, false),
+			)
+		case wasm.OpcodeVecI64x2ExtMulLowI32x4S:
+			c.emit(
+				newOperationV128ExtMul(shapeI32x4, true, true),
+			)
+		case wasm.OpcodeVecI64x2ExtMulHighI32x4S:
+			c.emit(
+				newOperationV128ExtMul(shapeI32x4, true, false),
+			)
+		case wasm.OpcodeVecI64x2ExtMulLowI32x4U:
+			c.emit(
+				newOperationV128ExtMul(shapeI32x4, false, true),
+			)
+		case wasm.OpcodeVecI64x2ExtMulHighI32x4U:
+			c.emit(
+				newOperationV128ExtMul(shapeI32x4, false, false),
+			)
+		case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S:
+			c.emit(
+				newOperationV128ExtAddPairwise(shapeI8x16, true),
+			)
+		case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U:
+			c.emit(
+				newOperationV128ExtAddPairwise(shapeI8x16, false),
+			)
+		case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S:
+			c.emit(
+				newOperationV128ExtAddPairwise(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U:
+			c.emit(
+				newOperationV128ExtAddPairwise(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecF64x2PromoteLowF32x4Zero:
+			c.emit(
+				newOperationV128FloatPromote(),
+			)
+		case wasm.OpcodeVecF32x4DemoteF64x2Zero:
+			c.emit(
+				newOperationV128FloatDemote(),
+			)
+		case wasm.OpcodeVecF32x4ConvertI32x4S:
+			c.emit(
+				newOperationV128FConvertFromI(shapeF32x4, true),
+			)
+		case wasm.OpcodeVecF32x4ConvertI32x4U:
+			c.emit(
+				newOperationV128FConvertFromI(shapeF32x4, false),
+			)
+		case wasm.OpcodeVecF64x2ConvertLowI32x4S:
+			c.emit(
+				newOperationV128FConvertFromI(shapeF64x2, true),
+			)
+		case wasm.OpcodeVecF64x2ConvertLowI32x4U:
+			c.emit(
+				newOperationV128FConvertFromI(shapeF64x2, false),
+			)
+		case wasm.OpcodeVecI32x4DotI16x8S:
+			c.emit(
+				newOperationV128Dot(),
+			)
+		case wasm.OpcodeVecI8x16NarrowI16x8S:
+			c.emit(
+				newOperationV128Narrow(shapeI16x8, true),
+			)
+		case wasm.OpcodeVecI8x16NarrowI16x8U:
+			c.emit(
+				newOperationV128Narrow(shapeI16x8, false),
+			)
+		case wasm.OpcodeVecI16x8NarrowI32x4S:
+			c.emit(
+				newOperationV128Narrow(shapeI32x4, true),
+			)
+		case wasm.OpcodeVecI16x8NarrowI32x4U:
+			c.emit(
+				newOperationV128Narrow(shapeI32x4, false),
+			)
+		case wasm.OpcodeVecI32x4TruncSatF32x4S:
+			c.emit(
+				newOperationV128ITruncSatFromF(shapeF32x4, true),
+			)
+		case wasm.OpcodeVecI32x4TruncSatF32x4U:
+			c.emit(
+				newOperationV128ITruncSatFromF(shapeF32x4, false),
+			)
+		case wasm.OpcodeVecI32x4TruncSatF64x2SZero:
+			c.emit(
+				newOperationV128ITruncSatFromF(shapeF64x2, true),
+			)
+		case wasm.OpcodeVecI32x4TruncSatF64x2UZero:
+			c.emit(
+				newOperationV128ITruncSatFromF(shapeF64x2, false),
+			)
+		default:
+			return fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp))
+		}
+	case wasm.OpcodeAtomicPrefix:
+		c.pc++
+		atomicOp := c.body[c.pc]
+		switch atomicOp {
+		case wasm.OpcodeAtomicMemoryWait32:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicMemoryWait32Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicMemoryWait(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicMemoryWait64:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicMemoryWait64Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicMemoryWait(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicMemoryNotify:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicMemoryNotifyName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicMemoryNotify(imm),
+			)
+		case wasm.OpcodeAtomicFence:
+			// Skip immediate value
+			c.pc++
+			_ = c.body[c.pc]
+			c.emit(
+				newOperationAtomicFence(),
+			)
+		case wasm.OpcodeAtomicI32Load:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32LoadName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI64Load:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64LoadName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI32Load8U:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Load8UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad8(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI32Load16U:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Load16UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad16(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI64Load8U:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Load8UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad8(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI64Load16U:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Load16UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad16(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI64Load32U:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Load32UName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicLoad(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI32Store:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32StoreName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI32Store8:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Store8Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore8(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI32Store16:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Store16Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore16(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI64Store:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64StoreName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI64Store8:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Store8Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore8(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI64Store16:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Store16Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore16(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI64Store32:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Store32Name)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicStore(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI32RmwAdd:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwAddName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI64RmwAdd:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwAddName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI32Rmw8AddU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8AddUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI64Rmw8AddU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8AddUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI32Rmw16AddU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16AddUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI64Rmw16AddU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16AddUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI64Rmw32AddU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32AddUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAdd),
+			)
+		case wasm.OpcodeAtomicI32RmwSub:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwSubName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI64RmwSub:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwSubName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI32Rmw8SubU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8SubUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI64Rmw8SubU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8SubUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI32Rmw16SubU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16SubUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI64Rmw16SubU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16SubUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI64Rmw32SubU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32SubUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpSub),
+			)
+		case wasm.OpcodeAtomicI32RmwAnd:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwAndName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI64RmwAnd:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwAndName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI32Rmw8AndU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8AndUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI64Rmw8AndU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8AndUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI32Rmw16AndU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16AndUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI64Rmw16AndU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16AndUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI64Rmw32AndU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32AndUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAnd),
+			)
+		case wasm.OpcodeAtomicI32RmwOr:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwOrName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI64RmwOr:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwOrName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI32Rmw8OrU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8OrUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI64Rmw8OrU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8OrUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI32Rmw16OrU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16OrUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI64Rmw16OrU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16OrUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI64Rmw32OrU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32OrUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpOr),
+			)
+		case wasm.OpcodeAtomicI32RmwXor:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwXorName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI64RmwXor:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwXorName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI32Rmw8XorU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8XorUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI64Rmw8XorU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8XorUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI32Rmw16XorU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16XorUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI64Rmw16XorU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16XorUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI64Rmw32XorU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32XorUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpXor),
+			)
+		case wasm.OpcodeAtomicI32RmwXchg:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwXchgName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI64RmwXchg:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwXchgName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI32Rmw8XchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8XchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI64Rmw8XchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8XchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI32Rmw16XchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16XchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI64Rmw16XchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16XchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI64Rmw32XchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32XchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpNop),
+			)
+		case wasm.OpcodeAtomicI32RmwCmpxchg:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwCmpxchgName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMWCmpxchg(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI64RmwCmpxchg:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwCmpxchgName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMWCmpxchg(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI32Rmw8CmpxchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8CmpxchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8Cmpxchg(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI64Rmw8CmpxchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8CmpxchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW8Cmpxchg(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI32Rmw16CmpxchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16CmpxchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16Cmpxchg(unsignedTypeI32, imm),
+			)
+		case wasm.OpcodeAtomicI64Rmw16CmpxchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16CmpxchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMW16Cmpxchg(unsignedTypeI64, imm),
+			)
+		case wasm.OpcodeAtomicI64Rmw32CmpxchgU:
+			imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32CmpxchgUName)
+			if err != nil {
+				return err
+			}
+			c.emit(
+				newOperationAtomicRMWCmpxchg(unsignedTypeI32, imm),
+			)
+		default:
+			return fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
+		}
+	default:
+		return fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
+	}
+
+	// Move the program counter to point to the next instruction.
+	c.pc++
+	return nil
+}
+
+func (c *compiler) nextFrameID() (id uint32) {
+	id = c.currentFrameID + 1
+	c.currentFrameID++
+	return
+}
+
+func (c *compiler) applyToStack(opcode wasm.Opcode) (index uint32, err error) {
+	switch opcode {
+	case
+		// These are the opcodes that is coupled with "index"　immediate
+		// and it DOES affect the signature of opcode.
+		wasm.OpcodeCall,
+		wasm.OpcodeCallIndirect,
+		wasm.OpcodeLocalGet,
+		wasm.OpcodeLocalSet,
+		wasm.OpcodeLocalTee,
+		wasm.OpcodeGlobalGet,
+		wasm.OpcodeGlobalSet:
+		// Assumes that we are at the opcode now so skip it before read immediates.
+		v, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+		if err != nil {
+			return 0, fmt.Errorf("reading immediates: %w", err)
+		}
+		c.pc += num
+		index = v
+	default:
+		// Note that other opcodes are free of index
+		// as it doesn't affect the signature of opt code.
+		// In other words, the "index" argument of wasmOpcodeSignature
+		// is ignored there.
+	}
+
+	if c.unreachableState.on {
+		return 0, nil
+	}
+
+	// Retrieve the signature of the opcode.
+	s, err := c.wasmOpcodeSignature(opcode, index)
+	if err != nil {
+		return 0, err
+	}
+
+	// Manipulate the stack according to the signature.
+	// Note that the following algorithm assumes that
+	// the unknown type is unique in the signature,
+	// and is determined by the actual type on the stack.
+	// The determined type is stored in this typeParam.
+	var typeParam unsignedType
+	var typeParamFound bool
+	for i := range s.in {
+		want := s.in[len(s.in)-1-i]
+		actual := c.stackPop()
+		if want == unsignedTypeUnknown && typeParamFound {
+			want = typeParam
+		} else if want == unsignedTypeUnknown {
+			want = actual
+			typeParam = want
+			typeParamFound = true
+		}
+		if want != actual {
+			return 0, fmt.Errorf("input signature mismatch: want %s but have %s", want, actual)
+		}
+	}
+
+	for _, target := range s.out {
+		if target == unsignedTypeUnknown && !typeParamFound {
+			return 0, fmt.Errorf("cannot determine type of unknown result")
+		} else if target == unsignedTypeUnknown {
+			c.stackPush(typeParam)
+		} else {
+			c.stackPush(target)
+		}
+	}
+
+	return index, nil
+}
+
+func (c *compiler) stackPeek() (ret unsignedType) {
+	ret = c.stack[len(c.stack)-1]
+	return
+}
+
+func (c *compiler) stackPop() (ret unsignedType) {
+	// No need to check stack bound
+	// as we can assume that all the operations
+	// are valid thanks to validateFunction
+	// at module validation phase.
+	ret = c.stack[len(c.stack)-1]
+	c.stack = c.stack[:len(c.stack)-1]
+	return
+}
+
+func (c *compiler) stackPush(ts unsignedType) {
+	c.stack = append(c.stack, ts)
+}
+
+// emit adds the operations into the result.
+func (c *compiler) emit(op unionOperation) {
+	if !c.unreachableState.on {
+		switch op.Kind {
+		case operationKindDrop:
+			// If the drop range is nil,
+			// we could remove such operations.
+			// That happens when drop operation is unnecessary.
+			// i.e. when there's no need to adjust stack before jmp.
+			if int64(op.U1) == -1 {
+				return
+			}
+		}
+		c.result.Operations = append(c.result.Operations, op)
+		if c.needSourceOffset {
+			c.result.IROperationSourceOffsetsInWasmBinary = append(c.result.IROperationSourceOffsetsInWasmBinary,
+				c.currentOpPC+c.bodyOffsetInCodeSection)
+		}
+	}
+}
+
+// Emit const expression with default values of the given type.
+func (c *compiler) emitDefaultValue(t wasm.ValueType) {
+	switch t {
+	case wasm.ValueTypeI32:
+		c.stackPush(unsignedTypeI32)
+		c.emit(newOperationConstI32(0))
+	case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		c.stackPush(unsignedTypeI64)
+		c.emit(newOperationConstI64(0))
+	case wasm.ValueTypeF32:
+		c.stackPush(unsignedTypeF32)
+		c.emit(newOperationConstF32(0))
+	case wasm.ValueTypeF64:
+		c.stackPush(unsignedTypeF64)
+		c.emit(newOperationConstF64(0))
+	case wasm.ValueTypeV128:
+		c.stackPush(unsignedTypeV128)
+		c.emit(newOperationV128Const(0, 0))
+	}
+}
+
+// Returns the "depth" (starting from top of the stack)
+// of the n-th local.
+func (c *compiler) localDepth(index wasm.Index) int {
+	height := c.localIndexToStackHeightInUint64[index]
+	return c.stackLenInUint64(len(c.stack)) - 1 - int(height)
+}
+
+func (c *compiler) localType(index wasm.Index) (t wasm.ValueType) {
+	if params := uint32(len(c.sig.Params)); index < params {
+		t = c.sig.Params[index]
+	} else {
+		t = c.localTypes[index-params]
+	}
+	return
+}
+
+// getFrameDropRange returns the range (starting from top of the stack) that spans across the (uint64) stack. The range is
+// supposed to be dropped from the stack when the given frame exists or branch into it.
+//
+// * frame is the control frame which the call-site is trying to branch into or exit.
+// * isEnd true if the call-site is handling wasm.OpcodeEnd.
+func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveRange {
+	var start int
+	if !isEnd && frame.kind == controlFrameKindLoop {
+		// If this is not End and the call-site is trying to branch into the Loop control frame,
+		// we have to Start executing from the beginning of the loop block.
+		// Therefore, we have to pass the inputs to the frame.
+		start = frame.blockType.ParamNumInUint64
+	} else {
+		start = frame.blockType.ResultNumInUint64
+	}
+	var end int
+	if frame.kind == controlFrameKindFunction {
+		// On the function return, we eliminate all the contents on the stack
+		// including locals (existing below of frame.originalStackLen)
+		end = c.stackLenInUint64(len(c.stack)) - 1
+	} else {
+		end = c.stackLenInUint64(len(c.stack)) - 1 - c.stackLenInUint64(frame.originalStackLenWithoutParam)
+	}
+	if start <= end {
+		return inclusiveRange{Start: int32(start), End: int32(end)}
+	} else {
+		return nopinclusiveRange
+	}
+}
+
+func (c *compiler) stackLenInUint64(ceil int) (ret int) {
+	for i := 0; i < ceil; i++ {
+		if c.stack[i] == unsignedTypeV128 {
+			ret += 2
+		} else {
+			ret++
+		}
+	}
+	return
+}
+
+func (c *compiler) readMemoryArg(tag string) (memoryArg, error) {
+	c.result.UsesMemory = true
+	alignment, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+	if err != nil {
+		return memoryArg{}, fmt.Errorf("reading alignment for %s: %w", tag, err)
+	}
+	c.pc += num
+	offset, num, err := leb128.LoadUint32(c.body[c.pc+1:])
+	if err != nil {
+		return memoryArg{}, fmt.Errorf("reading offset for %s: %w", tag, err)
+	}
+	c.pc += num
+	return memoryArg{Offset: offset, Alignment: alignment}, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go
new file mode 100644
index 000000000..8af1d94b0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go
@@ -0,0 +1,22 @@
+package interpreter
+
+import (
+	"bytes"
+)
+
+func format(ops []unionOperation) string {
+	buf := bytes.NewBuffer(nil)
+
+	_, _ = buf.WriteString(".entrypoint\n")
+	for i := range ops {
+		op := &ops[i]
+		str := op.String()
+		isLabel := op.Kind == operationKindLabel
+		if !isLabel {
+			const indent = "\t"
+			str = indent + str
+		}
+		_, _ = buf.WriteString(str + "\n")
+	}
+	return buf.String()
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
new file mode 100644
index 000000000..a89ddc457
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
@@ -0,0 +1,4583 @@
+package interpreter
+
+import (
+	"context"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"math"
+	"math/bits"
+	"sync"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/moremath"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+	"github.com/tetratelabs/wazero/internal/wasmruntime"
+)
+
+// callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise
+// wasm.ErrCallStackOverflow instead of overflowing the Go runtime.
+//
+// The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
+var callStackCeiling = 2000
+
+// engine is an interpreter implementation of wasm.Engine
+type engine struct {
+	enabledFeatures   api.CoreFeatures
+	compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
+	mux               sync.RWMutex
+}
+
+func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
+	return &engine{
+		enabledFeatures:   enabledFeatures,
+		compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
+	}
+}
+
+// Close implements the same method as documented on wasm.Engine.
+func (e *engine) Close() (err error) {
+	return
+}
+
+// CompiledModuleCount implements the same method as documented on wasm.Engine.
+func (e *engine) CompiledModuleCount() uint32 {
+	return uint32(len(e.compiledFunctions))
+}
+
+// DeleteCompiledModule implements the same method as documented on wasm.Engine.
+func (e *engine) DeleteCompiledModule(m *wasm.Module) {
+	e.deleteCompiledFunctions(m)
+}
+
+func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	delete(e.compiledFunctions, module.ID)
+}
+
+func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	e.compiledFunctions[module.ID] = fs
+}
+
+func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+	fs, ok = e.compiledFunctions[module.ID]
+	return
+}
+
+// moduleEngine implements wasm.ModuleEngine
+type moduleEngine struct {
+	// codes are the compiled functions in a module instances.
+	// The index is module instance-scoped.
+	functions []function
+
+	// parentEngine holds *engine from which this module engine is created from.
+	parentEngine *engine
+}
+
+// GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
+func (e *moduleEngine) GetGlobalValue(wasm.Index) (lo, hi uint64) {
+	panic("BUG: GetGlobalValue should never be called on interpreter mode")
+}
+
+// SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
+func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) {
+	panic("BUG: SetGlobalValue should never be called on interpreter mode")
+}
+
+// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
+func (e *moduleEngine) OwnsGlobals() bool { return false }
+
+// callEngine holds context per moduleEngine.Call, and shared across all the
+// function calls originating from the same moduleEngine.Call execution.
+//
+// This implements api.Function.
+type callEngine struct {
+	internalapi.WazeroOnlyType
+
+	// stack contains the operands.
+	// Note that all the values are represented as uint64.
+	stack []uint64
+
+	// frames are the function call stack.
+	frames []*callFrame
+
+	// f is the initial function for this call engine.
+	f *function
+
+	// stackiterator for Listeners to walk frames and stack.
+	stackIterator stackIterator
+}
+
+func (e *moduleEngine) newCallEngine(compiled *function) *callEngine {
+	return &callEngine{f: compiled}
+}
+
+func (ce *callEngine) pushValue(v uint64) {
+	ce.stack = append(ce.stack, v)
+}
+
+func (ce *callEngine) pushValues(v []uint64) {
+	ce.stack = append(ce.stack, v...)
+}
+
+func (ce *callEngine) popValue() (v uint64) {
+	// No need to check stack bound
+	// as we can assume that all the operations
+	// are valid thanks to validateFunction
+	// at module validation phase
+	// and interpreterir translation
+	// before compilation.
+	stackTopIndex := len(ce.stack) - 1
+	v = ce.stack[stackTopIndex]
+	ce.stack = ce.stack[:stackTopIndex]
+	return
+}
+
+func (ce *callEngine) popValues(v []uint64) {
+	stackTopIndex := len(ce.stack) - len(v)
+	copy(v, ce.stack[stackTopIndex:])
+	ce.stack = ce.stack[:stackTopIndex]
+}
+
+// peekValues peeks api.ValueType values from the stack and returns them.
+func (ce *callEngine) peekValues(count int) []uint64 {
+	if count == 0 {
+		return nil
+	}
+	stackLen := len(ce.stack)
+	return ce.stack[stackLen-count : stackLen]
+}
+
+func (ce *callEngine) drop(raw uint64) {
+	r := inclusiveRangeFromU64(raw)
+	if r.Start == -1 {
+		return
+	} else if r.Start == 0 {
+		ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End]
+	} else {
+		newStack := ce.stack[:int32(len(ce.stack))-1-r.End]
+		newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...)
+		ce.stack = newStack
+	}
+}
+
+func (ce *callEngine) pushFrame(frame *callFrame) {
+	if callStackCeiling <= len(ce.frames) {
+		panic(wasmruntime.ErrRuntimeStackOverflow)
+	}
+	ce.frames = append(ce.frames, frame)
+}
+
+func (ce *callEngine) popFrame() (frame *callFrame) {
+	// No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at
+	// module validation phase and interpreterir translation before compilation.
+	oneLess := len(ce.frames) - 1
+	frame = ce.frames[oneLess]
+	ce.frames = ce.frames[:oneLess]
+	return
+}
+
+type callFrame struct {
+	// pc is the program counter representing the current position in code.body.
+	pc uint64
+	// f is the compiled function used in this function frame.
+	f *function
+	// base index in the frame of this function, used to detect the count of
+	// values on the stack.
+	base int
+}
+
+type compiledFunction struct {
+	source              *wasm.Module
+	body                []unionOperation
+	listener            experimental.FunctionListener
+	offsetsInWasmBinary []uint64
+	hostFn              interface{}
+	ensureTermination   bool
+	index               wasm.Index
+}
+
+type function struct {
+	funcType       *wasm.FunctionType
+	moduleInstance *wasm.ModuleInstance
+	typeID         wasm.FunctionTypeID
+	parent         *compiledFunction
+}
+
+// functionFromUintptr resurrects the original *function from the given uintptr
+// which comes from either funcref table or OpcodeRefFunc instruction.
+func functionFromUintptr(ptr uintptr) *function {
+	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
+	//
+	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
+	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
+	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
+	var wrapped *uintptr = &ptr
+	return *(**function)(unsafe.Pointer(wrapped))
+}
+
+type snapshot struct {
+	stack  []uint64
+	frames []*callFrame
+	pc     uint64
+
+	ret []uint64
+
+	ce *callEngine
+}
+
+// Snapshot implements the same method as documented on experimental.Snapshotter.
+func (ce *callEngine) Snapshot() experimental.Snapshot {
+	stack := make([]uint64, len(ce.stack))
+	copy(stack, ce.stack)
+
+	frames := make([]*callFrame, len(ce.frames))
+	copy(frames, ce.frames)
+
+	return &snapshot{
+		stack:  stack,
+		frames: frames,
+		ce:     ce,
+	}
+}
+
+// Restore implements the same method as documented on experimental.Snapshot.
+func (s *snapshot) Restore(ret []uint64) {
+	s.ret = ret
+	panic(s)
+}
+
+func (s *snapshot) doRestore() {
+	ce := s.ce
+
+	ce.stack = s.stack
+	ce.frames = s.frames
+	ce.frames[len(ce.frames)-1].pc = s.pc
+
+	copy(ce.stack[len(ce.stack)-len(s.ret):], s.ret)
+}
+
+// Error implements the same method on error.
+func (s *snapshot) Error() string {
+	return "unhandled snapshot restore, this generally indicates restore was called from a different " +
+		"exported function invocation than snapshot"
+}
+
+// stackIterator implements experimental.StackIterator.
+type stackIterator struct {
+	stack   []uint64
+	frames  []*callFrame
+	started bool
+	fn      *function
+	pc      uint64
+}
+
+func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) {
+	si.fn = f
+	si.pc = 0
+	si.stack = stack
+	si.frames = frames
+	si.started = false
+}
+
+func (si *stackIterator) clear() {
+	si.stack = nil
+	si.frames = nil
+	si.started = false
+	si.fn = nil
+}
+
+// Next implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) Next() bool {
+	if !si.started {
+		si.started = true
+		return true
+	}
+
+	if len(si.frames) == 0 {
+		return false
+	}
+
+	frame := si.frames[len(si.frames)-1]
+	si.stack = si.stack[:frame.base]
+	si.fn = frame.f
+	si.pc = frame.pc
+	si.frames = si.frames[:len(si.frames)-1]
+	return true
+}
+
+// Function implements the same method as documented on
+// experimental.StackIterator.
+func (si *stackIterator) Function() experimental.InternalFunction {
+	return internalFunction{si.fn}
+}
+
+// ProgramCounter implements the same method as documented on
+// experimental.StackIterator.
+func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
+	return experimental.ProgramCounter(si.pc)
+}
+
+// internalFunction implements experimental.InternalFunction.
+type internalFunction struct{ *function }
+
+// Definition implements the same method as documented on
+// experimental.InternalFunction.
+func (f internalFunction) Definition() api.FunctionDefinition {
+	return f.definition()
+}
+
+// SourceOffsetForPC implements the same method as documented on
+// experimental.InternalFunction.
+func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
+	offsetsMap := f.parent.offsetsInWasmBinary
+	if uint64(pc) < uint64(len(offsetsMap)) {
+		return offsetsMap[pc]
+	}
+	return 0
+}
+
+// interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR.
+const callFrameStackSize = 0
+
+// CompileModule implements the same method as documented on wasm.Engine.
+func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
+	if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
+		return nil
+	}
+
+	funcs := make([]compiledFunction, len(module.FunctionSection))
+	irCompiler, err := newCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination)
+	if err != nil {
+		return err
+	}
+	imported := module.ImportFunctionCount
+	for i := range module.CodeSection {
+		var lsn experimental.FunctionListener
+		if i < len(listeners) {
+			lsn = listeners[i]
+		}
+
+		compiled := &funcs[i]
+		// If this is the host function, there's nothing to do as the runtime representation of
+		// host function in interpreter is its Go function itself as opposed to Wasm functions,
+		// which need to be compiled down to
+		if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil {
+			compiled.hostFn = codeSeg.GoFunc
+		} else {
+			ir, err := irCompiler.Next()
+			if err != nil {
+				return err
+			}
+			err = e.lowerIR(ir, compiled)
+			if err != nil {
+				def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount)
+				return fmt.Errorf("failed to lower func[%s] to interpreterir: %w", def.DebugName(), err)
+			}
+		}
+		compiled.source = module
+		compiled.ensureTermination = ensureTermination
+		compiled.listener = lsn
+		compiled.index = imported + uint32(i)
+	}
+	e.addCompiledFunctions(module, funcs)
+	return nil
+}
+
+// NewModuleEngine implements the same method as documented on wasm.Engine.
+func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
+	me := &moduleEngine{
+		parentEngine: e,
+		functions:    make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
+	}
+
+	codes, ok := e.getCompiledFunctions(module)
+	if !ok {
+		return nil, errors.New("source module must be compiled before instantiation")
+	}
+
+	for i := range codes {
+		c := &codes[i]
+		offset := i + int(module.ImportFunctionCount)
+		typeIndex := module.FunctionSection[i]
+		me.functions[offset] = function{
+			moduleInstance: instance,
+			typeID:         instance.TypeIDs[typeIndex],
+			funcType:       &module.TypeSection[typeIndex],
+			parent:         c,
+		}
+	}
+	return me, nil
+}
+
+// lowerIR lowers the interpreterir operations to engine friendly struct.
+func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
+	// Copy the body from the result.
+	ret.body = make([]unionOperation, len(ir.Operations))
+	copy(ret.body, ir.Operations)
+	// Also copy the offsets if necessary.
+	if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
+		ret.offsetsInWasmBinary = make([]uint64, len(offsets))
+		copy(ret.offsetsInWasmBinary, offsets)
+	}
+
+	labelAddressResolutions := [labelKindNum][]uint64{}
+
+	// First, we iterate all labels, and resolve the address.
+	for i := range ret.body {
+		op := &ret.body[i]
+		switch op.Kind {
+		case operationKindLabel:
+			label := label(op.U1)
+			address := uint64(i)
+
+			kind, fid := label.Kind(), label.FrameID()
+			frameToAddresses := labelAddressResolutions[label.Kind()]
+			// Expand the slice if necessary.
+			if diff := fid - len(frameToAddresses) + 1; diff > 0 {
+				for j := 0; j < diff; j++ {
+					frameToAddresses = append(frameToAddresses, 0)
+				}
+			}
+			frameToAddresses[fid] = address
+			labelAddressResolutions[kind] = frameToAddresses
+		}
+	}
+
+	// Then resolve the label as the index to the body.
+	for i := range ret.body {
+		op := &ret.body[i]
+		switch op.Kind {
+		case operationKindBr:
+			e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions)
+		case operationKindBrIf:
+			e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions)
+			e.setLabelAddress(&op.U2, label(op.U2), labelAddressResolutions)
+		case operationKindBrTable:
+			for j := 0; j < len(op.Us); j += 2 {
+				target := op.Us[j]
+				e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions)
+			}
+		}
+	}
+	return nil
+}
+
+func (e *engine) setLabelAddress(op *uint64, label label, labelAddressResolutions [labelKindNum][]uint64) {
+	if label.IsReturnTarget() {
+		// Jmp to the end of the possible binary.
+		*op = math.MaxUint64
+	} else {
+		*op = labelAddressResolutions[label.Kind()][label.FrameID()]
+	}
+}
+
+// ResolveImportedFunction implements wasm.ModuleEngine.
+func (e *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
+	imported := importedModuleEngine.(*moduleEngine)
+	e.functions[index] = imported.functions[indexInImportedModule]
+}
+
+// ResolveImportedMemory implements wasm.ModuleEngine.
+func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {}
+
+// DoneInstantiation implements wasm.ModuleEngine.
+func (e *moduleEngine) DoneInstantiation() {}
+
+// FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine.
+func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
+	return uintptr(unsafe.Pointer(&e.functions[funcIndex]))
+}
+
+// NewFunction implements the same method as documented on wasm.ModuleEngine.
+func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) {
+	// Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to
+	// code on close aren't locked, neither is this read.
+	compiled := &e.functions[index]
+	return e.newCallEngine(compiled)
+}
+
+// LookupFunction implements the same method as documented on wasm.ModuleEngine.
+func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
+	if tableOffset >= uint32(len(t.References)) {
+		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+	}
+	rawPtr := t.References[tableOffset]
+	if rawPtr == 0 {
+		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+	}
+
+	tf := functionFromUintptr(rawPtr)
+	if tf.typeID != typeId {
+		panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
+	}
+	return tf.moduleInstance, tf.parent.index
+}
+
+// Definition implements the same method as documented on api.Function.
+func (ce *callEngine) Definition() api.FunctionDefinition {
+	return ce.f.definition()
+}
+
+func (f *function) definition() api.FunctionDefinition {
+	compiled := f.parent
+	return compiled.source.FunctionDefinition(compiled.index)
+}
+
+// Call implements the same method as documented on api.Function.
+func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) {
+	ft := ce.f.funcType
+	if n := ft.ParamNumInUint64; n != len(params) {
+		return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params))
+	}
+	return ce.call(ctx, params, nil)
+}
+
+// CallWithStack implements the same method as documented on api.Function.
+func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error {
+	params, results, err := wasm.SplitCallStack(ce.f.funcType, stack)
+	if err != nil {
+		return err
+	}
+	_, err = ce.call(ctx, params, results)
+	return err
+}
+
+func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) {
+	m := ce.f.moduleInstance
+	if ce.f.parent.ensureTermination {
+		select {
+		case <-ctx.Done():
+			// If the provided context is already done, close the call context
+			// and return the error.
+			m.CloseWithCtxErr(ctx)
+			return nil, m.FailIfClosed()
+		default:
+		}
+	}
+
+	if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
+		ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, ce)
+	}
+
+	defer func() {
+		// If the module closed during the call, and the call didn't err for another reason, set an ExitError.
+		if err == nil {
+			err = m.FailIfClosed()
+		}
+		// TODO: ^^ Will not fail if the function was imported from a closed module.
+
+		if v := recover(); v != nil {
+			err = ce.recoverOnCall(ctx, m, v)
+		}
+	}()
+
+	ce.pushValues(params)
+
+	if ce.f.parent.ensureTermination {
+		done := m.CloseModuleOnCanceledOrTimeout(ctx)
+		defer done()
+	}
+
+	ce.callFunction(ctx, m, ce.f)
+
+	// This returns a safe copy of the results, instead of a slice view. If we
+	// returned a re-slice, the caller could accidentally or purposefully
+	// corrupt the stack of subsequent calls.
+	ft := ce.f.funcType
+	if results == nil && ft.ResultNumInUint64 > 0 {
+		results = make([]uint64, ft.ResultNumInUint64)
+	}
+	ce.popValues(results)
+	return results, nil
+}
+
+// functionListenerInvocation captures arguments needed to perform function
+// listener invocations when unwinding the call stack.
+type functionListenerInvocation struct {
+	experimental.FunctionListener
+	def api.FunctionDefinition
+}
+
+// recoverOnCall takes the recovered value `recoverOnCall`, and wraps it
+// with the call frame stack traces. Also, reset the state of callEngine
+// so that it can be used for the subsequent calls.
+func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) {
+	if s, ok := v.(*snapshot); ok {
+		// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
+		// let it propagate up to be handled by the caller.
+		panic(s)
+	}
+
+	builder := wasmdebug.NewErrorBuilder()
+	frameCount := len(ce.frames)
+	functionListeners := make([]functionListenerInvocation, 0, 16)
+
+	if frameCount > wasmdebug.MaxFrames {
+		frameCount = wasmdebug.MaxFrames
+	}
+	for i := 0; i < frameCount; i++ {
+		frame := ce.popFrame()
+		f := frame.f
+		def := f.definition()
+		var sources []string
+		if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 {
+			sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc])
+		}
+		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
+		if f.parent.listener != nil {
+			functionListeners = append(functionListeners, functionListenerInvocation{
+				FunctionListener: f.parent.listener,
+				def:              f.definition(),
+			})
+		}
+	}
+
+	err = builder.FromRecovered(v)
+	for i := range functionListeners {
+		functionListeners[i].Abort(ctx, m, functionListeners[i].def, err)
+	}
+
+	// Allows the reuse of CallEngine.
+	ce.stack, ce.frames = ce.stack[:0], ce.frames[:0]
+	return
+}
+
+func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) {
+	if f.parent.hostFn != nil {
+		ce.callGoFuncWithStack(ctx, m, f)
+	} else if lsn := f.parent.listener; lsn != nil {
+		ce.callNativeFuncWithListener(ctx, m, f, lsn)
+	} else {
+		ce.callNativeFunc(ctx, m, f)
+	}
+}
+
+func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) {
+	typ := f.funcType
+	lsn := f.parent.listener
+	if lsn != nil {
+		params := stack[:typ.ParamNumInUint64]
+		ce.stackIterator.reset(ce.stack, ce.frames, f)
+		lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator)
+		ce.stackIterator.clear()
+	}
+	frame := &callFrame{f: f, base: len(ce.stack)}
+	ce.pushFrame(frame)
+
+	fn := f.parent.hostFn
+	switch fn := fn.(type) {
+	case api.GoModuleFunction:
+		fn.Call(ctx, m, stack)
+	case api.GoFunction:
+		fn.Call(ctx, stack)
+	}
+
+	ce.popFrame()
+	if lsn != nil {
+		// TODO: This doesn't get the error due to use of panic to propagate them.
+		results := stack[:typ.ResultNumInUint64]
+		lsn.After(ctx, m, f.definition(), results)
+	}
+}
+
+func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) {
+	frame := &callFrame{f: f, base: len(ce.stack)}
+	moduleInst := f.moduleInstance
+	functions := moduleInst.Engine.(*moduleEngine).functions
+	memoryInst := moduleInst.MemoryInstance
+	globals := moduleInst.Globals
+	tables := moduleInst.Tables
+	typeIDs := moduleInst.TypeIDs
+	dataInstances := moduleInst.DataInstances
+	elementInstances := moduleInst.ElementInstances
+	ce.pushFrame(frame)
+	body := frame.f.parent.body
+	bodyLen := uint64(len(body))
+	for frame.pc < bodyLen {
+		op := &body[frame.pc]
+		// TODO: add description of each operation/case
+		// on, for example, how many args are used,
+		// how the stack is modified, etc.
+		switch op.Kind {
+		case operationKindBuiltinFunctionCheckExitCode:
+			if err := m.FailIfClosed(); err != nil {
+				panic(err)
+			}
+			frame.pc++
+		case operationKindUnreachable:
+			panic(wasmruntime.ErrRuntimeUnreachable)
+		case operationKindBr:
+			frame.pc = op.U1
+		case operationKindBrIf:
+			if ce.popValue() > 0 {
+				ce.drop(op.U3)
+				frame.pc = op.U1
+			} else {
+				frame.pc = op.U2
+			}
+		case operationKindBrTable:
+			v := ce.popValue()
+			defaultAt := uint64(len(op.Us))/2 - 1
+			if v > defaultAt {
+				v = defaultAt
+			}
+			v *= 2
+			ce.drop(op.Us[v+1])
+			frame.pc = op.Us[v]
+		case operationKindCall:
+			func() {
+				if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil {
+					defer func() {
+						if r := recover(); r != nil {
+							if s, ok := r.(*snapshot); ok && s.ce == ce {
+								s.doRestore()
+								frame = ce.frames[len(ce.frames)-1]
+								body = frame.f.parent.body
+								bodyLen = uint64(len(body))
+							} else {
+								panic(r)
+							}
+						}
+					}()
+				}
+				ce.callFunction(ctx, f.moduleInstance, &functions[op.U1])
+			}()
+			frame.pc++
+		case operationKindCallIndirect:
+			offset := ce.popValue()
+			table := tables[op.U2]
+			if offset >= uint64(len(table.References)) {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			}
+			rawPtr := table.References[offset]
+			if rawPtr == 0 {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			}
+
+			tf := functionFromUintptr(rawPtr)
+			if tf.typeID != typeIDs[op.U1] {
+				panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
+			}
+
+			ce.callFunction(ctx, f.moduleInstance, tf)
+			frame.pc++
+		case operationKindDrop:
+			ce.drop(op.U1)
+			frame.pc++
+		case operationKindSelect:
+			c := ce.popValue()
+			if op.B3 { // Target is vector.
+				x2Hi, x2Lo := ce.popValue(), ce.popValue()
+				if c == 0 {
+					_, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits.
+					ce.pushValue(x2Lo)
+					ce.pushValue(x2Hi)
+				}
+			} else {
+				v2 := ce.popValue()
+				if c == 0 {
+					_ = ce.popValue()
+					ce.pushValue(v2)
+				}
+			}
+			frame.pc++
+		case operationKindPick:
+			index := len(ce.stack) - 1 - int(op.U1)
+			ce.pushValue(ce.stack[index])
+			if op.B3 { // V128 value target.
+				ce.pushValue(ce.stack[index+1])
+			}
+			frame.pc++
+		case operationKindSet:
+			if op.B3 { // V128 value target.
+				lowIndex := len(ce.stack) - 1 - int(op.U1)
+				highIndex := lowIndex + 1
+				hi, lo := ce.popValue(), ce.popValue()
+				ce.stack[lowIndex], ce.stack[highIndex] = lo, hi
+			} else {
+				index := len(ce.stack) - 1 - int(op.U1)
+				ce.stack[index] = ce.popValue()
+			}
+			frame.pc++
+		case operationKindGlobalGet:
+			g := globals[op.U1]
+			ce.pushValue(g.Val)
+			if g.Type.ValType == wasm.ValueTypeV128 {
+				ce.pushValue(g.ValHi)
+			}
+			frame.pc++
+		case operationKindGlobalSet:
+			g := globals[op.U1]
+			if g.Type.ValType == wasm.ValueTypeV128 {
+				g.ValHi = ce.popValue()
+			}
+			g.Val = ce.popValue()
+			frame.pc++
+		case operationKindLoad:
+			offset := ce.popMemoryOffset(op)
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32, unsignedTypeF32:
+				if val, ok := memoryInst.ReadUint32Le(offset); !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				} else {
+					ce.pushValue(uint64(val))
+				}
+			case unsignedTypeI64, unsignedTypeF64:
+				if val, ok := memoryInst.ReadUint64Le(offset); !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				} else {
+					ce.pushValue(val)
+				}
+			}
+			frame.pc++
+		case operationKindLoad8:
+			val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op))
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+
+			switch signedInt(op.B1) {
+			case signedInt32:
+				ce.pushValue(uint64(uint32(int8(val))))
+			case signedInt64:
+				ce.pushValue(uint64(int8(val)))
+			case signedUint32, signedUint64:
+				ce.pushValue(uint64(val))
+			}
+			frame.pc++
+		case operationKindLoad16:
+
+			val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op))
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+
+			switch signedInt(op.B1) {
+			case signedInt32:
+				ce.pushValue(uint64(uint32(int16(val))))
+			case signedInt64:
+				ce.pushValue(uint64(int16(val)))
+			case signedUint32, signedUint64:
+				ce.pushValue(uint64(val))
+			}
+			frame.pc++
+		case operationKindLoad32:
+			val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op))
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+
+			if op.B1 == 1 { // Signed
+				ce.pushValue(uint64(int32(val)))
+			} else {
+				ce.pushValue(uint64(val))
+			}
+			frame.pc++
+		case operationKindStore:
+			val := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32, unsignedTypeF32:
+				if !memoryInst.WriteUint32Le(offset, uint32(val)) {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+			case unsignedTypeI64, unsignedTypeF64:
+				if !memoryInst.WriteUint64Le(offset, val) {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+			}
+			frame.pc++
+		case operationKindStore8:
+			val := byte(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			if !memoryInst.WriteByte(offset, val) {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindStore16:
+			val := uint16(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			if !memoryInst.WriteUint16Le(offset, val) {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindStore32:
+			val := uint32(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			if !memoryInst.WriteUint32Le(offset, val) {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindMemorySize:
+			ce.pushValue(uint64(memoryInst.Pages()))
+			frame.pc++
+		case operationKindMemoryGrow:
+			n := ce.popValue()
+			if res, ok := memoryInst.Grow(uint32(n)); !ok {
+				ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer.
+			} else {
+				ce.pushValue(uint64(res))
+			}
+			frame.pc++
+		case operationKindConstI32, operationKindConstI64,
+			operationKindConstF32, operationKindConstF64:
+			ce.pushValue(op.U1)
+			frame.pc++
+		case operationKindEq:
+			var b bool
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = uint32(v1) == uint32(v2)
+			case unsignedTypeI64:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = v1 == v2
+			case unsignedTypeF32:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1))
+			case unsignedTypeF64:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = math.Float64frombits(v2) == math.Float64frombits(v1)
+			}
+			if b {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindNe:
+			var b bool
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32, unsignedTypeI64:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = v1 != v2
+			case unsignedTypeF32:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1))
+			case unsignedTypeF64:
+				v2, v1 := ce.popValue(), ce.popValue()
+				b = math.Float64frombits(v2) != math.Float64frombits(v1)
+			}
+			if b {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindEqz:
+			if ce.popValue() == 0 {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindLt:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			var b bool
+			switch signedType(op.B1) {
+			case signedTypeInt32:
+				b = int32(v1) < int32(v2)
+			case signedTypeInt64:
+				b = int64(v1) < int64(v2)
+			case signedTypeUint32, signedTypeUint64:
+				b = v1 < v2
+			case signedTypeFloat32:
+				b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2))
+			case signedTypeFloat64:
+				b = math.Float64frombits(v1) < math.Float64frombits(v2)
+			}
+			if b {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindGt:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			var b bool
+			switch signedType(op.B1) {
+			case signedTypeInt32:
+				b = int32(v1) > int32(v2)
+			case signedTypeInt64:
+				b = int64(v1) > int64(v2)
+			case signedTypeUint32, signedTypeUint64:
+				b = v1 > v2
+			case signedTypeFloat32:
+				b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2))
+			case signedTypeFloat64:
+				b = math.Float64frombits(v1) > math.Float64frombits(v2)
+			}
+			if b {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindLe:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			var b bool
+			switch signedType(op.B1) {
+			case signedTypeInt32:
+				b = int32(v1) <= int32(v2)
+			case signedTypeInt64:
+				b = int64(v1) <= int64(v2)
+			case signedTypeUint32, signedTypeUint64:
+				b = v1 <= v2
+			case signedTypeFloat32:
+				b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2))
+			case signedTypeFloat64:
+				b = math.Float64frombits(v1) <= math.Float64frombits(v2)
+			}
+			if b {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindGe:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			var b bool
+			switch signedType(op.B1) {
+			case signedTypeInt32:
+				b = int32(v1) >= int32(v2)
+			case signedTypeInt64:
+				b = int64(v1) >= int64(v2)
+			case signedTypeUint32, signedTypeUint64:
+				b = v1 >= v2
+			case signedTypeFloat32:
+				b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2))
+			case signedTypeFloat64:
+				b = math.Float64frombits(v1) >= math.Float64frombits(v2)
+			}
+			if b {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindAdd:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				v := uint32(v1) + uint32(v2)
+				ce.pushValue(uint64(v))
+			case unsignedTypeI64:
+				ce.pushValue(v1 + v2)
+			case unsignedTypeF32:
+				ce.pushValue(addFloat32bits(uint32(v1), uint32(v2)))
+			case unsignedTypeF64:
+				v := math.Float64frombits(v1) + math.Float64frombits(v2)
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindSub:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				ce.pushValue(uint64(uint32(v1) - uint32(v2)))
+			case unsignedTypeI64:
+				ce.pushValue(v1 - v2)
+			case unsignedTypeF32:
+				ce.pushValue(subFloat32bits(uint32(v1), uint32(v2)))
+			case unsignedTypeF64:
+				v := math.Float64frombits(v1) - math.Float64frombits(v2)
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindMul:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				ce.pushValue(uint64(uint32(v1) * uint32(v2)))
+			case unsignedTypeI64:
+				ce.pushValue(v1 * v2)
+			case unsignedTypeF32:
+				ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2)))
+			case unsignedTypeF64:
+				v := math.Float64frombits(v2) * math.Float64frombits(v1)
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindClz:
+			v := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(bits.LeadingZeros32(uint32(v))))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(bits.LeadingZeros64(v)))
+			}
+			frame.pc++
+		case operationKindCtz:
+			v := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(bits.TrailingZeros32(uint32(v))))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(bits.TrailingZeros64(v)))
+			}
+			frame.pc++
+		case operationKindPopcnt:
+			v := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(bits.OnesCount32(uint32(v))))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(bits.OnesCount64(v)))
+			}
+			frame.pc++
+		case operationKindDiv:
+			// If an integer, check we won't divide by zero.
+			t := signedType(op.B1)
+			v2, v1 := ce.popValue(), ce.popValue()
+			switch t {
+			case signedTypeFloat32, signedTypeFloat64: // not integers
+			default:
+				if v2 == 0 {
+					panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
+				}
+			}
+
+			switch t {
+			case signedTypeInt32:
+				d := int32(v2)
+				n := int32(v1)
+				if n == math.MinInt32 && d == -1 {
+					panic(wasmruntime.ErrRuntimeIntegerOverflow)
+				}
+				ce.pushValue(uint64(uint32(n / d)))
+			case signedTypeInt64:
+				d := int64(v2)
+				n := int64(v1)
+				if n == math.MinInt64 && d == -1 {
+					panic(wasmruntime.ErrRuntimeIntegerOverflow)
+				}
+				ce.pushValue(uint64(n / d))
+			case signedTypeUint32:
+				d := uint32(v2)
+				n := uint32(v1)
+				ce.pushValue(uint64(n / d))
+			case signedTypeUint64:
+				d := v2
+				n := v1
+				ce.pushValue(n / d)
+			case signedTypeFloat32:
+				ce.pushValue(divFloat32bits(uint32(v1), uint32(v2)))
+			case signedTypeFloat64:
+				ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2)))
+			}
+			frame.pc++
+		case operationKindRem:
+			v2, v1 := ce.popValue(), ce.popValue()
+			if v2 == 0 {
+				panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
+			}
+			switch signedInt(op.B1) {
+			case signedInt32:
+				d := int32(v2)
+				n := int32(v1)
+				ce.pushValue(uint64(uint32(n % d)))
+			case signedInt64:
+				d := int64(v2)
+				n := int64(v1)
+				ce.pushValue(uint64(n % d))
+			case signedUint32:
+				d := uint32(v2)
+				n := uint32(v1)
+				ce.pushValue(uint64(n % d))
+			case signedUint64:
+				d := v2
+				n := v1
+				ce.pushValue(n % d)
+			}
+			frame.pc++
+		case operationKindAnd:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(uint32(v2) & uint32(v1)))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(v2 & v1))
+			}
+			frame.pc++
+		case operationKindOr:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(uint32(v2) | uint32(v1)))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(v2 | v1))
+			}
+			frame.pc++
+		case operationKindXor:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(uint32(v2) ^ uint32(v1)))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(v2 ^ v1))
+			}
+			frame.pc++
+		case operationKindShl:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32)))
+			} else {
+				// unsignedInt64
+				ce.pushValue(v1 << (v2 % 64))
+			}
+			frame.pc++
+		case operationKindShr:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			switch signedInt(op.B1) {
+			case signedInt32:
+				ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32))))
+			case signedInt64:
+				ce.pushValue(uint64(int64(v1) >> (v2 % 64)))
+			case signedUint32:
+				ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32)))
+			case signedUint64:
+				ce.pushValue(v1 >> (v2 % 64))
+			}
+			frame.pc++
+		case operationKindRotl:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2))))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2))))
+			}
+			frame.pc++
+		case operationKindRotr:
+			v2 := ce.popValue()
+			v1 := ce.popValue()
+			if op.B1 == 0 {
+				// unsignedInt32
+				ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2))))
+			} else {
+				// unsignedInt64
+				ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2))))
+			}
+			frame.pc++
+		case operationKindAbs:
+			if op.B1 == 0 {
+				// float32
+				const mask uint32 = 1 << 31
+				ce.pushValue(uint64(uint32(ce.popValue()) &^ mask))
+			} else {
+				// float64
+				const mask uint64 = 1 << 63
+				ce.pushValue(ce.popValue() &^ mask)
+			}
+			frame.pc++
+		case operationKindNeg:
+			if op.B1 == 0 {
+				// float32
+				v := -math.Float32frombits(uint32(ce.popValue()))
+				ce.pushValue(uint64(math.Float32bits(v)))
+			} else {
+				// float64
+				v := -math.Float64frombits(ce.popValue())
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindCeil:
+			if op.B1 == 0 {
+				// float32
+				v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue())))
+				ce.pushValue(uint64(math.Float32bits(v)))
+			} else {
+				// float64
+				v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue()))
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindFloor:
+			if op.B1 == 0 {
+				// float32
+				v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue())))
+				ce.pushValue(uint64(math.Float32bits(v)))
+			} else {
+				// float64
+				v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue()))
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindTrunc:
+			if op.B1 == 0 {
+				// float32
+				v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue())))
+				ce.pushValue(uint64(math.Float32bits(v)))
+			} else {
+				// float64
+				v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue()))
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindNearest:
+			if op.B1 == 0 {
+				// float32
+				f := math.Float32frombits(uint32(ce.popValue()))
+				ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f))))
+			} else {
+				// float64
+				f := math.Float64frombits(ce.popValue())
+				ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f)))
+			}
+			frame.pc++
+		case operationKindSqrt:
+			if op.B1 == 0 {
+				// float32
+				v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue()))))
+				ce.pushValue(uint64(math.Float32bits(float32(v))))
+			} else {
+				// float64
+				v := math.Sqrt(math.Float64frombits(ce.popValue()))
+				ce.pushValue(math.Float64bits(v))
+			}
+			frame.pc++
+		case operationKindMin:
+			if op.B1 == 0 {
+				// float32
+				ce.pushValue(wasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue())))
+			} else {
+				v2 := math.Float64frombits(ce.popValue())
+				v1 := math.Float64frombits(ce.popValue())
+				ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2)))
+			}
+			frame.pc++
+		case operationKindMax:
+			if op.B1 == 0 {
+				ce.pushValue(wasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue())))
+			} else {
+				// float64
+				v2 := math.Float64frombits(ce.popValue())
+				v1 := math.Float64frombits(ce.popValue())
+				ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2)))
+			}
+			frame.pc++
+		case operationKindCopysign:
+			if op.B1 == 0 {
+				// float32
+				v2 := uint32(ce.popValue())
+				v1 := uint32(ce.popValue())
+				const signbit = 1 << 31
+				ce.pushValue(uint64(v1&^signbit | v2&signbit))
+			} else {
+				// float64
+				v2 := ce.popValue()
+				v1 := ce.popValue()
+				const signbit = 1 << 63
+				ce.pushValue(v1&^signbit | v2&signbit)
+			}
+			frame.pc++
+		case operationKindI32WrapFromI64:
+			ce.pushValue(uint64(uint32(ce.popValue())))
+			frame.pc++
+		case operationKindITruncFromF:
+			if op.B1 == 0 {
+				// float32
+				switch signedInt(op.B2) {
+				case signedInt32:
+					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							v = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < math.MinInt32 || v > math.MaxInt32 {
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing sources.
+							if v < 0 {
+								v = math.MinInt32
+							} else {
+								v = math.MaxInt32
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(uint64(uint32(int32(v))))
+				case signedInt64:
+					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
+					res := int64(v)
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							res = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < math.MinInt64 || v >= math.MaxInt64 {
+						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
+						// and that's why we use '>=' not '>' to check overflow.
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing sources.
+							if v < 0 {
+								res = math.MinInt64
+							} else {
+								res = math.MaxInt64
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(uint64(res))
+				case signedUint32:
+					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							v = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < 0 || v > math.MaxUint32 {
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing source.
+							if v < 0 {
+								v = 0
+							} else {
+								v = math.MaxUint32
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(uint64(uint32(v)))
+				case signedUint64:
+					v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue()))))
+					res := uint64(v)
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							res = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < 0 || v >= math.MaxUint64 {
+						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
+						// and that's why we use '>=' not '>' to check overflow.
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing source.
+							if v < 0 {
+								res = 0
+							} else {
+								res = math.MaxUint64
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(res)
+				}
+			} else {
+				// float64
+				switch signedInt(op.B2) {
+				case signedInt32:
+					v := math.Trunc(math.Float64frombits(ce.popValue()))
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							v = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < math.MinInt32 || v > math.MaxInt32 {
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing source.
+							if v < 0 {
+								v = math.MinInt32
+							} else {
+								v = math.MaxInt32
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(uint64(uint32(int32(v))))
+				case signedInt64:
+					v := math.Trunc(math.Float64frombits(ce.popValue()))
+					res := int64(v)
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							res = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < math.MinInt64 || v >= math.MaxInt64 {
+						// Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation,
+						// and that's why we use '>=' not '>' to check overflow.
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing source.
+							if v < 0 {
+								res = math.MinInt64
+							} else {
+								res = math.MaxInt64
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(uint64(res))
+				case signedUint32:
+					v := math.Trunc(math.Float64frombits(ce.popValue()))
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							v = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < 0 || v > math.MaxUint32 {
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing source.
+							if v < 0 {
+								v = 0
+							} else {
+								v = math.MaxUint32
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(uint64(uint32(v)))
+				case signedUint64:
+					v := math.Trunc(math.Float64frombits(ce.popValue()))
+					res := uint64(v)
+					if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN
+						if op.B3 {
+							// non-trapping conversion must cast nan to zero.
+							res = 0
+						} else {
+							panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+						}
+					} else if v < 0 || v >= math.MaxUint64 {
+						// Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation,
+						// and that's why we use '>=' not '>' to check overflow.
+						if op.B3 {
+							// non-trapping conversion must "saturate" the value for overflowing source.
+							if v < 0 {
+								res = 0
+							} else {
+								res = math.MaxUint64
+							}
+						} else {
+							panic(wasmruntime.ErrRuntimeIntegerOverflow)
+						}
+					}
+					ce.pushValue(res)
+				}
+			}
+			frame.pc++
+		case operationKindFConvertFromI:
+			switch signedInt(op.B1) {
+			case signedInt32:
+				if op.B2 == 0 {
+					// float32
+					v := float32(int32(ce.popValue()))
+					ce.pushValue(uint64(math.Float32bits(v)))
+				} else {
+					// float64
+					v := float64(int32(ce.popValue()))
+					ce.pushValue(math.Float64bits(v))
+				}
+			case signedInt64:
+				if op.B2 == 0 {
+					// float32
+					v := float32(int64(ce.popValue()))
+					ce.pushValue(uint64(math.Float32bits(v)))
+				} else {
+					// float64
+					v := float64(int64(ce.popValue()))
+					ce.pushValue(math.Float64bits(v))
+				}
+			case signedUint32:
+				if op.B2 == 0 {
+					// float32
+					v := float32(uint32(ce.popValue()))
+					ce.pushValue(uint64(math.Float32bits(v)))
+				} else {
+					// float64
+					v := float64(uint32(ce.popValue()))
+					ce.pushValue(math.Float64bits(v))
+				}
+			case signedUint64:
+				if op.B2 == 0 {
+					// float32
+					v := float32(ce.popValue())
+					ce.pushValue(uint64(math.Float32bits(v)))
+				} else {
+					// float64
+					v := float64(ce.popValue())
+					ce.pushValue(math.Float64bits(v))
+				}
+			}
+			frame.pc++
+		case operationKindF32DemoteFromF64:
+			v := float32(math.Float64frombits(ce.popValue()))
+			ce.pushValue(uint64(math.Float32bits(v)))
+			frame.pc++
+		case operationKindF64PromoteFromF32:
+			v := float64(math.Float32frombits(uint32(ce.popValue())))
+			ce.pushValue(math.Float64bits(v))
+			frame.pc++
+		case operationKindExtend:
+			if op.B1 == 1 {
+				// Signed.
+				v := int64(int32(ce.popValue()))
+				ce.pushValue(uint64(v))
+			} else {
+				v := uint64(uint32(ce.popValue()))
+				ce.pushValue(v)
+			}
+			frame.pc++
+		case operationKindSignExtend32From8:
+			v := uint32(int8(ce.popValue()))
+			ce.pushValue(uint64(v))
+			frame.pc++
+		case operationKindSignExtend32From16:
+			v := uint32(int16(ce.popValue()))
+			ce.pushValue(uint64(v))
+			frame.pc++
+		case operationKindSignExtend64From8:
+			v := int64(int8(ce.popValue()))
+			ce.pushValue(uint64(v))
+			frame.pc++
+		case operationKindSignExtend64From16:
+			v := int64(int16(ce.popValue()))
+			ce.pushValue(uint64(v))
+			frame.pc++
+		case operationKindSignExtend64From32:
+			v := int64(int32(ce.popValue()))
+			ce.pushValue(uint64(v))
+			frame.pc++
+		case operationKindMemoryInit:
+			dataInstance := dataInstances[op.U1]
+			copySize := ce.popValue()
+			inDataOffset := ce.popValue()
+			inMemoryOffset := ce.popValue()
+			if inDataOffset+copySize > uint64(len(dataInstance)) ||
+				inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			} else if copySize != 0 {
+				copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:])
+			}
+			frame.pc++
+		case operationKindDataDrop:
+			dataInstances[op.U1] = nil
+			frame.pc++
+		case operationKindMemoryCopy:
+			memLen := uint64(len(memoryInst.Buffer))
+			copySize := ce.popValue()
+			sourceOffset := ce.popValue()
+			destinationOffset := ce.popValue()
+			if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			} else if copySize != 0 {
+				copy(memoryInst.Buffer[destinationOffset:],
+					memoryInst.Buffer[sourceOffset:sourceOffset+copySize])
+			}
+			frame.pc++
+		case operationKindMemoryFill:
+			fillSize := ce.popValue()
+			value := byte(ce.popValue())
+			offset := ce.popValue()
+			if fillSize+offset > uint64(len(memoryInst.Buffer)) {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			} else if fillSize != 0 {
+				// Uses the copy trick for faster filling buffer.
+				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+				buf := memoryInst.Buffer[offset : offset+fillSize]
+				buf[0] = value
+				for i := 1; i < len(buf); i *= 2 {
+					copy(buf[i:], buf[:i])
+				}
+			}
+			frame.pc++
+		case operationKindTableInit:
+			elementInstance := elementInstances[op.U1]
+			copySize := ce.popValue()
+			inElementOffset := ce.popValue()
+			inTableOffset := ce.popValue()
+			table := tables[op.U2]
+			if inElementOffset+copySize > uint64(len(elementInstance)) ||
+				inTableOffset+copySize > uint64(len(table.References)) {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			} else if copySize != 0 {
+				copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:])
+			}
+			frame.pc++
+		case operationKindElemDrop:
+			elementInstances[op.U1] = nil
+			frame.pc++
+		case operationKindTableCopy:
+			srcTable, dstTable := tables[op.U1].References, tables[op.U2].References
+			copySize := ce.popValue()
+			sourceOffset := ce.popValue()
+			destinationOffset := ce.popValue()
+			if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			} else if copySize != 0 {
+				copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize])
+			}
+			frame.pc++
+		case operationKindRefFunc:
+			ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1]))))
+			frame.pc++
+		case operationKindTableGet:
+			table := tables[op.U1]
+
+			offset := ce.popValue()
+			if offset >= uint64(len(table.References)) {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			}
+
+			ce.pushValue(uint64(table.References[offset]))
+			frame.pc++
+		case operationKindTableSet:
+			table := tables[op.U1]
+			ref := ce.popValue()
+
+			offset := ce.popValue()
+			if offset >= uint64(len(table.References)) {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			}
+
+			table.References[offset] = uintptr(ref) // externrefs are opaque uint64.
+			frame.pc++
+		case operationKindTableSize:
+			table := tables[op.U1]
+			ce.pushValue(uint64(len(table.References)))
+			frame.pc++
+		case operationKindTableGrow:
+			table := tables[op.U1]
+			num, ref := ce.popValue(), ce.popValue()
+			ret := table.Grow(uint32(num), uintptr(ref))
+			ce.pushValue(uint64(ret))
+			frame.pc++
+		case operationKindTableFill:
+			table := tables[op.U1]
+			num := ce.popValue()
+			ref := uintptr(ce.popValue())
+			offset := ce.popValue()
+			if num+offset > uint64(len(table.References)) {
+				panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+			} else if num > 0 {
+				// Uses the copy trick for faster filling the region with the value.
+				// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+				targetRegion := table.References[offset : offset+num]
+				targetRegion[0] = ref
+				for i := 1; i < len(targetRegion); i *= 2 {
+					copy(targetRegion[i:], targetRegion[:i])
+				}
+			}
+			frame.pc++
+		case operationKindV128Const:
+			lo, hi := op.U1, op.U2
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Add:
+			yHigh, yLow := ce.popValue(), ce.popValue()
+			xHigh, xLow := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				ce.pushValue(
+					uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) |
+						uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 |
+						uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 |
+						uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48,
+				)
+				ce.pushValue(
+					uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) |
+						uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 |
+						uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 |
+						uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48,
+				)
+			case shapeI16x8:
+				ce.pushValue(
+					uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) |
+						uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32,
+				)
+				ce.pushValue(
+					uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) |
+						uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32,
+				)
+			case shapeI32x4:
+				ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow)))
+				ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh)))
+			case shapeI64x2:
+				ce.pushValue(xLow + yLow)
+				ce.pushValue(xHigh + yHigh)
+			case shapeF32x4:
+				ce.pushValue(
+					addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
+				)
+				ce.pushValue(
+					addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
+				)
+			case shapeF64x2:
+				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow)))
+				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh)))
+			}
+			frame.pc++
+		case operationKindV128Sub:
+			yHigh, yLow := ce.popValue(), ce.popValue()
+			xHigh, xLow := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				ce.pushValue(
+					uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) |
+						uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 |
+						uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 |
+						uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48,
+				)
+				ce.pushValue(
+					uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) |
+						uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 |
+						uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 |
+						uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48,
+				)
+			case shapeI16x8:
+				ce.pushValue(
+					uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) |
+						uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32,
+				)
+				ce.pushValue(
+					uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) |
+						uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32,
+				)
+			case shapeI32x4:
+				ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow)))
+				ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh)))
+			case shapeI64x2:
+				ce.pushValue(xLow - yLow)
+				ce.pushValue(xHigh - yHigh)
+			case shapeF32x4:
+				ce.pushValue(
+					subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32,
+				)
+				ce.pushValue(
+					subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32,
+				)
+			case shapeF64x2:
+				ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow)))
+				ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh)))
+			}
+			frame.pc++
+		case operationKindV128Load:
+			offset := ce.popMemoryOffset(op)
+			switch op.B1 {
+			case v128LoadType128:
+				lo, ok := memoryInst.ReadUint64Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(lo)
+				hi, ok := memoryInst.ReadUint64Le(offset + 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(hi)
+			case v128LoadType8x8s:
+				data, ok := memoryInst.Read(offset, 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(
+					uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))),
+				)
+				ce.pushValue(
+					uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))),
+				)
+			case v128LoadType8x8u:
+				data, ok := memoryInst.Read(offset, 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(
+					uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]),
+				)
+				ce.pushValue(
+					uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]),
+				)
+			case v128LoadType16x4s:
+				data, ok := memoryInst.Read(offset, 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(
+					uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 |
+						uint64(uint32(int16(binary.LittleEndian.Uint16(data)))),
+				)
+				ce.pushValue(
+					uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 |
+						uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))),
+				)
+			case v128LoadType16x4u:
+				data, ok := memoryInst.Read(offset, 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(
+					uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)),
+				)
+				ce.pushValue(
+					uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])),
+				)
+			case v128LoadType32x2s:
+				data, ok := memoryInst.Read(offset, 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data))))
+				ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:]))))
+			case v128LoadType32x2u:
+				data, ok := memoryInst.Read(offset, 8)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(uint64(binary.LittleEndian.Uint32(data)))
+				ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:])))
+			case v128LoadType8Splat:
+				v, ok := memoryInst.ReadByte(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 |
+					uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v)
+				ce.pushValue(v8)
+				ce.pushValue(v8)
+			case v128LoadType16Splat:
+				v, ok := memoryInst.ReadUint16Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v)
+				ce.pushValue(v4)
+				ce.pushValue(v4)
+			case v128LoadType32Splat:
+				v, ok := memoryInst.ReadUint32Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				vv := uint64(v)<<32 | uint64(v)
+				ce.pushValue(vv)
+				ce.pushValue(vv)
+			case v128LoadType64Splat:
+				lo, ok := memoryInst.ReadUint64Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(lo)
+				ce.pushValue(lo)
+			case v128LoadType32zero:
+				lo, ok := memoryInst.ReadUint32Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(uint64(lo))
+				ce.pushValue(0)
+			case v128LoadType64zero:
+				lo, ok := memoryInst.ReadUint64Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(lo)
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindV128LoadLane:
+			hi, lo := ce.popValue(), ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			switch op.B1 {
+			case 8:
+				b, ok := memoryInst.ReadByte(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				if op.B2 < 8 {
+					s := op.B2 << 3
+					lo = (lo & ^(0xff << s)) | uint64(b)<<s
+				} else {
+					s := (op.B2 - 8) << 3
+					hi = (hi & ^(0xff << s)) | uint64(b)<<s
+				}
+			case 16:
+				b, ok := memoryInst.ReadUint16Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				if op.B2 < 4 {
+					s := op.B2 << 4
+					lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s
+				} else {
+					s := (op.B2 - 4) << 4
+					hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s
+				}
+			case 32:
+				b, ok := memoryInst.ReadUint32Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				if op.B2 < 2 {
+					s := op.B2 << 5
+					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
+				} else {
+					s := (op.B2 - 2) << 5
+					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s
+				}
+			case 64:
+				b, ok := memoryInst.ReadUint64Le(offset)
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				if op.B2 == 0 {
+					lo = b
+				} else {
+					hi = b
+				}
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Store:
+			hi, lo := ce.popValue(), ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			// Write the upper bytes first to trigger an early error if the memory access is out of bounds.
+			// Otherwise, the lower bytes might be written to memory, but the upper bytes might not.
+			if uint64(offset)+8 > math.MaxUint32 {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			if ok := memoryInst.WriteUint64Le(offset, lo); !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindV128StoreLane:
+			hi, lo := ce.popValue(), ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			var ok bool
+			switch op.B1 {
+			case 8:
+				if op.B2 < 8 {
+					ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8)))
+				} else {
+					ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8)))
+				}
+			case 16:
+				if op.B2 < 4 {
+					ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16)))
+				} else {
+					ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16)))
+				}
+			case 32:
+				if op.B2 < 2 {
+					ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32)))
+				} else {
+					ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32)))
+				}
+			case 64:
+				if op.B2 == 0 {
+					ok = memoryInst.WriteUint64Le(offset, lo)
+				} else {
+					ok = memoryInst.WriteUint64Le(offset, hi)
+				}
+			}
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindV128ReplaceLane:
+			v := ce.popValue()
+			hi, lo := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				if op.B2 < 8 {
+					s := op.B2 << 3
+					lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s
+				} else {
+					s := (op.B2 - 8) << 3
+					hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s
+				}
+			case shapeI16x8:
+				if op.B2 < 4 {
+					s := op.B2 << 4
+					lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s
+				} else {
+					s := (op.B2 - 4) << 4
+					hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s
+				}
+			case shapeI32x4, shapeF32x4:
+				if op.B2 < 2 {
+					s := op.B2 << 5
+					lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
+				} else {
+					s := (op.B2 - 2) << 5
+					hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s
+				}
+			case shapeI64x2, shapeF64x2:
+				if op.B2 == 0 {
+					lo = v
+				} else {
+					hi = v
+				}
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128ExtractLane:
+			hi, lo := ce.popValue(), ce.popValue()
+			var v uint64
+			switch op.B1 {
+			case shapeI8x16:
+				var u8 byte
+				if op.B2 < 8 {
+					u8 = byte(lo >> (op.B2 * 8))
+				} else {
+					u8 = byte(hi >> ((op.B2 - 8) * 8))
+				}
+				if op.B3 {
+					// sign-extend.
+					v = uint64(uint32(int8(u8)))
+				} else {
+					v = uint64(u8)
+				}
+			case shapeI16x8:
+				var u16 uint16
+				if op.B2 < 4 {
+					u16 = uint16(lo >> (op.B2 * 16))
+				} else {
+					u16 = uint16(hi >> ((op.B2 - 4) * 16))
+				}
+				if op.B3 {
+					// sign-extend.
+					v = uint64(uint32(int16(u16)))
+				} else {
+					v = uint64(u16)
+				}
+			case shapeI32x4, shapeF32x4:
+				if op.B2 < 2 {
+					v = uint64(uint32(lo >> (op.B2 * 32)))
+				} else {
+					v = uint64(uint32(hi >> ((op.B2 - 2) * 32)))
+				}
+			case shapeI64x2, shapeF64x2:
+				if op.B2 == 0 {
+					v = lo
+				} else {
+					v = hi
+				}
+			}
+			ce.pushValue(v)
+			frame.pc++
+		case operationKindV128Splat:
+			v := ce.popValue()
+			var hi, lo uint64
+			switch op.B1 {
+			case shapeI8x16:
+				v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 |
+					uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v))
+				hi, lo = v8, v8
+			case shapeI16x8:
+				v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v))
+				hi, lo = v4, v4
+			case shapeI32x4, shapeF32x4:
+				v2 := uint64(uint32(v))<<32 | uint64(uint32(v))
+				lo, hi = v2, v2
+			case shapeI64x2, shapeF64x2:
+				lo, hi = v, v
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Swizzle:
+			idxHi, idxLo := ce.popValue(), ce.popValue()
+			baseHi, baseLo := ce.popValue(), ce.popValue()
+			var newVal [16]byte
+			for i := 0; i < 16; i++ {
+				var id byte
+				if i < 8 {
+					id = byte(idxLo >> (i * 8))
+				} else {
+					id = byte(idxHi >> ((i - 8) * 8))
+				}
+				if id < 8 {
+					newVal[i] = byte(baseLo >> (id * 8))
+				} else if id < 16 {
+					newVal[i] = byte(baseHi >> ((id - 8) * 8))
+				}
+			}
+			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
+			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
+			frame.pc++
+		case operationKindV128Shuffle:
+			xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue()
+			var newVal [16]byte
+			for i, l := range op.Us {
+				if l < 8 {
+					newVal[i] = byte(yLo >> (l * 8))
+				} else if l < 16 {
+					newVal[i] = byte(yHi >> ((l - 8) * 8))
+				} else if l < 24 {
+					newVal[i] = byte(xLo >> ((l - 16) * 8))
+				} else if l < 32 {
+					newVal[i] = byte(xHi >> ((l - 24) * 8))
+				}
+			}
+			ce.pushValue(binary.LittleEndian.Uint64(newVal[:8]))
+			ce.pushValue(binary.LittleEndian.Uint64(newVal[8:]))
+			frame.pc++
+		case operationKindV128AnyTrue:
+			hi, lo := ce.popValue(), ce.popValue()
+			if hi != 0 || lo != 0 {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindV128AllTrue:
+			hi, lo := ce.popValue(), ce.popValue()
+			var ret bool
+			switch op.B1 {
+			case shapeI8x16:
+				ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) &&
+					(uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) &&
+					(uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) &&
+					(uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0)
+			case shapeI16x8:
+				ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) &&
+					(uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0)
+			case shapeI32x4:
+				ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) &&
+					(uint32(hi) != 0) && (uint32(hi>>32) != 0)
+			case shapeI64x2:
+				ret = (lo != 0) &&
+					(hi != 0)
+			}
+			if ret {
+				ce.pushValue(1)
+			} else {
+				ce.pushValue(0)
+			}
+			frame.pc++
+		case operationKindV128BitMask:
+			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction
+			hi, lo := ce.popValue(), ce.popValue()
+			var res uint64
+			switch op.B1 {
+			case shapeI8x16:
+				for i := 0; i < 8; i++ {
+					if int8(lo>>(i*8)) < 0 {
+						res |= 1 << i
+					}
+				}
+				for i := 0; i < 8; i++ {
+					if int8(hi>>(i*8)) < 0 {
+						res |= 1 << (i + 8)
+					}
+				}
+			case shapeI16x8:
+				for i := 0; i < 4; i++ {
+					if int16(lo>>(i*16)) < 0 {
+						res |= 1 << i
+					}
+				}
+				for i := 0; i < 4; i++ {
+					if int16(hi>>(i*16)) < 0 {
+						res |= 1 << (i + 4)
+					}
+				}
+			case shapeI32x4:
+				for i := 0; i < 2; i++ {
+					if int32(lo>>(i*32)) < 0 {
+						res |= 1 << i
+					}
+				}
+				for i := 0; i < 2; i++ {
+					if int32(hi>>(i*32)) < 0 {
+						res |= 1 << (i + 2)
+					}
+				}
+			case shapeI64x2:
+				if int64(lo) < 0 {
+					res |= 0b01
+				}
+				if int(hi) < 0 {
+					res |= 0b10
+				}
+			}
+			ce.pushValue(res)
+			frame.pc++
+		case operationKindV128And:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			ce.pushValue(x1Lo & x2Lo)
+			ce.pushValue(x1Hi & x2Hi)
+			frame.pc++
+		case operationKindV128Not:
+			hi, lo := ce.popValue(), ce.popValue()
+			ce.pushValue(^lo)
+			ce.pushValue(^hi)
+			frame.pc++
+		case operationKindV128Or:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			ce.pushValue(x1Lo | x2Lo)
+			ce.pushValue(x1Hi | x2Hi)
+			frame.pc++
+		case operationKindV128Xor:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			ce.pushValue(x1Lo ^ x2Lo)
+			ce.pushValue(x1Hi ^ x2Hi)
+			frame.pc++
+		case operationKindV128Bitselect:
+			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select
+			cHi, cLo := ce.popValue(), ce.popValue()
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			// v128.or(v128.and(v1, c), v128.and(v2, v128.not(c)))
+			ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo)))
+			ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi)))
+			frame.pc++
+		case operationKindV128AndNot:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			ce.pushValue(x1Lo & (^x2Lo))
+			ce.pushValue(x1Hi & (^x2Hi))
+			frame.pc++
+		case operationKindV128Shl:
+			s := ce.popValue()
+			hi, lo := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				s = s % 8
+				lo = uint64(uint8(lo<<s)) |
+					uint64(uint8((lo>>8)<<s))<<8 |
+					uint64(uint8((lo>>16)<<s))<<16 |
+					uint64(uint8((lo>>24)<<s))<<24 |
+					uint64(uint8((lo>>32)<<s))<<32 |
+					uint64(uint8((lo>>40)<<s))<<40 |
+					uint64(uint8((lo>>48)<<s))<<48 |
+					uint64(uint8((lo>>56)<<s))<<56
+				hi = uint64(uint8(hi<<s)) |
+					uint64(uint8((hi>>8)<<s))<<8 |
+					uint64(uint8((hi>>16)<<s))<<16 |
+					uint64(uint8((hi>>24)<<s))<<24 |
+					uint64(uint8((hi>>32)<<s))<<32 |
+					uint64(uint8((hi>>40)<<s))<<40 |
+					uint64(uint8((hi>>48)<<s))<<48 |
+					uint64(uint8((hi>>56)<<s))<<56
+			case shapeI16x8:
+				s = s % 16
+				lo = uint64(uint16(lo<<s)) |
+					uint64(uint16((lo>>16)<<s))<<16 |
+					uint64(uint16((lo>>32)<<s))<<32 |
+					uint64(uint16((lo>>48)<<s))<<48
+				hi = uint64(uint16(hi<<s)) |
+					uint64(uint16((hi>>16)<<s))<<16 |
+					uint64(uint16((hi>>32)<<s))<<32 |
+					uint64(uint16((hi>>48)<<s))<<48
+			case shapeI32x4:
+				s = s % 32
+				lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32
+				hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32
+			case shapeI64x2:
+				s = s % 64
+				lo = lo << s
+				hi = hi << s
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Shr:
+			s := ce.popValue()
+			hi, lo := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				s = s % 8
+				if op.B3 { // signed
+					lo = uint64(uint8(int8(lo)>>s)) |
+						uint64(uint8(int8(lo>>8)>>s))<<8 |
+						uint64(uint8(int8(lo>>16)>>s))<<16 |
+						uint64(uint8(int8(lo>>24)>>s))<<24 |
+						uint64(uint8(int8(lo>>32)>>s))<<32 |
+						uint64(uint8(int8(lo>>40)>>s))<<40 |
+						uint64(uint8(int8(lo>>48)>>s))<<48 |
+						uint64(uint8(int8(lo>>56)>>s))<<56
+					hi = uint64(uint8(int8(hi)>>s)) |
+						uint64(uint8(int8(hi>>8)>>s))<<8 |
+						uint64(uint8(int8(hi>>16)>>s))<<16 |
+						uint64(uint8(int8(hi>>24)>>s))<<24 |
+						uint64(uint8(int8(hi>>32)>>s))<<32 |
+						uint64(uint8(int8(hi>>40)>>s))<<40 |
+						uint64(uint8(int8(hi>>48)>>s))<<48 |
+						uint64(uint8(int8(hi>>56)>>s))<<56
+				} else {
+					lo = uint64(uint8(lo)>>s) |
+						uint64(uint8(lo>>8)>>s)<<8 |
+						uint64(uint8(lo>>16)>>s)<<16 |
+						uint64(uint8(lo>>24)>>s)<<24 |
+						uint64(uint8(lo>>32)>>s)<<32 |
+						uint64(uint8(lo>>40)>>s)<<40 |
+						uint64(uint8(lo>>48)>>s)<<48 |
+						uint64(uint8(lo>>56)>>s)<<56
+					hi = uint64(uint8(hi)>>s) |
+						uint64(uint8(hi>>8)>>s)<<8 |
+						uint64(uint8(hi>>16)>>s)<<16 |
+						uint64(uint8(hi>>24)>>s)<<24 |
+						uint64(uint8(hi>>32)>>s)<<32 |
+						uint64(uint8(hi>>40)>>s)<<40 |
+						uint64(uint8(hi>>48)>>s)<<48 |
+						uint64(uint8(hi>>56)>>s)<<56
+				}
+			case shapeI16x8:
+				s = s % 16
+				if op.B3 { // signed
+					lo = uint64(uint16(int16(lo)>>s)) |
+						uint64(uint16(int16(lo>>16)>>s))<<16 |
+						uint64(uint16(int16(lo>>32)>>s))<<32 |
+						uint64(uint16(int16(lo>>48)>>s))<<48
+					hi = uint64(uint16(int16(hi)>>s)) |
+						uint64(uint16(int16(hi>>16)>>s))<<16 |
+						uint64(uint16(int16(hi>>32)>>s))<<32 |
+						uint64(uint16(int16(hi>>48)>>s))<<48
+				} else {
+					lo = uint64(uint16(lo)>>s) |
+						uint64(uint16(lo>>16)>>s)<<16 |
+						uint64(uint16(lo>>32)>>s)<<32 |
+						uint64(uint16(lo>>48)>>s)<<48
+					hi = uint64(uint16(hi)>>s) |
+						uint64(uint16(hi>>16)>>s)<<16 |
+						uint64(uint16(hi>>32)>>s)<<32 |
+						uint64(uint16(hi>>48)>>s)<<48
+				}
+			case shapeI32x4:
+				s = s % 32
+				if op.B3 {
+					lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32
+					hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32
+				} else {
+					lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32
+					hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32
+				}
+			case shapeI64x2:
+				s = s % 64
+				if op.B3 { // signed
+					lo = uint64(int64(lo) >> s)
+					hi = uint64(int64(hi) >> s)
+				} else {
+					lo = lo >> s
+					hi = hi >> s
+				}
+
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Cmp:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			var result []bool
+			switch op.B1 {
+			case v128CmpTypeI8x16Eq:
+				result = []bool{
+					byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8),
+					byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24),
+					byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40),
+					byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56),
+					byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8),
+					byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24),
+					byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40),
+					byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16Ne:
+				result = []bool{
+					byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8),
+					byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24),
+					byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40),
+					byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56),
+					byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8),
+					byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24),
+					byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40),
+					byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16LtS:
+				result = []bool{
+					int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8),
+					int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24),
+					int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40),
+					int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56),
+					int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8),
+					int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24),
+					int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40),
+					int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16LtU:
+				result = []bool{
+					byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8),
+					byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24),
+					byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40),
+					byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56),
+					byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8),
+					byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24),
+					byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40),
+					byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16GtS:
+				result = []bool{
+					int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8),
+					int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24),
+					int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40),
+					int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56),
+					int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8),
+					int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24),
+					int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40),
+					int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16GtU:
+				result = []bool{
+					byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8),
+					byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24),
+					byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40),
+					byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56),
+					byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8),
+					byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24),
+					byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40),
+					byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16LeS:
+				result = []bool{
+					int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8),
+					int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24),
+					int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40),
+					int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56),
+					int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8),
+					int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24),
+					int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40),
+					int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16LeU:
+				result = []bool{
+					byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8),
+					byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24),
+					byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40),
+					byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56),
+					byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8),
+					byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24),
+					byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40),
+					byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16GeS:
+				result = []bool{
+					int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8),
+					int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24),
+					int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40),
+					int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56),
+					int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8),
+					int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24),
+					int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40),
+					int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56),
+				}
+			case v128CmpTypeI8x16GeU:
+				result = []bool{
+					byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8),
+					byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24),
+					byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40),
+					byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56),
+					byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8),
+					byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24),
+					byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40),
+					byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56),
+				}
+			case v128CmpTypeI16x8Eq:
+				result = []bool{
+					uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16),
+					uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48),
+					uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16),
+					uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8Ne:
+				result = []bool{
+					uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16),
+					uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48),
+					uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16),
+					uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8LtS:
+				result = []bool{
+					int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16),
+					int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48),
+					int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16),
+					int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8LtU:
+				result = []bool{
+					uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16),
+					uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48),
+					uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16),
+					uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8GtS:
+				result = []bool{
+					int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16),
+					int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48),
+					int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16),
+					int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8GtU:
+				result = []bool{
+					uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16),
+					uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48),
+					uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16),
+					uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8LeS:
+				result = []bool{
+					int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16),
+					int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48),
+					int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16),
+					int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8LeU:
+				result = []bool{
+					uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16),
+					uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48),
+					uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16),
+					uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8GeS:
+				result = []bool{
+					int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16),
+					int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48),
+					int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16),
+					int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48),
+				}
+			case v128CmpTypeI16x8GeU:
+				result = []bool{
+					uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16),
+					uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48),
+					uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16),
+					uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48),
+				}
+			case v128CmpTypeI32x4Eq:
+				result = []bool{
+					uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32),
+					uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4Ne:
+				result = []bool{
+					uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32),
+					uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4LtS:
+				result = []bool{
+					int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32),
+					int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4LtU:
+				result = []bool{
+					uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32),
+					uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4GtS:
+				result = []bool{
+					int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32),
+					int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4GtU:
+				result = []bool{
+					uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32),
+					uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4LeS:
+				result = []bool{
+					int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32),
+					int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4LeU:
+				result = []bool{
+					uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32),
+					uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4GeS:
+				result = []bool{
+					int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32),
+					int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32),
+				}
+			case v128CmpTypeI32x4GeU:
+				result = []bool{
+					uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32),
+					uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32),
+				}
+			case v128CmpTypeI64x2Eq:
+				result = []bool{x1Lo == x2Lo, x1Hi == x2Hi}
+			case v128CmpTypeI64x2Ne:
+				result = []bool{x1Lo != x2Lo, x1Hi != x2Hi}
+			case v128CmpTypeI64x2LtS:
+				result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)}
+			case v128CmpTypeI64x2GtS:
+				result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)}
+			case v128CmpTypeI64x2LeS:
+				result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)}
+			case v128CmpTypeI64x2GeS:
+				result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)}
+			case v128CmpTypeF32x4Eq:
+				result = []bool{
+					math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)),
+					math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)),
+					math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)),
+					math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)),
+				}
+			case v128CmpTypeF32x4Ne:
+				result = []bool{
+					math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)),
+					math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)),
+					math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)),
+					math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)),
+				}
+			case v128CmpTypeF32x4Lt:
+				result = []bool{
+					math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)),
+					math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)),
+					math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)),
+					math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)),
+				}
+			case v128CmpTypeF32x4Gt:
+				result = []bool{
+					math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)),
+					math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)),
+					math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)),
+					math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)),
+				}
+			case v128CmpTypeF32x4Le:
+				result = []bool{
+					math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)),
+					math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)),
+					math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)),
+					math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)),
+				}
+			case v128CmpTypeF32x4Ge:
+				result = []bool{
+					math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)),
+					math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)),
+					math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)),
+					math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)),
+				}
+			case v128CmpTypeF64x2Eq:
+				result = []bool{
+					math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo),
+					math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi),
+				}
+			case v128CmpTypeF64x2Ne:
+				result = []bool{
+					math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo),
+					math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi),
+				}
+			case v128CmpTypeF64x2Lt:
+				result = []bool{
+					math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo),
+					math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi),
+				}
+			case v128CmpTypeF64x2Gt:
+				result = []bool{
+					math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo),
+					math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi),
+				}
+			case v128CmpTypeF64x2Le:
+				result = []bool{
+					math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo),
+					math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi),
+				}
+			case v128CmpTypeF64x2Ge:
+				result = []bool{
+					math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo),
+					math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi),
+				}
+			}
+
+			var retLo, retHi uint64
+			laneNum := len(result)
+			switch laneNum {
+			case 16:
+				for i, b := range result {
+					if b {
+						if i < 8 {
+							retLo |= 0xff << (i * 8)
+						} else {
+							retHi |= 0xff << ((i - 8) * 8)
+						}
+					}
+				}
+			case 8:
+				for i, b := range result {
+					if b {
+						if i < 4 {
+							retLo |= 0xffff << (i * 16)
+						} else {
+							retHi |= 0xffff << ((i - 4) * 16)
+						}
+					}
+				}
+			case 4:
+				for i, b := range result {
+					if b {
+						if i < 2 {
+							retLo |= 0xffff_ffff << (i * 32)
+						} else {
+							retHi |= 0xffff_ffff << ((i - 2) * 32)
+						}
+					}
+				}
+			case 2:
+				if result[0] {
+					retLo = ^uint64(0)
+				}
+				if result[1] {
+					retHi = ^uint64(0)
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128AddSat:
+			x2hi, x2Lo := ce.popValue(), ce.popValue()
+			x1hi, x1Lo := ce.popValue(), ce.popValue()
+
+			var retLo, retHi uint64
+
+			// Lane-wise addition while saturating the overflowing values.
+			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition
+			switch op.B1 {
+			case shapeI8x16:
+				for i := 0; i < 16; i++ {
+					var v, w byte
+					if i < 8 {
+						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
+					} else {
+						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
+					}
+
+					var uv uint64
+					if op.B3 { // signed
+						if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 {
+							uv = uint64(byte(0x80))
+						} else if subbed > math.MaxInt8 {
+							uv = uint64(byte(0x7f))
+						} else {
+							uv = uint64(byte(int8(subbed)))
+						}
+					} else {
+						if subbed := int64(v) + int64(w); subbed < 0 {
+							uv = uint64(byte(0))
+						} else if subbed > math.MaxUint8 {
+							uv = uint64(byte(0xff))
+						} else {
+							uv = uint64(byte(subbed))
+						}
+					}
+
+					if i < 8 { // first 8 lanes are on lower 64bits.
+						retLo |= uv << (i * 8)
+					} else {
+						retHi |= uv << ((i - 8) * 8)
+					}
+				}
+			case shapeI16x8:
+				for i := 0; i < 8; i++ {
+					var v, w uint16
+					if i < 4 {
+						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
+					} else {
+						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
+					}
+
+					var uv uint64
+					if op.B3 { // signed
+						if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 {
+							uv = uint64(uint16(0x8000))
+						} else if added > math.MaxInt16 {
+							uv = uint64(uint16(0x7fff))
+						} else {
+							uv = uint64(uint16(int16(added)))
+						}
+					} else {
+						if added := int64(v) + int64(w); added < 0 {
+							uv = uint64(uint16(0))
+						} else if added > math.MaxUint16 {
+							uv = uint64(uint16(0xffff))
+						} else {
+							uv = uint64(uint16(added))
+						}
+					}
+
+					if i < 4 { // first 4 lanes are on lower 64bits.
+						retLo |= uv << (i * 16)
+					} else {
+						retHi |= uv << ((i - 4) * 16)
+					}
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128SubSat:
+			x2hi, x2Lo := ce.popValue(), ce.popValue()
+			x1hi, x1Lo := ce.popValue(), ce.popValue()
+
+			var retLo, retHi uint64
+
+			// Lane-wise subtraction while saturating the overflowing values.
+			// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction
+			switch op.B1 {
+			case shapeI8x16:
+				for i := 0; i < 16; i++ {
+					var v, w byte
+					if i < 8 {
+						v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8))
+					} else {
+						v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8))
+					}
+
+					var uv uint64
+					if op.B3 { // signed
+						if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 {
+							uv = uint64(byte(0x80))
+						} else if subbed > math.MaxInt8 {
+							uv = uint64(byte(0x7f))
+						} else {
+							uv = uint64(byte(int8(subbed)))
+						}
+					} else {
+						if subbed := int64(v) - int64(w); subbed < 0 {
+							uv = uint64(byte(0))
+						} else if subbed > math.MaxUint8 {
+							uv = uint64(byte(0xff))
+						} else {
+							uv = uint64(byte(subbed))
+						}
+					}
+
+					if i < 8 {
+						retLo |= uv << (i * 8)
+					} else {
+						retHi |= uv << ((i - 8) * 8)
+					}
+				}
+			case shapeI16x8:
+				for i := 0; i < 8; i++ {
+					var v, w uint16
+					if i < 4 {
+						v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16))
+					} else {
+						v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16))
+					}
+
+					var uv uint64
+					if op.B3 { // signed
+						if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 {
+							uv = uint64(uint16(0x8000))
+						} else if subbed > math.MaxInt16 {
+							uv = uint64(uint16(0x7fff))
+						} else {
+							uv = uint64(uint16(int16(subbed)))
+						}
+					} else {
+						if subbed := int64(v) - int64(w); subbed < 0 {
+							uv = uint64(uint16(0))
+						} else if subbed > math.MaxUint16 {
+							uv = uint64(uint16(0xffff))
+						} else {
+							uv = uint64(uint16(subbed))
+						}
+					}
+
+					if i < 4 {
+						retLo |= uv << (i * 16)
+					} else {
+						retHi |= uv << ((i - 4) * 16)
+					}
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Mul:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI16x8:
+				retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) |
+					(uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48)
+				retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) |
+					(uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48)
+			case shapeI32x4:
+				retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32)
+				retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32)
+			case shapeI64x2:
+				retHi = x1hi * x2hi
+				retLo = x1lo * x2lo
+			case shapeF32x4:
+				retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
+				retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
+			case shapeF64x2:
+				retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi))
+				retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo))
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Div:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			if op.B1 == shapeF64x2 {
+				retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi))
+				retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo))
+			} else {
+				retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
+				retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Neg:
+			hi, lo := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) |
+					(uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) |
+					(uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) |
+					(uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56)
+				hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) |
+					(uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) |
+					(uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) |
+					(uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56)
+			case shapeI16x8:
+				hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) |
+					(uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48)
+				lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) |
+					(uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48)
+			case shapeI32x4:
+				hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32)
+				lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32)
+			case shapeI64x2:
+				hi = -hi
+				lo = -lo
+			case shapeF32x4:
+				hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) |
+					(uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32)
+				lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) |
+					(uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32)
+			case shapeF64x2:
+				hi = math.Float64bits(-math.Float64frombits(hi))
+				lo = math.Float64bits(-math.Float64frombits(lo))
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Sqrt:
+			hi, lo := ce.popValue(), ce.popValue()
+			if op.B1 == shapeF64x2 {
+				hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi)))
+				lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo)))
+			} else {
+				hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) |
+					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32)
+				lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) |
+					(uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32)
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Abs:
+			hi, lo := ce.popValue(), ce.popValue()
+			switch op.B1 {
+			case shapeI8x16:
+				lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) |
+					(uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) |
+					(uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) |
+					(uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56)
+				hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) |
+					(uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) |
+					(uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) |
+					(uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56)
+			case shapeI16x8:
+				hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) |
+					(uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48)
+				lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) |
+					(uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48)
+			case shapeI32x4:
+				hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32)
+				lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32)
+			case shapeI64x2:
+				if int64(hi) < 0 {
+					hi = -hi
+				}
+				if int64(lo) < 0 {
+					lo = -lo
+				}
+			case shapeF32x4:
+				hi = hi &^ (1<<31 | 1<<63)
+				lo = lo &^ (1<<31 | 1<<63)
+			case shapeF64x2:
+				hi = hi &^ (1 << 63)
+				lo = lo &^ (1 << 63)
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Popcnt:
+			hi, lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			for i := 0; i < 16; i++ {
+				var v byte
+				if i < 8 {
+					v = byte(lo >> (i * 8))
+				} else {
+					v = byte(hi >> ((i - 8) * 8))
+				}
+
+				var cnt uint64
+				for i := 0; i < 8; i++ {
+					if (v>>i)&0b1 != 0 {
+						cnt++
+					}
+				}
+
+				if i < 8 {
+					retLo |= cnt << (i * 8)
+				} else {
+					retHi |= cnt << ((i - 8) * 8)
+				}
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Min:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI8x16:
+				if op.B3 { // signed
+					retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) |
+						uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
+						uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
+						uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
+					retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) |
+						uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
+						uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
+						uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
+				} else {
+					retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) |
+						uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
+						uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
+						uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
+					retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) |
+						uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
+						uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
+						uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
+				}
+			case shapeI16x8:
+				if op.B3 { // signed
+					retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) |
+						uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
+						uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
+						uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
+					retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) |
+						uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
+						uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
+						uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
+				} else {
+					retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) |
+						uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
+						uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
+						uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
+					retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) |
+						uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
+						uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
+						uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
+				}
+			case shapeI32x4:
+				if op.B3 { // signed
+					retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) |
+						uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
+					retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) |
+						uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
+				} else {
+					retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) |
+						uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
+					retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) |
+						uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
+				}
+			case shapeF32x4:
+				retHi = wasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) |
+					wasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
+				retLo = wasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) |
+					wasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
+			case shapeF64x2:
+				retHi = math.Float64bits(moremath.WasmCompatMin64(
+					math.Float64frombits(x1hi),
+					math.Float64frombits(x2hi),
+				))
+				retLo = math.Float64bits(moremath.WasmCompatMin64(
+					math.Float64frombits(x1lo),
+					math.Float64frombits(x2lo),
+				))
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Max:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI8x16:
+				if op.B3 { // signed
+					retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) |
+						uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
+						uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
+						uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
+					retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) |
+						uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
+						uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
+						uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
+				} else {
+					retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) |
+						uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
+						uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
+						uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
+					retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) |
+						uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
+						uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
+						uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
+				}
+			case shapeI16x8:
+				if op.B3 { // signed
+					retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) |
+						uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
+						uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
+						uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
+					retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) |
+						uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
+						uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
+						uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
+				} else {
+					retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) |
+						uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
+						uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
+						uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
+					retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) |
+						uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
+						uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
+						uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
+				}
+			case shapeI32x4:
+				if op.B3 { // signed
+					retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) |
+						uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
+					retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) |
+						uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
+				} else {
+					retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) |
+						uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32
+					retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) |
+						uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32
+				}
+			case shapeF32x4:
+				retHi = wasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) |
+					wasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32
+				retLo = wasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) |
+					wasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32
+			case shapeF64x2:
+				retHi = math.Float64bits(moremath.WasmCompatMax64(
+					math.Float64frombits(x1hi),
+					math.Float64frombits(x2hi),
+				))
+				retLo = math.Float64bits(moremath.WasmCompatMax64(
+					math.Float64frombits(x1lo),
+					math.Float64frombits(x2lo),
+				))
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128AvgrU:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI8x16:
+				retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) |
+					uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 |
+					uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 |
+					uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48
+				retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) |
+					uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 |
+					uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 |
+					uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48
+			case shapeI16x8:
+				retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) |
+					uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 |
+					uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 |
+					uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48
+				retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) |
+					uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 |
+					uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 |
+					uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Pmin:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			if op.B1 == shapeF32x4 {
+				if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) {
+					retLo = x2lo & 0x00000000_ffffffff
+				} else {
+					retLo = x1lo & 0x00000000_ffffffff
+				}
+				if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) {
+					retLo |= x2lo & 0xffffffff_00000000
+				} else {
+					retLo |= x1lo & 0xffffffff_00000000
+				}
+				if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) {
+					retHi = x2hi & 0x00000000_ffffffff
+				} else {
+					retHi = x1hi & 0x00000000_ffffffff
+				}
+				if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) {
+					retHi |= x2hi & 0xffffffff_00000000
+				} else {
+					retHi |= x1hi & 0xffffffff_00000000
+				}
+			} else {
+				if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) {
+					retLo = x2lo
+				} else {
+					retLo = x1lo
+				}
+				if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) {
+					retHi = x2hi
+				} else {
+					retHi = x1hi
+				}
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Pmax:
+			x2hi, x2lo := ce.popValue(), ce.popValue()
+			x1hi, x1lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			if op.B1 == shapeF32x4 {
+				if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) {
+					retLo = x2lo & 0x00000000_ffffffff
+				} else {
+					retLo = x1lo & 0x00000000_ffffffff
+				}
+				if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) {
+					retLo |= x2lo & 0xffffffff_00000000
+				} else {
+					retLo |= x1lo & 0xffffffff_00000000
+				}
+				if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) {
+					retHi = x2hi & 0x00000000_ffffffff
+				} else {
+					retHi = x1hi & 0x00000000_ffffffff
+				}
+				if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) {
+					retHi |= x2hi & 0xffffffff_00000000
+				} else {
+					retHi |= x1hi & 0xffffffff_00000000
+				}
+			} else {
+				if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) {
+					retLo = x2lo
+				} else {
+					retLo = x1lo
+				}
+				if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) {
+					retHi = x2hi
+				} else {
+					retHi = x1hi
+				}
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Ceil:
+			hi, lo := ce.popValue(), ce.popValue()
+			if op.B1 == shapeF32x4 {
+				lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32)
+				hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32)
+			} else {
+				lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo)))
+				hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi)))
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Floor:
+			hi, lo := ce.popValue(), ce.popValue()
+			if op.B1 == shapeF32x4 {
+				lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32)
+				hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32)
+			} else {
+				lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo)))
+				hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi)))
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Trunc:
+			hi, lo := ce.popValue(), ce.popValue()
+			if op.B1 == shapeF32x4 {
+				lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32)
+				hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32)
+			} else {
+				lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo)))
+				hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi)))
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Nearest:
+			hi, lo := ce.popValue(), ce.popValue()
+			if op.B1 == shapeF32x4 {
+				lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32)
+				hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) |
+					(uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32)
+			} else {
+				lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo)))
+				hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi)))
+			}
+			ce.pushValue(lo)
+			ce.pushValue(hi)
+			frame.pc++
+		case operationKindV128Extend:
+			hi, lo := ce.popValue(), ce.popValue()
+			var origin uint64
+			if op.B3 { // use lower 64 bits
+				origin = lo
+			} else {
+				origin = hi
+			}
+
+			signed := op.B2 == 1
+
+			var retHi, retLo uint64
+			switch op.B1 {
+			case shapeI8x16:
+				for i := 0; i < 8; i++ {
+					v8 := byte(origin >> (i * 8))
+
+					var v16 uint16
+					if signed {
+						v16 = uint16(int8(v8))
+					} else {
+						v16 = uint16(v8)
+					}
+
+					if i < 4 {
+						retLo |= uint64(v16) << (i * 16)
+					} else {
+						retHi |= uint64(v16) << ((i - 4) * 16)
+					}
+				}
+			case shapeI16x8:
+				for i := 0; i < 4; i++ {
+					v16 := uint16(origin >> (i * 16))
+
+					var v32 uint32
+					if signed {
+						v32 = uint32(int16(v16))
+					} else {
+						v32 = uint32(v16)
+					}
+
+					if i < 2 {
+						retLo |= uint64(v32) << (i * 32)
+					} else {
+						retHi |= uint64(v32) << ((i - 2) * 32)
+					}
+				}
+			case shapeI32x4:
+				v32Lo := uint32(origin)
+				v32Hi := uint32(origin >> 32)
+				if signed {
+					retLo = uint64(int32(v32Lo))
+					retHi = uint64(int32(v32Hi))
+				} else {
+					retLo = uint64(v32Lo)
+					retHi = uint64(v32Hi)
+				}
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128ExtMul:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			var x1, x2 uint64
+			if op.B3 { // use lower 64 bits
+				x1, x2 = x1Lo, x2Lo
+			} else {
+				x1, x2 = x1Hi, x2Hi
+			}
+
+			signed := op.B2 == 1
+
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI8x16:
+				for i := 0; i < 8; i++ {
+					v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8))
+
+					var v16 uint16
+					if signed {
+						v16 = uint16(int16(int8(v1)) * int16(int8(v2)))
+					} else {
+						v16 = uint16(v1) * uint16(v2)
+					}
+
+					if i < 4 {
+						retLo |= uint64(v16) << (i * 16)
+					} else {
+						retHi |= uint64(v16) << ((i - 4) * 16)
+					}
+				}
+			case shapeI16x8:
+				for i := 0; i < 4; i++ {
+					v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16))
+
+					var v32 uint32
+					if signed {
+						v32 = uint32(int32(int16(v1)) * int32(int16(v2)))
+					} else {
+						v32 = uint32(v1) * uint32(v2)
+					}
+
+					if i < 2 {
+						retLo |= uint64(v32) << (i * 32)
+					} else {
+						retHi |= uint64(v32) << ((i - 2) * 32)
+					}
+				}
+			case shapeI32x4:
+				v1Lo, v2Lo := uint32(x1), uint32(x2)
+				v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32)
+				if signed {
+					retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo)))
+					retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi)))
+				} else {
+					retLo = uint64(v1Lo) * uint64(v2Lo)
+					retHi = uint64(v1Hi) * uint64(v2Hi)
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Q15mulrSatS:
+			x2hi, x2Lo := ce.popValue(), ce.popValue()
+			x1hi, x1Lo := ce.popValue(), ce.popValue()
+			var retLo, retHi uint64
+			for i := 0; i < 8; i++ {
+				var v, w int16
+				if i < 4 {
+					v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16)))
+				} else {
+					v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16)))
+				}
+
+				var uv uint64
+				// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication
+				if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 {
+					uv = uint64(uint16(0x8000))
+				} else if calc > math.MaxInt16 {
+					uv = uint64(uint16(0x7fff))
+				} else {
+					uv = uint64(uint16(int16(calc)))
+				}
+
+				if i < 4 {
+					retLo |= uv << (i * 16)
+				} else {
+					retHi |= uv << ((i - 4) * 16)
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128ExtAddPairwise:
+			hi, lo := ce.popValue(), ce.popValue()
+
+			signed := op.B3
+
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI8x16:
+				for i := 0; i < 8; i++ {
+					var v1, v2 byte
+					if i < 4 {
+						v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8))
+					} else {
+						v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8))
+					}
+
+					var v16 uint16
+					if signed {
+						v16 = uint16(int16(int8(v1)) + int16(int8(v2)))
+					} else {
+						v16 = uint16(v1) + uint16(v2)
+					}
+
+					if i < 4 {
+						retLo |= uint64(v16) << (i * 16)
+					} else {
+						retHi |= uint64(v16) << ((i - 4) * 16)
+					}
+				}
+			case shapeI16x8:
+				for i := 0; i < 4; i++ {
+					var v1, v2 uint16
+					if i < 2 {
+						v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16))
+					} else {
+						v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16))
+					}
+
+					var v32 uint32
+					if signed {
+						v32 = uint32(int32(int16(v1)) + int32(int16(v2)))
+					} else {
+						v32 = uint32(v1) + uint32(v2)
+					}
+
+					if i < 2 {
+						retLo |= uint64(v32) << (i * 32)
+					} else {
+						retHi |= uint64(v32) << ((i - 2) * 32)
+					}
+				}
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128FloatPromote:
+			_, toPromote := ce.popValue(), ce.popValue()
+			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote)))))
+			ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32)))))
+			frame.pc++
+		case operationKindV128FloatDemote:
+			hi, lo := ce.popValue(), ce.popValue()
+			ce.pushValue(
+				uint64(math.Float32bits(float32(math.Float64frombits(lo)))) |
+					(uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32),
+			)
+			ce.pushValue(0)
+			frame.pc++
+		case operationKindV128FConvertFromI:
+			hi, lo := ce.popValue(), ce.popValue()
+			v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32)
+			signed := op.B3
+
+			var retLo, retHi uint64
+			switch op.B1 { // Destination shape.
+			case shapeF32x4: // f32x4 from signed/unsigned i32x4
+				if signed {
+					retLo = uint64(math.Float32bits(float32(int32(v1)))) |
+						(uint64(math.Float32bits(float32(int32(v2)))) << 32)
+					retHi = uint64(math.Float32bits(float32(int32(v3)))) |
+						(uint64(math.Float32bits(float32(int32(v4)))) << 32)
+				} else {
+					retLo = uint64(math.Float32bits(float32(v1))) |
+						(uint64(math.Float32bits(float32(v2))) << 32)
+					retHi = uint64(math.Float32bits(float32(v3))) |
+						(uint64(math.Float32bits(float32(v4))) << 32)
+				}
+			case shapeF64x2: // f64x2 from signed/unsigned i32x4
+				if signed {
+					retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2)))
+				} else {
+					retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2))
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Narrow:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			signed := op.B3
+
+			var retLo, retHi uint64
+			switch op.B1 {
+			case shapeI16x8: // signed/unsigned i16x8 to i8x16
+				for i := 0; i < 8; i++ {
+					var v16 uint16
+					if i < 4 {
+						v16 = uint16(x1Lo >> (i * 16))
+					} else {
+						v16 = uint16(x1Hi >> ((i - 4) * 16))
+					}
+
+					var v byte
+					if signed {
+						if s := int16(v16); s > math.MaxInt8 {
+							v = math.MaxInt8
+						} else if s < math.MinInt8 {
+							s = math.MinInt8
+							v = byte(s)
+						} else {
+							v = byte(v16)
+						}
+					} else {
+						if s := int16(v16); s > math.MaxUint8 {
+							v = math.MaxUint8
+						} else if s < 0 {
+							v = 0
+						} else {
+							v = byte(v16)
+						}
+					}
+					retLo |= uint64(v) << (i * 8)
+				}
+				for i := 0; i < 8; i++ {
+					var v16 uint16
+					if i < 4 {
+						v16 = uint16(x2Lo >> (i * 16))
+					} else {
+						v16 = uint16(x2Hi >> ((i - 4) * 16))
+					}
+
+					var v byte
+					if signed {
+						if s := int16(v16); s > math.MaxInt8 {
+							v = math.MaxInt8
+						} else if s < math.MinInt8 {
+							s = math.MinInt8
+							v = byte(s)
+						} else {
+							v = byte(v16)
+						}
+					} else {
+						if s := int16(v16); s > math.MaxUint8 {
+							v = math.MaxUint8
+						} else if s < 0 {
+							v = 0
+						} else {
+							v = byte(v16)
+						}
+					}
+					retHi |= uint64(v) << (i * 8)
+				}
+			case shapeI32x4: // signed/unsigned i32x4 to i16x8
+				for i := 0; i < 4; i++ {
+					var v32 uint32
+					if i < 2 {
+						v32 = uint32(x1Lo >> (i * 32))
+					} else {
+						v32 = uint32(x1Hi >> ((i - 2) * 32))
+					}
+
+					var v uint16
+					if signed {
+						if s := int32(v32); s > math.MaxInt16 {
+							v = math.MaxInt16
+						} else if s < math.MinInt16 {
+							s = math.MinInt16
+							v = uint16(s)
+						} else {
+							v = uint16(v32)
+						}
+					} else {
+						if s := int32(v32); s > math.MaxUint16 {
+							v = math.MaxUint16
+						} else if s < 0 {
+							v = 0
+						} else {
+							v = uint16(v32)
+						}
+					}
+					retLo |= uint64(v) << (i * 16)
+				}
+
+				for i := 0; i < 4; i++ {
+					var v32 uint32
+					if i < 2 {
+						v32 = uint32(x2Lo >> (i * 32))
+					} else {
+						v32 = uint32(x2Hi >> ((i - 2) * 32))
+					}
+
+					var v uint16
+					if signed {
+						if s := int32(v32); s > math.MaxInt16 {
+							v = math.MaxInt16
+						} else if s < math.MinInt16 {
+							s = math.MinInt16
+							v = uint16(s)
+						} else {
+							v = uint16(v32)
+						}
+					} else {
+						if s := int32(v32); s > math.MaxUint16 {
+							v = math.MaxUint16
+						} else if s < 0 {
+							v = 0
+						} else {
+							v = uint16(v32)
+						}
+					}
+					retHi |= uint64(v) << (i * 16)
+				}
+			}
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindV128Dot:
+			x2Hi, x2Lo := ce.popValue(), ce.popValue()
+			x1Hi, x1Lo := ce.popValue(), ce.popValue()
+			ce.pushValue(
+				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) |
+					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32),
+			)
+			ce.pushValue(
+				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) |
+					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32),
+			)
+			frame.pc++
+		case operationKindV128ITruncSatFromF:
+			hi, lo := ce.popValue(), ce.popValue()
+			signed := op.B3
+			var retLo, retHi uint64
+
+			switch op.B1 {
+			case shapeF32x4: // f32x4 to i32x4
+				for i, f64 := range [4]float64{
+					math.Trunc(float64(math.Float32frombits(uint32(lo)))),
+					math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))),
+					math.Trunc(float64(math.Float32frombits(uint32(hi)))),
+					math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))),
+				} {
+
+					var v uint32
+					if math.IsNaN(f64) {
+						v = 0
+					} else if signed {
+						if f64 < math.MinInt32 {
+							f64 = math.MinInt32
+						} else if f64 > math.MaxInt32 {
+							f64 = math.MaxInt32
+						}
+						v = uint32(int32(f64))
+					} else {
+						if f64 < 0 {
+							f64 = 0
+						} else if f64 > math.MaxUint32 {
+							f64 = math.MaxUint32
+						}
+						v = uint32(f64)
+					}
+
+					if i < 2 {
+						retLo |= uint64(v) << (i * 32)
+					} else {
+						retHi |= uint64(v) << ((i - 2) * 32)
+					}
+				}
+
+			case shapeF64x2: // f64x2 to i32x4
+				for i, f := range [2]float64{
+					math.Trunc(math.Float64frombits(lo)),
+					math.Trunc(math.Float64frombits(hi)),
+				} {
+					var v uint32
+					if math.IsNaN(f) {
+						v = 0
+					} else if signed {
+						if f < math.MinInt32 {
+							f = math.MinInt32
+						} else if f > math.MaxInt32 {
+							f = math.MaxInt32
+						}
+						v = uint32(int32(f))
+					} else {
+						if f < 0 {
+							f = 0
+						} else if f > math.MaxUint32 {
+							f = math.MaxUint32
+						}
+						v = uint32(f)
+					}
+
+					retLo |= uint64(v) << (i * 32)
+				}
+			}
+
+			ce.pushValue(retLo)
+			ce.pushValue(retHi)
+			frame.pc++
+		case operationKindAtomicMemoryWait:
+			timeout := int64(ce.popValue())
+			exp := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			// Runtime instead of validation error because the spec intends to allow binaries to include
+			// such instructions as long as they are not executed.
+			if !memoryInst.Shared {
+				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
+			}
+
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				if offset%4 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				if int(offset) > len(memoryInst.Buffer)-4 {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(memoryInst.Wait32(offset, uint32(exp), timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
+					mem.Mux.Lock()
+					defer mem.Mux.Unlock()
+					value, _ := mem.ReadUint32Le(offset)
+					return value
+				}))
+			case unsignedTypeI64:
+				if offset%8 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				if int(offset) > len(memoryInst.Buffer)-8 {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(memoryInst.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
+					mem.Mux.Lock()
+					defer mem.Mux.Unlock()
+					value, _ := mem.ReadUint64Le(offset)
+					return value
+				}))
+			}
+			frame.pc++
+		case operationKindAtomicMemoryNotify:
+			count := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			if offset%4 != 0 {
+				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+			}
+			// Just a bounds check
+			if offset >= memoryInst.Size() {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			res := memoryInst.Notify(offset, uint32(count))
+			ce.pushValue(uint64(res))
+			frame.pc++
+		case operationKindAtomicFence:
+			// Memory not required for fence only
+			if memoryInst != nil {
+				// An empty critical section can be used as a synchronization primitive, which is what
+				// fence is. Probably, there are no spectests or defined behavior to confirm this yet.
+				memoryInst.Mux.Lock()
+				memoryInst.Mux.Unlock() //nolint:staticcheck
+			}
+			frame.pc++
+		case operationKindAtomicLoad:
+			offset := ce.popMemoryOffset(op)
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				if offset%4 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				val, ok := memoryInst.ReadUint32Le(offset)
+				memoryInst.Mux.Unlock()
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(uint64(val))
+			case unsignedTypeI64:
+				if offset%8 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				val, ok := memoryInst.ReadUint64Le(offset)
+				memoryInst.Mux.Unlock()
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				ce.pushValue(val)
+			}
+			frame.pc++
+		case operationKindAtomicLoad8:
+			offset := ce.popMemoryOffset(op)
+			memoryInst.Mux.Lock()
+			val, ok := memoryInst.ReadByte(offset)
+			memoryInst.Mux.Unlock()
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			ce.pushValue(uint64(val))
+			frame.pc++
+		case operationKindAtomicLoad16:
+			offset := ce.popMemoryOffset(op)
+			if offset%2 != 0 {
+				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+			}
+			memoryInst.Mux.Lock()
+			val, ok := memoryInst.ReadUint16Le(offset)
+			memoryInst.Mux.Unlock()
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			ce.pushValue(uint64(val))
+			frame.pc++
+		case operationKindAtomicStore:
+			val := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				if offset%4 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				ok := memoryInst.WriteUint32Le(offset, uint32(val))
+				memoryInst.Mux.Unlock()
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+			case unsignedTypeI64:
+				if offset%8 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				ok := memoryInst.WriteUint64Le(offset, val)
+				memoryInst.Mux.Unlock()
+				if !ok {
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+			}
+			frame.pc++
+		case operationKindAtomicStore8:
+			val := byte(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			memoryInst.Mux.Lock()
+			ok := memoryInst.WriteByte(offset, val)
+			memoryInst.Mux.Unlock()
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindAtomicStore16:
+			val := uint16(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			if offset%2 != 0 {
+				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+			}
+			memoryInst.Mux.Lock()
+			ok := memoryInst.WriteUint16Le(offset, val)
+			memoryInst.Mux.Unlock()
+			if !ok {
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			frame.pc++
+		case operationKindAtomicRMW:
+			val := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				if offset%4 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				old, ok := memoryInst.ReadUint32Le(offset)
+				if !ok {
+					memoryInst.Mux.Unlock()
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				var newVal uint32
+				switch atomicArithmeticOp(op.B2) {
+				case atomicArithmeticOpAdd:
+					newVal = old + uint32(val)
+				case atomicArithmeticOpSub:
+					newVal = old - uint32(val)
+				case atomicArithmeticOpAnd:
+					newVal = old & uint32(val)
+				case atomicArithmeticOpOr:
+					newVal = old | uint32(val)
+				case atomicArithmeticOpXor:
+					newVal = old ^ uint32(val)
+				case atomicArithmeticOpNop:
+					newVal = uint32(val)
+				}
+				memoryInst.WriteUint32Le(offset, newVal)
+				memoryInst.Mux.Unlock()
+				ce.pushValue(uint64(old))
+			case unsignedTypeI64:
+				if offset%8 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				old, ok := memoryInst.ReadUint64Le(offset)
+				if !ok {
+					memoryInst.Mux.Unlock()
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				var newVal uint64
+				switch atomicArithmeticOp(op.B2) {
+				case atomicArithmeticOpAdd:
+					newVal = old + val
+				case atomicArithmeticOpSub:
+					newVal = old - val
+				case atomicArithmeticOpAnd:
+					newVal = old & val
+				case atomicArithmeticOpOr:
+					newVal = old | val
+				case atomicArithmeticOpXor:
+					newVal = old ^ val
+				case atomicArithmeticOpNop:
+					newVal = val
+				}
+				memoryInst.WriteUint64Le(offset, newVal)
+				memoryInst.Mux.Unlock()
+				ce.pushValue(old)
+			}
+			frame.pc++
+		case operationKindAtomicRMW8:
+			val := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			memoryInst.Mux.Lock()
+			old, ok := memoryInst.ReadByte(offset)
+			if !ok {
+				memoryInst.Mux.Unlock()
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			arg := byte(val)
+			var newVal byte
+			switch atomicArithmeticOp(op.B2) {
+			case atomicArithmeticOpAdd:
+				newVal = old + arg
+			case atomicArithmeticOpSub:
+				newVal = old - arg
+			case atomicArithmeticOpAnd:
+				newVal = old & arg
+			case atomicArithmeticOpOr:
+				newVal = old | arg
+			case atomicArithmeticOpXor:
+				newVal = old ^ arg
+			case atomicArithmeticOpNop:
+				newVal = arg
+			}
+			memoryInst.WriteByte(offset, newVal)
+			memoryInst.Mux.Unlock()
+			ce.pushValue(uint64(old))
+			frame.pc++
+		case operationKindAtomicRMW16:
+			val := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			if offset%2 != 0 {
+				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+			}
+			memoryInst.Mux.Lock()
+			old, ok := memoryInst.ReadUint16Le(offset)
+			if !ok {
+				memoryInst.Mux.Unlock()
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			arg := uint16(val)
+			var newVal uint16
+			switch atomicArithmeticOp(op.B2) {
+			case atomicArithmeticOpAdd:
+				newVal = old + arg
+			case atomicArithmeticOpSub:
+				newVal = old - arg
+			case atomicArithmeticOpAnd:
+				newVal = old & arg
+			case atomicArithmeticOpOr:
+				newVal = old | arg
+			case atomicArithmeticOpXor:
+				newVal = old ^ arg
+			case atomicArithmeticOpNop:
+				newVal = arg
+			}
+			memoryInst.WriteUint16Le(offset, newVal)
+			memoryInst.Mux.Unlock()
+			ce.pushValue(uint64(old))
+			frame.pc++
+		case operationKindAtomicRMWCmpxchg:
+			rep := ce.popValue()
+			exp := ce.popValue()
+			offset := ce.popMemoryOffset(op)
+			switch unsignedType(op.B1) {
+			case unsignedTypeI32:
+				if offset%4 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				old, ok := memoryInst.ReadUint32Le(offset)
+				if !ok {
+					memoryInst.Mux.Unlock()
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				if old == uint32(exp) {
+					memoryInst.WriteUint32Le(offset, uint32(rep))
+				}
+				memoryInst.Mux.Unlock()
+				ce.pushValue(uint64(old))
+			case unsignedTypeI64:
+				if offset%8 != 0 {
+					panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+				}
+				memoryInst.Mux.Lock()
+				old, ok := memoryInst.ReadUint64Le(offset)
+				if !ok {
+					memoryInst.Mux.Unlock()
+					panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+				}
+				if old == exp {
+					memoryInst.WriteUint64Le(offset, rep)
+				}
+				memoryInst.Mux.Unlock()
+				ce.pushValue(old)
+			}
+			frame.pc++
+		case operationKindAtomicRMW8Cmpxchg:
+			rep := byte(ce.popValue())
+			exp := byte(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			memoryInst.Mux.Lock()
+			old, ok := memoryInst.ReadByte(offset)
+			if !ok {
+				memoryInst.Mux.Unlock()
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			if old == exp {
+				memoryInst.WriteByte(offset, rep)
+			}
+			memoryInst.Mux.Unlock()
+			ce.pushValue(uint64(old))
+			frame.pc++
+		case operationKindAtomicRMW16Cmpxchg:
+			rep := uint16(ce.popValue())
+			exp := uint16(ce.popValue())
+			offset := ce.popMemoryOffset(op)
+			if offset%2 != 0 {
+				panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+			}
+			memoryInst.Mux.Lock()
+			old, ok := memoryInst.ReadUint16Le(offset)
+			if !ok {
+				memoryInst.Mux.Unlock()
+				panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+			}
+			if old == exp {
+				memoryInst.WriteUint16Le(offset, rep)
+			}
+			memoryInst.Mux.Unlock()
+			ce.pushValue(uint64(old))
+			frame.pc++
+		default:
+			frame.pc++
+		}
+	}
+	ce.popFrame()
+}
+
+func wasmCompatMax32bits(v1, v2 uint32) uint64 {
+	return uint64(math.Float32bits(moremath.WasmCompatMax32(
+		math.Float32frombits(v1),
+		math.Float32frombits(v2),
+	)))
+}
+
+func wasmCompatMin32bits(v1, v2 uint32) uint64 {
+	return uint64(math.Float32bits(moremath.WasmCompatMin32(
+		math.Float32frombits(v1),
+		math.Float32frombits(v2),
+	)))
+}
+
+func addFloat32bits(v1, v2 uint32) uint64 {
+	return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2)))
+}
+
+func subFloat32bits(v1, v2 uint32) uint64 {
+	return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2)))
+}
+
+func mulFloat32bits(v1, v2 uint32) uint64 {
+	return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2)))
+}
+
+func divFloat32bits(v1, v2 uint32) uint64 {
+	return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2)))
+}
+
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
+func flt32(z1, z2 float32) bool {
+	if z1 != z1 || z2 != z2 {
+		return false
+	} else if z1 == z2 {
+		return false
+	} else if math.IsInf(float64(z1), 1) {
+		return false
+	} else if math.IsInf(float64(z1), -1) {
+		return true
+	} else if math.IsInf(float64(z2), 1) {
+		return true
+	} else if math.IsInf(float64(z2), -1) {
+		return false
+	}
+	return z1 < z2
+}
+
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2
+func flt64(z1, z2 float64) bool {
+	if z1 != z1 || z2 != z2 {
+		return false
+	} else if z1 == z2 {
+		return false
+	} else if math.IsInf(z1, 1) {
+		return false
+	} else if math.IsInf(z1, -1) {
+		return true
+	} else if math.IsInf(z2, 1) {
+		return true
+	} else if math.IsInf(z2, -1) {
+		return false
+	}
+	return z1 < z2
+}
+
+func i8RoundingAverage(v1, v2 byte) byte {
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
+	return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2)
+}
+
+func i16RoundingAverage(v1, v2 uint16) uint16 {
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average
+	return uint16((uint32(v1) + uint32(v2) + 1) / 2)
+}
+
+func i8Abs(v byte) byte {
+	if i := int8(v); i < 0 {
+		return byte(-i)
+	} else {
+		return byte(i)
+	}
+}
+
+func i8MaxU(v1, v2 byte) byte {
+	if v1 < v2 {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i8MinU(v1, v2 byte) byte {
+	if v1 > v2 {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i8MaxS(v1, v2 byte) byte {
+	if int8(v1) < int8(v2) {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i8MinS(v1, v2 byte) byte {
+	if int8(v1) > int8(v2) {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i16MaxU(v1, v2 uint16) uint16 {
+	if v1 < v2 {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i16MinU(v1, v2 uint16) uint16 {
+	if v1 > v2 {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i16MaxS(v1, v2 uint16) uint16 {
+	if int16(v1) < int16(v2) {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i16MinS(v1, v2 uint16) uint16 {
+	if int16(v1) > int16(v2) {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i32MaxU(v1, v2 uint32) uint32 {
+	if v1 < v2 {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i32MinU(v1, v2 uint32) uint32 {
+	if v1 > v2 {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i32MaxS(v1, v2 uint32) uint32 {
+	if int32(v1) < int32(v2) {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i32MinS(v1, v2 uint32) uint32 {
+	if int32(v1) > int32(v2) {
+		return v2
+	} else {
+		return v1
+	}
+}
+
+func i16Abs(v uint16) uint16 {
+	if i := int16(v); i < 0 {
+		return uint16(-i)
+	} else {
+		return uint16(i)
+	}
+}
+
+func i32Abs(v uint32) uint32 {
+	if i := int32(v); i < 0 {
+		return uint32(-i)
+	} else {
+		return uint32(i)
+	}
+}
+
+func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context {
+	def, typ := f.definition(), f.funcType
+
+	ce.stackIterator.reset(ce.stack, ce.frames, f)
+	fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator)
+	ce.stackIterator.clear()
+	ce.callNativeFunc(ctx, m, f)
+	fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64))
+	return ctx
+}
+
+// popMemoryOffset takes a memory offset off the stack for use in load and store instructions.
+// As the top of stack value is 64-bit, this ensures it is in range before returning it.
+func (ce *callEngine) popMemoryOffset(op *unionOperation) uint32 {
+	offset := op.U2 + ce.popValue()
+	if offset > math.MaxUint32 {
+		panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+	}
+	return uint32(offset)
+}
+
+func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) {
+	typ := f.funcType
+	paramLen := typ.ParamNumInUint64
+	resultLen := typ.ResultNumInUint64
+	stackLen := paramLen
+
+	// In the interpreter engine, ce.stack may only have capacity to store
+	// parameters. Grow when there are more results than parameters.
+	if growLen := resultLen - paramLen; growLen > 0 {
+		for i := 0; i < growLen; i++ {
+			ce.stack = append(ce.stack, 0)
+		}
+		stackLen += growLen
+	}
+
+	// Pass the stack elements to the go function.
+	stack := ce.stack[len(ce.stack)-stackLen:]
+	ce.callGoFunc(ctx, m, f, stack)
+
+	// Shrink the stack when there were more parameters than results.
+	if shrinkLen := paramLen - resultLen; shrinkLen > 0 {
+		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
new file mode 100644
index 000000000..3087a718f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
@@ -0,0 +1,2812 @@
+package interpreter
+
+import (
+	"fmt"
+	"math"
+	"strings"
+)
+
+// unsignedInt represents unsigned 32-bit or 64-bit integers.
+type unsignedInt byte
+
+const (
+	unsignedInt32 unsignedInt = iota
+	unsignedInt64
+)
+
+// String implements fmt.Stringer.
+func (s unsignedInt) String() (ret string) {
+	switch s {
+	case unsignedInt32:
+		ret = "i32"
+	case unsignedInt64:
+		ret = "i64"
+	}
+	return
+}
+
+// signedInt represents signed or unsigned integers.
+type signedInt byte
+
+const (
+	signedInt32 signedInt = iota
+	signedInt64
+	signedUint32
+	signedUint64
+)
+
+// String implements fmt.Stringer.
+func (s signedInt) String() (ret string) {
+	switch s {
+	case signedUint32:
+		ret = "u32"
+	case signedUint64:
+		ret = "u64"
+	case signedInt32:
+		ret = "s32"
+	case signedInt64:
+		ret = "s64"
+	}
+	return
+}
+
+// float represents the scalar double or single precision floating points.
+type float byte
+
+const (
+	f32 float = iota
+	f64
+)
+
+// String implements fmt.Stringer.
+func (s float) String() (ret string) {
+	switch s {
+	case f32:
+		ret = "f32"
+	case f64:
+		ret = "f64"
+	}
+	return
+}
+
+// unsignedType is the union of unsignedInt, float and V128 vector type.
+type unsignedType byte
+
+const (
+	unsignedTypeI32 unsignedType = iota
+	unsignedTypeI64
+	unsignedTypeF32
+	unsignedTypeF64
+	unsignedTypeV128
+	unsignedTypeUnknown
+)
+
+// String implements fmt.Stringer.
+func (s unsignedType) String() (ret string) {
+	switch s {
+	case unsignedTypeI32:
+		ret = "i32"
+	case unsignedTypeI64:
+		ret = "i64"
+	case unsignedTypeF32:
+		ret = "f32"
+	case unsignedTypeF64:
+		ret = "f64"
+	case unsignedTypeV128:
+		ret = "v128"
+	case unsignedTypeUnknown:
+		ret = "unknown"
+	}
+	return
+}
+
+// signedType is the union of signedInt and float types.
+type signedType byte
+
+const (
+	signedTypeInt32 signedType = iota
+	signedTypeUint32
+	signedTypeInt64
+	signedTypeUint64
+	signedTypeFloat32
+	signedTypeFloat64
+)
+
+// String implements fmt.Stringer.
+func (s signedType) String() (ret string) {
+	switch s {
+	case signedTypeInt32:
+		ret = "s32"
+	case signedTypeUint32:
+		ret = "u32"
+	case signedTypeInt64:
+		ret = "s64"
+	case signedTypeUint64:
+		ret = "u64"
+	case signedTypeFloat32:
+		ret = "f32"
+	case signedTypeFloat64:
+		ret = "f64"
+	}
+	return
+}
+
+// operationKind is the Kind of each implementation of Operation interface.
+type operationKind uint16
+
+// String implements fmt.Stringer.
+func (o operationKind) String() (ret string) {
+	switch o {
+	case operationKindUnreachable:
+		ret = "Unreachable"
+	case operationKindLabel:
+		ret = "label"
+	case operationKindBr:
+		ret = "Br"
+	case operationKindBrIf:
+		ret = "BrIf"
+	case operationKindBrTable:
+		ret = "BrTable"
+	case operationKindCall:
+		ret = "Call"
+	case operationKindCallIndirect:
+		ret = "CallIndirect"
+	case operationKindDrop:
+		ret = "Drop"
+	case operationKindSelect:
+		ret = "Select"
+	case operationKindPick:
+		ret = "Pick"
+	case operationKindSet:
+		ret = "Swap"
+	case operationKindGlobalGet:
+		ret = "GlobalGet"
+	case operationKindGlobalSet:
+		ret = "GlobalSet"
+	case operationKindLoad:
+		ret = "Load"
+	case operationKindLoad8:
+		ret = "Load8"
+	case operationKindLoad16:
+		ret = "Load16"
+	case operationKindLoad32:
+		ret = "Load32"
+	case operationKindStore:
+		ret = "Store"
+	case operationKindStore8:
+		ret = "Store8"
+	case operationKindStore16:
+		ret = "Store16"
+	case operationKindStore32:
+		ret = "Store32"
+	case operationKindMemorySize:
+		ret = "MemorySize"
+	case operationKindMemoryGrow:
+		ret = "MemoryGrow"
+	case operationKindConstI32:
+		ret = "ConstI32"
+	case operationKindConstI64:
+		ret = "ConstI64"
+	case operationKindConstF32:
+		ret = "ConstF32"
+	case operationKindConstF64:
+		ret = "ConstF64"
+	case operationKindEq:
+		ret = "Eq"
+	case operationKindNe:
+		ret = "Ne"
+	case operationKindEqz:
+		ret = "Eqz"
+	case operationKindLt:
+		ret = "Lt"
+	case operationKindGt:
+		ret = "Gt"
+	case operationKindLe:
+		ret = "Le"
+	case operationKindGe:
+		ret = "Ge"
+	case operationKindAdd:
+		ret = "Add"
+	case operationKindSub:
+		ret = "Sub"
+	case operationKindMul:
+		ret = "Mul"
+	case operationKindClz:
+		ret = "Clz"
+	case operationKindCtz:
+		ret = "Ctz"
+	case operationKindPopcnt:
+		ret = "Popcnt"
+	case operationKindDiv:
+		ret = "Div"
+	case operationKindRem:
+		ret = "Rem"
+	case operationKindAnd:
+		ret = "And"
+	case operationKindOr:
+		ret = "Or"
+	case operationKindXor:
+		ret = "Xor"
+	case operationKindShl:
+		ret = "Shl"
+	case operationKindShr:
+		ret = "Shr"
+	case operationKindRotl:
+		ret = "Rotl"
+	case operationKindRotr:
+		ret = "Rotr"
+	case operationKindAbs:
+		ret = "Abs"
+	case operationKindNeg:
+		ret = "Neg"
+	case operationKindCeil:
+		ret = "Ceil"
+	case operationKindFloor:
+		ret = "Floor"
+	case operationKindTrunc:
+		ret = "Trunc"
+	case operationKindNearest:
+		ret = "Nearest"
+	case operationKindSqrt:
+		ret = "Sqrt"
+	case operationKindMin:
+		ret = "Min"
+	case operationKindMax:
+		ret = "Max"
+	case operationKindCopysign:
+		ret = "Copysign"
+	case operationKindI32WrapFromI64:
+		ret = "I32WrapFromI64"
+	case operationKindITruncFromF:
+		ret = "ITruncFromF"
+	case operationKindFConvertFromI:
+		ret = "FConvertFromI"
+	case operationKindF32DemoteFromF64:
+		ret = "F32DemoteFromF64"
+	case operationKindF64PromoteFromF32:
+		ret = "F64PromoteFromF32"
+	case operationKindI32ReinterpretFromF32:
+		ret = "I32ReinterpretFromF32"
+	case operationKindI64ReinterpretFromF64:
+		ret = "I64ReinterpretFromF64"
+	case operationKindF32ReinterpretFromI32:
+		ret = "F32ReinterpretFromI32"
+	case operationKindF64ReinterpretFromI64:
+		ret = "F64ReinterpretFromI64"
+	case operationKindExtend:
+		ret = "Extend"
+	case operationKindMemoryInit:
+		ret = "MemoryInit"
+	case operationKindDataDrop:
+		ret = "DataDrop"
+	case operationKindMemoryCopy:
+		ret = "MemoryCopy"
+	case operationKindMemoryFill:
+		ret = "MemoryFill"
+	case operationKindTableInit:
+		ret = "TableInit"
+	case operationKindElemDrop:
+		ret = "ElemDrop"
+	case operationKindTableCopy:
+		ret = "TableCopy"
+	case operationKindRefFunc:
+		ret = "RefFunc"
+	case operationKindTableGet:
+		ret = "TableGet"
+	case operationKindTableSet:
+		ret = "TableSet"
+	case operationKindTableSize:
+		ret = "TableSize"
+	case operationKindTableGrow:
+		ret = "TableGrow"
+	case operationKindTableFill:
+		ret = "TableFill"
+	case operationKindV128Const:
+		ret = "ConstV128"
+	case operationKindV128Add:
+		ret = "V128Add"
+	case operationKindV128Sub:
+		ret = "V128Sub"
+	case operationKindV128Load:
+		ret = "V128Load"
+	case operationKindV128LoadLane:
+		ret = "V128LoadLane"
+	case operationKindV128Store:
+		ret = "V128Store"
+	case operationKindV128StoreLane:
+		ret = "V128StoreLane"
+	case operationKindV128ExtractLane:
+		ret = "V128ExtractLane"
+	case operationKindV128ReplaceLane:
+		ret = "V128ReplaceLane"
+	case operationKindV128Splat:
+		ret = "V128Splat"
+	case operationKindV128Shuffle:
+		ret = "V128Shuffle"
+	case operationKindV128Swizzle:
+		ret = "V128Swizzle"
+	case operationKindV128AnyTrue:
+		ret = "V128AnyTrue"
+	case operationKindV128AllTrue:
+		ret = "V128AllTrue"
+	case operationKindV128And:
+		ret = "V128And"
+	case operationKindV128Not:
+		ret = "V128Not"
+	case operationKindV128Or:
+		ret = "V128Or"
+	case operationKindV128Xor:
+		ret = "V128Xor"
+	case operationKindV128Bitselect:
+		ret = "V128Bitselect"
+	case operationKindV128AndNot:
+		ret = "V128AndNot"
+	case operationKindV128BitMask:
+		ret = "V128BitMask"
+	case operationKindV128Shl:
+		ret = "V128Shl"
+	case operationKindV128Shr:
+		ret = "V128Shr"
+	case operationKindV128Cmp:
+		ret = "V128Cmp"
+	case operationKindSignExtend32From8:
+		ret = "SignExtend32From8"
+	case operationKindSignExtend32From16:
+		ret = "SignExtend32From16"
+	case operationKindSignExtend64From8:
+		ret = "SignExtend64From8"
+	case operationKindSignExtend64From16:
+		ret = "SignExtend64From16"
+	case operationKindSignExtend64From32:
+		ret = "SignExtend64From32"
+	case operationKindV128AddSat:
+		ret = "V128AddSat"
+	case operationKindV128SubSat:
+		ret = "V128SubSat"
+	case operationKindV128Mul:
+		ret = "V128Mul"
+	case operationKindV128Div:
+		ret = "V128Div"
+	case operationKindV128Neg:
+		ret = "V128Neg"
+	case operationKindV128Sqrt:
+		ret = "V128Sqrt"
+	case operationKindV128Abs:
+		ret = "V128Abs"
+	case operationKindV128Popcnt:
+		ret = "V128Popcnt"
+	case operationKindV128Min:
+		ret = "V128Min"
+	case operationKindV128Max:
+		ret = "V128Max"
+	case operationKindV128AvgrU:
+		ret = "V128AvgrU"
+	case operationKindV128Ceil:
+		ret = "V128Ceil"
+	case operationKindV128Floor:
+		ret = "V128Floor"
+	case operationKindV128Trunc:
+		ret = "V128Trunc"
+	case operationKindV128Nearest:
+		ret = "V128Nearest"
+	case operationKindV128Pmin:
+		ret = "V128Pmin"
+	case operationKindV128Pmax:
+		ret = "V128Pmax"
+	case operationKindV128Extend:
+		ret = "V128Extend"
+	case operationKindV128ExtMul:
+		ret = "V128ExtMul"
+	case operationKindV128Q15mulrSatS:
+		ret = "V128Q15mulrSatS"
+	case operationKindV128ExtAddPairwise:
+		ret = "V128ExtAddPairwise"
+	case operationKindV128FloatPromote:
+		ret = "V128FloatPromote"
+	case operationKindV128FloatDemote:
+		ret = "V128FloatDemote"
+	case operationKindV128FConvertFromI:
+		ret = "V128FConvertFromI"
+	case operationKindV128Dot:
+		ret = "V128Dot"
+	case operationKindV128Narrow:
+		ret = "V128Narrow"
+	case operationKindV128ITruncSatFromF:
+		ret = "V128ITruncSatFromF"
+	case operationKindBuiltinFunctionCheckExitCode:
+		ret = "BuiltinFunctionCheckExitCode"
+	case operationKindAtomicMemoryWait:
+		ret = "operationKindAtomicMemoryWait"
+	case operationKindAtomicMemoryNotify:
+		ret = "operationKindAtomicMemoryNotify"
+	case operationKindAtomicFence:
+		ret = "operationKindAtomicFence"
+	case operationKindAtomicLoad:
+		ret = "operationKindAtomicLoad"
+	case operationKindAtomicLoad8:
+		ret = "operationKindAtomicLoad8"
+	case operationKindAtomicLoad16:
+		ret = "operationKindAtomicLoad16"
+	case operationKindAtomicStore:
+		ret = "operationKindAtomicStore"
+	case operationKindAtomicStore8:
+		ret = "operationKindAtomicStore8"
+	case operationKindAtomicStore16:
+		ret = "operationKindAtomicStore16"
+	case operationKindAtomicRMW:
+		ret = "operationKindAtomicRMW"
+	case operationKindAtomicRMW8:
+		ret = "operationKindAtomicRMW8"
+	case operationKindAtomicRMW16:
+		ret = "operationKindAtomicRMW16"
+	case operationKindAtomicRMWCmpxchg:
+		ret = "operationKindAtomicRMWCmpxchg"
+	case operationKindAtomicRMW8Cmpxchg:
+		ret = "operationKindAtomicRMW8Cmpxchg"
+	case operationKindAtomicRMW16Cmpxchg:
+		ret = "operationKindAtomicRMW16Cmpxchg"
+	default:
+		panic(fmt.Errorf("unknown operation %d", o))
+	}
+	return
+}
+
+const (
+	// operationKindUnreachable is the Kind for NewOperationUnreachable.
+	operationKindUnreachable operationKind = iota
+	// operationKindLabel is the Kind for NewOperationLabel.
+	operationKindLabel
+	// operationKindBr is the Kind for NewOperationBr.
+	operationKindBr
+	// operationKindBrIf is the Kind for NewOperationBrIf.
+	operationKindBrIf
+	// operationKindBrTable is the Kind for NewOperationBrTable.
+	operationKindBrTable
+	// operationKindCall is the Kind for NewOperationCall.
+	operationKindCall
+	// operationKindCallIndirect is the Kind for NewOperationCallIndirect.
+	operationKindCallIndirect
+	// operationKindDrop is the Kind for NewOperationDrop.
+	operationKindDrop
+	// operationKindSelect is the Kind for NewOperationSelect.
+	operationKindSelect
+	// operationKindPick is the Kind for NewOperationPick.
+	operationKindPick
+	// operationKindSet is the Kind for NewOperationSet.
+	operationKindSet
+	// operationKindGlobalGet is the Kind for NewOperationGlobalGet.
+	operationKindGlobalGet
+	// operationKindGlobalSet is the Kind for NewOperationGlobalSet.
+	operationKindGlobalSet
+	// operationKindLoad is the Kind for NewOperationLoad.
+	operationKindLoad
+	// operationKindLoad8 is the Kind for NewOperationLoad8.
+	operationKindLoad8
+	// operationKindLoad16 is the Kind for NewOperationLoad16.
+	operationKindLoad16
+	// operationKindLoad32 is the Kind for NewOperationLoad32.
+	operationKindLoad32
+	// operationKindStore is the Kind for NewOperationStore.
+	operationKindStore
+	// operationKindStore8 is the Kind for NewOperationStore8.
+	operationKindStore8
+	// operationKindStore16 is the Kind for NewOperationStore16.
+	operationKindStore16
+	// operationKindStore32 is the Kind for NewOperationStore32.
+	operationKindStore32
+	// operationKindMemorySize is the Kind for NewOperationMemorySize.
+	operationKindMemorySize
+	// operationKindMemoryGrow is the Kind for NewOperationMemoryGrow.
+	operationKindMemoryGrow
+	// operationKindConstI32 is the Kind for NewOperationConstI32.
+	operationKindConstI32
+	// operationKindConstI64 is the Kind for NewOperationConstI64.
+	operationKindConstI64
+	// operationKindConstF32 is the Kind for NewOperationConstF32.
+	operationKindConstF32
+	// operationKindConstF64 is the Kind for NewOperationConstF64.
+	operationKindConstF64
+	// operationKindEq is the Kind for NewOperationEq.
+	operationKindEq
+	// operationKindNe is the Kind for NewOperationNe.
+	operationKindNe
+	// operationKindEqz is the Kind for NewOperationEqz.
+	operationKindEqz
+	// operationKindLt is the Kind for NewOperationLt.
+	operationKindLt
+	// operationKindGt is the Kind for NewOperationGt.
+	operationKindGt
+	// operationKindLe is the Kind for NewOperationLe.
+	operationKindLe
+	// operationKindGe is the Kind for NewOperationGe.
+	operationKindGe
+	// operationKindAdd is the Kind for NewOperationAdd.
+	operationKindAdd
+	// operationKindSub is the Kind for NewOperationSub.
+	operationKindSub
+	// operationKindMul is the Kind for NewOperationMul.
+	operationKindMul
+	// operationKindClz is the Kind for NewOperationClz.
+	operationKindClz
+	// operationKindCtz is the Kind for NewOperationCtz.
+	operationKindCtz
+	// operationKindPopcnt is the Kind for NewOperationPopcnt.
+	operationKindPopcnt
+	// operationKindDiv is the Kind for NewOperationDiv.
+	operationKindDiv
+	// operationKindRem is the Kind for NewOperationRem.
+	operationKindRem
+	// operationKindAnd is the Kind for NewOperationAnd.
+	operationKindAnd
+	// operationKindOr is the Kind for NewOperationOr.
+	operationKindOr
+	// operationKindXor is the Kind for NewOperationXor.
+	operationKindXor
+	// operationKindShl is the Kind for NewOperationShl.
+	operationKindShl
+	// operationKindShr is the Kind for NewOperationShr.
+	operationKindShr
+	// operationKindRotl is the Kind for NewOperationRotl.
+	operationKindRotl
+	// operationKindRotr is the Kind for NewOperationRotr.
+	operationKindRotr
+	// operationKindAbs is the Kind for NewOperationAbs.
+	operationKindAbs
+	// operationKindNeg is the Kind for NewOperationNeg.
+	operationKindNeg
+	// operationKindCeil is the Kind for NewOperationCeil.
+	operationKindCeil
+	// operationKindFloor is the Kind for NewOperationFloor.
+	operationKindFloor
+	// operationKindTrunc is the Kind for NewOperationTrunc.
+	operationKindTrunc
+	// operationKindNearest is the Kind for NewOperationNearest.
+	operationKindNearest
+	// operationKindSqrt is the Kind for NewOperationSqrt.
+	operationKindSqrt
+	// operationKindMin is the Kind for NewOperationMin.
+	operationKindMin
+	// operationKindMax is the Kind for NewOperationMax.
+	operationKindMax
+	// operationKindCopysign is the Kind for NewOperationCopysign.
+	operationKindCopysign
+	// operationKindI32WrapFromI64 is the Kind for NewOperationI32WrapFromI64.
+	operationKindI32WrapFromI64
+	// operationKindITruncFromF is the Kind for NewOperationITruncFromF.
+	operationKindITruncFromF
+	// operationKindFConvertFromI is the Kind for NewOperationFConvertFromI.
+	operationKindFConvertFromI
+	// operationKindF32DemoteFromF64 is the Kind for NewOperationF32DemoteFromF64.
+	operationKindF32DemoteFromF64
+	// operationKindF64PromoteFromF32 is the Kind for NewOperationF64PromoteFromF32.
+	operationKindF64PromoteFromF32
+	// operationKindI32ReinterpretFromF32 is the Kind for NewOperationI32ReinterpretFromF32.
+	operationKindI32ReinterpretFromF32
+	// operationKindI64ReinterpretFromF64 is the Kind for NewOperationI64ReinterpretFromF64.
+	operationKindI64ReinterpretFromF64
+	// operationKindF32ReinterpretFromI32 is the Kind for NewOperationF32ReinterpretFromI32.
+	operationKindF32ReinterpretFromI32
+	// operationKindF64ReinterpretFromI64 is the Kind for NewOperationF64ReinterpretFromI64.
+	operationKindF64ReinterpretFromI64
+	// operationKindExtend is the Kind for NewOperationExtend.
+	operationKindExtend
+	// operationKindSignExtend32From8 is the Kind for NewOperationSignExtend32From8.
+	operationKindSignExtend32From8
+	// operationKindSignExtend32From16 is the Kind for NewOperationSignExtend32From16.
+	operationKindSignExtend32From16
+	// operationKindSignExtend64From8 is the Kind for NewOperationSignExtend64From8.
+	operationKindSignExtend64From8
+	// operationKindSignExtend64From16 is the Kind for NewOperationSignExtend64From16.
+	operationKindSignExtend64From16
+	// operationKindSignExtend64From32 is the Kind for NewOperationSignExtend64From32.
+	operationKindSignExtend64From32
+	// operationKindMemoryInit is the Kind for NewOperationMemoryInit.
+	operationKindMemoryInit
+	// operationKindDataDrop is the Kind for NewOperationDataDrop.
+	operationKindDataDrop
+	// operationKindMemoryCopy is the Kind for NewOperationMemoryCopy.
+	operationKindMemoryCopy
+	// operationKindMemoryFill is the Kind for NewOperationMemoryFill.
+	operationKindMemoryFill
+	// operationKindTableInit is the Kind for NewOperationTableInit.
+	operationKindTableInit
+	// operationKindElemDrop is the Kind for NewOperationElemDrop.
+	operationKindElemDrop
+	// operationKindTableCopy is the Kind for NewOperationTableCopy.
+	operationKindTableCopy
+	// operationKindRefFunc is the Kind for NewOperationRefFunc.
+	operationKindRefFunc
+	// operationKindTableGet is the Kind for NewOperationTableGet.
+	operationKindTableGet
+	// operationKindTableSet is the Kind for NewOperationTableSet.
+	operationKindTableSet
+	// operationKindTableSize is the Kind for NewOperationTableSize.
+	operationKindTableSize
+	// operationKindTableGrow is the Kind for NewOperationTableGrow.
+	operationKindTableGrow
+	// operationKindTableFill is the Kind for NewOperationTableFill.
+	operationKindTableFill
+
+	// Vector value related instructions are prefixed by V128.
+
+	// operationKindV128Const is the Kind for NewOperationV128Const.
+	operationKindV128Const
+	// operationKindV128Add is the Kind for NewOperationV128Add.
+	operationKindV128Add
+	// operationKindV128Sub is the Kind for NewOperationV128Sub.
+	operationKindV128Sub
+	// operationKindV128Load is the Kind for NewOperationV128Load.
+	operationKindV128Load
+	// operationKindV128LoadLane is the Kind for NewOperationV128LoadLane.
+	operationKindV128LoadLane
+	// operationKindV128Store is the Kind for NewOperationV128Store.
+	operationKindV128Store
+	// operationKindV128StoreLane is the Kind for NewOperationV128StoreLane.
+	operationKindV128StoreLane
+	// operationKindV128ExtractLane is the Kind for NewOperationV128ExtractLane.
+	operationKindV128ExtractLane
+	// operationKindV128ReplaceLane is the Kind for NewOperationV128ReplaceLane.
+	operationKindV128ReplaceLane
+	// operationKindV128Splat is the Kind for NewOperationV128Splat.
+	operationKindV128Splat
+	// operationKindV128Shuffle is the Kind for NewOperationV128Shuffle.
+	operationKindV128Shuffle
+	// operationKindV128Swizzle is the Kind for NewOperationV128Swizzle.
+	operationKindV128Swizzle
+	// operationKindV128AnyTrue is the Kind for NewOperationV128AnyTrue.
+	operationKindV128AnyTrue
+	// operationKindV128AllTrue is the Kind for NewOperationV128AllTrue.
+	operationKindV128AllTrue
+	// operationKindV128BitMask is the Kind for NewOperationV128BitMask.
+	operationKindV128BitMask
+	// operationKindV128And is the Kind for NewOperationV128And.
+	operationKindV128And
+	// operationKindV128Not is the Kind for NewOperationV128Not.
+	operationKindV128Not
+	// operationKindV128Or is the Kind for NewOperationV128Or.
+	operationKindV128Or
+	// operationKindV128Xor is the Kind for NewOperationV128Xor.
+	operationKindV128Xor
+	// operationKindV128Bitselect is the Kind for NewOperationV128Bitselect.
+	operationKindV128Bitselect
+	// operationKindV128AndNot is the Kind for NewOperationV128AndNot.
+	operationKindV128AndNot
+	// operationKindV128Shl is the Kind for NewOperationV128Shl.
+	operationKindV128Shl
+	// operationKindV128Shr is the Kind for NewOperationV128Shr.
+	operationKindV128Shr
+	// operationKindV128Cmp is the Kind for NewOperationV128Cmp.
+	operationKindV128Cmp
+	// operationKindV128AddSat is the Kind for NewOperationV128AddSat.
+	operationKindV128AddSat
+	// operationKindV128SubSat is the Kind for NewOperationV128SubSat.
+	operationKindV128SubSat
+	// operationKindV128Mul is the Kind for NewOperationV128Mul.
+	operationKindV128Mul
+	// operationKindV128Div is the Kind for NewOperationV128Div.
+	operationKindV128Div
+	// operationKindV128Neg is the Kind for NewOperationV128Neg.
+	operationKindV128Neg
+	// operationKindV128Sqrt is the Kind for NewOperationV128Sqrt.
+	operationKindV128Sqrt
+	// operationKindV128Abs is the Kind for NewOperationV128Abs.
+	operationKindV128Abs
+	// operationKindV128Popcnt is the Kind for NewOperationV128Popcnt.
+	operationKindV128Popcnt
+	// operationKindV128Min is the Kind for NewOperationV128Min.
+	operationKindV128Min
+	// operationKindV128Max is the Kind for NewOperationV128Max.
+	operationKindV128Max
+	// operationKindV128AvgrU is the Kind for NewOperationV128AvgrU.
+	operationKindV128AvgrU
+	// operationKindV128Pmin is the Kind for NewOperationV128Pmin.
+	operationKindV128Pmin
+	// operationKindV128Pmax is the Kind for NewOperationV128Pmax.
+	operationKindV128Pmax
+	// operationKindV128Ceil is the Kind for NewOperationV128Ceil.
+	operationKindV128Ceil
+	// operationKindV128Floor is the Kind for NewOperationV128Floor.
+	operationKindV128Floor
+	// operationKindV128Trunc is the Kind for NewOperationV128Trunc.
+	operationKindV128Trunc
+	// operationKindV128Nearest is the Kind for NewOperationV128Nearest.
+	operationKindV128Nearest
+	// operationKindV128Extend is the Kind for NewOperationV128Extend.
+	operationKindV128Extend
+	// operationKindV128ExtMul is the Kind for NewOperationV128ExtMul.
+	operationKindV128ExtMul
+	// operationKindV128Q15mulrSatS is the Kind for NewOperationV128Q15mulrSatS.
+	operationKindV128Q15mulrSatS
+	// operationKindV128ExtAddPairwise is the Kind for NewOperationV128ExtAddPairwise.
+	operationKindV128ExtAddPairwise
+	// operationKindV128FloatPromote is the Kind for NewOperationV128FloatPromote.
+	operationKindV128FloatPromote
+	// operationKindV128FloatDemote is the Kind for NewOperationV128FloatDemote.
+	operationKindV128FloatDemote
+	// operationKindV128FConvertFromI is the Kind for NewOperationV128FConvertFromI.
+	operationKindV128FConvertFromI
+	// operationKindV128Dot is the Kind for NewOperationV128Dot.
+	operationKindV128Dot
+	// operationKindV128Narrow is the Kind for NewOperationV128Narrow.
+	operationKindV128Narrow
+	// operationKindV128ITruncSatFromF is the Kind for NewOperationV128ITruncSatFromF.
+	operationKindV128ITruncSatFromF
+
+	// operationKindBuiltinFunctionCheckExitCode is the Kind for NewOperationBuiltinFunctionCheckExitCode.
+	operationKindBuiltinFunctionCheckExitCode
+
+	// operationKindAtomicMemoryWait is the kind for NewOperationAtomicMemoryWait.
+	operationKindAtomicMemoryWait
+	// operationKindAtomicMemoryNotify is the kind for NewOperationAtomicMemoryNotify.
+	operationKindAtomicMemoryNotify
+	// operationKindAtomicFence is the kind for NewOperationAtomicFence.
+	operationKindAtomicFence
+	// operationKindAtomicLoad is the kind for NewOperationAtomicLoad.
+	operationKindAtomicLoad
+	// operationKindAtomicLoad8 is the kind for NewOperationAtomicLoad8.
+	operationKindAtomicLoad8
+	// operationKindAtomicLoad16 is the kind for NewOperationAtomicLoad16.
+	operationKindAtomicLoad16
+	// operationKindAtomicStore is the kind for NewOperationAtomicStore.
+	operationKindAtomicStore
+	// operationKindAtomicStore8 is the kind for NewOperationAtomicStore8.
+	operationKindAtomicStore8
+	// operationKindAtomicStore16 is the kind for NewOperationAtomicStore16.
+	operationKindAtomicStore16
+
+	// operationKindAtomicRMW is the kind for NewOperationAtomicRMW.
+	operationKindAtomicRMW
+	// operationKindAtomicRMW8 is the kind for NewOperationAtomicRMW8.
+	operationKindAtomicRMW8
+	// operationKindAtomicRMW16 is the kind for NewOperationAtomicRMW16.
+	operationKindAtomicRMW16
+
+	// operationKindAtomicRMWCmpxchg is the kind for NewOperationAtomicRMWCmpxchg.
+	operationKindAtomicRMWCmpxchg
+	// operationKindAtomicRMW8Cmpxchg is the kind for NewOperationAtomicRMW8Cmpxchg.
+	operationKindAtomicRMW8Cmpxchg
+	// operationKindAtomicRMW16Cmpxchg is the kind for NewOperationAtomicRMW16Cmpxchg.
+	operationKindAtomicRMW16Cmpxchg
+
+	// operationKindEnd is always placed at the bottom of this iota definition to be used in the test.
+	operationKindEnd
+)
+
+// NewOperationBuiltinFunctionCheckExitCode is a constructor for unionOperation with Kind operationKindBuiltinFunctionCheckExitCode.
+//
+// OperationBuiltinFunctionCheckExitCode corresponds to the instruction to check the api.Module is already closed due to
+// context.DeadlineExceeded, context.Canceled, or the explicit call of CloseWithExitCode on api.Module.
+func newOperationBuiltinFunctionCheckExitCode() unionOperation {
+	return unionOperation{Kind: operationKindBuiltinFunctionCheckExitCode}
+}
+
+// label is the unique identifier for each block in a single function in interpreterir
+// where "block" consists of multiple operations, and must End with branching operations
+// (e.g. operationKindBr or operationKindBrIf).
+type label uint64
+
+// Kind returns the labelKind encoded in this label.
+func (l label) Kind() labelKind {
+	return labelKind(uint32(l))
+}
+
+// FrameID returns the frame id encoded in this label.
+func (l label) FrameID() int {
+	return int(uint32(l >> 32))
+}
+
+// NewLabel is a constructor for a label.
+func newLabel(kind labelKind, frameID uint32) label {
+	return label(kind) | label(frameID)<<32
+}
+
+// String implements fmt.Stringer.
+func (l label) String() (ret string) {
+	frameID := l.FrameID()
+	switch l.Kind() {
+	case labelKindHeader:
+		ret = fmt.Sprintf(".L%d", frameID)
+	case labelKindElse:
+		ret = fmt.Sprintf(".L%d_else", frameID)
+	case labelKindContinuation:
+		ret = fmt.Sprintf(".L%d_cont", frameID)
+	case labelKindReturn:
+		return ".return"
+	}
+	return
+}
+
+func (l label) IsReturnTarget() bool {
+	return l.Kind() == labelKindReturn
+}
+
+// labelKind is the Kind of the label.
+type labelKind = byte
+
+const (
+	// labelKindHeader is the header for various blocks. For example, the "then" block of
+	// wasm.OpcodeIfName in Wasm has the label of this Kind.
+	labelKindHeader labelKind = iota
+	// labelKindElse is the Kind of label for "else" block of wasm.OpcodeIfName in Wasm.
+	labelKindElse
+	// labelKindContinuation is the Kind of label which is the continuation of blocks.
+	// For example, for wasm text like
+	// (func
+	//   ....
+	//   (if (local.get 0) (then (nop)) (else (nop)))
+	//   return
+	// )
+	// we have the continuation block (of if-block) corresponding to "return" opcode.
+	labelKindContinuation
+	labelKindReturn
+	labelKindNum
+)
+
+// unionOperation implements Operation and is the compilation (engine.lowerIR) result of a interpreterir.Operation.
+//
+// Not all operations result in a unionOperation, e.g. interpreterir.OperationI32ReinterpretFromF32, and some operations are
+// more complex than others, e.g. interpreterir.NewOperationBrTable.
+//
+// Note: This is a form of union type as it can store fields needed for any operation. Hence, most fields are opaque and
+// only relevant when in context of its kind.
+type unionOperation struct {
+	// Kind determines how to interpret the other fields in this struct.
+	Kind   operationKind
+	B1, B2 byte
+	B3     bool
+	U1, U2 uint64
+	U3     uint64
+	Us     []uint64
+}
+
+// String implements fmt.Stringer.
+func (o unionOperation) String() string {
+	switch o.Kind {
+	case operationKindUnreachable,
+		operationKindSelect,
+		operationKindMemorySize,
+		operationKindMemoryGrow,
+		operationKindI32WrapFromI64,
+		operationKindF32DemoteFromF64,
+		operationKindF64PromoteFromF32,
+		operationKindI32ReinterpretFromF32,
+		operationKindI64ReinterpretFromF64,
+		operationKindF32ReinterpretFromI32,
+		operationKindF64ReinterpretFromI64,
+		operationKindSignExtend32From8,
+		operationKindSignExtend32From16,
+		operationKindSignExtend64From8,
+		operationKindSignExtend64From16,
+		operationKindSignExtend64From32,
+		operationKindMemoryInit,
+		operationKindDataDrop,
+		operationKindMemoryCopy,
+		operationKindMemoryFill,
+		operationKindTableInit,
+		operationKindElemDrop,
+		operationKindTableCopy,
+		operationKindRefFunc,
+		operationKindTableGet,
+		operationKindTableSet,
+		operationKindTableSize,
+		operationKindTableGrow,
+		operationKindTableFill,
+		operationKindBuiltinFunctionCheckExitCode:
+		return o.Kind.String()
+
+	case operationKindCall,
+		operationKindGlobalGet,
+		operationKindGlobalSet:
+		return fmt.Sprintf("%s %d", o.Kind, o.B1)
+
+	case operationKindLabel:
+		return label(o.U1).String()
+
+	case operationKindBr:
+		return fmt.Sprintf("%s %s", o.Kind, label(o.U1).String())
+
+	case operationKindBrIf:
+		thenTarget := label(o.U1)
+		elseTarget := label(o.U2)
+		return fmt.Sprintf("%s %s, %s", o.Kind, thenTarget, elseTarget)
+
+	case operationKindBrTable:
+		var targets []string
+		var defaultLabel label
+		if len(o.Us) > 0 {
+			targets = make([]string, len(o.Us)-1)
+			for i, t := range o.Us[1:] {
+				targets[i] = label(t).String()
+			}
+			defaultLabel = label(o.Us[0])
+		}
+		return fmt.Sprintf("%s [%s] %s", o.Kind, strings.Join(targets, ","), defaultLabel)
+
+	case operationKindCallIndirect:
+		return fmt.Sprintf("%s: type=%d, table=%d", o.Kind, o.U1, o.U2)
+
+	case operationKindDrop:
+		start := int64(o.U1)
+		end := int64(o.U2)
+		return fmt.Sprintf("%s %d..%d", o.Kind, start, end)
+
+	case operationKindPick, operationKindSet:
+		return fmt.Sprintf("%s %d (is_vector=%v)", o.Kind, o.U1, o.B3)
+
+	case operationKindLoad, operationKindStore:
+		return fmt.Sprintf("%s.%s (align=%d, offset=%d)", unsignedType(o.B1), o.Kind, o.U1, o.U2)
+
+	case operationKindLoad8,
+		operationKindLoad16:
+		return fmt.Sprintf("%s.%s (align=%d, offset=%d)", signedType(o.B1), o.Kind, o.U1, o.U2)
+
+	case operationKindStore8,
+		operationKindStore16,
+		operationKindStore32:
+		return fmt.Sprintf("%s (align=%d, offset=%d)", o.Kind, o.U1, o.U2)
+
+	case operationKindLoad32:
+		var t string
+		if o.B1 == 1 {
+			t = "i64"
+		} else {
+			t = "u64"
+		}
+		return fmt.Sprintf("%s.%s (align=%d, offset=%d)", t, o.Kind, o.U1, o.U2)
+
+	case operationKindEq,
+		operationKindNe,
+		operationKindAdd,
+		operationKindSub,
+		operationKindMul:
+		return fmt.Sprintf("%s.%s", unsignedType(o.B1), o.Kind)
+
+	case operationKindEqz,
+		operationKindClz,
+		operationKindCtz,
+		operationKindPopcnt,
+		operationKindAnd,
+		operationKindOr,
+		operationKindXor,
+		operationKindShl,
+		operationKindRotl,
+		operationKindRotr:
+		return fmt.Sprintf("%s.%s", unsignedInt(o.B1), o.Kind)
+
+	case operationKindRem, operationKindShr:
+		return fmt.Sprintf("%s.%s", signedInt(o.B1), o.Kind)
+
+	case operationKindLt,
+		operationKindGt,
+		operationKindLe,
+		operationKindGe,
+		operationKindDiv:
+		return fmt.Sprintf("%s.%s", signedType(o.B1), o.Kind)
+
+	case operationKindAbs,
+		operationKindNeg,
+		operationKindCeil,
+		operationKindFloor,
+		operationKindTrunc,
+		operationKindNearest,
+		operationKindSqrt,
+		operationKindMin,
+		operationKindMax,
+		operationKindCopysign:
+		return fmt.Sprintf("%s.%s", float(o.B1), o.Kind)
+
+	case operationKindConstI32,
+		operationKindConstI64:
+		return fmt.Sprintf("%s %#x", o.Kind, o.U1)
+
+	case operationKindConstF32:
+		return fmt.Sprintf("%s %f", o.Kind, math.Float32frombits(uint32(o.U1)))
+	case operationKindConstF64:
+		return fmt.Sprintf("%s %f", o.Kind, math.Float64frombits(o.U1))
+
+	case operationKindITruncFromF:
+		return fmt.Sprintf("%s.%s.%s (non_trapping=%v)", signedInt(o.B2), o.Kind, float(o.B1), o.B3)
+	case operationKindFConvertFromI:
+		return fmt.Sprintf("%s.%s.%s", float(o.B2), o.Kind, signedInt(o.B1))
+	case operationKindExtend:
+		var in, out string
+		if o.B3 {
+			in = "i32"
+			out = "i64"
+		} else {
+			in = "u32"
+			out = "u64"
+		}
+		return fmt.Sprintf("%s.%s.%s", out, o.Kind, in)
+
+	case operationKindV128Const:
+		return fmt.Sprintf("%s [%#x, %#x]", o.Kind, o.U1, o.U2)
+	case operationKindV128Add,
+		operationKindV128Sub:
+		return fmt.Sprintf("%s (shape=%s)", o.Kind, shapeName(o.B1))
+	case operationKindV128Load,
+		operationKindV128LoadLane,
+		operationKindV128Store,
+		operationKindV128StoreLane,
+		operationKindV128ExtractLane,
+		operationKindV128ReplaceLane,
+		operationKindV128Splat,
+		operationKindV128Shuffle,
+		operationKindV128Swizzle,
+		operationKindV128AnyTrue,
+		operationKindV128AllTrue,
+		operationKindV128BitMask,
+		operationKindV128And,
+		operationKindV128Not,
+		operationKindV128Or,
+		operationKindV128Xor,
+		operationKindV128Bitselect,
+		operationKindV128AndNot,
+		operationKindV128Shl,
+		operationKindV128Shr,
+		operationKindV128Cmp,
+		operationKindV128AddSat,
+		operationKindV128SubSat,
+		operationKindV128Mul,
+		operationKindV128Div,
+		operationKindV128Neg,
+		operationKindV128Sqrt,
+		operationKindV128Abs,
+		operationKindV128Popcnt,
+		operationKindV128Min,
+		operationKindV128Max,
+		operationKindV128AvgrU,
+		operationKindV128Pmin,
+		operationKindV128Pmax,
+		operationKindV128Ceil,
+		operationKindV128Floor,
+		operationKindV128Trunc,
+		operationKindV128Nearest,
+		operationKindV128Extend,
+		operationKindV128ExtMul,
+		operationKindV128Q15mulrSatS,
+		operationKindV128ExtAddPairwise,
+		operationKindV128FloatPromote,
+		operationKindV128FloatDemote,
+		operationKindV128FConvertFromI,
+		operationKindV128Dot,
+		operationKindV128Narrow:
+		return o.Kind.String()
+
+	case operationKindV128ITruncSatFromF:
+		if o.B3 {
+			return fmt.Sprintf("%s.%sS", o.Kind, shapeName(o.B1))
+		} else {
+			return fmt.Sprintf("%s.%sU", o.Kind, shapeName(o.B1))
+		}
+
+	case operationKindAtomicMemoryWait,
+		operationKindAtomicMemoryNotify,
+		operationKindAtomicFence,
+		operationKindAtomicLoad,
+		operationKindAtomicLoad8,
+		operationKindAtomicLoad16,
+		operationKindAtomicStore,
+		operationKindAtomicStore8,
+		operationKindAtomicStore16,
+		operationKindAtomicRMW,
+		operationKindAtomicRMW8,
+		operationKindAtomicRMW16,
+		operationKindAtomicRMWCmpxchg,
+		operationKindAtomicRMW8Cmpxchg,
+		operationKindAtomicRMW16Cmpxchg:
+		return o.Kind.String()
+
+	default:
+		panic(fmt.Sprintf("TODO: %v", o.Kind))
+	}
+}
+
+// NewOperationUnreachable is a constructor for unionOperation with operationKindUnreachable
+//
+// This corresponds to wasm.OpcodeUnreachable.
+//
+// The engines are expected to exit the execution with wasmruntime.ErrRuntimeUnreachable error.
+func newOperationUnreachable() unionOperation {
+	return unionOperation{Kind: operationKindUnreachable}
+}
+
+// NewOperationLabel is a constructor for unionOperation with operationKindLabel.
+//
+// This is used to inform the engines of the beginning of a label.
+func newOperationLabel(label label) unionOperation {
+	return unionOperation{Kind: operationKindLabel, U1: uint64(label)}
+}
+
+// NewOperationBr is a constructor for unionOperation with operationKindBr.
+//
+// The engines are expected to branch into U1 label.
+func newOperationBr(target label) unionOperation {
+	return unionOperation{Kind: operationKindBr, U1: uint64(target)}
+}
+
+// NewOperationBrIf is a constructor for unionOperation with operationKindBrIf.
+//
+// The engines are expected to pop a value and branch into U1 label if the value equals 1.
+// Otherwise, the code branches into U2 label.
+func newOperationBrIf(thenTarget, elseTarget label, thenDrop inclusiveRange) unionOperation {
+	return unionOperation{
+		Kind: operationKindBrIf,
+		U1:   uint64(thenTarget),
+		U2:   uint64(elseTarget),
+		U3:   thenDrop.AsU64(),
+	}
+}
+
+// NewOperationBrTable is a constructor for unionOperation with operationKindBrTable.
+//
+// This corresponds to wasm.OpcodeBrTableName except that the label
+// here means the interpreterir level, not the ones of Wasm.
+//
+// The engines are expected to do the br_table operation based on the default (Us[len(Us)-1], Us[len(Us)-2]) and
+// targets (Us[:len(Us)-1], Rs[:len(Us)-1]). More precisely, this pops a value from the stack (called "index")
+// and decides which branch we go into next based on the value.
+//
+// For example, assume we have operations like {default: L_DEFAULT, targets: [L0, L1, L2]}.
+// If "index" >= len(defaults), then branch into the L_DEFAULT label.
+// Otherwise, we enter label of targets[index].
+func newOperationBrTable(targetLabelsAndRanges []uint64) unionOperation {
+	return unionOperation{
+		Kind: operationKindBrTable,
+		Us:   targetLabelsAndRanges,
+	}
+}
+
+// NewOperationCall is a constructor for unionOperation with operationKindCall.
+//
+// This corresponds to wasm.OpcodeCallName, and engines are expected to
+// enter into a function whose index equals OperationCall.FunctionIndex.
+func newOperationCall(functionIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindCall, U1: uint64(functionIndex)}
+}
+
+// NewOperationCallIndirect implements Operation.
+//
+// This corresponds to wasm.OpcodeCallIndirectName, and engines are expected to
+// consume the one value from the top of stack (called "offset"),
+// and make a function call against the function whose function address equals
+// Tables[OperationCallIndirect.TableIndex][offset].
+//
+// Note: This is called indirect function call in the sense that the target function is indirectly
+// determined by the current state (top value) of the stack.
+// Therefore, two checks are performed at runtime before entering the target function:
+// 1) whether "offset" exceeds the length of table Tables[OperationCallIndirect.TableIndex].
+// 2) whether the type of the function table[offset] matches the function type specified by OperationCallIndirect.TypeIndex.
+func newOperationCallIndirect(typeIndex, tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindCallIndirect, U1: uint64(typeIndex), U2: uint64(tableIndex)}
+}
+
+// inclusiveRange is the range which spans across the value stack starting from the top to the bottom, and
+// both boundary are included in the range.
+type inclusiveRange struct {
+	Start, End int32
+}
+
+// AsU64 is be used to convert inclusiveRange to uint64 so that it can be stored in unionOperation.
+func (i inclusiveRange) AsU64() uint64 {
+	return uint64(uint32(i.Start))<<32 | uint64(uint32(i.End))
+}
+
+// inclusiveRangeFromU64 retrieves inclusiveRange from the given uint64 which is stored in unionOperation.
+func inclusiveRangeFromU64(v uint64) inclusiveRange {
+	return inclusiveRange{
+		Start: int32(uint32(v >> 32)),
+		End:   int32(uint32(v)),
+	}
+}
+
+// nopinclusiveRange is inclusiveRange which corresponds to no-operation.
+var nopinclusiveRange = inclusiveRange{Start: -1, End: -1}
+
+// NewOperationDrop is a constructor for unionOperation with operationKindDrop.
+//
+// The engines are expected to discard the values selected by NewOperationDrop.Depth which
+// starts from the top of the stack to the bottom.
+//
+// depth spans across the uint64 value stack at runtime to be dropped by this operation.
+func newOperationDrop(depth inclusiveRange) unionOperation {
+	return unionOperation{Kind: operationKindDrop, U1: depth.AsU64()}
+}
+
+// NewOperationSelect is a constructor for unionOperation with operationKindSelect.
+//
+// This corresponds to wasm.OpcodeSelect.
+//
+// The engines are expected to pop three values, say [..., x2, x1, c], then if the value "c" equals zero,
+// "x1" is pushed back onto the stack and, otherwise "x2" is pushed back.
+//
+// isTargetVector true if the selection target value's type is wasm.ValueTypeV128.
+func newOperationSelect(isTargetVector bool) unionOperation {
+	return unionOperation{Kind: operationKindSelect, B3: isTargetVector}
+}
+
+// NewOperationPick is a constructor for unionOperation with operationKindPick.
+//
+// The engines are expected to copy a value pointed by depth, and push the
+// copied value onto the top of the stack.
+//
+// depth is the location of the pick target in the uint64 value stack at runtime.
+// If isTargetVector=true, this points to the location of the lower 64-bits of the vector.
+func newOperationPick(depth int, isTargetVector bool) unionOperation {
+	return unionOperation{Kind: operationKindPick, U1: uint64(depth), B3: isTargetVector}
+}
+
+// NewOperationSet is a constructor for unionOperation with operationKindSet.
+//
+// The engines are expected to set the top value of the stack to the location specified by
+// depth.
+//
+// depth is the location of the set target in the uint64 value stack at runtime.
+// If isTargetVector=true, this points the location of the lower 64-bits of the vector.
+func newOperationSet(depth int, isTargetVector bool) unionOperation {
+	return unionOperation{Kind: operationKindSet, U1: uint64(depth), B3: isTargetVector}
+}
+
+// NewOperationGlobalGet is a constructor for unionOperation with operationKindGlobalGet.
+//
+// The engines are expected to read the global value specified by OperationGlobalGet.Index,
+// and push the copy of the value onto the stack.
+//
+// See wasm.OpcodeGlobalGet.
+func newOperationGlobalGet(index uint32) unionOperation {
+	return unionOperation{Kind: operationKindGlobalGet, U1: uint64(index)}
+}
+
+// NewOperationGlobalSet is a constructor for unionOperation with operationKindGlobalSet.
+//
+// The engines are expected to consume the value from the top of the stack,
+// and write the value into the global specified by OperationGlobalSet.Index.
+//
+// See wasm.OpcodeGlobalSet.
+func newOperationGlobalSet(index uint32) unionOperation {
+	return unionOperation{Kind: operationKindGlobalSet, U1: uint64(index)}
+}
+
+// memoryArg is the "memarg" to all memory instructions.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instructions%E2%91%A0
+type memoryArg struct {
+	// Alignment the expected alignment (expressed as the exponent of a power of 2). Default to the natural alignment.
+	//
+	// "Natural alignment" is defined here as the smallest power of two that can hold the size of the value type. Ex
+	// wasm.ValueTypeI64 is encoded in 8 little-endian bytes. 2^3 = 8, so the natural alignment is three.
+	Alignment uint32
+
+	// Offset is the address offset added to the instruction's dynamic address operand, yielding a 33-bit effective
+	// address that is the zero-based index at which the memory is accessed. Default to zero.
+	Offset uint32
+}
+
+// NewOperationLoad is a constructor for unionOperation with operationKindLoad.
+//
+// This corresponds to wasm.OpcodeI32LoadName wasm.OpcodeI64LoadName wasm.OpcodeF32LoadName and wasm.OpcodeF64LoadName.
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationLoad(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindLoad, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationLoad8 is a constructor for unionOperation with operationKindLoad8.
+//
+// This corresponds to wasm.OpcodeI32Load8SName wasm.OpcodeI32Load8UName wasm.OpcodeI64Load8SName wasm.OpcodeI64Load8UName.
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationLoad8(signedInt signedInt, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindLoad8, B1: byte(signedInt), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationLoad16 is a constructor for unionOperation with operationKindLoad16.
+//
+// This corresponds to wasm.OpcodeI32Load16SName wasm.OpcodeI32Load16UName wasm.OpcodeI64Load16SName wasm.OpcodeI64Load16UName.
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationLoad16(signedInt signedInt, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindLoad16, B1: byte(signedInt), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationLoad32 is a constructor for unionOperation with operationKindLoad32.
+//
+// This corresponds to wasm.OpcodeI64Load32SName wasm.OpcodeI64Load32UName.
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationLoad32(signed bool, arg memoryArg) unionOperation {
+	sigB := byte(0)
+	if signed {
+		sigB = 1
+	}
+	return unionOperation{Kind: operationKindLoad32, B1: sigB, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationStore is a constructor for unionOperation with operationKindStore.
+//
+// # This corresponds to wasm.OpcodeI32StoreName wasm.OpcodeI64StoreName wasm.OpcodeF32StoreName wasm.OpcodeF64StoreName
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationStore(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindStore, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationStore8 is a constructor for unionOperation with operationKindStore8.
+//
+// # This corresponds to wasm.OpcodeI32Store8Name wasm.OpcodeI64Store8Name
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationStore8(arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindStore8, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationStore16 is a constructor for unionOperation with operationKindStore16.
+//
+// # This corresponds to wasm.OpcodeI32Store16Name wasm.OpcodeI64Store16Name
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationStore16(arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindStore16, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationStore32 is a constructor for unionOperation with operationKindStore32.
+//
+// # This corresponds to wasm.OpcodeI64Store32Name
+//
+// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary,
+// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction.
+func newOperationStore32(arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindStore32, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationMemorySize is a constructor for unionOperation with operationKindMemorySize.
+//
+// This corresponds to wasm.OpcodeMemorySize.
+//
+// The engines are expected to push the current page size of the memory onto the stack.
+func newOperationMemorySize() unionOperation {
+	return unionOperation{Kind: operationKindMemorySize}
+}
+
+// NewOperationMemoryGrow is a constructor for unionOperation with operationKindMemoryGrow.
+//
+// This corresponds to wasm.OpcodeMemoryGrow.
+//
+// The engines are expected to pop one value from the top of the stack, then
+// execute wasm.MemoryInstance Grow with the value, and push the previous
+// page size of the memory onto the stack.
+func newOperationMemoryGrow() unionOperation {
+	return unionOperation{Kind: operationKindMemoryGrow}
+}
+
+// NewOperationConstI32 is a constructor for unionOperation with OperationConstI32.
+//
+// This corresponds to wasm.OpcodeI32Const.
+func newOperationConstI32(value uint32) unionOperation {
+	return unionOperation{Kind: operationKindConstI32, U1: uint64(value)}
+}
+
+// NewOperationConstI64 is a constructor for unionOperation with OperationConstI64.
+//
+// This corresponds to wasm.OpcodeI64Const.
+func newOperationConstI64(value uint64) unionOperation {
+	return unionOperation{Kind: operationKindConstI64, U1: value}
+}
+
+// NewOperationConstF32 is a constructor for unionOperation with OperationConstF32.
+//
+// This corresponds to wasm.OpcodeF32Const.
+func newOperationConstF32(value float32) unionOperation {
+	return unionOperation{Kind: operationKindConstF32, U1: uint64(math.Float32bits(value))}
+}
+
+// NewOperationConstF64 is a constructor for unionOperation with OperationConstF64.
+//
+// This corresponds to wasm.OpcodeF64Const.
+func newOperationConstF64(value float64) unionOperation {
+	return unionOperation{Kind: operationKindConstF64, U1: math.Float64bits(value)}
+}
+
+// NewOperationEq is a constructor for unionOperation with operationKindEq.
+//
+// This corresponds to wasm.OpcodeI32EqName wasm.OpcodeI64EqName wasm.OpcodeF32EqName wasm.OpcodeF64EqName
+func newOperationEq(b unsignedType) unionOperation {
+	return unionOperation{Kind: operationKindEq, B1: byte(b)}
+}
+
+// NewOperationNe is a constructor for unionOperation with operationKindNe.
+//
+// This corresponds to wasm.OpcodeI32NeName wasm.OpcodeI64NeName wasm.OpcodeF32NeName wasm.OpcodeF64NeName
+func newOperationNe(b unsignedType) unionOperation {
+	return unionOperation{Kind: operationKindNe, B1: byte(b)}
+}
+
+// NewOperationEqz is a constructor for unionOperation with operationKindEqz.
+//
+// This corresponds to wasm.OpcodeI32EqzName wasm.OpcodeI64EqzName
+func newOperationEqz(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindEqz, B1: byte(b)}
+}
+
+// NewOperationLt is a constructor for unionOperation with operationKindLt.
+//
+// This corresponds to wasm.OpcodeI32LtS wasm.OpcodeI32LtU wasm.OpcodeI64LtS wasm.OpcodeI64LtU wasm.OpcodeF32Lt wasm.OpcodeF64Lt
+func newOperationLt(b signedType) unionOperation {
+	return unionOperation{Kind: operationKindLt, B1: byte(b)}
+}
+
+// NewOperationGt is a constructor for unionOperation with operationKindGt.
+//
+// This corresponds to wasm.OpcodeI32GtS wasm.OpcodeI32GtU wasm.OpcodeI64GtS wasm.OpcodeI64GtU wasm.OpcodeF32Gt wasm.OpcodeF64Gt
+func newOperationGt(b signedType) unionOperation {
+	return unionOperation{Kind: operationKindGt, B1: byte(b)}
+}
+
+// NewOperationLe is a constructor for unionOperation with operationKindLe.
+//
+// This corresponds to wasm.OpcodeI32LeS wasm.OpcodeI32LeU wasm.OpcodeI64LeS wasm.OpcodeI64LeU wasm.OpcodeF32Le wasm.OpcodeF64Le
+func newOperationLe(b signedType) unionOperation {
+	return unionOperation{Kind: operationKindLe, B1: byte(b)}
+}
+
+// NewOperationGe is a constructor for unionOperation with operationKindGe.
+//
+// This corresponds to wasm.OpcodeI32GeS wasm.OpcodeI32GeU wasm.OpcodeI64GeS wasm.OpcodeI64GeU wasm.OpcodeF32Ge wasm.OpcodeF64Ge
+// NewOperationGe is the constructor for OperationGe
+func newOperationGe(b signedType) unionOperation {
+	return unionOperation{Kind: operationKindGe, B1: byte(b)}
+}
+
+// NewOperationAdd is a constructor for unionOperation with operationKindAdd.
+//
+// This corresponds to wasm.OpcodeI32AddName wasm.OpcodeI64AddName wasm.OpcodeF32AddName wasm.OpcodeF64AddName.
+func newOperationAdd(b unsignedType) unionOperation {
+	return unionOperation{Kind: operationKindAdd, B1: byte(b)}
+}
+
+// NewOperationSub is a constructor for unionOperation with operationKindSub.
+//
+// This corresponds to wasm.OpcodeI32SubName wasm.OpcodeI64SubName wasm.OpcodeF32SubName wasm.OpcodeF64SubName.
+func newOperationSub(b unsignedType) unionOperation {
+	return unionOperation{Kind: operationKindSub, B1: byte(b)}
+}
+
+// NewOperationMul is a constructor for unionOperation with wperationKindMul.
+//
+// This corresponds to wasm.OpcodeI32MulName wasm.OpcodeI64MulName wasm.OpcodeF32MulName wasm.OpcodeF64MulName.
+// NewOperationMul is the constructor for OperationMul
+func newOperationMul(b unsignedType) unionOperation {
+	return unionOperation{Kind: operationKindMul, B1: byte(b)}
+}
+
+// NewOperationClz is a constructor for unionOperation with operationKindClz.
+//
+// This corresponds to wasm.OpcodeI32ClzName wasm.OpcodeI64ClzName.
+//
+// The engines are expected to count up the leading zeros in the
+// current top of the stack, and push the count result.
+// For example, stack of [..., 0x00_ff_ff_ff] results in [..., 8].
+// See wasm.OpcodeI32Clz wasm.OpcodeI64Clz
+func newOperationClz(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindClz, B1: byte(b)}
+}
+
+// NewOperationCtz is a constructor for unionOperation with operationKindCtz.
+//
+// This corresponds to wasm.OpcodeI32CtzName wasm.OpcodeI64CtzName.
+//
+// The engines are expected to count up the trailing zeros in the
+// current top of the stack, and push the count result.
+// For example, stack of [..., 0xff_ff_ff_00] results in [..., 8].
+func newOperationCtz(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindCtz, B1: byte(b)}
+}
+
+// NewOperationPopcnt is a constructor for unionOperation with operationKindPopcnt.
+//
+// This corresponds to wasm.OpcodeI32PopcntName wasm.OpcodeI64PopcntName.
+//
+// The engines are expected to count up the number of set bits in the
+// current top of the stack, and push the count result.
+// For example, stack of [..., 0b00_00_00_11] results in [..., 2].
+func newOperationPopcnt(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindPopcnt, B1: byte(b)}
+}
+
+// NewOperationDiv is a constructor for unionOperation with operationKindDiv.
+//
+// This corresponds to wasm.OpcodeI32DivS wasm.OpcodeI32DivU wasm.OpcodeI64DivS
+//
+//	wasm.OpcodeI64DivU wasm.OpcodeF32Div wasm.OpcodeF64Div.
+func newOperationDiv(b signedType) unionOperation {
+	return unionOperation{Kind: operationKindDiv, B1: byte(b)}
+}
+
+// NewOperationRem is a constructor for unionOperation with operationKindRem.
+//
+// This corresponds to wasm.OpcodeI32RemS wasm.OpcodeI32RemU wasm.OpcodeI64RemS wasm.OpcodeI64RemU.
+//
+// The engines are expected to perform division on the top
+// two values of integer type on the stack and puts the remainder of the result
+// onto the stack. For example, stack [..., 10, 3] results in [..., 1] where
+// the quotient is discarded.
+// NewOperationRem is the constructor for OperationRem
+func newOperationRem(b signedInt) unionOperation {
+	return unionOperation{Kind: operationKindRem, B1: byte(b)}
+}
+
+// NewOperationAnd is a constructor for unionOperation with operationKindAnd.
+//
+// # This corresponds to wasm.OpcodeI32AndName wasm.OpcodeI64AndName
+//
+// The engines are expected to perform "And" operation on
+// top two values on the stack, and pushes the result.
+func newOperationAnd(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindAnd, B1: byte(b)}
+}
+
+// NewOperationOr is a constructor for unionOperation with operationKindOr.
+//
+// # This corresponds to wasm.OpcodeI32OrName wasm.OpcodeI64OrName
+//
+// The engines are expected to perform "Or" operation on
+// top two values on the stack, and pushes the result.
+func newOperationOr(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindOr, B1: byte(b)}
+}
+
+// NewOperationXor is a constructor for unionOperation with operationKindXor.
+//
+// # This corresponds to wasm.OpcodeI32XorName wasm.OpcodeI64XorName
+//
+// The engines are expected to perform "Xor" operation on
+// top two values on the stack, and pushes the result.
+func newOperationXor(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindXor, B1: byte(b)}
+}
+
+// NewOperationShl is a constructor for unionOperation with operationKindShl.
+//
+// # This corresponds to wasm.OpcodeI32ShlName wasm.OpcodeI64ShlName
+//
+// The engines are expected to perform "Shl" operation on
+// top two values on the stack, and pushes the result.
+func newOperationShl(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindShl, B1: byte(b)}
+}
+
+// NewOperationShr is a constructor for unionOperation with operationKindShr.
+//
+// # This corresponds to wasm.OpcodeI32ShrSName wasm.OpcodeI32ShrUName wasm.OpcodeI64ShrSName wasm.OpcodeI64ShrUName
+//
+// If OperationShr.Type is signed integer, then, the engines are expected to perform arithmetic right shift on the two
+// top values on the stack, otherwise do the logical right shift.
+func newOperationShr(b signedInt) unionOperation {
+	return unionOperation{Kind: operationKindShr, B1: byte(b)}
+}
+
+// NewOperationRotl is a constructor for unionOperation with operationKindRotl.
+//
+// # This corresponds to wasm.OpcodeI32RotlName wasm.OpcodeI64RotlName
+//
+// The engines are expected to perform "Rotl" operation on
+// top two values on the stack, and pushes the result.
+func newOperationRotl(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindRotl, B1: byte(b)}
+}
+
+// NewOperationRotr is a constructor for unionOperation with operationKindRotr.
+//
+// # This corresponds to wasm.OpcodeI32RotrName wasm.OpcodeI64RotrName
+//
+// The engines are expected to perform "Rotr" operation on
+// top two values on the stack, and pushes the result.
+func newOperationRotr(b unsignedInt) unionOperation {
+	return unionOperation{Kind: operationKindRotr, B1: byte(b)}
+}
+
+// NewOperationAbs is a constructor for unionOperation with operationKindAbs.
+//
+// This corresponds to wasm.OpcodeF32Abs wasm.OpcodeF64Abs
+func newOperationAbs(b float) unionOperation {
+	return unionOperation{Kind: operationKindAbs, B1: byte(b)}
+}
+
+// NewOperationNeg is a constructor for unionOperation with operationKindNeg.
+//
+// This corresponds to wasm.OpcodeF32Neg wasm.OpcodeF64Neg
+func newOperationNeg(b float) unionOperation {
+	return unionOperation{Kind: operationKindNeg, B1: byte(b)}
+}
+
+// NewOperationCeil is a constructor for unionOperation with operationKindCeil.
+//
+// This corresponds to wasm.OpcodeF32CeilName wasm.OpcodeF64CeilName
+func newOperationCeil(b float) unionOperation {
+	return unionOperation{Kind: operationKindCeil, B1: byte(b)}
+}
+
+// NewOperationFloor is a constructor for unionOperation with operationKindFloor.
+//
+// This corresponds to wasm.OpcodeF32FloorName wasm.OpcodeF64FloorName
+func newOperationFloor(b float) unionOperation {
+	return unionOperation{Kind: operationKindFloor, B1: byte(b)}
+}
+
+// NewOperationTrunc is a constructor for unionOperation with operationKindTrunc.
+//
+// This corresponds to wasm.OpcodeF32TruncName wasm.OpcodeF64TruncName
+func newOperationTrunc(b float) unionOperation {
+	return unionOperation{Kind: operationKindTrunc, B1: byte(b)}
+}
+
+// NewOperationNearest is a constructor for unionOperation with operationKindNearest.
+//
+// # This corresponds to wasm.OpcodeF32NearestName wasm.OpcodeF64NearestName
+//
+// Note: this is *not* equivalent to math.Round and instead has the same
+// the semantics of LLVM's rint intrinsic. See https://llvm.org/docs/LangRef.html#llvm-rint-intrinsic.
+// For example, math.Round(-4.5) produces -5 while we want to produce -4.
+func newOperationNearest(b float) unionOperation {
+	return unionOperation{Kind: operationKindNearest, B1: byte(b)}
+}
+
+// NewOperationSqrt is a constructor for unionOperation with operationKindSqrt.
+//
+// This corresponds to wasm.OpcodeF32SqrtName wasm.OpcodeF64SqrtName
+func newOperationSqrt(b float) unionOperation {
+	return unionOperation{Kind: operationKindSqrt, B1: byte(b)}
+}
+
+// NewOperationMin is a constructor for unionOperation with operationKindMin.
+//
+// # This corresponds to wasm.OpcodeF32MinName wasm.OpcodeF64MinName
+//
+// The engines are expected to pop two values from the stack, and push back the maximum of
+// these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 1.9].
+//
+// Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN,
+// which is a different behavior different from math.Min.
+func newOperationMin(b float) unionOperation {
+	return unionOperation{Kind: operationKindMin, B1: byte(b)}
+}
+
+// NewOperationMax is a constructor for unionOperation with operationKindMax.
+//
+// # This corresponds to wasm.OpcodeF32MaxName wasm.OpcodeF64MaxName
+//
+// The engines are expected to pop two values from the stack, and push back the maximum of
+// these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 100.1].
+//
+// Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN,
+// which is a different behavior different from math.Max.
+func newOperationMax(b float) unionOperation {
+	return unionOperation{Kind: operationKindMax, B1: byte(b)}
+}
+
+// NewOperationCopysign is a constructor for unionOperation with operationKindCopysign.
+//
+// # This corresponds to wasm.OpcodeF32CopysignName wasm.OpcodeF64CopysignName
+//
+// The engines are expected to pop two float values from the stack, and copy the signbit of
+// the first-popped value to the last one.
+// For example, stack [..., 1.213, -5.0] results in [..., -1.213].
+func newOperationCopysign(b float) unionOperation {
+	return unionOperation{Kind: operationKindCopysign, B1: byte(b)}
+}
+
+// NewOperationI32WrapFromI64 is a constructor for unionOperation with operationKindI32WrapFromI64.
+//
+// This corresponds to wasm.OpcodeI32WrapI64 and equivalent to uint64(uint32(v)) in Go.
+//
+// The engines are expected to replace the 64-bit int on top of the stack
+// with the corresponding 32-bit integer.
+func newOperationI32WrapFromI64() unionOperation {
+	return unionOperation{Kind: operationKindI32WrapFromI64}
+}
+
+// NewOperationITruncFromF is a constructor for unionOperation with operationKindITruncFromF.
+//
+// This corresponds to
+//
+//	wasm.OpcodeI32TruncF32SName wasm.OpcodeI32TruncF32UName wasm.OpcodeI32TruncF64SName
+//	wasm.OpcodeI32TruncF64UName wasm.OpcodeI64TruncF32SName wasm.OpcodeI64TruncF32UName wasm.OpcodeI64TruncF64SName
+//	wasm.OpcodeI64TruncF64UName. wasm.OpcodeI32TruncSatF32SName wasm.OpcodeI32TruncSatF32UName
+//	wasm.OpcodeI32TruncSatF64SName wasm.OpcodeI32TruncSatF64UName wasm.OpcodeI64TruncSatF32SName
+//	wasm.OpcodeI64TruncSatF32UName wasm.OpcodeI64TruncSatF64SName wasm.OpcodeI64TruncSatF64UName
+//
+// See [1] and [2] for when we encounter undefined behavior in the WebAssembly specification if NewOperationITruncFromF.NonTrapping == false.
+// To summarize, if the source float value is NaN or doesn't fit in the destination range of integers (incl. +=Inf),
+// then the runtime behavior is undefined. In wazero, the engines are expected to exit the execution in these undefined cases with
+// wasmruntime.ErrRuntimeInvalidConversionToInteger error.
+//
+// [1] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-umathrmtruncmathsfu_m-n-z for unsigned integers.
+// [2] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-smathrmtruncmathsfs_m-n-z for signed integers.
+//
+// nonTrapping true if this conversion is "nontrapping" in the sense of the
+// https://github.com/WebAssembly/spec/blob/ce4b6c4d47eb06098cc7ab2e81f24748da822f20/proposals/nontrapping-float-to-int-conversion/Overview.md
+func newOperationITruncFromF(inputType float, outputType signedInt, nonTrapping bool) unionOperation {
+	return unionOperation{
+		Kind: operationKindITruncFromF,
+		B1:   byte(inputType),
+		B2:   byte(outputType),
+		B3:   nonTrapping,
+	}
+}
+
+// NewOperationFConvertFromI is a constructor for unionOperation with operationKindFConvertFromI.
+//
+// This corresponds to
+//
+//	wasm.OpcodeF32ConvertI32SName wasm.OpcodeF32ConvertI32UName wasm.OpcodeF32ConvertI64SName wasm.OpcodeF32ConvertI64UName
+//	wasm.OpcodeF64ConvertI32SName wasm.OpcodeF64ConvertI32UName wasm.OpcodeF64ConvertI64SName wasm.OpcodeF64ConvertI64UName
+//
+// and equivalent to float32(uint32(x)), float32(int32(x)), etc in Go.
+func newOperationFConvertFromI(inputType signedInt, outputType float) unionOperation {
+	return unionOperation{
+		Kind: operationKindFConvertFromI,
+		B1:   byte(inputType),
+		B2:   byte(outputType),
+	}
+}
+
+// NewOperationF32DemoteFromF64 is a constructor for unionOperation with operationKindF32DemoteFromF64.
+//
+// This corresponds to wasm.OpcodeF32DemoteF64 and is equivalent float32(float64(v)).
+func newOperationF32DemoteFromF64() unionOperation {
+	return unionOperation{Kind: operationKindF32DemoteFromF64}
+}
+
+// NewOperationF64PromoteFromF32 is a constructor for unionOperation with operationKindF64PromoteFromF32.
+//
+// This corresponds to wasm.OpcodeF64PromoteF32 and is equivalent float64(float32(v)).
+func newOperationF64PromoteFromF32() unionOperation {
+	return unionOperation{Kind: operationKindF64PromoteFromF32}
+}
+
+// NewOperationI32ReinterpretFromF32 is a constructor for unionOperation with operationKindI32ReinterpretFromF32.
+//
+// This corresponds to wasm.OpcodeI32ReinterpretF32Name.
+func newOperationI32ReinterpretFromF32() unionOperation {
+	return unionOperation{Kind: operationKindI32ReinterpretFromF32}
+}
+
+// NewOperationI64ReinterpretFromF64 is a constructor for unionOperation with operationKindI64ReinterpretFromF64.
+//
+// This corresponds to wasm.OpcodeI64ReinterpretF64Name.
+func newOperationI64ReinterpretFromF64() unionOperation {
+	return unionOperation{Kind: operationKindI64ReinterpretFromF64}
+}
+
+// NewOperationF32ReinterpretFromI32 is a constructor for unionOperation with operationKindF32ReinterpretFromI32.
+//
+// This corresponds to wasm.OpcodeF32ReinterpretI32Name.
+func newOperationF32ReinterpretFromI32() unionOperation {
+	return unionOperation{Kind: operationKindF32ReinterpretFromI32}
+}
+
+// NewOperationF64ReinterpretFromI64 is a constructor for unionOperation with operationKindF64ReinterpretFromI64.
+//
+// This corresponds to wasm.OpcodeF64ReinterpretI64Name.
+func newOperationF64ReinterpretFromI64() unionOperation {
+	return unionOperation{Kind: operationKindF64ReinterpretFromI64}
+}
+
+// NewOperationExtend is a constructor for unionOperation with operationKindExtend.
+//
+// # This corresponds to wasm.OpcodeI64ExtendI32SName wasm.OpcodeI64ExtendI32UName
+//
+// The engines are expected to extend the 32-bit signed or unsigned int on top of the stack
+// as a 64-bit integer of corresponding signedness. For unsigned case, this is just reinterpreting the
+// underlying bit pattern as 64-bit integer. For signed case, this is sign-extension which preserves the
+// original integer's sign.
+func newOperationExtend(signed bool) unionOperation {
+	op := unionOperation{Kind: operationKindExtend}
+	if signed {
+		op.B1 = 1
+	}
+	return op
+}
+
+// NewOperationSignExtend32From8 is a constructor for unionOperation with operationKindSignExtend32From8.
+//
+// This corresponds to wasm.OpcodeI32Extend8SName.
+//
+// The engines are expected to sign-extend the first 8-bits of 32-bit in as signed 32-bit int.
+func newOperationSignExtend32From8() unionOperation {
+	return unionOperation{Kind: operationKindSignExtend32From8}
+}
+
+// NewOperationSignExtend32From16 is a constructor for unionOperation with operationKindSignExtend32From16.
+//
+// This corresponds to wasm.OpcodeI32Extend16SName.
+//
+// The engines are expected to sign-extend the first 16-bits of 32-bit in as signed 32-bit int.
+func newOperationSignExtend32From16() unionOperation {
+	return unionOperation{Kind: operationKindSignExtend32From16}
+}
+
+// NewOperationSignExtend64From8 is a constructor for unionOperation with operationKindSignExtend64From8.
+//
+// This corresponds to wasm.OpcodeI64Extend8SName.
+//
+// The engines are expected to sign-extend the first 8-bits of 64-bit in as signed 32-bit int.
+func newOperationSignExtend64From8() unionOperation {
+	return unionOperation{Kind: operationKindSignExtend64From8}
+}
+
+// NewOperationSignExtend64From16 is a constructor for unionOperation with operationKindSignExtend64From16.
+//
+// This corresponds to wasm.OpcodeI64Extend16SName.
+//
+// The engines are expected to sign-extend the first 16-bits of 64-bit in as signed 32-bit int.
+func newOperationSignExtend64From16() unionOperation {
+	return unionOperation{Kind: operationKindSignExtend64From16}
+}
+
+// NewOperationSignExtend64From32 is a constructor for unionOperation with operationKindSignExtend64From32.
+//
+// This corresponds to wasm.OpcodeI64Extend32SName.
+//
+// The engines are expected to sign-extend the first 32-bits of 64-bit in as signed 32-bit int.
+func newOperationSignExtend64From32() unionOperation {
+	return unionOperation{Kind: operationKindSignExtend64From32}
+}
+
+// NewOperationMemoryInit is a constructor for unionOperation with operationKindMemoryInit.
+//
+// This corresponds to wasm.OpcodeMemoryInitName.
+//
+// dataIndex is the index of the data instance in ModuleInstance.DataInstances
+// by which this operation instantiates a part of the memory.
+func newOperationMemoryInit(dataIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindMemoryInit, U1: uint64(dataIndex)}
+}
+
+// NewOperationDataDrop implements Operation.
+//
+// This corresponds to wasm.OpcodeDataDropName.
+//
+// dataIndex is the index of the data instance in ModuleInstance.DataInstances
+// which this operation drops.
+func newOperationDataDrop(dataIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindDataDrop, U1: uint64(dataIndex)}
+}
+
+// NewOperationMemoryCopy is a consuctor for unionOperation with operationKindMemoryCopy.
+//
+// This corresponds to wasm.OpcodeMemoryCopyName.
+func newOperationMemoryCopy() unionOperation {
+	return unionOperation{Kind: operationKindMemoryCopy}
+}
+
+// NewOperationMemoryFill is a consuctor for unionOperation with operationKindMemoryFill.
+func newOperationMemoryFill() unionOperation {
+	return unionOperation{Kind: operationKindMemoryFill}
+}
+
+// NewOperationTableInit is a constructor for unionOperation with operationKindTableInit.
+//
+// This corresponds to wasm.OpcodeTableInitName.
+//
+// elemIndex is the index of the element by which this operation initializes a part of the table.
+// tableIndex is the index of the table on which this operation initialize by the target element.
+func newOperationTableInit(elemIndex, tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableInit, U1: uint64(elemIndex), U2: uint64(tableIndex)}
+}
+
+// NewOperationElemDrop is a constructor for unionOperation with operationKindElemDrop.
+//
+// This corresponds to wasm.OpcodeElemDropName.
+//
+// elemIndex is the index of the element which this operation drops.
+func newOperationElemDrop(elemIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindElemDrop, U1: uint64(elemIndex)}
+}
+
+// NewOperationTableCopy implements Operation.
+//
+// This corresponds to wasm.OpcodeTableCopyName.
+func newOperationTableCopy(srcTableIndex, dstTableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableCopy, U1: uint64(srcTableIndex), U2: uint64(dstTableIndex)}
+}
+
+// NewOperationRefFunc constructor for unionOperation with operationKindRefFunc.
+//
+// This corresponds to wasm.OpcodeRefFuncName, and engines are expected to
+// push the opaque pointer value of engine specific func for the given FunctionIndex.
+//
+// Note: in wazero, we express any reference types (funcref or externref) as opaque pointers which is uint64.
+// Therefore, the engine implementations emit instructions to push the address of *function onto the stack.
+func newOperationRefFunc(functionIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindRefFunc, U1: uint64(functionIndex)}
+}
+
+// NewOperationTableGet constructor for unionOperation with operationKindTableGet.
+//
+// This corresponds to wasm.OpcodeTableGetName.
+func newOperationTableGet(tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableGet, U1: uint64(tableIndex)}
+}
+
+// NewOperationTableSet constructor for unionOperation with operationKindTableSet.
+//
+// This corresponds to wasm.OpcodeTableSetName.
+func newOperationTableSet(tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableSet, U1: uint64(tableIndex)}
+}
+
+// NewOperationTableSize constructor for unionOperation with operationKindTableSize.
+//
+// This corresponds to wasm.OpcodeTableSizeName.
+func newOperationTableSize(tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableSize, U1: uint64(tableIndex)}
+}
+
+// NewOperationTableGrow constructor for unionOperation with operationKindTableGrow.
+//
+// This corresponds to wasm.OpcodeTableGrowName.
+func newOperationTableGrow(tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableGrow, U1: uint64(tableIndex)}
+}
+
+// NewOperationTableFill constructor for unionOperation with operationKindTableFill.
+//
+// This corresponds to wasm.OpcodeTableFillName.
+func newOperationTableFill(tableIndex uint32) unionOperation {
+	return unionOperation{Kind: operationKindTableFill, U1: uint64(tableIndex)}
+}
+
+// NewOperationV128Const constructor for unionOperation with operationKindV128Const
+func newOperationV128Const(lo, hi uint64) unionOperation {
+	return unionOperation{Kind: operationKindV128Const, U1: lo, U2: hi}
+}
+
+// shape corresponds to a shape of v128 values.
+// https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-shape
+type shape = byte
+
+const (
+	shapeI8x16 shape = iota
+	shapeI16x8
+	shapeI32x4
+	shapeI64x2
+	shapeF32x4
+	shapeF64x2
+)
+
+func shapeName(s shape) (ret string) {
+	switch s {
+	case shapeI8x16:
+		ret = "I8x16"
+	case shapeI16x8:
+		ret = "I16x8"
+	case shapeI32x4:
+		ret = "I32x4"
+	case shapeI64x2:
+		ret = "I64x2"
+	case shapeF32x4:
+		ret = "F32x4"
+	case shapeF64x2:
+		ret = "F64x2"
+	}
+	return
+}
+
+// NewOperationV128Add constructor for unionOperation with operationKindV128Add.
+//
+// This corresponds to wasm.OpcodeVecI8x16AddName wasm.OpcodeVecI16x8AddName wasm.OpcodeVecI32x4AddName
+//
+//	wasm.OpcodeVecI64x2AddName wasm.OpcodeVecF32x4AddName wasm.OpcodeVecF64x2AddName
+func newOperationV128Add(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Add, B1: shape}
+}
+
+// NewOperationV128Sub constructor for unionOperation with operationKindV128Sub.
+//
+// This corresponds to wasm.OpcodeVecI8x16SubName wasm.OpcodeVecI16x8SubName wasm.OpcodeVecI32x4SubName
+//
+//	wasm.OpcodeVecI64x2SubName wasm.OpcodeVecF32x4SubName wasm.OpcodeVecF64x2SubName
+func newOperationV128Sub(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Sub, B1: shape}
+}
+
+// v128LoadType represents a type of wasm.OpcodeVecV128Load* instructions.
+type v128LoadType = byte
+
+const (
+	// v128LoadType128 corresponds to wasm.OpcodeVecV128LoadName.
+	v128LoadType128 v128LoadType = iota
+	// v128LoadType8x8s corresponds to wasm.OpcodeVecV128Load8x8SName.
+	v128LoadType8x8s
+	// v128LoadType8x8u corresponds to wasm.OpcodeVecV128Load8x8UName.
+	v128LoadType8x8u
+	// v128LoadType16x4s corresponds to wasm.OpcodeVecV128Load16x4SName
+	v128LoadType16x4s
+	// v128LoadType16x4u corresponds to wasm.OpcodeVecV128Load16x4UName
+	v128LoadType16x4u
+	// v128LoadType32x2s corresponds to wasm.OpcodeVecV128Load32x2SName
+	v128LoadType32x2s
+	// v128LoadType32x2u corresponds to wasm.OpcodeVecV128Load32x2UName
+	v128LoadType32x2u
+	// v128LoadType8Splat corresponds to wasm.OpcodeVecV128Load8SplatName
+	v128LoadType8Splat
+	// v128LoadType16Splat corresponds to wasm.OpcodeVecV128Load16SplatName
+	v128LoadType16Splat
+	// v128LoadType32Splat corresponds to wasm.OpcodeVecV128Load32SplatName
+	v128LoadType32Splat
+	// v128LoadType64Splat corresponds to wasm.OpcodeVecV128Load64SplatName
+	v128LoadType64Splat
+	// v128LoadType32zero corresponds to wasm.OpcodeVecV128Load32zeroName
+	v128LoadType32zero
+	// v128LoadType64zero corresponds to wasm.OpcodeVecV128Load64zeroName
+	v128LoadType64zero
+)
+
+// NewOperationV128Load is a constructor for unionOperation with operationKindV128Load.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecV128LoadName wasm.OpcodeVecV128Load8x8SName wasm.OpcodeVecV128Load8x8UName
+//	wasm.OpcodeVecV128Load16x4SName wasm.OpcodeVecV128Load16x4UName wasm.OpcodeVecV128Load32x2SName
+//	wasm.OpcodeVecV128Load32x2UName wasm.OpcodeVecV128Load8SplatName wasm.OpcodeVecV128Load16SplatName
+//	wasm.OpcodeVecV128Load32SplatName wasm.OpcodeVecV128Load64SplatName wasm.OpcodeVecV128Load32zeroName
+//	wasm.OpcodeVecV128Load64zeroName
+func newOperationV128Load(loadType v128LoadType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindV128Load, B1: loadType, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationV128LoadLane is a constructor for unionOperation with operationKindV128LoadLane.
+//
+// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName
+//
+//	wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName.
+//
+// laneIndex is >=0 && <(128/LaneSize).
+// laneSize is either 8, 16, 32, or 64.
+func newOperationV128LoadLane(laneIndex, laneSize byte, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindV128LoadLane, B1: laneSize, B2: laneIndex, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationV128Store is a constructor for unionOperation with operationKindV128Store.
+//
+// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName
+//
+//	wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName.
+func newOperationV128Store(arg memoryArg) unionOperation {
+	return unionOperation{
+		Kind: operationKindV128Store,
+		U1:   uint64(arg.Alignment),
+		U2:   uint64(arg.Offset),
+	}
+}
+
+// NewOperationV128StoreLane implements Operation.
+//
+// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName
+//
+//	wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName.
+//
+// laneIndex is >=0 && <(128/LaneSize).
+// laneSize is either 8, 16, 32, or 64.
+func newOperationV128StoreLane(laneIndex byte, laneSize byte, arg memoryArg) unionOperation {
+	return unionOperation{
+		Kind: operationKindV128StoreLane,
+		B1:   laneSize,
+		B2:   laneIndex,
+		U1:   uint64(arg.Alignment),
+		U2:   uint64(arg.Offset),
+	}
+}
+
+// NewOperationV128ExtractLane is a constructor for unionOperation with operationKindV128ExtractLane.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16ExtractLaneSName wasm.OpcodeVecI8x16ExtractLaneUName
+//	wasm.OpcodeVecI16x8ExtractLaneSName wasm.OpcodeVecI16x8ExtractLaneUName
+//	wasm.OpcodeVecI32x4ExtractLaneName wasm.OpcodeVecI64x2ExtractLaneName
+//	wasm.OpcodeVecF32x4ExtractLaneName wasm.OpcodeVecF64x2ExtractLaneName.
+//
+// laneIndex is >=0 && <M where shape = NxM.
+// signed is used when shape is either i8x16 or i16x2 to specify whether to sign-extend or not.
+func newOperationV128ExtractLane(laneIndex byte, signed bool, shape shape) unionOperation {
+	return unionOperation{
+		Kind: operationKindV128ExtractLane,
+		B1:   shape,
+		B2:   laneIndex,
+		B3:   signed,
+	}
+}
+
+// NewOperationV128ReplaceLane is a constructor for unionOperation with operationKindV128ReplaceLane.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16ReplaceLaneName wasm.OpcodeVecI16x8ReplaceLaneName
+//	wasm.OpcodeVecI32x4ReplaceLaneName wasm.OpcodeVecI64x2ReplaceLaneName
+//	wasm.OpcodeVecF32x4ReplaceLaneName wasm.OpcodeVecF64x2ReplaceLaneName.
+//
+// laneIndex is >=0 && <M where shape = NxM.
+func newOperationV128ReplaceLane(laneIndex byte, shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128ReplaceLane, B1: shape, B2: laneIndex}
+}
+
+// NewOperationV128Splat is a constructor for unionOperation with operationKindV128Splat.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16SplatName wasm.OpcodeVecI16x8SplatName
+//	wasm.OpcodeVecI32x4SplatName wasm.OpcodeVecI64x2SplatName
+//	wasm.OpcodeVecF32x4SplatName wasm.OpcodeVecF64x2SplatName.
+func newOperationV128Splat(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Splat, B1: shape}
+}
+
+// NewOperationV128Shuffle is a constructor for unionOperation with operationKindV128Shuffle.
+func newOperationV128Shuffle(lanes []uint64) unionOperation {
+	return unionOperation{Kind: operationKindV128Shuffle, Us: lanes}
+}
+
+// NewOperationV128Swizzle is a constructor for unionOperation with operationKindV128Swizzle.
+//
+// This corresponds to wasm.OpcodeVecI8x16SwizzleName.
+func newOperationV128Swizzle() unionOperation {
+	return unionOperation{Kind: operationKindV128Swizzle}
+}
+
+// NewOperationV128AnyTrue is a constructor for unionOperation with operationKindV128AnyTrue.
+//
+// This corresponds to wasm.OpcodeVecV128AnyTrueName.
+func newOperationV128AnyTrue() unionOperation {
+	return unionOperation{Kind: operationKindV128AnyTrue}
+}
+
+// NewOperationV128AllTrue is a constructor for unionOperation with operationKindV128AllTrue.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16AllTrueName wasm.OpcodeVecI16x8AllTrueName
+//	wasm.OpcodeVecI32x4AllTrueName wasm.OpcodeVecI64x2AllTrueName.
+func newOperationV128AllTrue(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128AllTrue, B1: shape}
+}
+
+// NewOperationV128BitMask is a constructor for unionOperation with operationKindV128BitMask.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16BitMaskName wasm.OpcodeVecI16x8BitMaskName
+//	wasm.OpcodeVecI32x4BitMaskName wasm.OpcodeVecI64x2BitMaskName.
+func newOperationV128BitMask(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128BitMask, B1: shape}
+}
+
+// NewOperationV128And is a constructor for unionOperation with operationKindV128And.
+//
+// This corresponds to wasm.OpcodeVecV128And.
+func newOperationV128And() unionOperation {
+	return unionOperation{Kind: operationKindV128And}
+}
+
+// NewOperationV128Not is a constructor for unionOperation with operationKindV128Not.
+//
+// This corresponds to wasm.OpcodeVecV128Not.
+func newOperationV128Not() unionOperation {
+	return unionOperation{Kind: operationKindV128Not}
+}
+
+// NewOperationV128Or is a constructor for unionOperation with operationKindV128Or.
+//
+// This corresponds to wasm.OpcodeVecV128Or.
+func newOperationV128Or() unionOperation {
+	return unionOperation{Kind: operationKindV128Or}
+}
+
+// NewOperationV128Xor is a constructor for unionOperation with operationKindV128Xor.
+//
+// This corresponds to wasm.OpcodeVecV128Xor.
+func newOperationV128Xor() unionOperation {
+	return unionOperation{Kind: operationKindV128Xor}
+}
+
+// NewOperationV128Bitselect is a constructor for unionOperation with operationKindV128Bitselect.
+//
+// This corresponds to wasm.OpcodeVecV128Bitselect.
+func newOperationV128Bitselect() unionOperation {
+	return unionOperation{Kind: operationKindV128Bitselect}
+}
+
+// NewOperationV128AndNot is a constructor for unionOperation with operationKindV128AndNot.
+//
+// This corresponds to wasm.OpcodeVecV128AndNot.
+func newOperationV128AndNot() unionOperation {
+	return unionOperation{Kind: operationKindV128AndNot}
+}
+
+// NewOperationV128Shl is a constructor for unionOperation with operationKindV128Shl.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16ShlName wasm.OpcodeVecI16x8ShlName
+//	wasm.OpcodeVecI32x4ShlName wasm.OpcodeVecI64x2ShlName
+func newOperationV128Shl(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Shl, B1: shape}
+}
+
+// NewOperationV128Shr is a constructor for unionOperation with operationKindV128Shr.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16ShrSName wasm.OpcodeVecI8x16ShrUName wasm.OpcodeVecI16x8ShrSName
+//	wasm.OpcodeVecI16x8ShrUName wasm.OpcodeVecI32x4ShrSName wasm.OpcodeVecI32x4ShrUName.
+//	wasm.OpcodeVecI64x2ShrSName wasm.OpcodeVecI64x2ShrUName.
+func newOperationV128Shr(shape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128Shr, B1: shape, B3: signed}
+}
+
+// NewOperationV128Cmp is a constructor for unionOperation with operationKindV128Cmp.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16EqName, wasm.OpcodeVecI8x16NeName, wasm.OpcodeVecI8x16LtSName, wasm.OpcodeVecI8x16LtUName, wasm.OpcodeVecI8x16GtSName,
+//	wasm.OpcodeVecI8x16GtUName, wasm.OpcodeVecI8x16LeSName, wasm.OpcodeVecI8x16LeUName, wasm.OpcodeVecI8x16GeSName, wasm.OpcodeVecI8x16GeUName,
+//	wasm.OpcodeVecI16x8EqName, wasm.OpcodeVecI16x8NeName, wasm.OpcodeVecI16x8LtSName, wasm.OpcodeVecI16x8LtUName, wasm.OpcodeVecI16x8GtSName,
+//	wasm.OpcodeVecI16x8GtUName, wasm.OpcodeVecI16x8LeSName, wasm.OpcodeVecI16x8LeUName, wasm.OpcodeVecI16x8GeSName, wasm.OpcodeVecI16x8GeUName,
+//	wasm.OpcodeVecI32x4EqName, wasm.OpcodeVecI32x4NeName, wasm.OpcodeVecI32x4LtSName, wasm.OpcodeVecI32x4LtUName, wasm.OpcodeVecI32x4GtSName,
+//	wasm.OpcodeVecI32x4GtUName, wasm.OpcodeVecI32x4LeSName, wasm.OpcodeVecI32x4LeUName, wasm.OpcodeVecI32x4GeSName, wasm.OpcodeVecI32x4GeUName,
+//	wasm.OpcodeVecI64x2EqName, wasm.OpcodeVecI64x2NeName, wasm.OpcodeVecI64x2LtSName, wasm.OpcodeVecI64x2GtSName, wasm.OpcodeVecI64x2LeSName,
+//	wasm.OpcodeVecI64x2GeSName, wasm.OpcodeVecF32x4EqName, wasm.OpcodeVecF32x4NeName, wasm.OpcodeVecF32x4LtName, wasm.OpcodeVecF32x4GtName,
+//	wasm.OpcodeVecF32x4LeName, wasm.OpcodeVecF32x4GeName, wasm.OpcodeVecF64x2EqName, wasm.OpcodeVecF64x2NeName, wasm.OpcodeVecF64x2LtName,
+//	wasm.OpcodeVecF64x2GtName, wasm.OpcodeVecF64x2LeName, wasm.OpcodeVecF64x2GeName
+func newOperationV128Cmp(cmpType v128CmpType) unionOperation {
+	return unionOperation{Kind: operationKindV128Cmp, B1: cmpType}
+}
+
+// v128CmpType represents a type of vector comparison operation.
+type v128CmpType = byte
+
+const (
+	// v128CmpTypeI8x16Eq corresponds to wasm.OpcodeVecI8x16EqName.
+	v128CmpTypeI8x16Eq v128CmpType = iota
+	// v128CmpTypeI8x16Ne corresponds to wasm.OpcodeVecI8x16NeName.
+	v128CmpTypeI8x16Ne
+	// v128CmpTypeI8x16LtS corresponds to wasm.OpcodeVecI8x16LtSName.
+	v128CmpTypeI8x16LtS
+	// v128CmpTypeI8x16LtU corresponds to wasm.OpcodeVecI8x16LtUName.
+	v128CmpTypeI8x16LtU
+	// v128CmpTypeI8x16GtS corresponds to wasm.OpcodeVecI8x16GtSName.
+	v128CmpTypeI8x16GtS
+	// v128CmpTypeI8x16GtU corresponds to wasm.OpcodeVecI8x16GtUName.
+	v128CmpTypeI8x16GtU
+	// v128CmpTypeI8x16LeS corresponds to wasm.OpcodeVecI8x16LeSName.
+	v128CmpTypeI8x16LeS
+	// v128CmpTypeI8x16LeU corresponds to wasm.OpcodeVecI8x16LeUName.
+	v128CmpTypeI8x16LeU
+	// v128CmpTypeI8x16GeS corresponds to wasm.OpcodeVecI8x16GeSName.
+	v128CmpTypeI8x16GeS
+	// v128CmpTypeI8x16GeU corresponds to wasm.OpcodeVecI8x16GeUName.
+	v128CmpTypeI8x16GeU
+	// v128CmpTypeI16x8Eq corresponds to wasm.OpcodeVecI16x8EqName.
+	v128CmpTypeI16x8Eq
+	// v128CmpTypeI16x8Ne corresponds to wasm.OpcodeVecI16x8NeName.
+	v128CmpTypeI16x8Ne
+	// v128CmpTypeI16x8LtS corresponds to wasm.OpcodeVecI16x8LtSName.
+	v128CmpTypeI16x8LtS
+	// v128CmpTypeI16x8LtU corresponds to wasm.OpcodeVecI16x8LtUName.
+	v128CmpTypeI16x8LtU
+	// v128CmpTypeI16x8GtS corresponds to wasm.OpcodeVecI16x8GtSName.
+	v128CmpTypeI16x8GtS
+	// v128CmpTypeI16x8GtU corresponds to wasm.OpcodeVecI16x8GtUName.
+	v128CmpTypeI16x8GtU
+	// v128CmpTypeI16x8LeS corresponds to wasm.OpcodeVecI16x8LeSName.
+	v128CmpTypeI16x8LeS
+	// v128CmpTypeI16x8LeU corresponds to wasm.OpcodeVecI16x8LeUName.
+	v128CmpTypeI16x8LeU
+	// v128CmpTypeI16x8GeS corresponds to wasm.OpcodeVecI16x8GeSName.
+	v128CmpTypeI16x8GeS
+	// v128CmpTypeI16x8GeU corresponds to wasm.OpcodeVecI16x8GeUName.
+	v128CmpTypeI16x8GeU
+	// v128CmpTypeI32x4Eq corresponds to wasm.OpcodeVecI32x4EqName.
+	v128CmpTypeI32x4Eq
+	// v128CmpTypeI32x4Ne corresponds to wasm.OpcodeVecI32x4NeName.
+	v128CmpTypeI32x4Ne
+	// v128CmpTypeI32x4LtS corresponds to wasm.OpcodeVecI32x4LtSName.
+	v128CmpTypeI32x4LtS
+	// v128CmpTypeI32x4LtU corresponds to wasm.OpcodeVecI32x4LtUName.
+	v128CmpTypeI32x4LtU
+	// v128CmpTypeI32x4GtS corresponds to wasm.OpcodeVecI32x4GtSName.
+	v128CmpTypeI32x4GtS
+	// v128CmpTypeI32x4GtU corresponds to wasm.OpcodeVecI32x4GtUName.
+	v128CmpTypeI32x4GtU
+	// v128CmpTypeI32x4LeS corresponds to wasm.OpcodeVecI32x4LeSName.
+	v128CmpTypeI32x4LeS
+	// v128CmpTypeI32x4LeU corresponds to wasm.OpcodeVecI32x4LeUName.
+	v128CmpTypeI32x4LeU
+	// v128CmpTypeI32x4GeS corresponds to wasm.OpcodeVecI32x4GeSName.
+	v128CmpTypeI32x4GeS
+	// v128CmpTypeI32x4GeU corresponds to wasm.OpcodeVecI32x4GeUName.
+	v128CmpTypeI32x4GeU
+	// v128CmpTypeI64x2Eq corresponds to wasm.OpcodeVecI64x2EqName.
+	v128CmpTypeI64x2Eq
+	// v128CmpTypeI64x2Ne corresponds to wasm.OpcodeVecI64x2NeName.
+	v128CmpTypeI64x2Ne
+	// v128CmpTypeI64x2LtS corresponds to wasm.OpcodeVecI64x2LtSName.
+	v128CmpTypeI64x2LtS
+	// v128CmpTypeI64x2GtS corresponds to wasm.OpcodeVecI64x2GtSName.
+	v128CmpTypeI64x2GtS
+	// v128CmpTypeI64x2LeS corresponds to wasm.OpcodeVecI64x2LeSName.
+	v128CmpTypeI64x2LeS
+	// v128CmpTypeI64x2GeS corresponds to wasm.OpcodeVecI64x2GeSName.
+	v128CmpTypeI64x2GeS
+	// v128CmpTypeF32x4Eq corresponds to wasm.OpcodeVecF32x4EqName.
+	v128CmpTypeF32x4Eq
+	// v128CmpTypeF32x4Ne corresponds to wasm.OpcodeVecF32x4NeName.
+	v128CmpTypeF32x4Ne
+	// v128CmpTypeF32x4Lt corresponds to wasm.OpcodeVecF32x4LtName.
+	v128CmpTypeF32x4Lt
+	// v128CmpTypeF32x4Gt corresponds to wasm.OpcodeVecF32x4GtName.
+	v128CmpTypeF32x4Gt
+	// v128CmpTypeF32x4Le corresponds to wasm.OpcodeVecF32x4LeName.
+	v128CmpTypeF32x4Le
+	// v128CmpTypeF32x4Ge corresponds to wasm.OpcodeVecF32x4GeName.
+	v128CmpTypeF32x4Ge
+	// v128CmpTypeF64x2Eq corresponds to wasm.OpcodeVecF64x2EqName.
+	v128CmpTypeF64x2Eq
+	// v128CmpTypeF64x2Ne corresponds to wasm.OpcodeVecF64x2NeName.
+	v128CmpTypeF64x2Ne
+	// v128CmpTypeF64x2Lt corresponds to wasm.OpcodeVecF64x2LtName.
+	v128CmpTypeF64x2Lt
+	// v128CmpTypeF64x2Gt corresponds to wasm.OpcodeVecF64x2GtName.
+	v128CmpTypeF64x2Gt
+	// v128CmpTypeF64x2Le corresponds to wasm.OpcodeVecF64x2LeName.
+	v128CmpTypeF64x2Le
+	// v128CmpTypeF64x2Ge corresponds to wasm.OpcodeVecF64x2GeName.
+	v128CmpTypeF64x2Ge
+)
+
+// NewOperationV128AddSat is a constructor for unionOperation with operationKindV128AddSat.
+//
+// This corresponds to wasm.OpcodeVecI8x16AddSatUName wasm.OpcodeVecI8x16AddSatSName
+//
+//	wasm.OpcodeVecI16x8AddSatUName wasm.OpcodeVecI16x8AddSatSName
+//
+// shape is either shapeI8x16 or shapeI16x8.
+func newOperationV128AddSat(shape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128AddSat, B1: shape, B3: signed}
+}
+
+// NewOperationV128SubSat is a constructor for unionOperation with operationKindV128SubSat.
+//
+// This corresponds to wasm.OpcodeVecI8x16SubSatUName wasm.OpcodeVecI8x16SubSatSName
+//
+//	wasm.OpcodeVecI16x8SubSatUName wasm.OpcodeVecI16x8SubSatSName
+//
+// shape is either shapeI8x16 or shapeI16x8.
+func newOperationV128SubSat(shape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128SubSat, B1: shape, B3: signed}
+}
+
+// NewOperationV128Mul is a constructor for unionOperation with operationKindV128Mul
+//
+// This corresponds to wasm.OpcodeVecF32x4MulName wasm.OpcodeVecF64x2MulName
+//
+//		wasm.OpcodeVecI16x8MulName wasm.OpcodeVecI32x4MulName wasm.OpcodeVecI64x2MulName.
+//	 shape is either shapeI16x8, shapeI32x4, shapeI64x2, shapeF32x4 or shapeF64x2.
+func newOperationV128Mul(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Mul, B1: shape}
+}
+
+// NewOperationV128Div is a constructor for unionOperation with operationKindV128Div.
+//
+// This corresponds to wasm.OpcodeVecF32x4DivName wasm.OpcodeVecF64x2DivName.
+// shape is either shapeF32x4 or shapeF64x2.
+func newOperationV128Div(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Div, B1: shape}
+}
+
+// NewOperationV128Neg is a constructor for unionOperation with operationKindV128Neg.
+//
+// This corresponds to wasm.OpcodeVecI8x16NegName wasm.OpcodeVecI16x8NegName wasm.OpcodeVecI32x4NegName
+//
+//	wasm.OpcodeVecI64x2NegName wasm.OpcodeVecF32x4NegName wasm.OpcodeVecF64x2NegName.
+func newOperationV128Neg(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Neg, B1: shape}
+}
+
+// NewOperationV128Sqrt is a constructor for unionOperation with 128operationKindV128Sqrt.
+//
+// shape is either shapeF32x4 or shapeF64x2.
+// This corresponds to wasm.OpcodeVecF32x4SqrtName wasm.OpcodeVecF64x2SqrtName.
+func newOperationV128Sqrt(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Sqrt, B1: shape}
+}
+
+// NewOperationV128Abs is a constructor for unionOperation with operationKindV128Abs.
+//
+// This corresponds to wasm.OpcodeVecI8x16AbsName wasm.OpcodeVecI16x8AbsName wasm.OpcodeVecI32x4AbsName
+//
+//	wasm.OpcodeVecI64x2AbsName wasm.OpcodeVecF32x4AbsName wasm.OpcodeVecF64x2AbsName.
+func newOperationV128Abs(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Abs, B1: shape}
+}
+
+// NewOperationV128Popcnt is a constructor for unionOperation with operationKindV128Popcnt.
+//
+// This corresponds to wasm.OpcodeVecI8x16PopcntName.
+func newOperationV128Popcnt(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Popcnt, B1: shape}
+}
+
+// NewOperationV128Min is a constructor for unionOperation with operationKindV128Min.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16MinSName wasm.OpcodeVecI8x16MinUName　wasm.OpcodeVecI16x8MinSName wasm.OpcodeVecI16x8MinUName
+//	wasm.OpcodeVecI32x4MinSName wasm.OpcodeVecI32x4MinUName　wasm.OpcodeVecI16x8MinSName wasm.OpcodeVecI16x8MinUName
+//	wasm.OpcodeVecF32x4MinName wasm.OpcodeVecF64x2MinName
+func newOperationV128Min(shape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128Min, B1: shape, B3: signed}
+}
+
+// NewOperationV128Max is a constructor for unionOperation with operationKindV128Max.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16MaxSName wasm.OpcodeVecI8x16MaxUName　wasm.OpcodeVecI16x8MaxSName wasm.OpcodeVecI16x8MaxUName
+//	wasm.OpcodeVecI32x4MaxSName wasm.OpcodeVecI32x4MaxUName　wasm.OpcodeVecI16x8MaxSName wasm.OpcodeVecI16x8MaxUName
+//	wasm.OpcodeVecF32x4MaxName wasm.OpcodeVecF64x2MaxName.
+func newOperationV128Max(shape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128Max, B1: shape, B3: signed}
+}
+
+// NewOperationV128AvgrU is a constructor for unionOperation with operationKindV128AvgrU.
+//
+// This corresponds to wasm.OpcodeVecI8x16AvgrUName.
+func newOperationV128AvgrU(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128AvgrU, B1: shape}
+}
+
+// NewOperationV128Pmin is a constructor for unionOperation with operationKindV128Pmin.
+//
+// This corresponds to wasm.OpcodeVecF32x4PminName wasm.OpcodeVecF64x2PminName.
+func newOperationV128Pmin(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Pmin, B1: shape}
+}
+
+// NewOperationV128Pmax is a constructor for unionOperation with operationKindV128Pmax.
+//
+// This corresponds to wasm.OpcodeVecF32x4PmaxName wasm.OpcodeVecF64x2PmaxName.
+func newOperationV128Pmax(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Pmax, B1: shape}
+}
+
+// NewOperationV128Ceil is a constructor for unionOperation with operationKindV128Ceil.
+//
+// This corresponds to wasm.OpcodeVecF32x4CeilName wasm.OpcodeVecF64x2CeilName
+func newOperationV128Ceil(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Ceil, B1: shape}
+}
+
+// NewOperationV128Floor is a constructor for unionOperation with operationKindV128Floor.
+//
+// This corresponds to wasm.OpcodeVecF32x4FloorName wasm.OpcodeVecF64x2FloorName
+func newOperationV128Floor(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Floor, B1: shape}
+}
+
+// NewOperationV128Trunc is a constructor for unionOperation with operationKindV128Trunc.
+//
+// This corresponds to wasm.OpcodeVecF32x4TruncName wasm.OpcodeVecF64x2TruncName
+func newOperationV128Trunc(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Trunc, B1: shape}
+}
+
+// NewOperationV128Nearest is a constructor for unionOperation with operationKindV128Nearest.
+//
+// This corresponds to wasm.OpcodeVecF32x4NearestName wasm.OpcodeVecF64x2NearestName
+func newOperationV128Nearest(shape shape) unionOperation {
+	return unionOperation{Kind: operationKindV128Nearest, B1: shape}
+}
+
+// NewOperationV128Extend is a constructor for unionOperation with operationKindV128Extend.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI16x8ExtendLowI8x16SName wasm.OpcodeVecI16x8ExtendHighI8x16SName
+//	wasm.OpcodeVecI16x8ExtendLowI8x16UName wasm.OpcodeVecI16x8ExtendHighI8x16UName
+//	wasm.OpcodeVecI32x4ExtendLowI16x8SName wasm.OpcodeVecI32x4ExtendHighI16x8SName
+//	wasm.OpcodeVecI32x4ExtendLowI16x8UName wasm.OpcodeVecI32x4ExtendHighI16x8UName
+//	wasm.OpcodeVecI64x2ExtendLowI32x4SName wasm.OpcodeVecI64x2ExtendHighI32x4SName
+//	wasm.OpcodeVecI64x2ExtendLowI32x4UName wasm.OpcodeVecI64x2ExtendHighI32x4UName
+//
+// originshape is the shape of the original lanes for extension which is
+// either shapeI8x16, shapeI16x8, or shapeI32x4.
+// useLow true if it uses the lower half of vector for extension.
+func newOperationV128Extend(originshape shape, signed bool, useLow bool) unionOperation {
+	op := unionOperation{Kind: operationKindV128Extend}
+	op.B1 = originshape
+	if signed {
+		op.B2 = 1
+	}
+	op.B3 = useLow
+	return op
+}
+
+// NewOperationV128ExtMul is a constructor for unionOperation with operationKindV128ExtMul.
+//
+// This corresponds to
+//
+//		wasm.OpcodeVecI16x8ExtMulLowI8x16SName wasm.OpcodeVecI16x8ExtMulLowI8x16UName
+//		wasm.OpcodeVecI16x8ExtMulHighI8x16SName wasm.OpcodeVecI16x8ExtMulHighI8x16UName
+//	 wasm.OpcodeVecI32x4ExtMulLowI16x8SName wasm.OpcodeVecI32x4ExtMulLowI16x8UName
+//		wasm.OpcodeVecI32x4ExtMulHighI16x8SName wasm.OpcodeVecI32x4ExtMulHighI16x8UName
+//	 wasm.OpcodeVecI64x2ExtMulLowI32x4SName wasm.OpcodeVecI64x2ExtMulLowI32x4UName
+//		wasm.OpcodeVecI64x2ExtMulHighI32x4SName wasm.OpcodeVecI64x2ExtMulHighI32x4UName.
+//
+// originshape is the shape of the original lanes for extension which is
+// either shapeI8x16, shapeI16x8, or shapeI32x4.
+// useLow true if it uses the lower half of vector for extension.
+func newOperationV128ExtMul(originshape shape, signed bool, useLow bool) unionOperation {
+	op := unionOperation{Kind: operationKindV128ExtMul}
+	op.B1 = originshape
+	if signed {
+		op.B2 = 1
+	}
+	op.B3 = useLow
+	return op
+}
+
+// NewOperationV128Q15mulrSatS is a constructor for unionOperation with operationKindV128Q15mulrSatS.
+//
+// This corresponds to wasm.OpcodeVecI16x8Q15mulrSatSName
+func newOperationV128Q15mulrSatS() unionOperation {
+	return unionOperation{Kind: operationKindV128Q15mulrSatS}
+}
+
+// NewOperationV128ExtAddPairwise is a constructor for unionOperation with operationKindV128ExtAddPairwise.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI16x8ExtaddPairwiseI8x16SName wasm.OpcodeVecI16x8ExtaddPairwiseI8x16UName
+//	wasm.OpcodeVecI32x4ExtaddPairwiseI16x8SName wasm.OpcodeVecI32x4ExtaddPairwiseI16x8UName.
+//
+// originshape is the shape of the original lanes for extension which is
+// either shapeI8x16, or shapeI16x8.
+func newOperationV128ExtAddPairwise(originshape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128ExtAddPairwise, B1: originshape, B3: signed}
+}
+
+// NewOperationV128FloatPromote is a constructor for unionOperation with NewOperationV128FloatPromote.
+//
+// This corresponds to wasm.OpcodeVecF64x2PromoteLowF32x4ZeroName
+// This discards the higher 64-bit of a vector, and promotes two
+// 32-bit floats in the lower 64-bit as two 64-bit floats.
+func newOperationV128FloatPromote() unionOperation {
+	return unionOperation{Kind: operationKindV128FloatPromote}
+}
+
+// NewOperationV128FloatDemote is a constructor for unionOperation with NewOperationV128FloatDemote.
+//
+// This corresponds to wasm.OpcodeVecF32x4DemoteF64x2ZeroName.
+func newOperationV128FloatDemote() unionOperation {
+	return unionOperation{Kind: operationKindV128FloatDemote}
+}
+
+// NewOperationV128FConvertFromI is a constructor for unionOperation with NewOperationV128FConvertFromI.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecF32x4ConvertI32x4SName wasm.OpcodeVecF32x4ConvertI32x4UName
+//	wasm.OpcodeVecF64x2ConvertLowI32x4SName wasm.OpcodeVecF64x2ConvertLowI32x4UName.
+//
+// destinationshape is the shape of the destination lanes for conversion which is
+// either shapeF32x4, or shapeF64x2.
+func newOperationV128FConvertFromI(destinationshape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128FConvertFromI, B1: destinationshape, B3: signed}
+}
+
+// NewOperationV128Dot is a constructor for unionOperation with operationKindV128Dot.
+//
+// This corresponds to wasm.OpcodeVecI32x4DotI16x8SName
+func newOperationV128Dot() unionOperation {
+	return unionOperation{Kind: operationKindV128Dot}
+}
+
+// NewOperationV128Narrow is a constructor for unionOperation with operationKindV128Narrow.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI8x16NarrowI16x8SName wasm.OpcodeVecI8x16NarrowI16x8UName
+//	wasm.OpcodeVecI16x8NarrowI32x4SName wasm.OpcodeVecI16x8NarrowI32x4UName.
+//
+// originshape is the shape of the original lanes for narrowing which is
+// either shapeI16x8, or shapeI32x4.
+func newOperationV128Narrow(originshape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128Narrow, B1: originshape, B3: signed}
+}
+
+// NewOperationV128ITruncSatFromF is a constructor for unionOperation with operationKindV128ITruncSatFromF.
+//
+// This corresponds to
+//
+//	wasm.OpcodeVecI32x4TruncSatF64x2UZeroName wasm.OpcodeVecI32x4TruncSatF64x2SZeroName
+//	wasm.OpcodeVecI32x4TruncSatF32x4UName wasm.OpcodeVecI32x4TruncSatF32x4SName.
+//
+// originshape is the shape of the original lanes for truncation which is
+// either shapeF32x4, or shapeF64x2.
+func newOperationV128ITruncSatFromF(originshape shape, signed bool) unionOperation {
+	return unionOperation{Kind: operationKindV128ITruncSatFromF, B1: originshape, B3: signed}
+}
+
+// atomicArithmeticOp is the type for the operation kind of atomic arithmetic operations.
+type atomicArithmeticOp byte
+
+const (
+	// atomicArithmeticOpAdd is the kind for an add operation.
+	atomicArithmeticOpAdd atomicArithmeticOp = iota
+	// atomicArithmeticOpSub is the kind for a sub operation.
+	atomicArithmeticOpSub
+	// atomicArithmeticOpAnd is the kind for a bitwise and operation.
+	atomicArithmeticOpAnd
+	// atomicArithmeticOpOr is the kind for a bitwise or operation.
+	atomicArithmeticOpOr
+	// atomicArithmeticOpXor is the kind for a bitwise xor operation.
+	atomicArithmeticOpXor
+	// atomicArithmeticOpNop is the kind for a nop operation.
+	atomicArithmeticOpNop
+)
+
+// NewOperationAtomicMemoryWait is a constructor for unionOperation with operationKindAtomicMemoryWait.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicWait32Name wasm.OpcodeAtomicWait64Name
+func newOperationAtomicMemoryWait(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicMemoryWait, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicMemoryNotify is a constructor for unionOperation with operationKindAtomicMemoryNotify.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicNotifyName
+func newOperationAtomicMemoryNotify(arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicMemoryNotify, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicFence is a constructor for unionOperation with operationKindAtomicFence.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicFenceName
+func newOperationAtomicFence() unionOperation {
+	return unionOperation{Kind: operationKindAtomicFence}
+}
+
+// NewOperationAtomicLoad is a constructor for unionOperation with operationKindAtomicLoad.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32LoadName wasm.OpcodeAtomicI64LoadName
+func newOperationAtomicLoad(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicLoad, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicLoad8 is a constructor for unionOperation with operationKindAtomicLoad8.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32Load8UName wasm.OpcodeAtomicI64Load8UName
+func newOperationAtomicLoad8(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicLoad8, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicLoad16 is a constructor for unionOperation with operationKindAtomicLoad16.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32Load16UName wasm.OpcodeAtomicI64Load16UName
+func newOperationAtomicLoad16(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicLoad16, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicStore is a constructor for unionOperation with operationKindAtomicStore.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32StoreName wasm.OpcodeAtomicI64StoreName
+func newOperationAtomicStore(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicStore, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicStore8 is a constructor for unionOperation with operationKindAtomicStore8.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32Store8UName wasm.OpcodeAtomicI64Store8UName
+func newOperationAtomicStore8(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicStore8, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicStore16 is a constructor for unionOperation with operationKindAtomicStore16.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32Store16UName wasm.OpcodeAtomicI64Store16UName
+func newOperationAtomicStore16(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicStore16, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicRMW is a constructor for unionOperation with operationKindAtomicRMW.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32RMWAddName wasm.OpcodeAtomicI64RmwAddName
+//	wasm.OpcodeAtomicI32RMWSubName wasm.OpcodeAtomicI64RmwSubName
+//	wasm.OpcodeAtomicI32RMWAndName wasm.OpcodeAtomicI64RmwAndName
+//	wasm.OpcodeAtomicI32RMWOrName wasm.OpcodeAtomicI64RmwOrName
+//	wasm.OpcodeAtomicI32RMWXorName wasm.OpcodeAtomicI64RmwXorName
+func newOperationAtomicRMW(unsignedType unsignedType, arg memoryArg, op atomicArithmeticOp) unionOperation {
+	return unionOperation{Kind: operationKindAtomicRMW, B1: byte(unsignedType), B2: byte(op), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicRMW8 is a constructor for unionOperation with operationKindAtomicRMW8.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32RMW8AddUName wasm.OpcodeAtomicI64Rmw8AddUName
+//	wasm.OpcodeAtomicI32RMW8SubUName wasm.OpcodeAtomicI64Rmw8SubUName
+//	wasm.OpcodeAtomicI32RMW8AndUName wasm.OpcodeAtomicI64Rmw8AndUName
+//	wasm.OpcodeAtomicI32RMW8OrUName wasm.OpcodeAtomicI64Rmw8OrUName
+//	wasm.OpcodeAtomicI32RMW8XorUName wasm.OpcodeAtomicI64Rmw8XorUName
+func newOperationAtomicRMW8(unsignedType unsignedType, arg memoryArg, op atomicArithmeticOp) unionOperation {
+	return unionOperation{Kind: operationKindAtomicRMW8, B1: byte(unsignedType), B2: byte(op), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicRMW16 is a constructor for unionOperation with operationKindAtomicRMW16.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32RMW16AddUName wasm.OpcodeAtomicI64Rmw16AddUName
+//	wasm.OpcodeAtomicI32RMW16SubUName wasm.OpcodeAtomicI64Rmw16SubUName
+//	wasm.OpcodeAtomicI32RMW16AndUName wasm.OpcodeAtomicI64Rmw16AndUName
+//	wasm.OpcodeAtomicI32RMW16OrUName wasm.OpcodeAtomicI64Rmw16OrUName
+//	wasm.OpcodeAtomicI32RMW16XorUName wasm.OpcodeAtomicI64Rmw16XorUName
+func newOperationAtomicRMW16(unsignedType unsignedType, arg memoryArg, op atomicArithmeticOp) unionOperation {
+	return unionOperation{Kind: operationKindAtomicRMW16, B1: byte(unsignedType), B2: byte(op), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicRMWCmpxchg is a constructor for unionOperation with operationKindAtomicRMWCmpxchg.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32RMWCmpxchgName wasm.OpcodeAtomicI64RmwCmpxchgName
+func newOperationAtomicRMWCmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicRMWCmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicRMW8Cmpxchg is a constructor for unionOperation with operationKindAtomicRMW8Cmpxchg.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32RMW8CmpxchgUName wasm.OpcodeAtomicI64Rmw8CmpxchgUName
+func newOperationAtomicRMW8Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicRMW8Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
+
+// NewOperationAtomicRMW16Cmpxchg is a constructor for unionOperation with operationKindAtomicRMW16Cmpxchg.
+//
+// This corresponds to
+//
+//	wasm.OpcodeAtomicI32RMW16CmpxchgUName wasm.OpcodeAtomicI64Rmw16CmpxchgUName
+func newOperationAtomicRMW16Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation {
+	return unionOperation{Kind: operationKindAtomicRMW16Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
new file mode 100644
index 000000000..7b9d5602d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
@@ -0,0 +1,767 @@
+package interpreter
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// signature represents how a Wasm opcode
+// manipulates the value stacks in terms of value types.
+type signature struct {
+	in, out []unsignedType
+}
+
+var (
+	signature_None_None    = &signature{}
+	signature_Unknown_None = &signature{
+		in: []unsignedType{unsignedTypeUnknown},
+	}
+	signature_None_I32 = &signature{
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_None_I64 = &signature{
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_None_V128 = &signature{
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_None_F32 = &signature{
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_None_F64 = &signature{
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64_None = &signature{
+		in: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32_None = &signature{
+		in: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_None = &signature{
+		in: []unsignedType{unsignedTypeF64},
+	}
+	signature_V128_None = &signature{
+		in: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_I32_F64 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64_F32 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_I64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_F32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F32_I64 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_F32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F64_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI32},
+	}
+
+	signature_I32I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI64},
+	}
+	signature_I32F32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeF32},
+	}
+	signature_I32F64_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeF64},
+	}
+	signature_I64I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32F32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF32, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F32F32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF32, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64F64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF64, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F64F64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF64, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32I32I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
+	}
+	signature_I32I64I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI32},
+	}
+	signature_UnknownUnknownI32_Unknown = &signature{
+		in:  []unsignedType{unsignedTypeUnknown, unsignedTypeUnknown, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeUnknown},
+	}
+	signature_V128V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128V128V128_V32 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeV128, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32V128_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeV128},
+	}
+	signature_I32V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128I32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128I64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128F32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128F64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128_I32 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_V128_I64 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_V128_F32 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_V128_F64 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_F32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_F64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I32I32I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I32I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+)
+
+// wasmOpcodeSignature returns the signature of given Wasm opcode.
+// Note that some of opcodes' signature vary depending on
+// the function instance (for example, local types).
+// "index" parameter is not used by most of opcodes.
+// The returned signature is used for stack validation when lowering Wasm's opcodes to interpreterir.
+func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature, error) {
+	switch op {
+	case wasm.OpcodeUnreachable, wasm.OpcodeNop, wasm.OpcodeBlock, wasm.OpcodeLoop:
+		return signature_None_None, nil
+	case wasm.OpcodeIf:
+		return signature_I32_None, nil
+	case wasm.OpcodeElse, wasm.OpcodeEnd, wasm.OpcodeBr:
+		return signature_None_None, nil
+	case wasm.OpcodeBrIf, wasm.OpcodeBrTable:
+		return signature_I32_None, nil
+	case wasm.OpcodeReturn:
+		return signature_None_None, nil
+	case wasm.OpcodeCall:
+		return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil
+	case wasm.OpcodeCallIndirect:
+		return c.funcTypeToSigs.get(index, true /* call_indirect */), nil
+	case wasm.OpcodeDrop:
+		return signature_Unknown_None, nil
+	case wasm.OpcodeSelect, wasm.OpcodeTypedSelect:
+		return signature_UnknownUnknownI32_Unknown, nil
+	case wasm.OpcodeLocalGet:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedOutSignature(t), nil
+	case wasm.OpcodeLocalSet:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedInSignature(t), nil
+	case wasm.OpcodeLocalTee:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedInOutSignature(t), nil
+	case wasm.OpcodeGlobalGet:
+		if len(c.globals) <= int(index) {
+			return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
+		}
+		return wasmValueTypeToUnsignedOutSignature(c.globals[index].ValType), nil
+	case wasm.OpcodeGlobalSet:
+		if len(c.globals) <= int(index) {
+			return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
+		}
+		return wasmValueTypeToUnsignedInSignature(c.globals[index].ValType), nil
+	case wasm.OpcodeI32Load:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Load:
+		return signature_I32_I64, nil
+	case wasm.OpcodeF32Load:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF64Load:
+		return signature_I32_F64, nil
+	case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U, wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U, wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U,
+		wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U:
+		return signature_I32_I64, nil
+	case wasm.OpcodeI32Store:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI64Store:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeF32Store:
+		return signature_I32F32_None, nil
+	case wasm.OpcodeF64Store:
+		return signature_I32F64_None, nil
+	case wasm.OpcodeI32Store8:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI32Store16:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI64Store8:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeI64Store16:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeI64Store32:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeMemorySize:
+		return signature_None_I32, nil
+	case wasm.OpcodeMemoryGrow:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Const:
+		return signature_None_I32, nil
+	case wasm.OpcodeI64Const:
+		return signature_None_I64, nil
+	case wasm.OpcodeF32Const:
+		return signature_None_F32, nil
+	case wasm.OpcodeF64Const:
+		return signature_None_F64, nil
+	case wasm.OpcodeI32Eqz:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Eq, wasm.OpcodeI32Ne, wasm.OpcodeI32LtS,
+		wasm.OpcodeI32LtU, wasm.OpcodeI32GtS, wasm.OpcodeI32GtU,
+		wasm.OpcodeI32LeS, wasm.OpcodeI32LeU, wasm.OpcodeI32GeS,
+		wasm.OpcodeI32GeU:
+		return signature_I32I32_I32, nil
+	case wasm.OpcodeI64Eqz:
+		return signature_I64_I32, nil
+	case wasm.OpcodeI64Eq, wasm.OpcodeI64Ne, wasm.OpcodeI64LtS,
+		wasm.OpcodeI64LtU, wasm.OpcodeI64GtS, wasm.OpcodeI64GtU,
+		wasm.OpcodeI64LeS, wasm.OpcodeI64LeU, wasm.OpcodeI64GeS,
+		wasm.OpcodeI64GeU:
+		return signature_I64I64_I32, nil
+	case wasm.OpcodeF32Eq, wasm.OpcodeF32Ne, wasm.OpcodeF32Lt,
+		wasm.OpcodeF32Gt, wasm.OpcodeF32Le, wasm.OpcodeF32Ge:
+		return signature_F32F32_I32, nil
+	case wasm.OpcodeF64Eq, wasm.OpcodeF64Ne, wasm.OpcodeF64Lt,
+		wasm.OpcodeF64Gt, wasm.OpcodeF64Le, wasm.OpcodeF64Ge:
+		return signature_F64F64_I32, nil
+	case wasm.OpcodeI32Clz, wasm.OpcodeI32Ctz, wasm.OpcodeI32Popcnt:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Add, wasm.OpcodeI32Sub, wasm.OpcodeI32Mul,
+		wasm.OpcodeI32DivS, wasm.OpcodeI32DivU, wasm.OpcodeI32RemS,
+		wasm.OpcodeI32RemU, wasm.OpcodeI32And, wasm.OpcodeI32Or,
+		wasm.OpcodeI32Xor, wasm.OpcodeI32Shl, wasm.OpcodeI32ShrS,
+		wasm.OpcodeI32ShrU, wasm.OpcodeI32Rotl, wasm.OpcodeI32Rotr:
+		return signature_I32I32_I32, nil
+	case wasm.OpcodeI64Clz, wasm.OpcodeI64Ctz, wasm.OpcodeI64Popcnt:
+		return signature_I64_I64, nil
+	case wasm.OpcodeI64Add, wasm.OpcodeI64Sub, wasm.OpcodeI64Mul,
+		wasm.OpcodeI64DivS, wasm.OpcodeI64DivU, wasm.OpcodeI64RemS,
+		wasm.OpcodeI64RemU, wasm.OpcodeI64And, wasm.OpcodeI64Or,
+		wasm.OpcodeI64Xor, wasm.OpcodeI64Shl, wasm.OpcodeI64ShrS,
+		wasm.OpcodeI64ShrU, wasm.OpcodeI64Rotl, wasm.OpcodeI64Rotr:
+		return signature_I64I64_I64, nil
+	case wasm.OpcodeF32Abs, wasm.OpcodeF32Neg, wasm.OpcodeF32Ceil,
+		wasm.OpcodeF32Floor, wasm.OpcodeF32Trunc, wasm.OpcodeF32Nearest,
+		wasm.OpcodeF32Sqrt:
+		return signature_F32_F32, nil
+	case wasm.OpcodeF32Add, wasm.OpcodeF32Sub, wasm.OpcodeF32Mul,
+		wasm.OpcodeF32Div, wasm.OpcodeF32Min, wasm.OpcodeF32Max,
+		wasm.OpcodeF32Copysign:
+		return signature_F32F32_F32, nil
+	case wasm.OpcodeF64Abs, wasm.OpcodeF64Neg, wasm.OpcodeF64Ceil,
+		wasm.OpcodeF64Floor, wasm.OpcodeF64Trunc, wasm.OpcodeF64Nearest,
+		wasm.OpcodeF64Sqrt:
+		return signature_F64_F64, nil
+	case wasm.OpcodeF64Add, wasm.OpcodeF64Sub, wasm.OpcodeF64Mul,
+		wasm.OpcodeF64Div, wasm.OpcodeF64Min, wasm.OpcodeF64Max,
+		wasm.OpcodeF64Copysign:
+		return signature_F64F64_F64, nil
+	case wasm.OpcodeI32WrapI64:
+		return signature_I64_I32, nil
+	case wasm.OpcodeI32TruncF32S, wasm.OpcodeI32TruncF32U:
+		return signature_F32_I32, nil
+	case wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF64U:
+		return signature_F64_I32, nil
+	case wasm.OpcodeI64ExtendI32S, wasm.OpcodeI64ExtendI32U:
+		return signature_I32_I64, nil
+	case wasm.OpcodeI64TruncF32S, wasm.OpcodeI64TruncF32U:
+		return signature_F32_I64, nil
+	case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF64U:
+		return signature_F64_I64, nil
+	case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI32U:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI64U:
+		return signature_I64_F32, nil
+	case wasm.OpcodeF32DemoteF64:
+		return signature_F64_F32, nil
+	case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI32U:
+		return signature_I32_F64, nil
+	case wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI64U:
+		return signature_I64_F64, nil
+	case wasm.OpcodeF64PromoteF32:
+		return signature_F32_F64, nil
+	case wasm.OpcodeI32ReinterpretF32:
+		return signature_F32_I32, nil
+	case wasm.OpcodeI64ReinterpretF64:
+		return signature_F64_I64, nil
+	case wasm.OpcodeF32ReinterpretI32:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF64ReinterpretI64:
+		return signature_I64_F64, nil
+	case wasm.OpcodeI32Extend8S, wasm.OpcodeI32Extend16S:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Extend8S, wasm.OpcodeI64Extend16S, wasm.OpcodeI64Extend32S:
+		return signature_I64_I64, nil
+	case wasm.OpcodeTableGet:
+		// table.get takes table's offset and pushes the ref type value of opaque pointer as i64 value onto the stack.
+		return signature_I32_I64, nil
+	case wasm.OpcodeTableSet:
+		// table.set takes table's offset and the ref type value of opaque pointer as i64 value.
+		return signature_I32I64_None, nil
+	case wasm.OpcodeRefFunc:
+		// ref.func is translated as pushing the compiled function's opaque pointer (uint64) at interpreterir layer.
+		return signature_None_I64, nil
+	case wasm.OpcodeRefIsNull:
+		// ref.is_null is translated as checking if the uint64 on the top of the stack (opaque pointer) is zero or not.
+		return signature_I64_I32, nil
+	case wasm.OpcodeRefNull:
+		// ref.null is translated as i64.const 0.
+		return signature_None_I64, nil
+	case wasm.OpcodeMiscPrefix:
+		switch miscOp := c.body[c.pc+1]; miscOp {
+		case wasm.OpcodeMiscI32TruncSatF32S, wasm.OpcodeMiscI32TruncSatF32U:
+			return signature_F32_I32, nil
+		case wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF64U:
+			return signature_F64_I32, nil
+		case wasm.OpcodeMiscI64TruncSatF32S, wasm.OpcodeMiscI64TruncSatF32U:
+			return signature_F32_I64, nil
+		case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF64U:
+			return signature_F64_I64, nil
+		case wasm.OpcodeMiscMemoryInit, wasm.OpcodeMiscMemoryCopy, wasm.OpcodeMiscMemoryFill,
+			wasm.OpcodeMiscTableInit, wasm.OpcodeMiscTableCopy:
+			return signature_I32I32I32_None, nil
+		case wasm.OpcodeMiscDataDrop, wasm.OpcodeMiscElemDrop:
+			return signature_None_None, nil
+		case wasm.OpcodeMiscTableGrow:
+			return signature_I64I32_I32, nil
+		case wasm.OpcodeMiscTableSize:
+			return signature_None_I32, nil
+		case wasm.OpcodeMiscTableFill:
+			return signature_I32I64I32_None, nil
+		default:
+			return nil, fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op)
+		}
+	case wasm.OpcodeVecPrefix:
+		switch vecOp := c.body[c.pc+1]; vecOp {
+		case wasm.OpcodeVecV128Const:
+			return signature_None_V128, nil
+		case wasm.OpcodeVecV128Load, wasm.OpcodeVecV128Load8x8s, wasm.OpcodeVecV128Load8x8u,
+			wasm.OpcodeVecV128Load16x4s, wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load32x2s,
+			wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
+			wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat, wasm.OpcodeVecV128Load32zero,
+			wasm.OpcodeVecV128Load64zero:
+			return signature_I32_V128, nil
+		case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane,
+			wasm.OpcodeVecV128Load32Lane, wasm.OpcodeVecV128Load64Lane:
+			return signature_I32V128_V128, nil
+		case wasm.OpcodeVecV128Store,
+			wasm.OpcodeVecV128Store8Lane,
+			wasm.OpcodeVecV128Store16Lane,
+			wasm.OpcodeVecV128Store32Lane,
+			wasm.OpcodeVecV128Store64Lane:
+			return signature_I32V128_None, nil
+		case wasm.OpcodeVecI8x16ExtractLaneS,
+			wasm.OpcodeVecI8x16ExtractLaneU,
+			wasm.OpcodeVecI16x8ExtractLaneS,
+			wasm.OpcodeVecI16x8ExtractLaneU,
+			wasm.OpcodeVecI32x4ExtractLane:
+			return signature_V128_I32, nil
+		case wasm.OpcodeVecI64x2ExtractLane:
+			return signature_V128_I64, nil
+		case wasm.OpcodeVecF32x4ExtractLane:
+			return signature_V128_F32, nil
+		case wasm.OpcodeVecF64x2ExtractLane:
+			return signature_V128_F64, nil
+		case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane, wasm.OpcodeVecI32x4ReplaceLane,
+			wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI8x16ShrU,
+			wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI16x8ShrU,
+			wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI32x4ShrU,
+			wasm.OpcodeVecI64x2Shl, wasm.OpcodeVecI64x2ShrS, wasm.OpcodeVecI64x2ShrU:
+			return signature_V128I32_V128, nil
+		case wasm.OpcodeVecI64x2ReplaceLane:
+			return signature_V128I64_V128, nil
+		case wasm.OpcodeVecF32x4ReplaceLane:
+			return signature_V128F32_V128, nil
+		case wasm.OpcodeVecF64x2ReplaceLane:
+			return signature_V128F64_V128, nil
+		case wasm.OpcodeVecI8x16Splat,
+			wasm.OpcodeVecI16x8Splat,
+			wasm.OpcodeVecI32x4Splat:
+			return signature_I32_V128, nil
+		case wasm.OpcodeVecI64x2Splat:
+			return signature_I64_V128, nil
+		case wasm.OpcodeVecF32x4Splat:
+			return signature_F32_V128, nil
+		case wasm.OpcodeVecF64x2Splat:
+			return signature_F64_V128, nil
+		case wasm.OpcodeVecV128i8x16Shuffle, wasm.OpcodeVecI8x16Swizzle, wasm.OpcodeVecV128And, wasm.OpcodeVecV128Or, wasm.OpcodeVecV128Xor, wasm.OpcodeVecV128AndNot:
+			return signature_V128V128_V128, nil
+		case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue,
+			wasm.OpcodeVecV128AnyTrue,
+			wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask:
+			return signature_V128_I32, nil
+		case wasm.OpcodeVecV128Not, wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg,
+			wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg, wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt,
+			wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI8x16Popcnt, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs,
+			wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs,
+			wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF32x4Nearest,
+			wasm.OpcodeVecF64x2Ceil, wasm.OpcodeVecF64x2Floor, wasm.OpcodeVecF64x2Trunc, wasm.OpcodeVecF64x2Nearest,
+			wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U, wasm.OpcodeVecI16x8ExtendHighI8x16U,
+			wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U, wasm.OpcodeVecI32x4ExtendHighI16x8U,
+			wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U, wasm.OpcodeVecI64x2ExtendHighI32x4U,
+			wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U,
+			wasm.OpcodeVecF64x2PromoteLowF32x4Zero, wasm.OpcodeVecF32x4DemoteF64x2Zero,
+			wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U,
+			wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U,
+			wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U,
+			wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero:
+			return signature_V128_V128, nil
+		case wasm.OpcodeVecV128Bitselect:
+			return signature_V128V128V128_V32, nil
+		case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI8x16GtS,
+			wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI8x16GeU,
+			wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI16x8GtS,
+			wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI16x8GeU,
+			wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI32x4LtU, wasm.OpcodeVecI32x4GtS,
+			wasm.OpcodeVecI32x4GtU, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI32x4LeU, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI32x4GeU,
+			wasm.OpcodeVecI64x2Eq, wasm.OpcodeVecI64x2Ne, wasm.OpcodeVecI64x2LtS, wasm.OpcodeVecI64x2GtS, wasm.OpcodeVecI64x2LeS,
+			wasm.OpcodeVecI64x2GeS, wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF32x4Gt,
+			wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Eq, wasm.OpcodeVecF64x2Ne, wasm.OpcodeVecF64x2Lt,
+			wasm.OpcodeVecF64x2Gt, wasm.OpcodeVecF64x2Le, wasm.OpcodeVecF64x2Ge,
+			wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI8x16Sub,
+			wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI8x16SubSatU,
+			wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI16x8AddSatS, wasm.OpcodeVecI16x8AddSatU, wasm.OpcodeVecI16x8Sub,
+			wasm.OpcodeVecI16x8SubSatS, wasm.OpcodeVecI16x8SubSatU, wasm.OpcodeVecI16x8Mul,
+			wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI32x4Mul,
+			wasm.OpcodeVecI64x2Add, wasm.OpcodeVecI64x2Sub, wasm.OpcodeVecI64x2Mul,
+			wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF32x4Div,
+			wasm.OpcodeVecF64x2Add, wasm.OpcodeVecF64x2Sub, wasm.OpcodeVecF64x2Mul, wasm.OpcodeVecF64x2Div,
+			wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI8x16AvgrU,
+			wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI16x8AvgrU,
+			wasm.OpcodeVecI32x4MinS, wasm.OpcodeVecI32x4MinU, wasm.OpcodeVecI32x4MaxS, wasm.OpcodeVecI32x4MaxU,
+			wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Min, wasm.OpcodeVecF64x2Max,
+			wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmin, wasm.OpcodeVecF64x2Pmax,
+			wasm.OpcodeVecI16x8Q15mulrSatS,
+			wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U, wasm.OpcodeVecI16x8ExtMulHighI8x16U,
+			wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U, wasm.OpcodeVecI32x4ExtMulHighI16x8U,
+			wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U, wasm.OpcodeVecI64x2ExtMulHighI32x4U,
+			wasm.OpcodeVecI32x4DotI16x8S,
+			wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U, wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U:
+			return signature_V128V128_V128, nil
+		default:
+			return nil, fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp))
+		}
+	case wasm.OpcodeAtomicPrefix:
+		switch atomicOp := c.body[c.pc+1]; atomicOp {
+		case wasm.OpcodeAtomicMemoryNotify:
+			return signature_I32I32_I32, nil
+		case wasm.OpcodeAtomicMemoryWait32:
+			return signature_I32I32I64_I32, nil
+		case wasm.OpcodeAtomicMemoryWait64:
+			return signature_I32I64I64_I32, nil
+		case wasm.OpcodeAtomicFence:
+			return signature_None_None, nil
+		case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U:
+			return signature_I32_I32, nil
+		case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U:
+			return signature_I32_I64, nil
+		case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16:
+			return signature_I32I32_None, nil
+		case wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32:
+			return signature_I32I64_None, nil
+		case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI32RmwXchg,
+			wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw8XchgU,
+			wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI32Rmw16XchgU:
+			return signature_I32I32_I32, nil
+		case wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI64RmwXchg,
+			wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XchgU,
+			wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XchgU,
+			wasm.OpcodeAtomicI64Rmw32AddU, wasm.OpcodeAtomicI64Rmw32SubU, wasm.OpcodeAtomicI64Rmw32AndU, wasm.OpcodeAtomicI64Rmw32OrU, wasm.OpcodeAtomicI64Rmw32XorU, wasm.OpcodeAtomicI64Rmw32XchgU:
+			return signature_I32I64_I64, nil
+		case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU:
+			return signature_I32I32I32_I32, nil
+		case wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU:
+			return signature_I32I64I64_I64, nil
+		default:
+			return nil, fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
+		}
+	default:
+		return nil, fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
+	}
+}
+
+// funcTypeToIRSignatures is the central cache for a module to get the *signature
+// for function calls.
+type funcTypeToIRSignatures struct {
+	directCalls   []*signature
+	indirectCalls []*signature
+	wasmTypes     []wasm.FunctionType
+}
+
+// get returns the *signature for the direct or indirect function call against functions whose type is at `typeIndex`.
+func (f *funcTypeToIRSignatures) get(typeIndex wasm.Index, indirect bool) *signature {
+	var sig *signature
+	if indirect {
+		sig = f.indirectCalls[typeIndex]
+	} else {
+		sig = f.directCalls[typeIndex]
+	}
+	if sig != nil {
+		return sig
+	}
+
+	tp := &f.wasmTypes[typeIndex]
+	if indirect {
+		sig = &signature{
+			in:  make([]unsignedType, 0, len(tp.Params)+1), // +1 to reserve space for call indirect index.
+			out: make([]unsignedType, 0, len(tp.Results)),
+		}
+	} else {
+		sig = &signature{
+			in:  make([]unsignedType, 0, len(tp.Params)),
+			out: make([]unsignedType, 0, len(tp.Results)),
+		}
+	}
+
+	for _, vt := range tp.Params {
+		sig.in = append(sig.in, wasmValueTypeTounsignedType(vt))
+	}
+	for _, vt := range tp.Results {
+		sig.out = append(sig.out, wasmValueTypeTounsignedType(vt))
+	}
+
+	if indirect {
+		sig.in = append(sig.in, unsignedTypeI32)
+		f.indirectCalls[typeIndex] = sig
+	} else {
+		f.directCalls[typeIndex] = sig
+	}
+	return sig
+}
+
+func wasmValueTypeTounsignedType(vt wasm.ValueType) unsignedType {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return unsignedTypeI32
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return unsignedTypeI64
+	case wasm.ValueTypeF32:
+		return unsignedTypeF32
+	case wasm.ValueTypeF64:
+		return unsignedTypeF64
+	case wasm.ValueTypeV128:
+		return unsignedTypeV128
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedOutSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_None_I32
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_None_I64
+	case wasm.ValueTypeF32:
+		return signature_None_F32
+	case wasm.ValueTypeF64:
+		return signature_None_F64
+	case wasm.ValueTypeV128:
+		return signature_None_V128
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedInSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_I32_None
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_I64_None
+	case wasm.ValueTypeF32:
+		return signature_F32_None
+	case wasm.ValueTypeF64:
+		return signature_F64_None
+	case wasm.ValueTypeV128:
+		return signature_V128_None
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedInOutSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_I32_I32
+	case wasm.ValueTypeI64,
+		// At interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_I64_I64
+	case wasm.ValueTypeF32:
+		return signature_F32_F32
+	case wasm.ValueTypeF64:
+		return signature_F64_F64
+	case wasm.ValueTypeV128:
+		return signature_V128_V128
+	}
+	panic("unreachable")
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
new file mode 100644
index 000000000..cf91c6b7a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
@@ -0,0 +1,170 @@
+package backend
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	// FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature.
+	FunctionABI struct {
+		Initialized bool
+
+		Args, Rets                 []ABIArg
+		ArgStackSize, RetStackSize int64
+
+		ArgIntRealRegs   byte
+		ArgFloatRealRegs byte
+		RetIntRealRegs   byte
+		RetFloatRealRegs byte
+	}
+
+	// ABIArg represents either argument or return value's location.
+	ABIArg struct {
+		// Index is the index of the argument.
+		Index int
+		// Kind is the kind of the argument.
+		Kind ABIArgKind
+		// Reg is valid if Kind == ABIArgKindReg.
+		// This VReg must be based on RealReg.
+		Reg regalloc.VReg
+		// Offset is valid if Kind == ABIArgKindStack.
+		// This is the offset from the beginning of either arg or ret stack slot.
+		Offset int64
+		// Type is the type of the argument.
+		Type ssa.Type
+	}
+
+	// ABIArgKind is the kind of ABI argument.
+	ABIArgKind byte
+)
+
+const (
+	// ABIArgKindReg represents an argument passed in a register.
+	ABIArgKindReg = iota
+	// ABIArgKindStack represents an argument passed in the stack.
+	ABIArgKindStack
+)
+
+// String implements fmt.Stringer.
+func (a *ABIArg) String() string {
+	return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind)
+}
+
+// String implements fmt.Stringer.
+func (a ABIArgKind) String() string {
+	switch a {
+	case ABIArgKindReg:
+		return "reg"
+	case ABIArgKindStack:
+		return "stack"
+	default:
+		panic("BUG")
+	}
+}
+
+// Init initializes the abiImpl for the given signature.
+func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) {
+	if len(a.Rets) < len(sig.Results) {
+		a.Rets = make([]ABIArg, len(sig.Results))
+	}
+	a.Rets = a.Rets[:len(sig.Results)]
+	a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats)
+	if argsNum := len(sig.Params); len(a.Args) < argsNum {
+		a.Args = make([]ABIArg, argsNum)
+	}
+	a.Args = a.Args[:len(sig.Params)]
+	a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats)
+
+	// Gather the real registers usages in arg/return.
+	a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0
+	a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0
+	for i := range a.Rets {
+		r := &a.Rets[i]
+		if r.Kind == ABIArgKindReg {
+			if r.Type.IsInt() {
+				a.RetIntRealRegs++
+			} else {
+				a.RetFloatRealRegs++
+			}
+		}
+	}
+	for i := range a.Args {
+		arg := &a.Args[i]
+		if arg.Kind == ABIArgKindReg {
+			if arg.Type.IsInt() {
+				a.ArgIntRealRegs++
+			} else {
+				a.ArgFloatRealRegs++
+			}
+		}
+	}
+
+	a.Initialized = true
+}
+
+// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
+// where if len(s) > len(types), the last elements of s is for the multi-return slot.
+func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) {
+	il, fl := len(ints), len(floats)
+
+	var stackOffset int64
+	intParamIndex, floatParamIndex := 0, 0
+	for i, typ := range types {
+		arg := &s[i]
+		arg.Index = i
+		arg.Type = typ
+		if typ.IsInt() {
+			if intParamIndex >= il {
+				arg.Kind = ABIArgKindStack
+				const slotSize = 8 // Align 8 bytes.
+				arg.Offset = stackOffset
+				stackOffset += slotSize
+			} else {
+				arg.Kind = ABIArgKindReg
+				arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt)
+				intParamIndex++
+			}
+		} else {
+			if floatParamIndex >= fl {
+				arg.Kind = ABIArgKindStack
+				slotSize := int64(8)   // Align at least 8 bytes.
+				if typ.Bits() == 128 { // Vector.
+					slotSize = 16
+				}
+				arg.Offset = stackOffset
+				stackOffset += slotSize
+			} else {
+				arg.Kind = ABIArgKindReg
+				arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat)
+				floatParamIndex++
+			}
+		}
+	}
+	return stackOffset
+}
+
+func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 {
+	stackSlotSize := a.RetStackSize + a.ArgStackSize
+	// Align stackSlotSize to 16 bytes.
+	stackSlotSize = (stackSlotSize + 15) &^ 15
+	// Check overflow 32-bit.
+	if stackSlotSize > 0xFFFFFFFF {
+		panic("ABI stack slot size overflow")
+	}
+	return uint32(stackSlotSize)
+}
+
+func (a *FunctionABI) ABIInfoAsUint64() uint64 {
+	return uint64(a.ArgIntRealRegs)<<56 |
+		uint64(a.ArgFloatRealRegs)<<48 |
+		uint64(a.RetIntRealRegs)<<40 |
+		uint64(a.RetFloatRealRegs)<<32 |
+		uint64(a.AlignedArgResultStackSlotSize())
+}
+
+func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) {
+	return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
new file mode 100644
index 000000000..dd67da43e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
@@ -0,0 +1,3 @@
+// Package backend must be free of Wasm-specific concept. In other words,
+// this package must not import internal/wasm package.
+package backend
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
new file mode 100644
index 000000000..59bbfe02d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
@@ -0,0 +1,417 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// NewCompiler returns a new Compiler that can generate a machine code.
+func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
+	return newCompiler(ctx, mach, builder)
+}
+
+func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
+	argResultInts, argResultFloats := mach.ArgsResultsRegs()
+	c := &compiler{
+		mach: mach, ssaBuilder: builder,
+		nextVRegID:      regalloc.VRegIDNonReservedBegin,
+		argResultInts:   argResultInts,
+		argResultFloats: argResultFloats,
+	}
+	mach.SetCompiler(c)
+	return c
+}
+
+// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
+// use the information there to emit the final machine code.
+type Compiler interface {
+	// SSABuilder returns the ssa.Builder used by this compiler.
+	SSABuilder() ssa.Builder
+
+	// Compile executes the following steps:
+	// 	1. Lower()
+	// 	2. RegAlloc()
+	// 	3. Finalize()
+	// 	4. Encode()
+	//
+	// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
+	//
+	// The returned byte slices are the machine code and the relocation information for the machine code.
+	// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
+	Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
+
+	// Lower lowers the given ssa.Instruction to the machine-specific instructions.
+	Lower()
+
+	// RegAlloc performs the register allocation after Lower is called.
+	RegAlloc()
+
+	// Finalize performs the finalization of the compilation, including machine code emission.
+	// This must be called after RegAlloc.
+	Finalize(ctx context.Context) error
+
+	// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
+	Buf() []byte
+
+	BufPtr() *[]byte
+
+	// Format returns the debug string of the current state of the compiler.
+	Format() string
+
+	// Init initializes the internal state of the compiler for the next compilation.
+	Init()
+
+	// AllocateVReg allocates a new virtual register of the given type.
+	AllocateVReg(typ ssa.Type) regalloc.VReg
+
+	// ValueDefinition returns the definition of the given value.
+	ValueDefinition(ssa.Value) *SSAValueDefinition
+
+	// VRegOf returns the virtual register of the given ssa.Value.
+	VRegOf(value ssa.Value) regalloc.VReg
+
+	// TypeOf returns the ssa.Type of the given virtual register.
+	TypeOf(regalloc.VReg) ssa.Type
+
+	// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
+	// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
+	MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool
+
+	// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
+	// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
+	//
+	// Note: caller should be careful to avoid excessive allocation on opcodes slice.
+	MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
+
+	// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
+	AddRelocationInfo(funcRef ssa.FuncRef)
+
+	// AddSourceOffsetInfo appends the source offset information for the given offset.
+	AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
+
+	// SourceOffsetInfo returns the source offset information for the current buffer offset.
+	SourceOffsetInfo() []SourceOffsetInfo
+
+	// EmitByte appends a byte to the buffer. Used during the code emission.
+	EmitByte(b byte)
+
+	// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
+	Emit4Bytes(b uint32)
+
+	// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
+	Emit8Bytes(b uint64)
+
+	// GetFunctionABI returns the ABI information for the given signature.
+	GetFunctionABI(sig *ssa.Signature) *FunctionABI
+}
+
+// RelocationInfo represents the relocation information for a call instruction.
+type RelocationInfo struct {
+	// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
+	Offset int64
+	// Target is the target function of the call instruction.
+	FuncRef ssa.FuncRef
+}
+
+// compiler implements Compiler.
+type compiler struct {
+	mach       Machine
+	currentGID ssa.InstructionGroupID
+	ssaBuilder ssa.Builder
+	// nextVRegID is the next virtual register ID to be allocated.
+	nextVRegID regalloc.VRegID
+	// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
+	ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
+	// ssaValueDefinitions maps ssa.ValueID to its definition.
+	ssaValueDefinitions []SSAValueDefinition
+	// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts().
+	ssaValueRefCounts []int
+	// returnVRegs is the list of virtual registers that store the return values.
+	returnVRegs  []regalloc.VReg
+	varEdges     [][2]regalloc.VReg
+	varEdgeTypes []ssa.Type
+	constEdges   []struct {
+		cInst *ssa.Instruction
+		dst   regalloc.VReg
+	}
+	vRegSet         []bool
+	vRegIDs         []regalloc.VRegID
+	tempRegs        []regalloc.VReg
+	tmpVals         []ssa.Value
+	ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
+	buf             []byte
+	relocations     []RelocationInfo
+	sourceOffsets   []SourceOffsetInfo
+	// abis maps ssa.SignatureID to the ABI implementation.
+	abis                           []FunctionABI
+	argResultInts, argResultFloats []regalloc.RealReg
+}
+
+// SourceOffsetInfo is a data to associate the source offset with the executable offset.
+type SourceOffsetInfo struct {
+	// SourceOffset is the source offset in the original source code.
+	SourceOffset ssa.SourceOffset
+	// ExecutableOffset is the offset in the compiled executable.
+	ExecutableOffset int64
+}
+
+// Compile implements Compiler.Compile.
+func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
+	c.Lower()
+	if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
+	}
+	c.RegAlloc()
+	if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
+	}
+	if err := c.Finalize(ctx); err != nil {
+		return nil, nil, err
+	}
+	if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
+	}
+	return c.buf, c.relocations, nil
+}
+
+// RegAlloc implements Compiler.RegAlloc.
+func (c *compiler) RegAlloc() {
+	c.mach.RegAlloc()
+}
+
+// Finalize implements Compiler.Finalize.
+func (c *compiler) Finalize(ctx context.Context) error {
+	c.mach.PostRegAlloc()
+	return c.mach.Encode(ctx)
+}
+
+// setCurrentGroupID sets the current instruction group ID.
+func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
+	c.currentGID = gid
+}
+
+// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
+func (c *compiler) assignVirtualRegisters() {
+	builder := c.ssaBuilder
+	refCounts := builder.ValueRefCounts()
+	c.ssaValueRefCounts = refCounts
+
+	need := len(refCounts)
+	if need >= len(c.ssaValueToVRegs) {
+		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...)
+	}
+	if need >= len(c.ssaValueDefinitions) {
+		c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...)
+	}
+
+	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
+		// First we assign a virtual register to each parameter.
+		for i := 0; i < blk.Params(); i++ {
+			p := blk.Param(i)
+			pid := p.ID()
+			typ := p.Type()
+			vreg := c.AllocateVReg(typ)
+			c.ssaValueToVRegs[pid] = vreg
+			c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}
+			c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
+		}
+
+		// Assigns each value to a virtual register produced by instructions.
+		for cur := blk.Root(); cur != nil; cur = cur.Next() {
+			r, rs := cur.Returns()
+			var N int
+			if r.Valid() {
+				id := r.ID()
+				ssaTyp := r.Type()
+				typ := r.Type()
+				vReg := c.AllocateVReg(typ)
+				c.ssaValueToVRegs[id] = vReg
+				c.ssaValueDefinitions[id] = SSAValueDefinition{
+					Instr:    cur,
+					N:        0,
+					RefCount: refCounts[id],
+				}
+				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
+				N++
+			}
+			for _, r := range rs {
+				id := r.ID()
+				ssaTyp := r.Type()
+				vReg := c.AllocateVReg(ssaTyp)
+				c.ssaValueToVRegs[id] = vReg
+				c.ssaValueDefinitions[id] = SSAValueDefinition{
+					Instr:    cur,
+					N:        N,
+					RefCount: refCounts[id],
+				}
+				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
+				N++
+			}
+		}
+	}
+
+	for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
+		typ := retBlk.Param(i).Type()
+		vReg := c.AllocateVReg(typ)
+		c.returnVRegs = append(c.returnVRegs, vReg)
+		c.ssaTypeOfVRegID[vReg.ID()] = typ
+	}
+}
+
+// AllocateVReg implements Compiler.AllocateVReg.
+func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
+	regType := regalloc.RegTypeOf(typ)
+	r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
+
+	id := r.ID()
+	if int(id) >= len(c.ssaTypeOfVRegID) {
+		c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
+	}
+	c.ssaTypeOfVRegID[id] = typ
+	c.nextVRegID++
+	return r
+}
+
+// Init implements Compiler.Init.
+func (c *compiler) Init() {
+	c.currentGID = 0
+	c.nextVRegID = regalloc.VRegIDNonReservedBegin
+	c.returnVRegs = c.returnVRegs[:0]
+	c.mach.Reset()
+	c.varEdges = c.varEdges[:0]
+	c.constEdges = c.constEdges[:0]
+	c.buf = c.buf[:0]
+	c.sourceOffsets = c.sourceOffsets[:0]
+	c.relocations = c.relocations[:0]
+}
+
+// ValueDefinition implements Compiler.ValueDefinition.
+func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition {
+	return &c.ssaValueDefinitions[value.ID()]
+}
+
+// VRegOf implements Compiler.VRegOf.
+func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
+	return c.ssaValueToVRegs[value.ID()]
+}
+
+// Format implements Compiler.Format.
+func (c *compiler) Format() string {
+	return c.mach.Format()
+}
+
+// TypeOf implements Compiler.Format.
+func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
+	return c.ssaTypeOfVRegID[v.ID()]
+}
+
+// MatchInstr implements Compiler.MatchInstr.
+func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {
+	instr := def.Instr
+	return def.IsFromInstr() &&
+		instr.Opcode() == opcode &&
+		instr.GroupID() == c.currentGID &&
+		def.RefCount < 2
+}
+
+// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
+func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
+	instr := def.Instr
+	if !def.IsFromInstr() {
+		return ssa.OpcodeInvalid
+	}
+
+	if instr.GroupID() != c.currentGID {
+		return ssa.OpcodeInvalid
+	}
+
+	if def.RefCount >= 2 {
+		return ssa.OpcodeInvalid
+	}
+
+	opcode := instr.Opcode()
+	for _, op := range opcodes {
+		if opcode == op {
+			return opcode
+		}
+	}
+	return ssa.OpcodeInvalid
+}
+
+// SSABuilder implements Compiler .SSABuilder.
+func (c *compiler) SSABuilder() ssa.Builder {
+	return c.ssaBuilder
+}
+
+// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
+func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
+	c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
+		SourceOffset:     sourceOffset,
+		ExecutableOffset: executableOffset,
+	})
+}
+
+// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
+func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
+	return c.sourceOffsets
+}
+
+// AddRelocationInfo implements Compiler.AddRelocationInfo.
+func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
+	c.relocations = append(c.relocations, RelocationInfo{
+		Offset:  int64(len(c.buf)),
+		FuncRef: funcRef,
+	})
+}
+
+// Emit8Bytes implements Compiler.Emit8Bytes.
+func (c *compiler) Emit8Bytes(b uint64) {
+	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
+}
+
+// Emit4Bytes implements Compiler.Emit4Bytes.
+func (c *compiler) Emit4Bytes(b uint32) {
+	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
+}
+
+// EmitByte implements Compiler.EmitByte.
+func (c *compiler) EmitByte(b byte) {
+	c.buf = append(c.buf, b)
+}
+
+// Buf implements Compiler.Buf.
+func (c *compiler) Buf() []byte {
+	return c.buf
+}
+
+// BufPtr implements Compiler.BufPtr.
+func (c *compiler) BufPtr() *[]byte {
+	return &c.buf
+}
+
+func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
+	if int(sig.ID) >= len(c.abis) {
+		c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
+	}
+
+	abi := &c.abis[sig.ID]
+	if abi.Initialized {
+		return abi
+	}
+
+	abi.Init(sig, c.argResultInts, c.argResultFloats)
+	return abi
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
new file mode 100644
index 000000000..80e65668a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
@@ -0,0 +1,226 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// Lower implements Compiler.Lower.
+func (c *compiler) Lower() {
+	c.assignVirtualRegisters()
+	c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature()))
+	c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax())
+	c.lowerBlocks()
+}
+
+// lowerBlocks lowers each block in the ssa.Builder.
+func (c *compiler) lowerBlocks() {
+	builder := c.ssaBuilder
+	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
+		c.lowerBlock(blk)
+	}
+
+	ectx := c.mach.ExecutableContext()
+	// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.
+	var prev ssa.BasicBlock
+	for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {
+		if prev != nil {
+			ectx.LinkAdjacentBlocks(prev, next)
+		}
+		prev = next
+	}
+}
+
+func (c *compiler) lowerBlock(blk ssa.BasicBlock) {
+	mach := c.mach
+	ectx := mach.ExecutableContext()
+	ectx.StartBlock(blk)
+
+	// We traverse the instructions in reverse order because we might want to lower multiple
+	// instructions together.
+	cur := blk.Tail()
+
+	// First gather the branching instructions at the end of the blocks.
+	var br0, br1 *ssa.Instruction
+	if cur.IsBranching() {
+		br0 = cur
+		cur = cur.Prev()
+		if cur != nil && cur.IsBranching() {
+			br1 = cur
+			cur = cur.Prev()
+		}
+	}
+
+	if br0 != nil {
+		c.lowerBranches(br0, br1)
+	}
+
+	if br1 != nil && br0 == nil {
+		panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?")
+	}
+
+	// Now start lowering the non-branching instructions.
+	for ; cur != nil; cur = cur.Prev() {
+		c.setCurrentGroupID(cur.GroupID())
+		if cur.Lowered() {
+			continue
+		}
+
+		switch cur.Opcode() {
+		case ssa.OpcodeReturn:
+			rets := cur.ReturnVals()
+			if len(rets) > 0 {
+				c.mach.LowerReturns(rets)
+			}
+			c.mach.InsertReturn()
+		default:
+			mach.LowerInstr(cur)
+		}
+		ectx.FlushPendingInstructions()
+	}
+
+	// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg.
+	if blk.EntryBlock() {
+		c.lowerFunctionArguments(blk)
+	}
+
+	ectx.EndBlock()
+}
+
+// lowerBranches is called right after StartBlock and before any LowerInstr call if
+// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists.
+// At least br0 is not nil, but br1 can be nil if there's no branching before br0.
+//
+// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.
+func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
+	ectx := c.mach.ExecutableContext()
+
+	c.setCurrentGroupID(br0.GroupID())
+	c.mach.LowerSingleBranch(br0)
+	ectx.FlushPendingInstructions()
+	if br1 != nil {
+		c.setCurrentGroupID(br1.GroupID())
+		c.mach.LowerConditionalBranch(br1)
+		ectx.FlushPendingInstructions()
+	}
+
+	if br0.Opcode() == ssa.OpcodeJump {
+		_, args, target := br0.BranchData()
+		argExists := len(args) != 0
+		if argExists && br1 != nil {
+			panic("BUG: critical edge split failed")
+		}
+		if argExists && target.ReturnBlock() {
+			if len(args) > 0 {
+				c.mach.LowerReturns(args)
+			}
+		} else if argExists {
+			c.lowerBlockArguments(args, target)
+		}
+	}
+	ectx.FlushPendingInstructions()
+}
+
+func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) {
+	ectx := c.mach.ExecutableContext()
+
+	c.tmpVals = c.tmpVals[:0]
+	for i := 0; i < entry.Params(); i++ {
+		p := entry.Param(i)
+		if c.ssaValueRefCounts[p.ID()] > 0 {
+			c.tmpVals = append(c.tmpVals, p)
+		} else {
+			// If the argument is not used, we can just pass an invalid value.
+			c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)
+		}
+	}
+	c.mach.LowerParams(c.tmpVals)
+	ectx.FlushPendingInstructions()
+}
+
+// lowerBlockArguments lowers how to pass arguments to the given successor block.
+func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {
+	if len(args) != succ.Params() {
+		panic("BUG: mismatched number of arguments")
+	}
+
+	c.varEdges = c.varEdges[:0]
+	c.varEdgeTypes = c.varEdgeTypes[:0]
+	c.constEdges = c.constEdges[:0]
+	for i := 0; i < len(args); i++ {
+		dst := succ.Param(i)
+		src := args[i]
+
+		dstReg := c.VRegOf(dst)
+		srcDef := c.ssaValueDefinitions[src.ID()]
+		if srcDef.IsFromInstr() && srcDef.Instr.Constant() {
+			c.constEdges = append(c.constEdges, struct {
+				cInst *ssa.Instruction
+				dst   regalloc.VReg
+			}{cInst: srcDef.Instr, dst: dstReg})
+		} else {
+			srcReg := c.VRegOf(src)
+			// Even when the src=dst, insert the move so that we can keep such registers keep-alive.
+			c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg})
+			c.varEdgeTypes = append(c.varEdgeTypes, src.Type())
+		}
+	}
+
+	// Check if there's an overlap among the dsts and srcs in varEdges.
+	c.vRegIDs = c.vRegIDs[:0]
+	for _, edge := range c.varEdges {
+		src := edge[0].ID()
+		if int(src) >= len(c.vRegSet) {
+			c.vRegSet = append(c.vRegSet, make([]bool, src+1)...)
+		}
+		c.vRegSet[src] = true
+		c.vRegIDs = append(c.vRegIDs, src)
+	}
+	separated := true
+	for _, edge := range c.varEdges {
+		dst := edge[1].ID()
+		if int(dst) >= len(c.vRegSet) {
+			c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...)
+		} else {
+			if c.vRegSet[dst] {
+				separated = false
+				break
+			}
+		}
+	}
+	for _, id := range c.vRegIDs {
+		c.vRegSet[id] = false // reset for the next use.
+	}
+
+	if separated {
+		// If there's no overlap, we can simply move the source to destination.
+		for i, edge := range c.varEdges {
+			src, dst := edge[0], edge[1]
+			c.mach.InsertMove(dst, src, c.varEdgeTypes[i])
+		}
+	} else {
+		// Otherwise, we allocate a temporary registers and move the source to the temporary register,
+		//
+		// First move all of them to temporary registers.
+		c.tempRegs = c.tempRegs[:0]
+		for i, edge := range c.varEdges {
+			src := edge[0]
+			typ := c.varEdgeTypes[i]
+			temp := c.AllocateVReg(typ)
+			c.tempRegs = append(c.tempRegs, temp)
+			c.mach.InsertMove(temp, src, typ)
+		}
+		// Then move the temporary registers to the destination.
+		for i, edge := range c.varEdges {
+			temp := c.tempRegs[i]
+			dst := edge[1]
+			c.mach.InsertMove(dst, temp, c.varEdgeTypes[i])
+		}
+	}
+
+	// Finally, move the constants.
+	for _, edge := range c.constEdges {
+		cInst, dst := edge.cInst, edge.dst
+		c.mach.InsertLoadConstantBlockArg(cInst, dst)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
new file mode 100644
index 000000000..81c6a6b62
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
@@ -0,0 +1,219 @@
+package backend
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type ExecutableContext interface {
+	// StartLoweringFunction is called when the lowering of the given function is started.
+	// maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function.
+	StartLoweringFunction(maximumBlockID ssa.BasicBlockID)
+
+	// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list.
+	LinkAdjacentBlocks(prev, next ssa.BasicBlock)
+
+	// StartBlock is called when the compilation of the given block is started.
+	// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with
+	// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd.
+	StartBlock(ssa.BasicBlock)
+
+	// EndBlock is called when the compilation of the current block is finished.
+	EndBlock()
+
+	// FlushPendingInstructions flushes the pending instructions to the buffer.
+	// This will be called after the lowering of each SSA Instruction.
+	FlushPendingInstructions()
+}
+
+type ExecutableContextT[Instr any] struct {
+	CurrentSSABlk ssa.BasicBlock
+
+	// InstrPool is the InstructionPool of instructions.
+	InstructionPool wazevoapi.Pool[Instr]
+	asNop           func(*Instr)
+	setNext         func(*Instr, *Instr)
+	setPrev         func(*Instr, *Instr)
+
+	// RootInstr is the root instruction of the executable.
+	RootInstr         *Instr
+	labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
+	NextLabel         Label
+	// LabelPositions maps a label to the instructions of the region which the label represents.
+	LabelPositions     map[Label]*LabelPosition[Instr]
+	OrderedBlockLabels []*LabelPosition[Instr]
+
+	// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
+	PerBlockHead, PerBlockEnd *Instr
+	// PendingInstructions are the instructions which are not yet emitted into the instruction list.
+	PendingInstructions []*Instr
+
+	// SsaBlockIDToLabels maps an SSA block ID to the label.
+	SsaBlockIDToLabels []Label
+}
+
+func NewExecutableContextT[Instr any](
+	resetInstruction func(*Instr),
+	setNext func(*Instr, *Instr),
+	setPrev func(*Instr, *Instr),
+	asNop func(*Instr),
+) *ExecutableContextT[Instr] {
+	return &ExecutableContextT[Instr]{
+		InstructionPool:   wazevoapi.NewPool[Instr](resetInstruction),
+		asNop:             asNop,
+		setNext:           setNext,
+		setPrev:           setPrev,
+		labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
+		LabelPositions:    make(map[Label]*LabelPosition[Instr]),
+		NextLabel:         LabelInvalid,
+	}
+}
+
+func resetLabelPosition[T any](l *LabelPosition[T]) {
+	*l = LabelPosition[T]{}
+}
+
+// StartLoweringFunction implements ExecutableContext.
+func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) {
+	imax := int(max)
+	if len(e.SsaBlockIDToLabels) <= imax {
+		// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration.
+		e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...)
+	}
+}
+
+func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
+	e.CurrentSSABlk = blk
+
+	l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
+	if l == LabelInvalid {
+		l = e.AllocateLabel()
+		e.SsaBlockIDToLabels[blk.ID()] = l
+	}
+
+	end := e.allocateNop0()
+	e.PerBlockHead, e.PerBlockEnd = end, end
+
+	labelPos, ok := e.LabelPositions[l]
+	if !ok {
+		labelPos = e.AllocateLabelPosition(l)
+		e.LabelPositions[l] = labelPos
+	}
+	e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
+	labelPos.Begin, labelPos.End = end, end
+	labelPos.SB = blk
+}
+
+// EndBlock implements ExecutableContext.
+func (e *ExecutableContextT[T]) EndBlock() {
+	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions.
+	e.insertAtPerBlockHead(e.allocateNop0())
+
+	l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
+	e.LabelPositions[l].Begin = e.PerBlockHead
+
+	if e.CurrentSSABlk.EntryBlock() {
+		e.RootInstr = e.PerBlockHead
+	}
+}
+
+func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) {
+	if e.PerBlockHead == nil {
+		e.PerBlockHead = i
+		e.PerBlockEnd = i
+		return
+	}
+	e.setNext(i, e.PerBlockHead)
+	e.setPrev(e.PerBlockHead, i)
+	e.PerBlockHead = i
+}
+
+// FlushPendingInstructions implements ExecutableContext.
+func (e *ExecutableContextT[T]) FlushPendingInstructions() {
+	l := len(e.PendingInstructions)
+	if l == 0 {
+		return
+	}
+	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order.
+		e.insertAtPerBlockHead(e.PendingInstructions[i])
+	}
+	e.PendingInstructions = e.PendingInstructions[:0]
+}
+
+func (e *ExecutableContextT[T]) Reset() {
+	e.labelPositionPool.Reset()
+	e.InstructionPool.Reset()
+	for l := Label(0); l <= e.NextLabel; l++ {
+		delete(e.LabelPositions, l)
+	}
+	e.PendingInstructions = e.PendingInstructions[:0]
+	e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
+	e.RootInstr = nil
+	e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0]
+	e.PerBlockHead, e.PerBlockEnd = nil, nil
+	e.NextLabel = LabelInvalid
+}
+
+// AllocateLabel allocates an unused label.
+func (e *ExecutableContextT[T]) AllocateLabel() Label {
+	e.NextLabel++
+	return e.NextLabel
+}
+
+func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
+	l := e.labelPositionPool.Allocate()
+	l.L = la
+	return l
+}
+
+func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
+	if blk.ReturnBlock() {
+		return LabelReturn
+	}
+	l := e.SsaBlockIDToLabels[blk.ID()]
+	if l == LabelInvalid {
+		l = e.AllocateLabel()
+		e.SsaBlockIDToLabels[blk.ID()] = l
+	}
+	return l
+}
+
+func (e *ExecutableContextT[T]) allocateNop0() *T {
+	i := e.InstructionPool.Allocate()
+	e.asNop(i)
+	return i
+}
+
+// LinkAdjacentBlocks implements backend.Machine.
+func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) {
+	prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)]
+	nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)]
+	e.setNext(prevLabelPos.End, nextLabelPos.Begin)
+}
+
+// LabelPosition represents the regions of the generated code which the label represents.
+type LabelPosition[Instr any] struct {
+	SB           ssa.BasicBlock
+	L            Label
+	Begin, End   *Instr
+	BinaryOffset int64
+}
+
+// Label represents a position in the generated code which is either
+// a real instruction or the constant InstructionPool (e.g. jump tables).
+//
+// This is exactly the same as the traditional "label" in assembly code.
+type Label uint32
+
+const (
+	LabelInvalid Label = 0
+	LabelReturn  Label = math.MaxUint32
+)
+
+// String implements backend.Machine.
+func (l Label) String() string {
+	return fmt.Sprintf("L%d", l)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
new file mode 100644
index 000000000..6fe6d7b3c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
@@ -0,0 +1,33 @@
+package backend
+
+import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+
+// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
+// argBegin is the index of the first argument in the signature which is not either execution context or module context.
+func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
+	var paramNeededInBytes, resultNeededInBytes int64
+	for _, p := range sig.Params[argBegin:] {
+		s := int64(p.Size())
+		if s < 8 {
+			s = 8 // We use uint64 for all basic types, except SIMD v128.
+		}
+		paramNeededInBytes += s
+	}
+	for _, r := range sig.Results {
+		s := int64(r.Size())
+		if s < 8 {
+			s = 8 // We use uint64 for all basic types, except SIMD v128.
+		}
+		resultNeededInBytes += s
+	}
+
+	if paramNeededInBytes > resultNeededInBytes {
+		ret = paramNeededInBytes
+	} else {
+		ret = resultNeededInBytes
+	}
+	retUnaligned = ret
+	// Align to 16 bytes.
+	ret = (ret + 15) &^ 15
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
new file mode 100644
index 000000000..130f8c621
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
@@ -0,0 +1,186 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// For the details of the ABI, see:
+// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#amd64-architecture
+
+var (
+	intArgResultRegs   = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11}
+	floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7}
+)
+
+var regInfo = &regalloc.RegisterInfo{
+	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
+		regalloc.RegTypeInt: {
+			rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
+		},
+		regalloc.RegTypeFloat: {
+			xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+		},
+	},
+	CalleeSavedRegisters: regalloc.NewRegSet(
+		rdx, r12, r13, r14, r15,
+		xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+	),
+	CallerSavedRegisters: regalloc.NewRegSet(
+		rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11,
+		xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+	),
+	RealRegToVReg: []regalloc.VReg{
+		rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg,
+		r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg,
+		xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg,
+		xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg,
+		xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg,
+	},
+	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
+	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
+		if r < xmm0 {
+			return regalloc.RegTypeInt
+		}
+		return regalloc.RegTypeFloat
+	},
+}
+
+// ArgsResultsRegs implements backend.Machine.
+func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
+	return intArgResultRegs, floatArgResultRegs
+}
+
+// LowerParams implements backend.Machine.
+func (m *machine) LowerParams(args []ssa.Value) {
+	a := m.currentABI
+
+	for i, ssaArg := range args {
+		if !ssaArg.Valid() {
+			continue
+		}
+		reg := m.c.VRegOf(ssaArg)
+		arg := &a.Args[i]
+		if arg.Kind == backend.ABIArgKindReg {
+			m.InsertMove(reg, arg.Reg, arg.Type)
+		} else {
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |
+			//          |      arg X      |
+			//          |     .......     |
+			//          |      arg 1      |
+			//          |      arg 0      |
+			//          |   ReturnAddress |
+			//          |    Caller_RBP   |
+			//          +-----------------+ <-- RBP
+			//          |   ...........   |
+			//          |   clobbered  M  |
+			//          |   ............  |
+			//          |   clobbered  0  |
+			//          |   spill slot N  |
+			//          |   ...........   |
+			//          |   spill slot 0  |
+			//   RSP--> +-----------------+
+			//             (low address)
+
+			// Load the value from the arg stack slot above the current RBP.
+			load := m.allocateInstr()
+			mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16)))
+			switch arg.Type {
+			case ssa.TypeI32:
+				load.asMovzxRmR(extModeLQ, mem, reg)
+			case ssa.TypeI64:
+				load.asMov64MR(mem, reg)
+			case ssa.TypeF32:
+				load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
+			case ssa.TypeF64:
+				load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
+			case ssa.TypeV128:
+				load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
+			default:
+				panic("BUG")
+			}
+			m.insert(load)
+		}
+	}
+}
+
+// LowerReturns implements backend.Machine.
+func (m *machine) LowerReturns(rets []ssa.Value) {
+	// Load the XMM registers first as it might need a temporary register to inline
+	// constant return.
+	a := m.currentABI
+	for i, ret := range rets {
+		r := &a.Rets[i]
+		if !r.Type.IsInt() {
+			m.LowerReturn(ret, r)
+		}
+	}
+	// Then load the GPR registers.
+	for i, ret := range rets {
+		r := &a.Rets[i]
+		if r.Type.IsInt() {
+			m.LowerReturn(ret, r)
+		}
+	}
+}
+
+func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) {
+	reg := m.c.VRegOf(ret)
+	if def := m.c.ValueDefinition(ret); def.IsFromInstr() {
+		// Constant instructions are inlined.
+		if inst := def.Instr; inst.Constant() {
+			m.insertLoadConstant(inst, reg)
+		}
+	}
+	if r.Kind == backend.ABIArgKindReg {
+		m.InsertMove(r.Reg, reg, ret.Type())
+	} else {
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |   ReturnAddress |
+		//          |    Caller_RBP   |
+		//          +-----------------+ <-- RBP
+		//          |   ...........   |
+		//          |   clobbered  M  |
+		//          |   ............  |
+		//          |   clobbered  0  |
+		//          |   spill slot N  |
+		//          |   ...........   |
+		//          |   spill slot 0  |
+		//   RSP--> +-----------------+
+		//             (low address)
+
+		// Store the value to the return stack slot above the current RBP.
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset)))
+		switch r.Type {
+		case ssa.TypeI32:
+			store.asMovRM(reg, mem, 4)
+		case ssa.TypeI64:
+			store.asMovRM(reg, mem, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, reg, mem)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
+		}
+		m.insert(store)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
new file mode 100644
index 000000000..cbf1cfdc5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
@@ -0,0 +1,9 @@
+package amd64
+
+// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
+// This implements wazevo.entrypoint, and see the comments there for detail.
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
+// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
new file mode 100644
index 000000000..e9cb131d1
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
@@ -0,0 +1,29 @@
+#include "funcdata.h"
+#include "textflag.h"
+
+// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr
+TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
+	MOVQ preambleExecutable+0(FP), R11
+	MOVQ functionExectuable+8(FP), R14
+	MOVQ executionContextPtr+16(FP), AX       // First argument is passed in AX.
+	MOVQ moduleContextPtr+24(FP), BX          // Second argument is passed in BX.
+	MOVQ paramResultSlicePtr+32(FP), R12
+	MOVQ goAllocatedStackSlicePtr+40(FP), R13
+	JMP  R11
+
+// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
+TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
+	MOVQ executable+0(FP), CX
+	MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX.
+
+	// Save the stack pointer and frame pointer.
+	MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer
+	MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer
+
+	// Then set the stack pointer and frame pointer to the values we got from the Go runtime.
+	MOVQ framePointer+24(FP), BP
+
+	// WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8.
+	MOVQ stackPointer+16(FP), SP
+
+	JMP CX
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
new file mode 100644
index 000000000..882d06c06
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
@@ -0,0 +1,248 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var (
+	executionContextPtrReg = raxVReg
+
+	// Followings are callee saved registers. They can be used freely in the entry preamble
+	// since the preamble is called via Go assembly function which has stack-based ABI.
+
+	// savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue.
+	savedExecutionContextPtr = rdxVReg
+	// paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s.
+	paramResultSlicePtr = r12VReg
+	// goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s.
+	goAllocatedStackPtr = r13VReg
+	// functionExecutable must match with entrypoint function in abi_entry_amd64.s.
+	functionExecutable = r14VReg
+	tmpIntReg          = r15VReg
+	tmpXmmReg          = xmm15VReg
+)
+
+// CompileEntryPreamble implements backend.Machine.
+func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte {
+	root := m.compileEntryPreamble(sig)
+	m.encodeWithoutSSA(root)
+	buf := m.c.Buf()
+	return buf
+}
+
+func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
+	abi := backend.FunctionABI{}
+	abi.Init(sig, intArgResultRegs, floatArgResultRegs)
+
+	root := m.allocateNop()
+
+	//// ----------------------------------- prologue ----------------------------------- ////
+
+	// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
+	// 		mov %executionContextPtrReg, %savedExecutionContextPtr
+	cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root)
+
+	// Next is to save the original RBP and RSP into the execution context.
+	cur = m.saveOriginalRSPRBP(cur)
+
+	// Now set the RSP to the Go-allocated stack pointer.
+	// 		mov %goAllocatedStackPtr, %rsp
+	cur = m.move64(goAllocatedStackPtr, rspVReg, cur)
+
+	if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 {
+		// Allocate stack slots for the arguments and return values.
+		// 		sub $stackSlotSize, %rsp
+		spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true)
+		cur = linkInstr(cur, spDec)
+	}
+
+	var offset uint32
+	for i := range abi.Args {
+		if i < 2 {
+			// module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function.
+			continue
+		}
+		arg := &abi.Args[i]
+		cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg)
+		if arg.Type == ssa.TypeV128 {
+			offset += 16
+		} else {
+			offset += 8
+		}
+	}
+
+	// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
+	zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
+	cur = linkInstr(cur, zerosRbp)
+
+	// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
+	// which is aligned to 16 bytes.
+	call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
+	cur = linkInstr(cur, call)
+
+	//// ----------------------------------- epilogue ----------------------------------- ////
+
+	// Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr.
+	offset = 0
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize))
+		if r.Type == ssa.TypeV128 {
+			offset += 16
+		} else {
+			offset += 8
+		}
+	}
+
+	// Finally, restore the original RBP and RSP.
+	cur = m.restoreOriginalRSPRBP(cur)
+
+	ret := m.allocateInstr().asRet()
+	linkInstr(cur, ret)
+	return root
+}
+
+// saveOriginalRSPRBP saves the original RSP and RBP into the execution context.
+func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction {
+	// 		mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg)
+	// 		mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg)
+	cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur)
+	cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur)
+	return cur
+}
+
+// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context.
+func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction {
+	// 		mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp
+	// 		mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp
+	cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur)
+	cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur)
+	return cur
+}
+
+func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction {
+	mov := m.allocateInstr().asMovRR(src, dst, true)
+	return linkInstr(prev, mov)
+}
+
+func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction {
+	mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx))
+	instr := m.allocateInstr()
+	if store {
+		instr.asMovRM(r, mem, 8)
+	} else {
+		instr.asMov64MR(mem, r)
+	}
+	return linkInstr(prev, instr)
+}
+
+// This is for debugging.
+func (m *machine) linkUD2(cur *instruction) *instruction { //nolint
+	return linkInstr(cur, m.allocateInstr().asUD2())
+}
+
+func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction {
+	var dst regalloc.VReg
+	argTyp := arg.Type
+	if arg.Kind == backend.ABIArgKindStack {
+		// Caller saved registers ca
+		switch argTyp {
+		case ssa.TypeI32, ssa.TypeI64:
+			dst = tmpIntReg
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			dst = tmpXmmReg
+		default:
+			panic("BUG")
+		}
+	} else {
+		dst = arg.Reg
+	}
+
+	load := m.allocateInstr()
+	a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr))
+	switch arg.Type {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, a, dst)
+	case ssa.TypeI64:
+		load.asMov64MR(a, dst)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, a, dst)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst)
+	}
+
+	cur = linkInstr(cur, load)
+	if arg.Kind == backend.ABIArgKindStack {
+		// Store back to the stack.
+		store := m.allocateInstr()
+		a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(dst, a, 4)
+		case ssa.TypeI64:
+			store.asMovRM(dst, a, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, dst, a)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, dst, a)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, dst, a)
+		}
+		cur = linkInstr(cur, store)
+	}
+	return cur
+}
+
+func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction {
+	var r regalloc.VReg
+	if result.Kind == backend.ABIArgKindStack {
+		// Load the value to the temporary.
+		load := m.allocateInstr()
+		offset := resultStackSlotBeginOffset + uint32(result.Offset)
+		a := newOperandMem(m.newAmodeImmReg(offset, rspVReg))
+		switch result.Type {
+		case ssa.TypeI32:
+			r = tmpIntReg
+			load.asMovzxRmR(extModeLQ, a, r)
+		case ssa.TypeI64:
+			r = tmpIntReg
+			load.asMov64MR(a, r)
+		case ssa.TypeF32:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovss, a, r)
+		case ssa.TypeF64:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovsd, a, r)
+		case ssa.TypeV128:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+	} else {
+		r = result.Reg
+	}
+
+	store := m.allocateInstr()
+	a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr))
+	switch result.Type {
+	case ssa.TypeI32:
+		store.asMovRM(r, a, 4)
+	case ssa.TypeI64:
+		store.asMovRM(r, a, 8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, r, a)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, r, a)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, r, a)
+	}
+
+	return linkInstr(cur, store)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
new file mode 100644
index 000000000..751050aff
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
@@ -0,0 +1,443 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var calleeSavedVRegs = []regalloc.VReg{
+	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
+	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
+}
+
+// CompileGoFunctionTrampoline implements backend.Machine.
+func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
+	ectx := m.ectx
+	argBegin := 1 // Skips exec context by default.
+	if needModuleContextPtr {
+		argBegin++
+	}
+
+	abi := &backend.FunctionABI{}
+	abi.Init(sig, intArgResultRegs, floatArgResultRegs)
+	m.currentABI = abi
+
+	cur := m.allocateNop()
+	ectx.RootInstr = cur
+
+	// Execution context is always the first argument.
+	execCtrPtr := raxVReg
+
+	// First we update RBP and RSP just like the normal prologue.
+	//
+	//                   (high address)                     (high address)
+	//       RBP ----> +-----------------+                +-----------------+
+	//                 |     .......     |                |     .......     |
+	//                 |      ret Y      |                |      ret Y      |
+	//                 |     .......     |                |     .......     |
+	//                 |      ret 0      |                |      ret 0      |
+	//                 |      arg X      |                |      arg X      |
+	//                 |     .......     |     ====>      |     .......     |
+	//                 |      arg 1      |                |      arg 1      |
+	//                 |      arg 0      |                |      arg 0      |
+	//                 |   Return Addr   |                |   Return Addr   |
+	//       RSP ----> +-----------------+                |    Caller_RBP   |
+	//                    (low address)                   +-----------------+ <----- RSP, RBP
+	//
+	cur = m.setupRBPRSP(cur)
+
+	goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
+	cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur)
+
+	// Save the callee saved registers.
+	cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
+
+	if needModuleContextPtr {
+		moduleCtrPtr := rbxVReg // Module context is always the second argument.
+		mem := m.newAmodeImmReg(
+			wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
+			execCtrPtr)
+		store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8)
+		cur = linkInstr(cur, store)
+	}
+
+	// Now let's advance the RSP to the stack slot for the arguments.
+	//
+	//                (high address)                     (high address)
+	//              +-----------------+               +-----------------+
+	//              |     .......     |               |     .......     |
+	//              |      ret Y      |               |      ret Y      |
+	//              |     .......     |               |     .......     |
+	//              |      ret 0      |               |      ret 0      |
+	//              |      arg X      |               |      arg X      |
+	//              |     .......     |   =======>    |     .......     |
+	//              |      arg 1      |               |      arg 1      |
+	//              |      arg 0      |               |      arg 0      |
+	//              |   Return Addr   |               |   Return Addr   |
+	//              |    Caller_RBP   |               |    Caller_RBP   |
+	//  RBP,RSP --> +-----------------+               +-----------------+ <----- RBP
+	//                 (low address)                  |  arg[N]/ret[M]  |
+	//                                                |    ..........   |
+	//                                                |  arg[1]/ret[1]  |
+	//                                                |  arg[0]/ret[0]  |
+	//                                                +-----------------+ <----- RSP
+	//                                                   (low address)
+	//
+	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
+	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
+	// the arguments/return values to/from Go function.
+	cur = m.addRSP(-int32(goSliceSizeAligned), cur)
+
+	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
+	var offsetInGoSlice int32
+	for i := range abi.Args[argBegin:] {
+		arg := &abi.Args[argBegin+i]
+		var v regalloc.VReg
+		if arg.Kind == backend.ABIArgKindReg {
+			v = arg.Reg
+		} else {
+			// We have saved callee saved registers, so we can use them.
+			if arg.Type.IsInt() {
+				v = r15VReg
+			} else {
+				v = xmm15VReg
+			}
+			mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
+			load := m.allocateInstr()
+			switch arg.Type {
+			case ssa.TypeI32:
+				load.asMovzxRmR(extModeLQ, mem, v)
+			case ssa.TypeI64:
+				load.asMov64MR(mem, v)
+			case ssa.TypeF32:
+				load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
+			case ssa.TypeF64:
+				load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
+			case ssa.TypeV128:
+				load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+			default:
+				panic("BUG")
+			}
+			cur = linkInstr(cur, load)
+		}
+
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(v, mem, 4)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeI64:
+			store.asMovRM(v, mem, 8)
+			offsetInGoSlice += 8
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, v, mem)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+			offsetInGoSlice += 8
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+			offsetInGoSlice += 16
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, store)
+	}
+
+	// Finally we push the size of the slice to the stack so the stack looks like:
+	//
+	//          (high address)
+	//       +-----------------+
+	//       |     .......     |
+	//       |      ret Y      |
+	//       |     .......     |
+	//       |      ret 0      |
+	//       |      arg X      |
+	//       |     .......     |
+	//       |      arg 1      |
+	//       |      arg 0      |
+	//       |   Return Addr   |
+	//       |    Caller_RBP   |
+	//       +-----------------+ <----- RBP
+	//       |  arg[N]/ret[M]  |
+	//       |    ..........   |
+	//       |  arg[1]/ret[1]  |
+	//       |  arg[0]/ret[0]  |
+	//       |    slice size   |
+	//       +-----------------+ <----- RSP
+	//         (low address)
+	//
+	// 		push $sliceSize
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned))))
+
+	// Load the exitCode to the register.
+	exitCodeReg := r12VReg // Callee saved which is already saved.
+	cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false))
+
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
+	cur = linkInstr(cur, setExitCode)
+	cur = linkInstr(cur, saveRsp)
+	cur = linkInstr(cur, saveRbp)
+
+	// Ready to exit the execution.
+	cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
+
+	// We don't need the slice size anymore, so pop it.
+	cur = m.addRSP(8, cur)
+
+	// Ready to set up the results.
+	offsetInGoSlice = 0
+	// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
+	// and defer the restoration of the result to the end of this function.
+	var argOverlapWithExecCtxOffset int32 = -1
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		var v regalloc.VReg
+		isRegResult := r.Kind == backend.ABIArgKindReg
+		if isRegResult {
+			v = r.Reg
+			if v.RealReg() == execCtrPtr.RealReg() {
+				argOverlapWithExecCtxOffset = offsetInGoSlice
+				offsetInGoSlice += 8 // always uint64 rep.
+				continue
+			}
+		} else {
+			if r.Type.IsInt() {
+				v = r15VReg
+			} else {
+				v = xmm15VReg
+			}
+		}
+
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
+		switch r.Type {
+		case ssa.TypeI32:
+			load.asMovzxRmR(extModeLQ, mem, v)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeI64:
+			load.asMov64MR(mem, v)
+			offsetInGoSlice += 8
+		case ssa.TypeF32:
+			load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeF64:
+			load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
+			offsetInGoSlice += 8
+		case ssa.TypeV128:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+			offsetInGoSlice += 16
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+
+		if !isRegResult {
+			// We need to store it back to the result slot above rbp.
+			store := m.allocateInstr()
+			mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
+			switch r.Type {
+			case ssa.TypeI32:
+				store.asMovRM(v, mem, 4)
+			case ssa.TypeI64:
+				store.asMovRM(v, mem, 8)
+			case ssa.TypeF32:
+				store.asXmmMovRM(sseOpcodeMovss, v, mem)
+			case ssa.TypeF64:
+				store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+			case ssa.TypeV128:
+				store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+			default:
+				panic("BUG")
+			}
+			cur = linkInstr(cur, store)
+		}
+	}
+
+	// Before return, we need to restore the callee saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
+
+	if argOverlapWithExecCtxOffset >= 0 {
+		// At this point execCtt is not used anymore, so we can finally store the
+		// result to the register which overlaps with the execution context pointer.
+		mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg))
+		load := m.allocateInstr().asMov64MR(mem, execCtrPtr)
+		cur = linkInstr(cur, load)
+	}
+
+	// Finally ready to return.
+	cur = m.revertRBPRSP(cur)
+	linkInstr(cur, m.allocateInstr().asRet())
+
+	m.encodeWithoutSSA(ectx.RootInstr)
+	return m.c.Buf()
+}
+
+func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			store.asMovRM(v, mem, 8)
+		case regalloc.RegTypeFloat:
+			store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, store)
+		offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
+	}
+	return cur
+}
+
+func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			load.asMov64MR(mem, v)
+		case regalloc.RegTypeFloat:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+		offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
+	}
+	return cur
+}
+
+func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction {
+	readRip := m.allocateInstr()
+	cur = linkInstr(cur, readRip)
+
+	ripReg := r12VReg // Callee saved which is already saved.
+	saveRip := m.allocateInstr().asMovRM(
+		ripReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
+		8,
+	)
+	cur = linkInstr(cur, saveRip)
+
+	exit := m.allocateExitSeq(execCtx)
+	cur = linkInstr(cur, exit)
+
+	nop, l := m.allocateBrTarget()
+	cur = linkInstr(cur, nop)
+	readRip.asLEA(newOperandLabel(l), ripReg)
+	return cur
+}
+
+// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
+// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
+// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
+var stackGrowSaveVRegs = []regalloc.VReg{
+	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
+	rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
+	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
+	xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
+}
+
+// CompileStackGrowCallSequence implements backend.Machine.
+func (m *machine) CompileStackGrowCallSequence() []byte {
+	ectx := m.ectx
+
+	cur := m.allocateNop()
+	ectx.RootInstr = cur
+
+	cur = m.setupRBPRSP(cur)
+
+	// Execution context is always the first argument.
+	execCtrPtr := raxVReg
+
+	// Save the callee saved and argument registers.
+	cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
+
+	// Load the exitCode to the register.
+	exitCodeReg := r12VReg // Already saved.
+	cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false))
+
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
+	cur = linkInstr(cur, setExitCode)
+	cur = linkInstr(cur, saveRsp)
+	cur = linkInstr(cur, saveRbp)
+
+	// Ready to exit the execution.
+	cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
+
+	// After the exit, restore the saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
+
+	// Finally ready to return.
+	cur = m.revertRBPRSP(cur)
+	linkInstr(cur, m.allocateInstr().asRet())
+
+	m.encodeWithoutSSA(ectx.RootInstr)
+	return m.c.Buf()
+}
+
+// insertStackBoundsCheck will insert the instructions after `cur` to check the
+// stack bounds, and if there's no sufficient spaces required for the function,
+// exit the execution and try growing it in Go world.
+func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
+	//		add $requiredStackSize, %rsp ;; Temporarily update the sp.
+	// 		cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
+	// 		ja .ok
+	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
+	//      pushq r15 ;; save the temporary.
+	//		mov $requiredStackSize, %r15
+	//		mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
+	//      popq r15 ;; restore the temporary.
+	//		callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
+	//		jmp .cont
+	// .ok:
+	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
+	// .cont:
+	cur = m.addRSP(-int32(requiredStackSize), cur)
+	cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
+		rspVReg, true))
+
+	ja := m.allocateInstr()
+	cur = linkInstr(cur, ja)
+
+	cur = m.addRSP(int32(requiredStackSize), cur)
+
+	// Save the temporary.
+
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg)))
+	// Load the required size to the temporary.
+	cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true))
+	// Set the required size in the execution context.
+	cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
+	// Restore the temporary.
+	cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg))
+	// Call the Go function to grow the stack.
+	cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg(
+		wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
+	// Jump to the continuation.
+	jmpToCont := m.allocateInstr()
+	cur = linkInstr(cur, jmpToCont)
+
+	// .ok:
+	okInstr, ok := m.allocateBrTarget()
+	cur = linkInstr(cur, okInstr)
+	ja.asJmpIf(condNBE, newOperandLabel(ok))
+	// On the ok path, we only need to reverse the temporary update.
+	cur = m.addRSP(int32(requiredStackSize), cur)
+
+	// .cont:
+	contInstr, cont := m.allocateBrTarget()
+	cur = linkInstr(cur, contInstr)
+	jmpToCont.asJmp(newOperandLabel(cont))
+
+	return cur
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
new file mode 100644
index 000000000..75cbeab75
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
@@ -0,0 +1,168 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type cond byte
+
+const (
+	// condO represents (overflow) condition.
+	condO cond = iota
+	// condNO represents (no overflow) condition.
+	condNO
+	// condB represents (< unsigned) condition.
+	condB
+	// condNB represents (>= unsigned) condition.
+	condNB
+	// condZ represents (zero) condition.
+	condZ
+	// condNZ represents (not-zero) condition.
+	condNZ
+	// condBE represents (<= unsigned) condition.
+	condBE
+	// condNBE represents (> unsigned) condition.
+	condNBE
+	// condS represents (negative) condition.
+	condS
+	// condNS represents (not-negative) condition.
+	condNS
+	// condP represents (parity) condition.
+	condP
+	// condNP represents (not parity) condition.
+	condNP
+	// condL represents (< signed) condition.
+	condL
+	// condNL represents (>= signed) condition.
+	condNL
+	// condLE represents (<= signed) condition.
+	condLE
+	// condNLE represents (> signed) condition.
+	condNLE
+
+	condInvalid
+)
+
+func (c cond) String() string {
+	switch c {
+	case condO:
+		return "o"
+	case condNO:
+		return "no"
+	case condB:
+		return "b"
+	case condNB:
+		return "nb"
+	case condZ:
+		return "z"
+	case condNZ:
+		return "nz"
+	case condBE:
+		return "be"
+	case condNBE:
+		return "nbe"
+	case condS:
+		return "s"
+	case condNS:
+		return "ns"
+	case condL:
+		return "l"
+	case condNL:
+		return "nl"
+	case condLE:
+		return "le"
+	case condNLE:
+		return "nle"
+	case condP:
+		return "p"
+	case condNP:
+		return "np"
+	default:
+		panic("unreachable")
+	}
+}
+
+func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond {
+	switch origin {
+	case ssa.IntegerCmpCondEqual:
+		return condZ
+	case ssa.IntegerCmpCondNotEqual:
+		return condNZ
+	case ssa.IntegerCmpCondSignedLessThan:
+		return condL
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
+		return condNL
+	case ssa.IntegerCmpCondSignedGreaterThan:
+		return condNLE
+	case ssa.IntegerCmpCondSignedLessThanOrEqual:
+		return condLE
+	case ssa.IntegerCmpCondUnsignedLessThan:
+		return condB
+	case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
+		return condNB
+	case ssa.IntegerCmpCondUnsignedGreaterThan:
+		return condNBE
+	case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		return condBE
+	default:
+		panic("unreachable")
+	}
+}
+
+func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond {
+	switch origin {
+	case ssa.FloatCmpCondGreaterThanOrEqual:
+		return condNB
+	case ssa.FloatCmpCondGreaterThan:
+		return condNBE
+	case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual:
+		panic(fmt.Sprintf("cond %s must be treated as a special case", origin))
+	default:
+		panic("unreachable")
+	}
+}
+
+func (c cond) encoding() byte {
+	return byte(c)
+}
+
+func (c cond) invert() cond {
+	switch c {
+	case condO:
+		return condNO
+	case condNO:
+		return condO
+	case condB:
+		return condNB
+	case condNB:
+		return condB
+	case condZ:
+		return condNZ
+	case condNZ:
+		return condZ
+	case condBE:
+		return condNBE
+	case condNBE:
+		return condBE
+	case condS:
+		return condNS
+	case condNS:
+		return condS
+	case condP:
+		return condNP
+	case condNP:
+		return condP
+	case condL:
+		return condNL
+	case condNL:
+		return condL
+	case condLE:
+		return condNLE
+	case condNLE:
+		return condLE
+	default:
+		panic("unreachable")
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
new file mode 100644
index 000000000..5e731e822
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
@@ -0,0 +1,35 @@
+package amd64
+
+// extMode represents the mode of extension in movzx/movsx.
+type extMode byte
+
+const (
+	// extModeBL represents Byte -> Longword.
+	extModeBL extMode = iota
+	// extModeBQ represents Byte -> Quadword.
+	extModeBQ
+	// extModeWL represents Word -> Longword.
+	extModeWL
+	// extModeWQ represents Word -> Quadword.
+	extModeWQ
+	// extModeLQ represents Longword -> Quadword.
+	extModeLQ
+)
+
+// String implements fmt.Stringer.
+func (e extMode) String() string {
+	switch e {
+	case extModeBL:
+		return "bl"
+	case extModeBQ:
+		return "bq"
+	case extModeWL:
+		return "wl"
+	case extModeWQ:
+		return "wq"
+	case extModeLQ:
+		return "lq"
+	default:
+		panic("BUG: invalid ext mode")
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
new file mode 100644
index 000000000..d27e79c0e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
@@ -0,0 +1,2472 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type instruction struct {
+	prev, next          *instruction
+	op1, op2            operand
+	u1, u2              uint64
+	b1                  bool
+	addedBeforeRegAlloc bool
+	kind                instructionKind
+}
+
+// Next implements regalloc.Instr.
+func (i *instruction) Next() regalloc.Instr {
+	return i.next
+}
+
+// Prev implements regalloc.Instr.
+func (i *instruction) Prev() regalloc.Instr {
+	return i.prev
+}
+
+// IsCall implements regalloc.Instr.
+func (i *instruction) IsCall() bool { return i.kind == call }
+
+// IsIndirectCall implements regalloc.Instr.
+func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect }
+
+// IsReturn implements regalloc.Instr.
+func (i *instruction) IsReturn() bool { return i.kind == ret }
+
+// AddedBeforeRegAlloc implements regalloc.Instr.
+func (i *instruction) AddedBeforeRegAlloc() bool { return i.addedBeforeRegAlloc }
+
+// String implements regalloc.Instr.
+func (i *instruction) String() string {
+	switch i.kind {
+	case nop0:
+		return "nop"
+	case sourceOffsetInfo:
+		return fmt.Sprintf("source_offset_info %d", i.u1)
+	case ret:
+		return "ret"
+	case imm:
+		if i.b1 {
+			return fmt.Sprintf("movabsq $%d, %s", int64(i.u1), i.op2.format(true))
+		} else {
+			return fmt.Sprintf("movl $%d, %s", int32(i.u1), i.op2.format(false))
+		}
+	case aluRmiR:
+		return fmt.Sprintf("%s %s, %s", aluRmiROpcode(i.u1), i.op1.format(i.b1), i.op2.format(i.b1))
+	case movRR:
+		if i.b1 {
+			return fmt.Sprintf("movq %s, %s", i.op1.format(true), i.op2.format(true))
+		} else {
+			return fmt.Sprintf("movl %s, %s", i.op1.format(false), i.op2.format(false))
+		}
+	case xmmRmR:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(false), i.op2.format(false))
+	case gprToXmm:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(i.b1), i.op2.format(i.b1))
+	case xmmUnaryRmR:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(false), i.op2.format(false))
+	case xmmUnaryRmRImm:
+		return fmt.Sprintf("%s $%d, %s, %s", sseOpcode(i.u1), roundingMode(i.u2), i.op1.format(false), i.op2.format(false))
+	case unaryRmR:
+		var suffix string
+		if i.b1 {
+			suffix = "q"
+		} else {
+			suffix = "l"
+		}
+		return fmt.Sprintf("%s%s %s, %s", unaryRmROpcode(i.u1), suffix, i.op1.format(i.b1), i.op2.format(i.b1))
+	case not:
+		var op string
+		if i.b1 {
+			op = "notq"
+		} else {
+			op = "notl"
+		}
+		return fmt.Sprintf("%s %s", op, i.op1.format(i.b1))
+	case neg:
+		var op string
+		if i.b1 {
+			op = "negq"
+		} else {
+			op = "negl"
+		}
+		return fmt.Sprintf("%s %s", op, i.op1.format(i.b1))
+	case div:
+		var prefix string
+		var op string
+		if i.b1 {
+			op = "divq"
+		} else {
+			op = "divl"
+		}
+		if i.u1 != 0 {
+			prefix = "i"
+		}
+		return fmt.Sprintf("%s%s %s", prefix, op, i.op1.format(i.b1))
+	case mulHi:
+		signed, _64 := i.u1 != 0, i.b1
+		var op string
+		switch {
+		case signed && _64:
+			op = "imulq"
+		case !signed && _64:
+			op = "mulq"
+		case signed && !_64:
+			op = "imull"
+		case !signed && !_64:
+			op = "mull"
+		}
+		return fmt.Sprintf("%s %s", op, i.op1.format(i.b1))
+	case signExtendData:
+		var op string
+		if i.b1 {
+			op = "cqo"
+		} else {
+			op = "cdq"
+		}
+		return op
+	case movzxRmR:
+		return fmt.Sprintf("movzx.%s %s, %s", extMode(i.u1), i.op1.format(true), i.op2.format(true))
+	case mov64MR:
+		return fmt.Sprintf("movq %s, %s", i.op1.format(true), i.op2.format(true))
+	case lea:
+		return fmt.Sprintf("lea %s, %s", i.op1.format(true), i.op2.format(true))
+	case movsxRmR:
+		return fmt.Sprintf("movsx.%s %s, %s", extMode(i.u1), i.op1.format(true), i.op2.format(true))
+	case movRM:
+		var suffix string
+		switch i.u1 {
+		case 1:
+			suffix = "b"
+		case 2:
+			suffix = "w"
+		case 4:
+			suffix = "l"
+		case 8:
+			suffix = "q"
+		}
+		return fmt.Sprintf("mov.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true))
+	case shiftR:
+		var suffix string
+		if i.b1 {
+			suffix = "q"
+		} else {
+			suffix = "l"
+		}
+		return fmt.Sprintf("%s%s %s, %s", shiftROp(i.u1), suffix, i.op1.format(false), i.op2.format(i.b1))
+	case xmmRmiReg:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(true), i.op2.format(true))
+	case cmpRmiR:
+		var op, suffix string
+		if i.u1 != 0 {
+			op = "cmp"
+		} else {
+			op = "test"
+		}
+		if i.b1 {
+			suffix = "q"
+		} else {
+			suffix = "l"
+		}
+		if op == "test" && i.op1.kind == operandKindMem {
+			// Print consistently with AT&T syntax.
+			return fmt.Sprintf("%s%s %s, %s", op, suffix, i.op2.format(i.b1), i.op1.format(i.b1))
+		}
+		return fmt.Sprintf("%s%s %s, %s", op, suffix, i.op1.format(i.b1), i.op2.format(i.b1))
+	case setcc:
+		return fmt.Sprintf("set%s %s", cond(i.u1), i.op2.format(true))
+	case cmove:
+		var suffix string
+		if i.b1 {
+			suffix = "q"
+		} else {
+			suffix = "l"
+		}
+		return fmt.Sprintf("cmov%s%s %s, %s", cond(i.u1), suffix, i.op1.format(i.b1), i.op2.format(i.b1))
+	case push64:
+		return fmt.Sprintf("pushq %s", i.op1.format(true))
+	case pop64:
+		return fmt.Sprintf("popq %s", i.op1.format(true))
+	case xmmMovRM:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(true), i.op2.format(true))
+	case xmmLoadConst:
+		panic("TODO")
+	case xmmToGpr:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(i.b1), i.op2.format(i.b1))
+	case cvtUint64ToFloatSeq:
+		panic("TODO")
+	case cvtFloatToSintSeq:
+		panic("TODO")
+	case cvtFloatToUintSeq:
+		panic("TODO")
+	case xmmMinMaxSeq:
+		panic("TODO")
+	case xmmCmpRmR:
+		return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(false), i.op2.format(false))
+	case xmmRmRImm:
+		op := sseOpcode(i.u1)
+		r1, r2 := i.op1.format(op == sseOpcodePextrq || op == sseOpcodePinsrq),
+			i.op2.format(op == sseOpcodePextrq || op == sseOpcodePinsrq)
+		return fmt.Sprintf("%s $%d, %s, %s", op, i.u2, r1, r2)
+	case jmp:
+		return fmt.Sprintf("jmp %s", i.op1.format(true))
+	case jmpIf:
+		return fmt.Sprintf("j%s %s", cond(i.u1), i.op1.format(true))
+	case jmpTableIsland:
+		return fmt.Sprintf("jump_table_island: jmp_table_index=%d", i.u1)
+	case exitSequence:
+		return fmt.Sprintf("exit_sequence %s", i.op1.format(true))
+	case ud2:
+		return "ud2"
+	case call:
+		return fmt.Sprintf("call %s", ssa.FuncRef(i.u1))
+	case callIndirect:
+		return fmt.Sprintf("callq *%s", i.op1.format(true))
+	case xchg:
+		var suffix string
+		switch i.u1 {
+		case 1:
+			suffix = "b"
+		case 2:
+			suffix = "w"
+		case 4:
+			suffix = "l"
+		case 8:
+			suffix = "q"
+		}
+		return fmt.Sprintf("xchg.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true))
+	case zeros:
+		return fmt.Sprintf("xor %s, %s", i.op2.format(true), i.op2.format(true))
+	case fcvtToSintSequence:
+		execCtx, src, tmpGp, tmpGp2, tmpXmm, src64, dst64, sat := i.fcvtToSintSequenceData()
+		return fmt.Sprintf(
+			"fcvtToSintSequence execCtx=%s, src=%s, tmpGp=%s, tmpGp2=%s, tmpXmm=%s, src64=%v, dst64=%v, sat=%v",
+			formatVRegSized(execCtx, true),
+			formatVRegSized(src, true),
+			formatVRegSized(tmpGp, true),
+			formatVRegSized(tmpGp2, true),
+			formatVRegSized(tmpXmm, true), src64, dst64, sat)
+	case fcvtToUintSequence:
+		execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2, src64, dst64, sat := i.fcvtToUintSequenceData()
+		return fmt.Sprintf(
+			"fcvtToUintSequence execCtx=%s, src=%s, tmpGp=%s, tmpGp2=%s, tmpXmm=%s, tmpXmm2=%s, src64=%v, dst64=%v, sat=%v",
+			formatVRegSized(execCtx, true),
+			formatVRegSized(src, true),
+			formatVRegSized(tmpGp, true),
+			formatVRegSized(tmpGp2, true),
+			formatVRegSized(tmpXmm, true),
+			formatVRegSized(tmpXmm2, true), src64, dst64, sat)
+	case idivRemSequence:
+		execCtx, divisor, tmpGp, isDiv, signed, _64 := i.idivRemSequenceData()
+		return fmt.Sprintf("idivRemSequence execCtx=%s, divisor=%s, tmpGp=%s, isDiv=%v, signed=%v, _64=%v",
+			formatVRegSized(execCtx, true), formatVRegSized(divisor, _64), formatVRegSized(tmpGp, _64), isDiv, signed, _64)
+	case defineUninitializedReg:
+		return fmt.Sprintf("defineUninitializedReg %s", i.op2.format(true))
+	case xmmCMov:
+		return fmt.Sprintf("xmmcmov%s %s, %s", cond(i.u1), i.op1.format(true), i.op2.format(true))
+	case blendvpd:
+		return fmt.Sprintf("blendvpd %s, %s, %%xmm0", i.op1.format(false), i.op2.format(false))
+	case mfence:
+		return "mfence"
+	case lockcmpxchg:
+		var suffix string
+		switch i.u1 {
+		case 1:
+			suffix = "b"
+		case 2:
+			suffix = "w"
+		case 4:
+			suffix = "l"
+		case 8:
+			suffix = "q"
+		}
+		return fmt.Sprintf("lock cmpxchg.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true))
+	case lockxadd:
+		var suffix string
+		switch i.u1 {
+		case 1:
+			suffix = "b"
+		case 2:
+			suffix = "w"
+		case 4:
+			suffix = "l"
+		case 8:
+			suffix = "q"
+		}
+		return fmt.Sprintf("lock xadd.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true))
+
+	case nopUseReg:
+		return fmt.Sprintf("nop_use_reg %s", i.op1.format(true))
+
+	default:
+		panic(fmt.Sprintf("BUG: %d", int(i.kind)))
+	}
+}
+
+// Defs implements regalloc.Instr.
+func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
+	*regs = (*regs)[:0]
+	switch dk := defKinds[i.kind]; dk {
+	case defKindNone:
+	case defKindOp2:
+		*regs = append(*regs, i.op2.reg())
+	case defKindCall:
+		_, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2)
+		for i := byte(0); i < retIntRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[intArgResultRegs[i]])
+		}
+		for i := byte(0); i < retFloatRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[floatArgResultRegs[i]])
+		}
+	case defKindDivRem:
+		_, _, _, isDiv, _, _ := i.idivRemSequenceData()
+		if isDiv {
+			*regs = append(*regs, raxVReg)
+		} else {
+			*regs = append(*regs, rdxVReg)
+		}
+	default:
+		panic(fmt.Sprintf("BUG: invalid defKind \"%s\" for %s", dk, i))
+	}
+	return *regs
+}
+
+// Uses implements regalloc.Instr.
+func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
+	*regs = (*regs)[:0]
+	switch uk := useKinds[i.kind]; uk {
+	case useKindNone:
+	case useKindOp1Op2Reg, useKindOp1RegOp2:
+		opAny, opReg := &i.op1, &i.op2
+		if uk == useKindOp1RegOp2 {
+			opAny, opReg = opReg, opAny
+		}
+		// The destination operand (op2) can be only reg,
+		// the source operand (op1) can be imm32, reg or mem.
+		switch opAny.kind {
+		case operandKindReg:
+			*regs = append(*regs, opAny.reg())
+		case operandKindMem:
+			opAny.addressMode().uses(regs)
+		case operandKindImm32:
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+		if opReg.kind != operandKindReg {
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+		*regs = append(*regs, opReg.reg())
+	case useKindOp1:
+		op := i.op1
+		switch op.kind {
+		case operandKindReg:
+			*regs = append(*regs, op.reg())
+		case operandKindMem:
+			op.addressMode().uses(regs)
+		case operandKindImm32, operandKindLabel:
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+	case useKindCallInd:
+		op := i.op1
+		switch op.kind {
+		case operandKindReg:
+			*regs = append(*regs, op.reg())
+		case operandKindMem:
+			op.addressMode().uses(regs)
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+		fallthrough
+	case useKindCall:
+		argIntRealRegs, argFloatRealRegs, _, _, _ := backend.ABIInfoFromUint64(i.u2)
+		for i := byte(0); i < argIntRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[intArgResultRegs[i]])
+		}
+		for i := byte(0); i < argFloatRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[floatArgResultRegs[i]])
+		}
+	case useKindFcvtToSintSequence:
+		execCtx, src, tmpGp, tmpGp2, tmpXmm, _, _, _ := i.fcvtToSintSequenceData()
+		*regs = append(*regs, execCtx, src, tmpGp, tmpGp2, tmpXmm)
+	case useKindFcvtToUintSequence:
+		execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2, _, _, _ := i.fcvtToUintSequenceData()
+		*regs = append(*regs, execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2)
+	case useKindDivRem:
+		execCtx, divisor, tmpGp, _, _, _ := i.idivRemSequenceData()
+		// idiv uses rax and rdx as implicit operands.
+		*regs = append(*regs, raxVReg, rdxVReg, execCtx, divisor, tmpGp)
+	case useKindBlendvpd:
+		*regs = append(*regs, xmm0VReg)
+
+		opAny, opReg := &i.op1, &i.op2
+		switch opAny.kind {
+		case operandKindReg:
+			*regs = append(*regs, opAny.reg())
+		case operandKindMem:
+			opAny.addressMode().uses(regs)
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+		if opReg.kind != operandKindReg {
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+		*regs = append(*regs, opReg.reg())
+
+	case useKindRaxOp1RegOp2:
+		opReg, opAny := &i.op1, &i.op2
+		*regs = append(*regs, raxVReg, opReg.reg())
+		switch opAny.kind {
+		case operandKindReg:
+			*regs = append(*regs, opAny.reg())
+		case operandKindMem:
+			opAny.addressMode().uses(regs)
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+		if opReg.kind != operandKindReg {
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+
+	default:
+		panic(fmt.Sprintf("BUG: invalid useKind %s for %s", uk, i))
+	}
+	return *regs
+}
+
+// AssignUse implements regalloc.Instr.
+func (i *instruction) AssignUse(index int, v regalloc.VReg) {
+	switch uk := useKinds[i.kind]; uk {
+	case useKindNone:
+	case useKindCallInd:
+		if index != 0 {
+			panic("BUG")
+		}
+		op := &i.op1
+		switch op.kind {
+		case operandKindReg:
+			op.setReg(v)
+		case operandKindMem:
+			op.addressMode().assignUses(index, v)
+		default:
+			panic("BUG")
+		}
+	case useKindOp1Op2Reg, useKindOp1RegOp2:
+		op, opMustBeReg := &i.op1, &i.op2
+		if uk == useKindOp1RegOp2 {
+			op, opMustBeReg = opMustBeReg, op
+		}
+		switch op.kind {
+		case operandKindReg:
+			if index == 0 {
+				op.setReg(v)
+			} else if index == 1 {
+				opMustBeReg.setReg(v)
+			} else {
+				panic("BUG")
+			}
+		case operandKindMem:
+			nregs := op.addressMode().nregs()
+			if index < nregs {
+				op.addressMode().assignUses(index, v)
+			} else if index == nregs {
+				opMustBeReg.setReg(v)
+			} else {
+				panic("BUG")
+			}
+		case operandKindImm32:
+			if index == 0 {
+				opMustBeReg.setReg(v)
+			} else {
+				panic("BUG")
+			}
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand pair: %s", i))
+		}
+	case useKindOp1:
+		op := &i.op1
+		switch op.kind {
+		case operandKindReg:
+			if index != 0 {
+				panic("BUG")
+			}
+			op.setReg(v)
+		case operandKindMem:
+			op.addressMode().assignUses(index, v)
+		default:
+			panic(fmt.Sprintf("BUG: invalid operand: %s", i))
+		}
+	case useKindFcvtToSintSequence:
+		switch index {
+		case 0:
+			i.op1.addressMode().base = v
+		case 1:
+			i.op1.addressMode().index = v
+		case 2:
+			i.op2.addressMode().base = v
+		case 3:
+			i.op2.addressMode().index = v
+		case 4:
+			i.u1 = uint64(v)
+		default:
+			panic("BUG")
+		}
+	case useKindFcvtToUintSequence:
+		switch index {
+		case 0:
+			i.op1.addressMode().base = v
+		case 1:
+			i.op1.addressMode().index = v
+		case 2:
+			i.op2.addressMode().base = v
+		case 3:
+			i.op2.addressMode().index = v
+		case 4:
+			i.u1 = uint64(v)
+		case 5:
+			i.u2 = uint64(v)
+		default:
+			panic("BUG")
+		}
+	case useKindDivRem:
+		switch index {
+		case 0:
+			if v != raxVReg {
+				panic("BUG")
+			}
+		case 1:
+			if v != rdxVReg {
+				panic("BUG")
+			}
+		case 2:
+			i.op1.setReg(v)
+		case 3:
+			i.op2.setReg(v)
+		case 4:
+			i.u1 = uint64(v)
+		default:
+			panic("BUG")
+		}
+	case useKindBlendvpd:
+		op, opMustBeReg := &i.op1, &i.op2
+		if index == 0 {
+			if v.RealReg() != xmm0 {
+				panic("BUG")
+			}
+		} else {
+			switch op.kind {
+			case operandKindReg:
+				switch index {
+				case 1:
+					op.setReg(v)
+				case 2:
+					opMustBeReg.setReg(v)
+				default:
+					panic("BUG")
+				}
+			case operandKindMem:
+				nregs := op.addressMode().nregs()
+				index--
+				if index < nregs {
+					op.addressMode().assignUses(index, v)
+				} else if index == nregs {
+					opMustBeReg.setReg(v)
+				} else {
+					panic("BUG")
+				}
+			default:
+				panic(fmt.Sprintf("BUG: invalid operand pair: %s", i))
+			}
+		}
+
+	case useKindRaxOp1RegOp2:
+		switch index {
+		case 0:
+			if v.RealReg() != rax {
+				panic("BUG")
+			}
+		case 1:
+			i.op1.setReg(v)
+		default:
+			op := &i.op2
+			switch op.kind {
+			case operandKindReg:
+				switch index {
+				case 1:
+					op.setReg(v)
+				case 2:
+					op.setReg(v)
+				default:
+					panic("BUG")
+				}
+			case operandKindMem:
+				nregs := op.addressMode().nregs()
+				index -= 2
+				if index < nregs {
+					op.addressMode().assignUses(index, v)
+				} else if index == nregs {
+					op.setReg(v)
+				} else {
+					panic("BUG")
+				}
+			default:
+				panic(fmt.Sprintf("BUG: invalid operand pair: %s", i))
+			}
+		}
+	default:
+		panic(fmt.Sprintf("BUG: invalid useKind %s for %s", uk, i))
+	}
+}
+
+// AssignDef implements regalloc.Instr.
+func (i *instruction) AssignDef(reg regalloc.VReg) {
+	switch dk := defKinds[i.kind]; dk {
+	case defKindNone:
+	case defKindOp2:
+		i.op2.setReg(reg)
+	default:
+		panic(fmt.Sprintf("BUG: invalid defKind \"%s\" for %s", dk, i))
+	}
+}
+
+// IsCopy implements regalloc.Instr.
+func (i *instruction) IsCopy() bool {
+	k := i.kind
+	if k == movRR {
+		return true
+	}
+	if k == xmmUnaryRmR {
+		if i.op1.kind == operandKindReg {
+			sse := sseOpcode(i.u1)
+			return sse == sseOpcodeMovss || sse == sseOpcodeMovsd || sse == sseOpcodeMovdqu
+		}
+	}
+	return false
+}
+
+func resetInstruction(i *instruction) {
+	*i = instruction{}
+}
+
+func setNext(i *instruction, next *instruction) {
+	i.next = next
+}
+
+func setPrev(i *instruction, prev *instruction) {
+	i.prev = prev
+}
+
+func asNop(i *instruction) {
+	i.kind = nop0
+}
+
+func (i *instruction) asNop0WithLabel(label backend.Label) *instruction { //nolint
+	i.kind = nop0
+	i.u1 = uint64(label)
+	return i
+}
+
+func (i *instruction) nop0Label() backend.Label {
+	return backend.Label(i.u1)
+}
+
+type instructionKind byte
+
+const (
+	nop0 instructionKind = iota + 1
+
+	// Integer arithmetic/bit-twiddling: (add sub and or xor mul, etc.) (32 64) (reg addr imm) reg
+	aluRmiR
+
+	// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc.
+	unaryRmR
+
+	// Bitwise not
+	not
+
+	// Integer negation
+	neg
+
+	// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
+	div
+
+	// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs.
+	mulHi
+
+	// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo)
+	// or al into ah: (cbw)
+	signExtendData
+
+	// Constant materialization: (imm32 imm64) reg.
+	// Either: movl $imm32, %reg32 or movabsq $imm64, %reg64.
+	imm
+
+	// GPR to GPR move: mov (64 32) reg reg.
+	movRR
+
+	// movzxRmR is zero-extended loads or move (R to R), except for 64 bits: movz (bl bq wl wq lq) addr reg.
+	// Note that the lq variant doesn't really exist since the default zero-extend rule makes it
+	// unnecessary. For that case we emit the equivalent "movl AM, reg32".
+	movzxRmR
+
+	// mov64MR is a plain 64-bit integer load, since movzxRmR can't represent that.
+	mov64MR
+
+	// Loads the memory address of addr into dst.
+	lea
+
+	// Sign-extended loads and moves: movs (bl bq wl wq lq) addr reg.
+	movsxRmR
+
+	// Integer stores: mov (b w l q) reg addr.
+	movRM
+
+	// Arithmetic shifts: (shl shr sar) (b w l q) imm reg.
+	shiftR
+
+	// Arithmetic SIMD shifts.
+	xmmRmiReg
+
+	// Integer comparisons/tests: cmp or test (b w l q) (reg addr imm) reg.
+	cmpRmiR
+
+	// Materializes the requested condition code in the destination reg.
+	setcc
+
+	// Integer conditional move.
+	// Overwrites the destination register.
+	cmove
+
+	// pushq (reg addr imm)
+	push64
+
+	// popq reg
+	pop64
+
+	// XMM (scalar or vector) binary op: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg
+	xmmRmR
+
+	// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg.
+	//
+	// This differs from xmmRmR in that the dst register of xmmUnaryRmR is not used in the
+	// computation of the instruction dst value and so does not have to be a previously valid
+	// value. This is characteristic of mov instructions.
+	xmmUnaryRmR
+
+	// XMM (scalar or vector) unary op with immediate: roundss, roundsd, etc.
+	//
+	// This differs from XMM_RM_R_IMM in that the dst register of
+	// XmmUnaryRmRImm is not used in the computation of the instruction dst
+	// value and so does not have to be a previously valid value.
+	xmmUnaryRmRImm
+
+	// XMM (scalar or vector) unary op (from xmm to mem): stores, movd, movq
+	xmmMovRM
+
+	// XMM (vector) unary op (to move a constant value into an xmm register): movups
+	xmmLoadConst
+
+	// XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si
+	xmmToGpr
+
+	// XMM (scalar) unary op (from integer to float reg): movd, movq, cvtsi2s{s,d}
+	gprToXmm
+
+	// Converts an unsigned int64 to a float32/float64.
+	cvtUint64ToFloatSeq
+
+	// Converts a scalar xmm to a signed int32/int64.
+	cvtFloatToSintSeq
+
+	// Converts a scalar xmm to an unsigned int32/int64.
+	cvtFloatToUintSeq
+
+	// A sequence to compute min/max with the proper NaN semantics for xmm registers.
+	xmmMinMaxSeq
+
+	// Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
+	xmmCmpRmR
+
+	// A binary XMM instruction with an 8-bit immediate: e.g. cmp (ps pd) imm (reg addr) reg
+	xmmRmRImm
+
+	// Direct call: call simm32.
+	// Note that the offset is the relative to the *current RIP*, which points to the first byte of the next instruction.
+	call
+
+	// Indirect call: callq (reg mem).
+	callIndirect
+
+	// Return.
+	ret
+
+	// Jump: jmp (reg, mem, imm32 or label)
+	jmp
+
+	// Jump conditionally: jcond cond label.
+	jmpIf
+
+	// jmpTableIsland is to emit the jump table.
+	jmpTableIsland
+
+	// exitSequence exits the execution and go back to the Go world.
+	exitSequence
+
+	// An instruction that will always trigger the illegal instruction exception.
+	ud2
+
+	// xchg is described in https://www.felixcloutier.com/x86/xchg.
+	// This instruction uses two operands, where one of them can be a memory address, and swaps their values.
+	// If the dst is a memory address, the execution is atomic.
+	xchg
+
+	// lockcmpxchg is the cmpxchg instruction https://www.felixcloutier.com/x86/cmpxchg with a lock prefix.
+	lockcmpxchg
+
+	// zeros puts zeros into the destination register. This is implemented as xor reg, reg for
+	// either integer or XMM registers. The reason why we have this instruction instead of using aluRmiR
+	// is that it requires the already-defined registers. From reg alloc's perspective, this defines
+	// the destination register and takes no inputs.
+	zeros
+
+	// sourceOffsetInfo is a dummy instruction to emit source offset info.
+	// The existence of this instruction does not affect the execution.
+	sourceOffsetInfo
+
+	// defineUninitializedReg is a no-op instruction that defines a register without a defining instruction.
+	defineUninitializedReg
+
+	// fcvtToSintSequence is a sequence of instructions to convert a float to a signed integer.
+	fcvtToSintSequence
+
+	// fcvtToUintSequence is a sequence of instructions to convert a float to an unsigned integer.
+	fcvtToUintSequence
+
+	// xmmCMov is a conditional move instruction for XMM registers. Lowered after register allocation.
+	xmmCMov
+
+	// idivRemSequence is a sequence of instructions to compute both the quotient and remainder of a division.
+	idivRemSequence
+
+	// blendvpd is https://www.felixcloutier.com/x86/blendvpd.
+	blendvpd
+
+	// mfence is https://www.felixcloutier.com/x86/mfence
+	mfence
+
+	// lockxadd is xadd https://www.felixcloutier.com/x86/xadd with a lock prefix.
+	lockxadd
+
+	// nopUseReg is a meta instruction that uses one register and does nothing.
+	nopUseReg
+
+	instrMax
+)
+
+func (i *instruction) asMFence() *instruction {
+	i.kind = mfence
+	return i
+}
+
+func (i *instruction) asNopUseReg(r regalloc.VReg) *instruction {
+	i.kind = nopUseReg
+	i.op1 = newOperandReg(r)
+	return i
+}
+
+func (i *instruction) asIdivRemSequence(execCtx, divisor, tmpGp regalloc.VReg, isDiv, signed, _64 bool) *instruction {
+	i.kind = idivRemSequence
+	i.op1 = newOperandReg(execCtx)
+	i.op2 = newOperandReg(divisor)
+	i.u1 = uint64(tmpGp)
+	if isDiv {
+		i.u2 |= 1
+	}
+	if signed {
+		i.u2 |= 2
+	}
+	if _64 {
+		i.u2 |= 4
+	}
+	return i
+}
+
+func (i *instruction) idivRemSequenceData() (
+	execCtx, divisor, tmpGp regalloc.VReg, isDiv, signed, _64 bool,
+) {
+	if i.kind != idivRemSequence {
+		panic("BUG")
+	}
+	return i.op1.reg(), i.op2.reg(), regalloc.VReg(i.u1), i.u2&1 != 0, i.u2&2 != 0, i.u2&4 != 0
+}
+
+func (i *instruction) asXmmCMov(cc cond, x operand, rd regalloc.VReg, size byte) *instruction {
+	i.kind = xmmCMov
+	i.op1 = x
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(cc)
+	i.u2 = uint64(size)
+	return i
+}
+
+func (i *instruction) asDefineUninitializedReg(r regalloc.VReg) *instruction {
+	i.kind = defineUninitializedReg
+	i.op2 = newOperandReg(r)
+	return i
+}
+
+func (m *machine) allocateFcvtToUintSequence(
+	execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2 regalloc.VReg,
+	src64, dst64, sat bool,
+) *instruction {
+	i := m.allocateInstr()
+	i.kind = fcvtToUintSequence
+	op1a := m.amodePool.Allocate()
+	op2a := m.amodePool.Allocate()
+	i.op1 = newOperandMem(op1a)
+	i.op2 = newOperandMem(op2a)
+	if src64 {
+		op1a.imm32 = 1
+	} else {
+		op1a.imm32 = 0
+	}
+	if dst64 {
+		op1a.imm32 |= 2
+	}
+	if sat {
+		op1a.imm32 |= 4
+	}
+
+	op1a.base = execCtx
+	op1a.index = src
+	op2a.base = tmpGp
+	op2a.index = tmpGp2
+	i.u1 = uint64(tmpXmm)
+	i.u2 = uint64(tmpXmm2)
+	return i
+}
+
+func (i *instruction) fcvtToUintSequenceData() (
+	execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2 regalloc.VReg, src64, dst64, sat bool,
+) {
+	if i.kind != fcvtToUintSequence {
+		panic("BUG")
+	}
+	op1a := i.op1.addressMode()
+	op2a := i.op2.addressMode()
+	return op1a.base, op1a.index, op2a.base, op2a.index, regalloc.VReg(i.u1), regalloc.VReg(i.u2),
+		op1a.imm32&1 != 0, op1a.imm32&2 != 0, op1a.imm32&4 != 0
+}
+
+func (m *machine) allocateFcvtToSintSequence(
+	execCtx, src, tmpGp, tmpGp2, tmpXmm regalloc.VReg,
+	src64, dst64, sat bool,
+) *instruction {
+	i := m.allocateInstr()
+	i.kind = fcvtToSintSequence
+	op1a := m.amodePool.Allocate()
+	op2a := m.amodePool.Allocate()
+	i.op1 = newOperandMem(op1a)
+	i.op2 = newOperandMem(op2a)
+	op1a.base = execCtx
+	op1a.index = src
+	op2a.base = tmpGp
+	op2a.index = tmpGp2
+	i.u1 = uint64(tmpXmm)
+	if src64 {
+		i.u2 = 1
+	} else {
+		i.u2 = 0
+	}
+	if dst64 {
+		i.u2 |= 2
+	}
+	if sat {
+		i.u2 |= 4
+	}
+	return i
+}
+
+func (i *instruction) fcvtToSintSequenceData() (
+	execCtx, src, tmpGp, tmpGp2, tmpXmm regalloc.VReg, src64, dst64, sat bool,
+) {
+	if i.kind != fcvtToSintSequence {
+		panic("BUG")
+	}
+	op1a := i.op1.addressMode()
+	op2a := i.op2.addressMode()
+	return op1a.base, op1a.index, op2a.base, op2a.index, regalloc.VReg(i.u1),
+		i.u2&1 != 0, i.u2&2 != 0, i.u2&4 != 0
+}
+
+func (k instructionKind) String() string {
+	switch k {
+	case nop0:
+		return "nop"
+	case ret:
+		return "ret"
+	case imm:
+		return "imm"
+	case aluRmiR:
+		return "aluRmiR"
+	case movRR:
+		return "movRR"
+	case xmmRmR:
+		return "xmmRmR"
+	case gprToXmm:
+		return "gprToXmm"
+	case xmmUnaryRmR:
+		return "xmmUnaryRmR"
+	case xmmUnaryRmRImm:
+		return "xmmUnaryRmRImm"
+	case unaryRmR:
+		return "unaryRmR"
+	case not:
+		return "not"
+	case neg:
+		return "neg"
+	case div:
+		return "div"
+	case mulHi:
+		return "mulHi"
+	case signExtendData:
+		return "signExtendData"
+	case movzxRmR:
+		return "movzxRmR"
+	case mov64MR:
+		return "mov64MR"
+	case lea:
+		return "lea"
+	case movsxRmR:
+		return "movsxRmR"
+	case movRM:
+		return "movRM"
+	case shiftR:
+		return "shiftR"
+	case xmmRmiReg:
+		return "xmmRmiReg"
+	case cmpRmiR:
+		return "cmpRmiR"
+	case setcc:
+		return "setcc"
+	case cmove:
+		return "cmove"
+	case push64:
+		return "push64"
+	case pop64:
+		return "pop64"
+	case xmmMovRM:
+		return "xmmMovRM"
+	case xmmLoadConst:
+		return "xmmLoadConst"
+	case xmmToGpr:
+		return "xmmToGpr"
+	case cvtUint64ToFloatSeq:
+		return "cvtUint64ToFloatSeq"
+	case cvtFloatToSintSeq:
+		return "cvtFloatToSintSeq"
+	case cvtFloatToUintSeq:
+		return "cvtFloatToUintSeq"
+	case xmmMinMaxSeq:
+		return "xmmMinMaxSeq"
+	case xmmCmpRmR:
+		return "xmmCmpRmR"
+	case xmmRmRImm:
+		return "xmmRmRImm"
+	case jmpIf:
+		return "jmpIf"
+	case jmp:
+		return "jmp"
+	case jmpTableIsland:
+		return "jmpTableIsland"
+	case exitSequence:
+		return "exit_sequence"
+	case ud2:
+		return "ud2"
+	case xchg:
+		return "xchg"
+	case zeros:
+		return "zeros"
+	case fcvtToSintSequence:
+		return "fcvtToSintSequence"
+	case fcvtToUintSequence:
+		return "fcvtToUintSequence"
+	case xmmCMov:
+		return "xmmCMov"
+	case idivRemSequence:
+		return "idivRemSequence"
+	case mfence:
+		return "mfence"
+	case lockcmpxchg:
+		return "lockcmpxchg"
+	case lockxadd:
+		return "lockxadd"
+	default:
+		panic("BUG")
+	}
+}
+
+type aluRmiROpcode byte
+
+const (
+	aluRmiROpcodeAdd aluRmiROpcode = iota + 1
+	aluRmiROpcodeSub
+	aluRmiROpcodeAnd
+	aluRmiROpcodeOr
+	aluRmiROpcodeXor
+	aluRmiROpcodeMul
+)
+
+func (a aluRmiROpcode) String() string {
+	switch a {
+	case aluRmiROpcodeAdd:
+		return "add"
+	case aluRmiROpcodeSub:
+		return "sub"
+	case aluRmiROpcodeAnd:
+		return "and"
+	case aluRmiROpcodeOr:
+		return "or"
+	case aluRmiROpcodeXor:
+		return "xor"
+	case aluRmiROpcodeMul:
+		return "imul"
+	default:
+		panic("BUG")
+	}
+}
+
+func (i *instruction) asJmpIf(cond cond, target operand) *instruction {
+	i.kind = jmpIf
+	i.u1 = uint64(cond)
+	i.op1 = target
+	return i
+}
+
+// asJmpTableSequence is used to emit the jump table.
+// targetSliceIndex is the index of the target slice in machine.jmpTableTargets.
+func (i *instruction) asJmpTableSequence(targetSliceIndex int, targetCount int) *instruction {
+	i.kind = jmpTableIsland
+	i.u1 = uint64(targetSliceIndex)
+	i.u2 = uint64(targetCount)
+	return i
+}
+
+func (i *instruction) asJmp(target operand) *instruction {
+	i.kind = jmp
+	i.op1 = target
+	return i
+}
+
+func (i *instruction) jmpLabel() backend.Label {
+	switch i.kind {
+	case jmp, jmpIf, lea, xmmUnaryRmR:
+		return i.op1.label()
+	default:
+		panic("BUG")
+	}
+}
+
+func (i *instruction) asLEA(target operand, rd regalloc.VReg) *instruction {
+	i.kind = lea
+	i.op1 = target
+	i.op2 = newOperandReg(rd)
+	return i
+}
+
+func (i *instruction) asCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction {
+	i.kind = call
+	i.u1 = uint64(ref)
+	if abi != nil {
+		i.u2 = abi.ABIInfoAsUint64()
+	}
+	return i
+}
+
+func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction {
+	if ptr.kind != operandKindReg && ptr.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = callIndirect
+	i.op1 = ptr
+	if abi != nil {
+		i.u2 = abi.ABIInfoAsUint64()
+	}
+	return i
+}
+
+func (i *instruction) asRet() *instruction {
+	i.kind = ret
+	return i
+}
+
+func (i *instruction) asImm(dst regalloc.VReg, value uint64, _64 bool) *instruction {
+	i.kind = imm
+	i.op2 = newOperandReg(dst)
+	i.u1 = value
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asAluRmiR(op aluRmiROpcode, rm operand, rd regalloc.VReg, _64 bool) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem && rm.kind != operandKindImm32 {
+		panic("BUG")
+	}
+	i.kind = aluRmiR
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asZeros(dst regalloc.VReg) *instruction {
+	i.kind = zeros
+	i.op2 = newOperandReg(dst)
+	return i
+}
+
+func (i *instruction) asBlendvpd(rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = blendvpd
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	return i
+}
+
+func (i *instruction) asXmmRmR(op sseOpcode, rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmRmR
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	return i
+}
+
+func (i *instruction) asXmmRmRImm(op sseOpcode, imm uint8, rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmRmRImm
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.u2 = uint64(imm)
+	return i
+}
+
+func (i *instruction) asGprToXmm(op sseOpcode, rm operand, rd regalloc.VReg, _64 bool) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = gprToXmm
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
+	i.kind = sourceOffsetInfo
+	i.u1 = uint64(l)
+	return i
+}
+
+func (i *instruction) sourceOffsetInfo() ssa.SourceOffset {
+	return ssa.SourceOffset(i.u1)
+}
+
+func (i *instruction) asXmmToGpr(op sseOpcode, rm, rd regalloc.VReg, _64 bool) *instruction {
+	i.kind = xmmToGpr
+	i.op1 = newOperandReg(rm)
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asMovRM(rm regalloc.VReg, rd operand, size byte) *instruction {
+	if rd.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = movRM
+	i.op1 = newOperandReg(rm)
+	i.op2 = rd
+	i.u1 = uint64(size)
+	return i
+}
+
+func (i *instruction) asMovsxRmR(ext extMode, src operand, rd regalloc.VReg) *instruction {
+	if src.kind != operandKindReg && src.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = movsxRmR
+	i.op1 = src
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(ext)
+	return i
+}
+
+func (i *instruction) asMovzxRmR(ext extMode, src operand, rd regalloc.VReg) *instruction {
+	if src.kind != operandKindReg && src.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = movzxRmR
+	i.op1 = src
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(ext)
+	return i
+}
+
+func (i *instruction) asSignExtendData(_64 bool) *instruction {
+	i.kind = signExtendData
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asUD2() *instruction {
+	i.kind = ud2
+	return i
+}
+
+func (i *instruction) asDiv(rn operand, signed bool, _64 bool) *instruction {
+	i.kind = div
+	i.op1 = rn
+	i.b1 = _64
+	if signed {
+		i.u1 = 1
+	}
+	return i
+}
+
+func (i *instruction) asMov64MR(rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = mov64MR
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	return i
+}
+
+func (i *instruction) asMovRR(rm, rd regalloc.VReg, _64 bool) *instruction {
+	i.kind = movRR
+	i.op1 = newOperandReg(rm)
+	i.op2 = newOperandReg(rd)
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asNot(rm operand, _64 bool) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = not
+	i.op1 = rm
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asNeg(rm operand, _64 bool) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = neg
+	i.op1 = rm
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asMulHi(rm operand, signed, _64 bool) *instruction {
+	if rm.kind != operandKindReg && (rm.kind != operandKindMem) {
+		panic("BUG")
+	}
+	i.kind = mulHi
+	i.op1 = rm
+	i.b1 = _64
+	if signed {
+		i.u1 = 1
+	}
+	return i
+}
+
+func (i *instruction) asUnaryRmR(op unaryRmROpcode, rm operand, rd regalloc.VReg, _64 bool) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = unaryRmR
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asShiftR(op shiftROp, amount operand, rd regalloc.VReg, _64 bool) *instruction {
+	if amount.kind != operandKindReg && amount.kind != operandKindImm32 {
+		panic("BUG")
+	}
+	i.kind = shiftR
+	i.op1 = amount
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asXmmRmiReg(op sseOpcode, rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindImm32 && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmRmiReg
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	return i
+}
+
+func (i *instruction) asCmpRmiR(cmp bool, rm operand, rn regalloc.VReg, _64 bool) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindImm32 && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = cmpRmiR
+	i.op1 = rm
+	i.op2 = newOperandReg(rn)
+	if cmp {
+		i.u1 = 1
+	}
+	i.b1 = _64
+	return i
+}
+
+func (i *instruction) asSetcc(c cond, rd regalloc.VReg) *instruction {
+	i.kind = setcc
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(c)
+	return i
+}
+
+func (i *instruction) asCmove(c cond, rm operand, rd regalloc.VReg, _64 bool) *instruction {
+	i.kind = cmove
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(c)
+	i.b1 = _64
+	return i
+}
+
+func (m *machine) allocateExitSeq(execCtx regalloc.VReg) *instruction {
+	i := m.allocateInstr()
+	i.kind = exitSequence
+	i.op1 = newOperandReg(execCtx)
+	// Allocate the address mode that will be used in encoding the exit sequence.
+	i.op2 = newOperandMem(m.amodePool.Allocate())
+	return i
+}
+
+func (i *instruction) asXmmUnaryRmR(op sseOpcode, rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmUnaryRmR
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	return i
+}
+
+func (i *instruction) asXmmUnaryRmRImm(op sseOpcode, imm byte, rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmUnaryRmRImm
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	i.u2 = uint64(imm)
+	return i
+}
+
+func (i *instruction) asXmmCmpRmR(op sseOpcode, rm operand, rd regalloc.VReg) *instruction {
+	if rm.kind != operandKindReg && rm.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmCmpRmR
+	i.op1 = rm
+	i.op2 = newOperandReg(rd)
+	i.u1 = uint64(op)
+	return i
+}
+
+func (i *instruction) asXmmMovRM(op sseOpcode, rm regalloc.VReg, rd operand) *instruction {
+	if rd.kind != operandKindMem {
+		panic("BUG")
+	}
+	i.kind = xmmMovRM
+	i.op1 = newOperandReg(rm)
+	i.op2 = rd
+	i.u1 = uint64(op)
+	return i
+}
+
+func (i *instruction) asPop64(rm regalloc.VReg) *instruction {
+	i.kind = pop64
+	i.op1 = newOperandReg(rm)
+	return i
+}
+
+func (i *instruction) asPush64(op operand) *instruction {
+	if op.kind != operandKindReg && op.kind != operandKindMem && op.kind != operandKindImm32 {
+		panic("BUG")
+	}
+	i.kind = push64
+	i.op1 = op
+	return i
+}
+
+func (i *instruction) asXCHG(rm regalloc.VReg, rd operand, size byte) *instruction {
+	i.kind = xchg
+	i.op1 = newOperandReg(rm)
+	i.op2 = rd
+	i.u1 = uint64(size)
+	return i
+}
+
+func (i *instruction) asLockCmpXCHG(rm regalloc.VReg, rd *amode, size byte) *instruction {
+	i.kind = lockcmpxchg
+	i.op1 = newOperandReg(rm)
+	i.op2 = newOperandMem(rd)
+	i.u1 = uint64(size)
+	return i
+}
+
+func (i *instruction) asLockXAdd(rm regalloc.VReg, rd *amode, size byte) *instruction {
+	i.kind = lockxadd
+	i.op1 = newOperandReg(rm)
+	i.op2 = newOperandMem(rd)
+	i.u1 = uint64(size)
+	return i
+}
+
+type unaryRmROpcode byte
+
+const (
+	unaryRmROpcodeBsr unaryRmROpcode = iota
+	unaryRmROpcodeBsf
+	unaryRmROpcodeLzcnt
+	unaryRmROpcodeTzcnt
+	unaryRmROpcodePopcnt
+)
+
+func (u unaryRmROpcode) String() string {
+	switch u {
+	case unaryRmROpcodeBsr:
+		return "bsr"
+	case unaryRmROpcodeBsf:
+		return "bsf"
+	case unaryRmROpcodeLzcnt:
+		return "lzcnt"
+	case unaryRmROpcodeTzcnt:
+		return "tzcnt"
+	case unaryRmROpcodePopcnt:
+		return "popcnt"
+	default:
+		panic("BUG")
+	}
+}
+
+type shiftROp byte
+
+const (
+	shiftROpRotateLeft           shiftROp = 0
+	shiftROpRotateRight          shiftROp = 1
+	shiftROpShiftLeft            shiftROp = 4
+	shiftROpShiftRightLogical    shiftROp = 5
+	shiftROpShiftRightArithmetic shiftROp = 7
+)
+
+func (s shiftROp) String() string {
+	switch s {
+	case shiftROpRotateLeft:
+		return "rol"
+	case shiftROpRotateRight:
+		return "ror"
+	case shiftROpShiftLeft:
+		return "shl"
+	case shiftROpShiftRightLogical:
+		return "shr"
+	case shiftROpShiftRightArithmetic:
+		return "sar"
+	default:
+		panic("BUG")
+	}
+}
+
+type sseOpcode byte
+
+const (
+	sseOpcodeInvalid sseOpcode = iota
+	sseOpcodeAddps
+	sseOpcodeAddpd
+	sseOpcodeAddss
+	sseOpcodeAddsd
+	sseOpcodeAndps
+	sseOpcodeAndpd
+	sseOpcodeAndnps
+	sseOpcodeAndnpd
+	sseOpcodeBlendvps
+	sseOpcodeBlendvpd
+	sseOpcodeComiss
+	sseOpcodeComisd
+	sseOpcodeCmpps
+	sseOpcodeCmppd
+	sseOpcodeCmpss
+	sseOpcodeCmpsd
+	sseOpcodeCvtdq2ps
+	sseOpcodeCvtdq2pd
+	sseOpcodeCvtsd2ss
+	sseOpcodeCvtsd2si
+	sseOpcodeCvtsi2ss
+	sseOpcodeCvtsi2sd
+	sseOpcodeCvtss2si
+	sseOpcodeCvtss2sd
+	sseOpcodeCvttps2dq
+	sseOpcodeCvttss2si
+	sseOpcodeCvttsd2si
+	sseOpcodeDivps
+	sseOpcodeDivpd
+	sseOpcodeDivss
+	sseOpcodeDivsd
+	sseOpcodeInsertps
+	sseOpcodeMaxps
+	sseOpcodeMaxpd
+	sseOpcodeMaxss
+	sseOpcodeMaxsd
+	sseOpcodeMinps
+	sseOpcodeMinpd
+	sseOpcodeMinss
+	sseOpcodeMinsd
+	sseOpcodeMovaps
+	sseOpcodeMovapd
+	sseOpcodeMovd
+	sseOpcodeMovdqa
+	sseOpcodeMovdqu
+	sseOpcodeMovlhps
+	sseOpcodeMovmskps
+	sseOpcodeMovmskpd
+	sseOpcodeMovq
+	sseOpcodeMovss
+	sseOpcodeMovsd
+	sseOpcodeMovups
+	sseOpcodeMovupd
+	sseOpcodeMulps
+	sseOpcodeMulpd
+	sseOpcodeMulss
+	sseOpcodeMulsd
+	sseOpcodeOrps
+	sseOpcodeOrpd
+	sseOpcodePabsb
+	sseOpcodePabsw
+	sseOpcodePabsd
+	sseOpcodePackssdw
+	sseOpcodePacksswb
+	sseOpcodePackusdw
+	sseOpcodePackuswb
+	sseOpcodePaddb
+	sseOpcodePaddd
+	sseOpcodePaddq
+	sseOpcodePaddw
+	sseOpcodePaddsb
+	sseOpcodePaddsw
+	sseOpcodePaddusb
+	sseOpcodePaddusw
+	sseOpcodePalignr
+	sseOpcodePand
+	sseOpcodePandn
+	sseOpcodePavgb
+	sseOpcodePavgw
+	sseOpcodePcmpeqb
+	sseOpcodePcmpeqw
+	sseOpcodePcmpeqd
+	sseOpcodePcmpeqq
+	sseOpcodePcmpgtb
+	sseOpcodePcmpgtw
+	sseOpcodePcmpgtd
+	sseOpcodePcmpgtq
+	sseOpcodePextrb
+	sseOpcodePextrw
+	sseOpcodePextrd
+	sseOpcodePextrq
+	sseOpcodePinsrb
+	sseOpcodePinsrw
+	sseOpcodePinsrd
+	sseOpcodePinsrq
+	sseOpcodePmaddwd
+	sseOpcodePmaxsb
+	sseOpcodePmaxsw
+	sseOpcodePmaxsd
+	sseOpcodePmaxub
+	sseOpcodePmaxuw
+	sseOpcodePmaxud
+	sseOpcodePminsb
+	sseOpcodePminsw
+	sseOpcodePminsd
+	sseOpcodePminub
+	sseOpcodePminuw
+	sseOpcodePminud
+	sseOpcodePmovmskb
+	sseOpcodePmovsxbd
+	sseOpcodePmovsxbw
+	sseOpcodePmovsxbq
+	sseOpcodePmovsxwd
+	sseOpcodePmovsxwq
+	sseOpcodePmovsxdq
+	sseOpcodePmovzxbd
+	sseOpcodePmovzxbw
+	sseOpcodePmovzxbq
+	sseOpcodePmovzxwd
+	sseOpcodePmovzxwq
+	sseOpcodePmovzxdq
+	sseOpcodePmulld
+	sseOpcodePmullw
+	sseOpcodePmuludq
+	sseOpcodePor
+	sseOpcodePshufb
+	sseOpcodePshufd
+	sseOpcodePsllw
+	sseOpcodePslld
+	sseOpcodePsllq
+	sseOpcodePsraw
+	sseOpcodePsrad
+	sseOpcodePsrlw
+	sseOpcodePsrld
+	sseOpcodePsrlq
+	sseOpcodePsubb
+	sseOpcodePsubd
+	sseOpcodePsubq
+	sseOpcodePsubw
+	sseOpcodePsubsb
+	sseOpcodePsubsw
+	sseOpcodePsubusb
+	sseOpcodePsubusw
+	sseOpcodePtest
+	sseOpcodePunpckhbw
+	sseOpcodePunpcklbw
+	sseOpcodePxor
+	sseOpcodeRcpss
+	sseOpcodeRoundps
+	sseOpcodeRoundpd
+	sseOpcodeRoundss
+	sseOpcodeRoundsd
+	sseOpcodeRsqrtss
+	sseOpcodeSqrtps
+	sseOpcodeSqrtpd
+	sseOpcodeSqrtss
+	sseOpcodeSqrtsd
+	sseOpcodeSubps
+	sseOpcodeSubpd
+	sseOpcodeSubss
+	sseOpcodeSubsd
+	sseOpcodeUcomiss
+	sseOpcodeUcomisd
+	sseOpcodeXorps
+	sseOpcodeXorpd
+	sseOpcodePmulhrsw
+	sseOpcodeUnpcklps
+	sseOpcodeCvtps2pd
+	sseOpcodeCvtpd2ps
+	sseOpcodeCvttpd2dq
+	sseOpcodeShufps
+	sseOpcodePmaddubsw
+)
+
+func (s sseOpcode) String() string {
+	switch s {
+	case sseOpcodeInvalid:
+		return "invalid"
+	case sseOpcodeAddps:
+		return "addps"
+	case sseOpcodeAddpd:
+		return "addpd"
+	case sseOpcodeAddss:
+		return "addss"
+	case sseOpcodeAddsd:
+		return "addsd"
+	case sseOpcodeAndps:
+		return "andps"
+	case sseOpcodeAndpd:
+		return "andpd"
+	case sseOpcodeAndnps:
+		return "andnps"
+	case sseOpcodeAndnpd:
+		return "andnpd"
+	case sseOpcodeBlendvps:
+		return "blendvps"
+	case sseOpcodeBlendvpd:
+		return "blendvpd"
+	case sseOpcodeComiss:
+		return "comiss"
+	case sseOpcodeComisd:
+		return "comisd"
+	case sseOpcodeCmpps:
+		return "cmpps"
+	case sseOpcodeCmppd:
+		return "cmppd"
+	case sseOpcodeCmpss:
+		return "cmpss"
+	case sseOpcodeCmpsd:
+		return "cmpsd"
+	case sseOpcodeCvtdq2ps:
+		return "cvtdq2ps"
+	case sseOpcodeCvtdq2pd:
+		return "cvtdq2pd"
+	case sseOpcodeCvtsd2ss:
+		return "cvtsd2ss"
+	case sseOpcodeCvtsd2si:
+		return "cvtsd2si"
+	case sseOpcodeCvtsi2ss:
+		return "cvtsi2ss"
+	case sseOpcodeCvtsi2sd:
+		return "cvtsi2sd"
+	case sseOpcodeCvtss2si:
+		return "cvtss2si"
+	case sseOpcodeCvtss2sd:
+		return "cvtss2sd"
+	case sseOpcodeCvttps2dq:
+		return "cvttps2dq"
+	case sseOpcodeCvttss2si:
+		return "cvttss2si"
+	case sseOpcodeCvttsd2si:
+		return "cvttsd2si"
+	case sseOpcodeDivps:
+		return "divps"
+	case sseOpcodeDivpd:
+		return "divpd"
+	case sseOpcodeDivss:
+		return "divss"
+	case sseOpcodeDivsd:
+		return "divsd"
+	case sseOpcodeInsertps:
+		return "insertps"
+	case sseOpcodeMaxps:
+		return "maxps"
+	case sseOpcodeMaxpd:
+		return "maxpd"
+	case sseOpcodeMaxss:
+		return "maxss"
+	case sseOpcodeMaxsd:
+		return "maxsd"
+	case sseOpcodeMinps:
+		return "minps"
+	case sseOpcodeMinpd:
+		return "minpd"
+	case sseOpcodeMinss:
+		return "minss"
+	case sseOpcodeMinsd:
+		return "minsd"
+	case sseOpcodeMovaps:
+		return "movaps"
+	case sseOpcodeMovapd:
+		return "movapd"
+	case sseOpcodeMovd:
+		return "movd"
+	case sseOpcodeMovdqa:
+		return "movdqa"
+	case sseOpcodeMovdqu:
+		return "movdqu"
+	case sseOpcodeMovlhps:
+		return "movlhps"
+	case sseOpcodeMovmskps:
+		return "movmskps"
+	case sseOpcodeMovmskpd:
+		return "movmskpd"
+	case sseOpcodeMovq:
+		return "movq"
+	case sseOpcodeMovss:
+		return "movss"
+	case sseOpcodeMovsd:
+		return "movsd"
+	case sseOpcodeMovups:
+		return "movups"
+	case sseOpcodeMovupd:
+		return "movupd"
+	case sseOpcodeMulps:
+		return "mulps"
+	case sseOpcodeMulpd:
+		return "mulpd"
+	case sseOpcodeMulss:
+		return "mulss"
+	case sseOpcodeMulsd:
+		return "mulsd"
+	case sseOpcodeOrps:
+		return "orps"
+	case sseOpcodeOrpd:
+		return "orpd"
+	case sseOpcodePabsb:
+		return "pabsb"
+	case sseOpcodePabsw:
+		return "pabsw"
+	case sseOpcodePabsd:
+		return "pabsd"
+	case sseOpcodePackssdw:
+		return "packssdw"
+	case sseOpcodePacksswb:
+		return "packsswb"
+	case sseOpcodePackusdw:
+		return "packusdw"
+	case sseOpcodePackuswb:
+		return "packuswb"
+	case sseOpcodePaddb:
+		return "paddb"
+	case sseOpcodePaddd:
+		return "paddd"
+	case sseOpcodePaddq:
+		return "paddq"
+	case sseOpcodePaddw:
+		return "paddw"
+	case sseOpcodePaddsb:
+		return "paddsb"
+	case sseOpcodePaddsw:
+		return "paddsw"
+	case sseOpcodePaddusb:
+		return "paddusb"
+	case sseOpcodePaddusw:
+		return "paddusw"
+	case sseOpcodePalignr:
+		return "palignr"
+	case sseOpcodePand:
+		return "pand"
+	case sseOpcodePandn:
+		return "pandn"
+	case sseOpcodePavgb:
+		return "pavgb"
+	case sseOpcodePavgw:
+		return "pavgw"
+	case sseOpcodePcmpeqb:
+		return "pcmpeqb"
+	case sseOpcodePcmpeqw:
+		return "pcmpeqw"
+	case sseOpcodePcmpeqd:
+		return "pcmpeqd"
+	case sseOpcodePcmpeqq:
+		return "pcmpeqq"
+	case sseOpcodePcmpgtb:
+		return "pcmpgtb"
+	case sseOpcodePcmpgtw:
+		return "pcmpgtw"
+	case sseOpcodePcmpgtd:
+		return "pcmpgtd"
+	case sseOpcodePcmpgtq:
+		return "pcmpgtq"
+	case sseOpcodePextrb:
+		return "pextrb"
+	case sseOpcodePextrw:
+		return "pextrw"
+	case sseOpcodePextrd:
+		return "pextrd"
+	case sseOpcodePextrq:
+		return "pextrq"
+	case sseOpcodePinsrb:
+		return "pinsrb"
+	case sseOpcodePinsrw:
+		return "pinsrw"
+	case sseOpcodePinsrd:
+		return "pinsrd"
+	case sseOpcodePinsrq:
+		return "pinsrq"
+	case sseOpcodePmaddwd:
+		return "pmaddwd"
+	case sseOpcodePmaxsb:
+		return "pmaxsb"
+	case sseOpcodePmaxsw:
+		return "pmaxsw"
+	case sseOpcodePmaxsd:
+		return "pmaxsd"
+	case sseOpcodePmaxub:
+		return "pmaxub"
+	case sseOpcodePmaxuw:
+		return "pmaxuw"
+	case sseOpcodePmaxud:
+		return "pmaxud"
+	case sseOpcodePminsb:
+		return "pminsb"
+	case sseOpcodePminsw:
+		return "pminsw"
+	case sseOpcodePminsd:
+		return "pminsd"
+	case sseOpcodePminub:
+		return "pminub"
+	case sseOpcodePminuw:
+		return "pminuw"
+	case sseOpcodePminud:
+		return "pminud"
+	case sseOpcodePmovmskb:
+		return "pmovmskb"
+	case sseOpcodePmovsxbd:
+		return "pmovsxbd"
+	case sseOpcodePmovsxbw:
+		return "pmovsxbw"
+	case sseOpcodePmovsxbq:
+		return "pmovsxbq"
+	case sseOpcodePmovsxwd:
+		return "pmovsxwd"
+	case sseOpcodePmovsxwq:
+		return "pmovsxwq"
+	case sseOpcodePmovsxdq:
+		return "pmovsxdq"
+	case sseOpcodePmovzxbd:
+		return "pmovzxbd"
+	case sseOpcodePmovzxbw:
+		return "pmovzxbw"
+	case sseOpcodePmovzxbq:
+		return "pmovzxbq"
+	case sseOpcodePmovzxwd:
+		return "pmovzxwd"
+	case sseOpcodePmovzxwq:
+		return "pmovzxwq"
+	case sseOpcodePmovzxdq:
+		return "pmovzxdq"
+	case sseOpcodePmulld:
+		return "pmulld"
+	case sseOpcodePmullw:
+		return "pmullw"
+	case sseOpcodePmuludq:
+		return "pmuludq"
+	case sseOpcodePor:
+		return "por"
+	case sseOpcodePshufb:
+		return "pshufb"
+	case sseOpcodePshufd:
+		return "pshufd"
+	case sseOpcodePsllw:
+		return "psllw"
+	case sseOpcodePslld:
+		return "pslld"
+	case sseOpcodePsllq:
+		return "psllq"
+	case sseOpcodePsraw:
+		return "psraw"
+	case sseOpcodePsrad:
+		return "psrad"
+	case sseOpcodePsrlw:
+		return "psrlw"
+	case sseOpcodePsrld:
+		return "psrld"
+	case sseOpcodePsrlq:
+		return "psrlq"
+	case sseOpcodePsubb:
+		return "psubb"
+	case sseOpcodePsubd:
+		return "psubd"
+	case sseOpcodePsubq:
+		return "psubq"
+	case sseOpcodePsubw:
+		return "psubw"
+	case sseOpcodePsubsb:
+		return "psubsb"
+	case sseOpcodePsubsw:
+		return "psubsw"
+	case sseOpcodePsubusb:
+		return "psubusb"
+	case sseOpcodePsubusw:
+		return "psubusw"
+	case sseOpcodePtest:
+		return "ptest"
+	case sseOpcodePunpckhbw:
+		return "punpckhbw"
+	case sseOpcodePunpcklbw:
+		return "punpcklbw"
+	case sseOpcodePxor:
+		return "pxor"
+	case sseOpcodeRcpss:
+		return "rcpss"
+	case sseOpcodeRoundps:
+		return "roundps"
+	case sseOpcodeRoundpd:
+		return "roundpd"
+	case sseOpcodeRoundss:
+		return "roundss"
+	case sseOpcodeRoundsd:
+		return "roundsd"
+	case sseOpcodeRsqrtss:
+		return "rsqrtss"
+	case sseOpcodeSqrtps:
+		return "sqrtps"
+	case sseOpcodeSqrtpd:
+		return "sqrtpd"
+	case sseOpcodeSqrtss:
+		return "sqrtss"
+	case sseOpcodeSqrtsd:
+		return "sqrtsd"
+	case sseOpcodeSubps:
+		return "subps"
+	case sseOpcodeSubpd:
+		return "subpd"
+	case sseOpcodeSubss:
+		return "subss"
+	case sseOpcodeSubsd:
+		return "subsd"
+	case sseOpcodeUcomiss:
+		return "ucomiss"
+	case sseOpcodeUcomisd:
+		return "ucomisd"
+	case sseOpcodeXorps:
+		return "xorps"
+	case sseOpcodeXorpd:
+		return "xorpd"
+	case sseOpcodePmulhrsw:
+		return "pmulhrsw"
+	case sseOpcodeUnpcklps:
+		return "unpcklps"
+	case sseOpcodeCvtps2pd:
+		return "cvtps2pd"
+	case sseOpcodeCvtpd2ps:
+		return "cvtpd2ps"
+	case sseOpcodeCvttpd2dq:
+		return "cvttpd2dq"
+	case sseOpcodeShufps:
+		return "shufps"
+	case sseOpcodePmaddubsw:
+		return "pmaddubsw"
+	default:
+		panic("BUG")
+	}
+}
+
+type roundingMode uint8
+
+const (
+	roundingModeNearest roundingMode = iota
+	roundingModeDown
+	roundingModeUp
+	roundingModeZero
+)
+
+func (r roundingMode) String() string {
+	switch r {
+	case roundingModeNearest:
+		return "nearest"
+	case roundingModeDown:
+		return "down"
+	case roundingModeUp:
+		return "up"
+	case roundingModeZero:
+		return "zero"
+	default:
+		panic("BUG")
+	}
+}
+
+// cmpPred is the immediate value for a comparison operation in xmmRmRImm.
+type cmpPred uint8
+
+const (
+	// cmpPredEQ_OQ is Equal (ordered, non-signaling)
+	cmpPredEQ_OQ cmpPred = iota
+	// cmpPredLT_OS is Less-than (ordered, signaling)
+	cmpPredLT_OS
+	// cmpPredLE_OS is Less-than-or-equal (ordered, signaling)
+	cmpPredLE_OS
+	// cmpPredUNORD_Q is Unordered (non-signaling)
+	cmpPredUNORD_Q
+	// cmpPredNEQ_UQ is Not-equal (unordered, non-signaling)
+	cmpPredNEQ_UQ
+	// cmpPredNLT_US is Not-less-than (unordered, signaling)
+	cmpPredNLT_US
+	// cmpPredNLE_US is Not-less-than-or-equal (unordered, signaling)
+	cmpPredNLE_US
+	// cmpPredORD_Q is Ordered (non-signaling)
+	cmpPredORD_Q
+	// cmpPredEQ_UQ is Equal (unordered, non-signaling)
+	cmpPredEQ_UQ
+	// cmpPredNGE_US is Not-greater-than-or-equal (unordered, signaling)
+	cmpPredNGE_US
+	// cmpPredNGT_US is Not-greater-than (unordered, signaling)
+	cmpPredNGT_US
+	// cmpPredFALSE_OQ is False (ordered, non-signaling)
+	cmpPredFALSE_OQ
+	// cmpPredNEQ_OQ is Not-equal (ordered, non-signaling)
+	cmpPredNEQ_OQ
+	// cmpPredGE_OS is Greater-than-or-equal (ordered, signaling)
+	cmpPredGE_OS
+	// cmpPredGT_OS is Greater-than (ordered, signaling)
+	cmpPredGT_OS
+	// cmpPredTRUE_UQ is True (unordered, non-signaling)
+	cmpPredTRUE_UQ
+	// Equal (ordered, signaling)
+	cmpPredEQ_OS
+	// Less-than (ordered, nonsignaling)
+	cmpPredLT_OQ
+	// Less-than-or-equal (ordered, nonsignaling)
+	cmpPredLE_OQ
+	// Unordered (signaling)
+	cmpPredUNORD_S
+	// Not-equal (unordered, signaling)
+	cmpPredNEQ_US
+	// Not-less-than (unordered, nonsignaling)
+	cmpPredNLT_UQ
+	// Not-less-than-or-equal (unordered, nonsignaling)
+	cmpPredNLE_UQ
+	// Ordered (signaling)
+	cmpPredORD_S
+	// Equal (unordered, signaling)
+	cmpPredEQ_US
+	// Not-greater-than-or-equal (unordered, non-signaling)
+	cmpPredNGE_UQ
+	// Not-greater-than (unordered, nonsignaling)
+	cmpPredNGT_UQ
+	// False (ordered, signaling)
+	cmpPredFALSE_OS
+	// Not-equal (ordered, signaling)
+	cmpPredNEQ_OS
+	// Greater-than-or-equal (ordered, nonsignaling)
+	cmpPredGE_OQ
+	// Greater-than (ordered, nonsignaling)
+	cmpPredGT_OQ
+	// True (unordered, signaling)
+	cmpPredTRUE_US
+)
+
+func (r cmpPred) String() string {
+	switch r {
+	case cmpPredEQ_OQ:
+		return "eq_oq"
+	case cmpPredLT_OS:
+		return "lt_os"
+	case cmpPredLE_OS:
+		return "le_os"
+	case cmpPredUNORD_Q:
+		return "unord_q"
+	case cmpPredNEQ_UQ:
+		return "neq_uq"
+	case cmpPredNLT_US:
+		return "nlt_us"
+	case cmpPredNLE_US:
+		return "nle_us"
+	case cmpPredORD_Q:
+		return "ord_q"
+	case cmpPredEQ_UQ:
+		return "eq_uq"
+	case cmpPredNGE_US:
+		return "nge_us"
+	case cmpPredNGT_US:
+		return "ngt_us"
+	case cmpPredFALSE_OQ:
+		return "false_oq"
+	case cmpPredNEQ_OQ:
+		return "neq_oq"
+	case cmpPredGE_OS:
+		return "ge_os"
+	case cmpPredGT_OS:
+		return "gt_os"
+	case cmpPredTRUE_UQ:
+		return "true_uq"
+	case cmpPredEQ_OS:
+		return "eq_os"
+	case cmpPredLT_OQ:
+		return "lt_oq"
+	case cmpPredLE_OQ:
+		return "le_oq"
+	case cmpPredUNORD_S:
+		return "unord_s"
+	case cmpPredNEQ_US:
+		return "neq_us"
+	case cmpPredNLT_UQ:
+		return "nlt_uq"
+	case cmpPredNLE_UQ:
+		return "nle_uq"
+	case cmpPredORD_S:
+		return "ord_s"
+	case cmpPredEQ_US:
+		return "eq_us"
+	case cmpPredNGE_UQ:
+		return "nge_uq"
+	case cmpPredNGT_UQ:
+		return "ngt_uq"
+	case cmpPredFALSE_OS:
+		return "false_os"
+	case cmpPredNEQ_OS:
+		return "neq_os"
+	case cmpPredGE_OQ:
+		return "ge_oq"
+	case cmpPredGT_OQ:
+		return "gt_oq"
+	case cmpPredTRUE_US:
+		return "true_us"
+	default:
+		panic("BUG")
+	}
+}
+
+func linkInstr(prev, next *instruction) *instruction {
+	prev.next = next
+	next.prev = prev
+	return next
+}
+
+type defKind byte
+
+const (
+	defKindNone defKind = iota + 1
+	defKindOp2
+	defKindCall
+	defKindDivRem
+)
+
+var defKinds = [instrMax]defKind{
+	nop0:                   defKindNone,
+	ret:                    defKindNone,
+	movRR:                  defKindOp2,
+	movRM:                  defKindNone,
+	xmmMovRM:               defKindNone,
+	aluRmiR:                defKindNone,
+	shiftR:                 defKindNone,
+	imm:                    defKindOp2,
+	unaryRmR:               defKindOp2,
+	xmmRmiReg:              defKindNone,
+	xmmUnaryRmR:            defKindOp2,
+	xmmUnaryRmRImm:         defKindOp2,
+	xmmCmpRmR:              defKindNone,
+	xmmRmR:                 defKindNone,
+	xmmRmRImm:              defKindNone,
+	mov64MR:                defKindOp2,
+	movsxRmR:               defKindOp2,
+	movzxRmR:               defKindOp2,
+	gprToXmm:               defKindOp2,
+	xmmToGpr:               defKindOp2,
+	cmove:                  defKindNone,
+	call:                   defKindCall,
+	callIndirect:           defKindCall,
+	ud2:                    defKindNone,
+	jmp:                    defKindNone,
+	jmpIf:                  defKindNone,
+	jmpTableIsland:         defKindNone,
+	cmpRmiR:                defKindNone,
+	exitSequence:           defKindNone,
+	lea:                    defKindOp2,
+	setcc:                  defKindOp2,
+	zeros:                  defKindOp2,
+	sourceOffsetInfo:       defKindNone,
+	fcvtToSintSequence:     defKindNone,
+	defineUninitializedReg: defKindOp2,
+	fcvtToUintSequence:     defKindNone,
+	xmmCMov:                defKindOp2,
+	idivRemSequence:        defKindDivRem,
+	blendvpd:               defKindNone,
+	mfence:                 defKindNone,
+	xchg:                   defKindNone,
+	lockcmpxchg:            defKindNone,
+	lockxadd:               defKindNone,
+	neg:                    defKindNone,
+	nopUseReg:              defKindNone,
+}
+
+// String implements fmt.Stringer.
+func (d defKind) String() string {
+	switch d {
+	case defKindNone:
+		return "none"
+	case defKindOp2:
+		return "op2"
+	case defKindCall:
+		return "call"
+	case defKindDivRem:
+		return "divrem"
+	default:
+		return "invalid"
+	}
+}
+
+type useKind byte
+
+const (
+	useKindNone useKind = iota + 1
+	useKindOp1
+	// useKindOp1Op2Reg is Op1 can be any operand, Op2 must be a register.
+	useKindOp1Op2Reg
+	// useKindOp1RegOp2 is Op1 must be a register, Op2 can be any operand.
+	useKindOp1RegOp2
+	// useKindRaxOp1RegOp2 is Op1 must be a register, Op2 can be any operand, and RAX is used.
+	useKindRaxOp1RegOp2
+	useKindDivRem
+	useKindBlendvpd
+	useKindCall
+	useKindCallInd
+	useKindFcvtToSintSequence
+	useKindFcvtToUintSequence
+)
+
+var useKinds = [instrMax]useKind{
+	nop0:                   useKindNone,
+	ret:                    useKindNone,
+	movRR:                  useKindOp1,
+	movRM:                  useKindOp1RegOp2,
+	xmmMovRM:               useKindOp1RegOp2,
+	cmove:                  useKindOp1Op2Reg,
+	aluRmiR:                useKindOp1Op2Reg,
+	shiftR:                 useKindOp1Op2Reg,
+	imm:                    useKindNone,
+	unaryRmR:               useKindOp1,
+	xmmRmiReg:              useKindOp1Op2Reg,
+	xmmUnaryRmR:            useKindOp1,
+	xmmUnaryRmRImm:         useKindOp1,
+	xmmCmpRmR:              useKindOp1Op2Reg,
+	xmmRmR:                 useKindOp1Op2Reg,
+	xmmRmRImm:              useKindOp1Op2Reg,
+	mov64MR:                useKindOp1,
+	movzxRmR:               useKindOp1,
+	movsxRmR:               useKindOp1,
+	gprToXmm:               useKindOp1,
+	xmmToGpr:               useKindOp1,
+	call:                   useKindCall,
+	callIndirect:           useKindCallInd,
+	ud2:                    useKindNone,
+	jmpIf:                  useKindOp1,
+	jmp:                    useKindOp1,
+	cmpRmiR:                useKindOp1Op2Reg,
+	exitSequence:           useKindOp1,
+	lea:                    useKindOp1,
+	jmpTableIsland:         useKindNone,
+	setcc:                  useKindNone,
+	zeros:                  useKindNone,
+	sourceOffsetInfo:       useKindNone,
+	fcvtToSintSequence:     useKindFcvtToSintSequence,
+	defineUninitializedReg: useKindNone,
+	fcvtToUintSequence:     useKindFcvtToUintSequence,
+	xmmCMov:                useKindOp1,
+	idivRemSequence:        useKindDivRem,
+	blendvpd:               useKindBlendvpd,
+	mfence:                 useKindNone,
+	xchg:                   useKindOp1RegOp2,
+	lockcmpxchg:            useKindRaxOp1RegOp2,
+	lockxadd:               useKindOp1RegOp2,
+	neg:                    useKindOp1,
+	nopUseReg:              useKindOp1,
+}
+
+func (u useKind) String() string {
+	switch u {
+	case useKindNone:
+		return "none"
+	case useKindOp1:
+		return "op1"
+	case useKindOp1Op2Reg:
+		return "op1op2Reg"
+	case useKindOp1RegOp2:
+		return "op1RegOp2"
+	case useKindCall:
+		return "call"
+	case useKindCallInd:
+		return "callInd"
+	default:
+		return "invalid"
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
new file mode 100644
index 000000000..6637b428c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
@@ -0,0 +1,1683 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
+	switch kind := i.kind; kind {
+	case nop0, sourceOffsetInfo, defineUninitializedReg, fcvtToSintSequence, fcvtToUintSequence, nopUseReg:
+	case ret:
+		encodeRet(c)
+	case imm:
+		dst := regEncodings[i.op2.reg().RealReg()]
+		con := i.u1
+		if i.b1 { // 64 bit.
+			if lower32willSignExtendTo64(con) {
+				// Sign extend mov(imm32).
+				encodeRegReg(c,
+					legacyPrefixesNone,
+					0xc7, 1,
+					0,
+					dst,
+					rexInfo(0).setW(),
+				)
+				c.Emit4Bytes(uint32(con))
+			} else {
+				c.EmitByte(rexEncodingW | dst.rexBit())
+				c.EmitByte(0xb8 | dst.encoding())
+				c.Emit8Bytes(con)
+			}
+		} else {
+			if dst.rexBit() > 0 {
+				c.EmitByte(rexEncodingDefault | 0x1)
+			}
+			c.EmitByte(0xb8 | dst.encoding())
+			c.Emit4Bytes(uint32(con))
+		}
+
+	case aluRmiR:
+		var rex rexInfo
+		if i.b1 {
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		aluOp := aluRmiROpcode(i.u1)
+		if aluOp == aluRmiROpcodeMul {
+			op1 := i.op1
+			const regMemOpc, regMemOpcNum = 0x0FAF, 2
+			switch op1.kind {
+			case operandKindReg:
+				src := regEncodings[op1.reg().RealReg()]
+				encodeRegReg(c, legacyPrefixesNone, regMemOpc, regMemOpcNum, dst, src, rex)
+			case operandKindMem:
+				m := i.op1.addressMode()
+				encodeRegMem(c, legacyPrefixesNone, regMemOpc, regMemOpcNum, dst, m, rex)
+			case operandKindImm32:
+				imm8 := lower8willSignExtendTo32(op1.imm32())
+				var opc uint32
+				if imm8 {
+					opc = 0x6b
+				} else {
+					opc = 0x69
+				}
+				encodeRegReg(c, legacyPrefixesNone, opc, 1, dst, dst, rex)
+				if imm8 {
+					c.EmitByte(byte(op1.imm32()))
+				} else {
+					c.Emit4Bytes(op1.imm32())
+				}
+			default:
+				panic("BUG: invalid operand kind")
+			}
+		} else {
+			const opcodeNum = 1
+			var opcR, opcM, subOpcImm uint32
+			switch aluOp {
+			case aluRmiROpcodeAdd:
+				opcR, opcM, subOpcImm = 0x01, 0x03, 0x0
+			case aluRmiROpcodeSub:
+				opcR, opcM, subOpcImm = 0x29, 0x2b, 0x5
+			case aluRmiROpcodeAnd:
+				opcR, opcM, subOpcImm = 0x21, 0x23, 0x4
+			case aluRmiROpcodeOr:
+				opcR, opcM, subOpcImm = 0x09, 0x0b, 0x1
+			case aluRmiROpcodeXor:
+				opcR, opcM, subOpcImm = 0x31, 0x33, 0x6
+			default:
+				panic("BUG: invalid aluRmiROpcode")
+			}
+
+			op1 := i.op1
+			switch op1.kind {
+			case operandKindReg:
+				src := regEncodings[op1.reg().RealReg()]
+				encodeRegReg(c, legacyPrefixesNone, opcR, opcodeNum, src, dst, rex)
+			case operandKindMem:
+				m := i.op1.addressMode()
+				encodeRegMem(c, legacyPrefixesNone, opcM, opcodeNum, dst, m, rex)
+			case operandKindImm32:
+				imm8 := lower8willSignExtendTo32(op1.imm32())
+				var opc uint32
+				if imm8 {
+					opc = 0x83
+				} else {
+					opc = 0x81
+				}
+				encodeRegReg(c, legacyPrefixesNone, opc, opcodeNum, regEnc(subOpcImm), dst, rex)
+				if imm8 {
+					c.EmitByte(byte(op1.imm32()))
+				} else {
+					c.Emit4Bytes(op1.imm32())
+				}
+			default:
+				panic("BUG: invalid operand kind")
+			}
+		}
+
+	case movRR:
+		src := regEncodings[i.op1.reg().RealReg()]
+		dst := regEncodings[i.op2.reg().RealReg()]
+		var rex rexInfo
+		if i.b1 {
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+		encodeRegReg(c, legacyPrefixesNone, 0x89, 1, src, dst, rex)
+
+	case xmmRmR, blendvpd:
+		op := sseOpcode(i.u1)
+		var legPrex legacyPrefixes
+		var opcode uint32
+		var opcodeNum uint32
+		switch op {
+		case sseOpcodeAddps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F58, 2
+		case sseOpcodeAddpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F58, 2
+		case sseOpcodeAddss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F58, 2
+		case sseOpcodeAddsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F58, 2
+		case sseOpcodeAndps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F54, 2
+		case sseOpcodeAndpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F54, 2
+		case sseOpcodeAndnps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F55, 2
+		case sseOpcodeAndnpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F55, 2
+		case sseOpcodeBlendvps:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3814, 3
+		case sseOpcodeBlendvpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3815, 3
+		case sseOpcodeDivps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5E, 2
+		case sseOpcodeDivpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5E, 2
+		case sseOpcodeDivss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5E, 2
+		case sseOpcodeDivsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5E, 2
+		case sseOpcodeMaxps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5F, 2
+		case sseOpcodeMaxpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5F, 2
+		case sseOpcodeMaxss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5F, 2
+		case sseOpcodeMaxsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5F, 2
+		case sseOpcodeMinps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5D, 2
+		case sseOpcodeMinpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5D, 2
+		case sseOpcodeMinss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5D, 2
+		case sseOpcodeMinsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5D, 2
+		case sseOpcodeMovlhps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F16, 2
+		case sseOpcodeMovsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F10, 2
+		case sseOpcodeMulps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F59, 2
+		case sseOpcodeMulpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F59, 2
+		case sseOpcodeMulss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F59, 2
+		case sseOpcodeMulsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F59, 2
+		case sseOpcodeOrpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F56, 2
+		case sseOpcodeOrps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F56, 2
+		case sseOpcodePackssdw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F6B, 2
+		case sseOpcodePacksswb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F63, 2
+		case sseOpcodePackusdw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F382B, 3
+		case sseOpcodePackuswb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F67, 2
+		case sseOpcodePaddb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFC, 2
+		case sseOpcodePaddd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFE, 2
+		case sseOpcodePaddq:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD4, 2
+		case sseOpcodePaddw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFD, 2
+		case sseOpcodePaddsb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEC, 2
+		case sseOpcodePaddsw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FED, 2
+		case sseOpcodePaddusb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDC, 2
+		case sseOpcodePaddusw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDD, 2
+		case sseOpcodePand:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDB, 2
+		case sseOpcodePandn:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDF, 2
+		case sseOpcodePavgb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE0, 2
+		case sseOpcodePavgw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE3, 2
+		case sseOpcodePcmpeqb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F74, 2
+		case sseOpcodePcmpeqw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F75, 2
+		case sseOpcodePcmpeqd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F76, 2
+		case sseOpcodePcmpeqq:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3829, 3
+		case sseOpcodePcmpgtb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F64, 2
+		case sseOpcodePcmpgtw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F65, 2
+		case sseOpcodePcmpgtd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F66, 2
+		case sseOpcodePcmpgtq:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3837, 3
+		case sseOpcodePmaddwd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF5, 2
+		case sseOpcodePmaxsb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383C, 3
+		case sseOpcodePmaxsw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEE, 2
+		case sseOpcodePmaxsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383D, 3
+		case sseOpcodePmaxub:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDE, 2
+		case sseOpcodePmaxuw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383E, 3
+		case sseOpcodePmaxud:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383F, 3
+		case sseOpcodePminsb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3838, 3
+		case sseOpcodePminsw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEA, 2
+		case sseOpcodePminsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3839, 3
+		case sseOpcodePminub:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDA, 2
+		case sseOpcodePminuw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383A, 3
+		case sseOpcodePminud:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383B, 3
+		case sseOpcodePmulld:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3840, 3
+		case sseOpcodePmullw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD5, 2
+		case sseOpcodePmuludq:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF4, 2
+		case sseOpcodePor:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEB, 2
+		case sseOpcodePshufb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3800, 3
+		case sseOpcodePsubb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF8, 2
+		case sseOpcodePsubd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFA, 2
+		case sseOpcodePsubq:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFB, 2
+		case sseOpcodePsubw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF9, 2
+		case sseOpcodePsubsb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE8, 2
+		case sseOpcodePsubsw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE9, 2
+		case sseOpcodePsubusb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD8, 2
+		case sseOpcodePsubusw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD9, 2
+		case sseOpcodePunpckhbw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F68, 2
+		case sseOpcodePunpcklbw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F60, 2
+		case sseOpcodePxor:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEF, 2
+		case sseOpcodeSubps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5C, 2
+		case sseOpcodeSubpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5C, 2
+		case sseOpcodeSubss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5C, 2
+		case sseOpcodeSubsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5C, 2
+		case sseOpcodeXorps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F57, 2
+		case sseOpcodeXorpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F57, 2
+		case sseOpcodePmulhrsw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F380B, 3
+		case sseOpcodeUnpcklps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F14, 2
+		case sseOpcodePmaddubsw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3804, 3
+		default:
+			if kind == blendvpd {
+				legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3815, 3
+			} else {
+				panic(fmt.Sprintf("Unsupported sseOpcode: %s", op))
+			}
+		}
+
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		rex := rexInfo(0).clearW()
+		op1 := i.op1
+		if op1.kind == operandKindReg {
+			src := regEncodings[op1.reg().RealReg()]
+			encodeRegReg(c, legPrex, opcode, opcodeNum, dst, src, rex)
+		} else if i.op1.kind == operandKindMem {
+			m := i.op1.addressMode()
+			encodeRegMem(c, legPrex, opcode, opcodeNum, dst, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+	case gprToXmm:
+		var legPrefix legacyPrefixes
+		var opcode uint32
+		const opcodeNum = 2
+		switch sseOpcode(i.u1) {
+		case sseOpcodeMovd, sseOpcodeMovq:
+			legPrefix, opcode = legacyPrefixes0x66, 0x0f6e
+		case sseOpcodeCvtsi2ss:
+			legPrefix, opcode = legacyPrefixes0xF3, 0x0f2a
+		case sseOpcodeCvtsi2sd:
+			legPrefix, opcode = legacyPrefixes0xF2, 0x0f2a
+		default:
+			panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1)))
+		}
+
+		var rex rexInfo
+		if i.b1 {
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		op1 := i.op1
+		if op1.kind == operandKindReg {
+			src := regEncodings[op1.reg().RealReg()]
+			encodeRegReg(c, legPrefix, opcode, opcodeNum, dst, src, rex)
+		} else if i.op1.kind == operandKindMem {
+			m := i.op1.addressMode()
+			encodeRegMem(c, legPrefix, opcode, opcodeNum, dst, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+	case xmmUnaryRmR:
+		var prefix legacyPrefixes
+		var opcode uint32
+		var opcodeNum uint32
+		op := sseOpcode(i.u1)
+		switch op {
+		case sseOpcodeCvtss2sd:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5A, 2
+		case sseOpcodeCvtsd2ss:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5A, 2
+		case sseOpcodeMovaps:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F28, 2
+		case sseOpcodeMovapd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F28, 2
+		case sseOpcodeMovdqa:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F6F, 2
+		case sseOpcodeMovdqu:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F6F, 2
+		case sseOpcodeMovsd:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F10, 2
+		case sseOpcodeMovss:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F10, 2
+		case sseOpcodeMovups:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F10, 2
+		case sseOpcodeMovupd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F10, 2
+		case sseOpcodePabsb:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381C, 3
+		case sseOpcodePabsw:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381D, 3
+		case sseOpcodePabsd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381E, 3
+		case sseOpcodePmovsxbd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3821, 3
+		case sseOpcodePmovsxbw:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3820, 3
+		case sseOpcodePmovsxbq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3822, 3
+		case sseOpcodePmovsxwd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3823, 3
+		case sseOpcodePmovsxwq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3824, 3
+		case sseOpcodePmovsxdq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3825, 3
+		case sseOpcodePmovzxbd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3831, 3
+		case sseOpcodePmovzxbw:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3830, 3
+		case sseOpcodePmovzxbq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3832, 3
+		case sseOpcodePmovzxwd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3833, 3
+		case sseOpcodePmovzxwq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3834, 3
+		case sseOpcodePmovzxdq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3835, 3
+		case sseOpcodeSqrtps:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F51, 2
+		case sseOpcodeSqrtpd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F51, 2
+		case sseOpcodeSqrtss:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F51, 2
+		case sseOpcodeSqrtsd:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F51, 2
+		case sseOpcodeXorps:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F57, 2
+		case sseOpcodeXorpd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F57, 2
+		case sseOpcodeCvtdq2ps:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F5B, 2
+		case sseOpcodeCvtdq2pd:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0FE6, 2
+		case sseOpcodeCvtps2pd:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F5A, 2
+		case sseOpcodeCvtpd2ps:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5A, 2
+		case sseOpcodeCvttps2dq:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5B, 2
+		case sseOpcodeCvttpd2dq:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE6, 2
+		default:
+			panic(fmt.Sprintf("Unsupported sseOpcode: %s", op))
+		}
+
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		rex := rexInfo(0).clearW()
+		op1 := i.op1
+		if op1.kind == operandKindReg {
+			src := regEncodings[op1.reg().RealReg()]
+			encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex)
+		} else if i.op1.kind == operandKindMem {
+			m := i.op1.addressMode()
+			needsLabelResolution = encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+	case xmmUnaryRmRImm:
+		var prefix legacyPrefixes
+		var opcode uint32
+		var opcodeNum uint32
+		op := sseOpcode(i.u1)
+		switch op {
+		case sseOpcodeRoundps:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a08, 3
+		case sseOpcodeRoundss:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a0a, 3
+		case sseOpcodeRoundpd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a09, 3
+		case sseOpcodeRoundsd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a0b, 3
+		}
+		rex := rexInfo(0).clearW()
+		dst := regEncodings[i.op2.reg().RealReg()]
+		op1 := i.op1
+		if op1.kind == operandKindReg {
+			src := regEncodings[op1.reg().RealReg()]
+			encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex)
+		} else if i.op1.kind == operandKindMem {
+			m := i.op1.addressMode()
+			encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+		c.EmitByte(byte(i.u2))
+
+	case unaryRmR:
+		var prefix legacyPrefixes
+		var opcode uint32
+		var opcodeNum uint32
+		op := unaryRmROpcode(i.u1)
+		// We assume size is either 32 or 64.
+		switch op {
+		case unaryRmROpcodeBsr:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0fbd, 2
+		case unaryRmROpcodeBsf:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0fbc, 2
+		case unaryRmROpcodeLzcnt:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fbd, 2
+		case unaryRmROpcodeTzcnt:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fbc, 2
+		case unaryRmROpcodePopcnt:
+			prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fb8, 2
+		default:
+			panic(fmt.Sprintf("Unsupported unaryRmROpcode: %s", op))
+		}
+
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+		op1 := i.op1
+		if op1.kind == operandKindReg {
+			src := regEncodings[op1.reg().RealReg()]
+			encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex)
+		} else if i.op1.kind == operandKindMem {
+			m := i.op1.addressMode()
+			encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+	case not:
+		var prefix legacyPrefixes
+		src := regEncodings[i.op1.reg().RealReg()]
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+		subopcode := uint8(2)
+		encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex)
+
+	case neg:
+		var prefix legacyPrefixes
+		src := regEncodings[i.op1.reg().RealReg()]
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+		subopcode := uint8(3)
+		encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex)
+
+	case div:
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+		var subopcode uint8
+		if i.u1 != 0 { // Signed.
+			subopcode = 7
+		} else {
+			subopcode = 6
+		}
+
+		divisor := i.op1
+		if divisor.kind == operandKindReg {
+			src := regEncodings[divisor.reg().RealReg()]
+			encodeEncEnc(c, legacyPrefixesNone, 0xf7, 1, subopcode, uint8(src), rex)
+		} else if divisor.kind == operandKindMem {
+			m := divisor.addressMode()
+			encodeEncMem(c, legacyPrefixesNone, 0xf7, 1, subopcode, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+	case mulHi:
+		var prefix legacyPrefixes
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+
+		signed := i.u1 != 0
+		var subopcode uint8
+		if signed {
+			subopcode = 5
+		} else {
+			subopcode = 4
+		}
+
+		// src1 is implicitly rax,
+		// dst_lo is implicitly rax,
+		// dst_hi is implicitly rdx.
+		src2 := i.op1
+		if src2.kind == operandKindReg {
+			src := regEncodings[src2.reg().RealReg()]
+			encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex)
+		} else if src2.kind == operandKindMem {
+			m := src2.addressMode()
+			encodeEncMem(c, prefix, 0xf7, 1, subopcode, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+	case signExtendData:
+		if i.b1 { // 64 bit.
+			c.EmitByte(0x48)
+			c.EmitByte(0x99)
+		} else {
+			c.EmitByte(0x99)
+		}
+	case movzxRmR, movsxRmR:
+		signed := i.kind == movsxRmR
+
+		ext := extMode(i.u1)
+		var opcode uint32
+		var opcodeNum uint32
+		var rex rexInfo
+		switch ext {
+		case extModeBL:
+			if signed {
+				opcode, opcodeNum, rex = 0x0fbe, 2, rex.clearW()
+			} else {
+				opcode, opcodeNum, rex = 0x0fb6, 2, rex.clearW()
+			}
+		case extModeBQ:
+			if signed {
+				opcode, opcodeNum, rex = 0x0fbe, 2, rex.setW()
+			} else {
+				opcode, opcodeNum, rex = 0x0fb6, 2, rex.setW()
+			}
+		case extModeWL:
+			if signed {
+				opcode, opcodeNum, rex = 0x0fbf, 2, rex.clearW()
+			} else {
+				opcode, opcodeNum, rex = 0x0fb7, 2, rex.clearW()
+			}
+		case extModeWQ:
+			if signed {
+				opcode, opcodeNum, rex = 0x0fbf, 2, rex.setW()
+			} else {
+				opcode, opcodeNum, rex = 0x0fb7, 2, rex.setW()
+			}
+		case extModeLQ:
+			if signed {
+				opcode, opcodeNum, rex = 0x63, 1, rex.setW()
+			} else {
+				opcode, opcodeNum, rex = 0x8b, 1, rex.clearW()
+			}
+		default:
+			panic("BUG: invalid extMode")
+		}
+
+		op := i.op1
+		dst := regEncodings[i.op2.reg().RealReg()]
+		switch op.kind {
+		case operandKindReg:
+			src := regEncodings[op.reg().RealReg()]
+			if ext == extModeBL || ext == extModeBQ {
+				// Some destinations must be encoded with REX.R = 1.
+				if e := src.encoding(); e >= 4 && e <= 7 {
+					rex = rex.always()
+				}
+			}
+			encodeRegReg(c, legacyPrefixesNone, opcode, opcodeNum, dst, src, rex)
+		case operandKindMem:
+			m := op.addressMode()
+			encodeRegMem(c, legacyPrefixesNone, opcode, opcodeNum, dst, m, rex)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case mov64MR:
+		m := i.op1.addressMode()
+		encodeLoad64(c, m, i.op2.reg().RealReg())
+
+	case lea:
+		needsLabelResolution = true
+		dst := regEncodings[i.op2.reg().RealReg()]
+		rex := rexInfo(0).setW()
+		const opcode, opcodeNum = 0x8d, 1
+		switch i.op1.kind {
+		case operandKindMem:
+			a := i.op1.addressMode()
+			encodeRegMem(c, legacyPrefixesNone, opcode, opcodeNum, dst, a, rex)
+		case operandKindLabel:
+			rex.encode(c, regRexBit(byte(dst)), 0)
+			c.EmitByte(byte((opcode) & 0xff))
+
+			// Indicate "LEAQ [RIP + 32bit displacement].
+			// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
+			c.EmitByte(encodeModRM(0b00, dst.encoding(), 0b101))
+
+			// This will be resolved later, so we just emit a placeholder (0xffffffff for testing).
+			c.Emit4Bytes(0xffffffff)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case movRM:
+		m := i.op2.addressMode()
+		src := regEncodings[i.op1.reg().RealReg()]
+
+		var rex rexInfo
+		switch i.u1 {
+		case 1:
+			if e := src.encoding(); e >= 4 && e <= 7 {
+				rex = rex.always()
+			}
+			encodeRegMem(c, legacyPrefixesNone, 0x88, 1, src, m, rex.clearW())
+		case 2:
+			encodeRegMem(c, legacyPrefixes0x66, 0x89, 1, src, m, rex.clearW())
+		case 4:
+			encodeRegMem(c, legacyPrefixesNone, 0x89, 1, src, m, rex.clearW())
+		case 8:
+			encodeRegMem(c, legacyPrefixesNone, 0x89, 1, src, m, rex.setW())
+		default:
+			panic(fmt.Sprintf("BUG: invalid size %d: %s", i.u1, i.String()))
+		}
+
+	case shiftR:
+		src := regEncodings[i.op2.reg().RealReg()]
+		amount := i.op1
+
+		var opcode uint32
+		var prefix legacyPrefixes
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+
+		switch amount.kind {
+		case operandKindReg:
+			if amount.reg() != rcxVReg {
+				panic("BUG: invalid reg operand: must be rcx")
+			}
+			opcode, prefix = 0xd3, legacyPrefixesNone
+			encodeEncEnc(c, prefix, opcode, 1, uint8(i.u1), uint8(src), rex)
+		case operandKindImm32:
+			opcode, prefix = 0xc1, legacyPrefixesNone
+			encodeEncEnc(c, prefix, opcode, 1, uint8(i.u1), uint8(src), rex)
+			c.EmitByte(byte(amount.imm32()))
+		default:
+			panic("BUG: invalid operand kind")
+		}
+	case xmmRmiReg:
+		const legPrefix = legacyPrefixes0x66
+		rex := rexInfo(0).clearW()
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		var opcode uint32
+		var regDigit uint8
+
+		op := sseOpcode(i.u1)
+		op1 := i.op1
+		if i.op1.kind == operandKindImm32 {
+			switch op {
+			case sseOpcodePsllw:
+				opcode, regDigit = 0x0f71, 6
+			case sseOpcodePslld:
+				opcode, regDigit = 0x0f72, 6
+			case sseOpcodePsllq:
+				opcode, regDigit = 0x0f73, 6
+			case sseOpcodePsraw:
+				opcode, regDigit = 0x0f71, 4
+			case sseOpcodePsrad:
+				opcode, regDigit = 0x0f72, 4
+			case sseOpcodePsrlw:
+				opcode, regDigit = 0x0f71, 2
+			case sseOpcodePsrld:
+				opcode, regDigit = 0x0f72, 2
+			case sseOpcodePsrlq:
+				opcode, regDigit = 0x0f73, 2
+			default:
+				panic("invalid opcode")
+			}
+
+			encodeEncEnc(c, legPrefix, opcode, 2, regDigit, uint8(dst), rex)
+			imm32 := op1.imm32()
+			if imm32 > 0xff&imm32 {
+				panic("immediate value does not fit 1 byte")
+			}
+			c.EmitByte(uint8(imm32))
+		} else {
+			switch op {
+			case sseOpcodePsllw:
+				opcode = 0x0ff1
+			case sseOpcodePslld:
+				opcode = 0x0ff2
+			case sseOpcodePsllq:
+				opcode = 0x0ff3
+			case sseOpcodePsraw:
+				opcode = 0x0fe1
+			case sseOpcodePsrad:
+				opcode = 0x0fe2
+			case sseOpcodePsrlw:
+				opcode = 0x0fd1
+			case sseOpcodePsrld:
+				opcode = 0x0fd2
+			case sseOpcodePsrlq:
+				opcode = 0x0fd3
+			default:
+				panic("invalid opcode")
+			}
+
+			if op1.kind == operandKindReg {
+				reg := regEncodings[op1.reg().RealReg()]
+				encodeRegReg(c, legPrefix, opcode, 2, dst, reg, rex)
+			} else if op1.kind == operandKindMem {
+				m := op1.addressMode()
+				encodeRegMem(c, legPrefix, opcode, 2, dst, m, rex)
+			} else {
+				panic("BUG: invalid operand kind")
+			}
+		}
+
+	case cmpRmiR:
+		var opcode uint32
+		isCmp := i.u1 != 0
+		rex := rexInfo(0)
+		_64 := i.b1
+		if _64 { // 64 bit.
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+		dst := regEncodings[i.op2.reg().RealReg()]
+		op1 := i.op1
+		switch op1.kind {
+		case operandKindReg:
+			reg := regEncodings[op1.reg().RealReg()]
+			if isCmp {
+				opcode = 0x39
+			} else {
+				opcode = 0x85
+			}
+			// Here we swap the encoding of the operands for CMP to be consistent with the output of LLVM/GCC.
+			encodeRegReg(c, legacyPrefixesNone, opcode, 1, reg, dst, rex)
+
+		case operandKindMem:
+			if isCmp {
+				opcode = 0x3b
+			} else {
+				opcode = 0x85
+			}
+			m := op1.addressMode()
+			encodeRegMem(c, legacyPrefixesNone, opcode, 1, dst, m, rex)
+
+		case operandKindImm32:
+			imm32 := op1.imm32()
+			useImm8 := isCmp && lower8willSignExtendTo32(imm32)
+			var subopcode uint8
+
+			switch {
+			case isCmp && useImm8:
+				opcode, subopcode = 0x83, 7
+			case isCmp && !useImm8:
+				opcode, subopcode = 0x81, 7
+			default:
+				opcode, subopcode = 0xf7, 0
+			}
+			encodeEncEnc(c, legacyPrefixesNone, opcode, 1, subopcode, uint8(dst), rex)
+			if useImm8 {
+				c.EmitByte(uint8(imm32))
+			} else {
+				c.Emit4Bytes(imm32)
+			}
+
+		default:
+			panic("BUG: invalid operand kind")
+		}
+	case setcc:
+		cc := cond(i.u1)
+		dst := regEncodings[i.op2.reg().RealReg()]
+		rex := rexInfo(0).clearW().always()
+		opcode := uint32(0x0f90) + uint32(cc)
+		encodeEncEnc(c, legacyPrefixesNone, opcode, 2, 0, uint8(dst), rex)
+	case cmove:
+		cc := cond(i.u1)
+		dst := regEncodings[i.op2.reg().RealReg()]
+		rex := rexInfo(0)
+		if i.b1 { // 64 bit.
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+		opcode := uint32(0x0f40) + uint32(cc)
+		src := i.op1
+		switch src.kind {
+		case operandKindReg:
+			srcReg := regEncodings[src.reg().RealReg()]
+			encodeRegReg(c, legacyPrefixesNone, opcode, 2, dst, srcReg, rex)
+		case operandKindMem:
+			m := src.addressMode()
+			encodeRegMem(c, legacyPrefixesNone, opcode, 2, dst, m, rex)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+	case push64:
+		op := i.op1
+
+		switch op.kind {
+		case operandKindReg:
+			dst := regEncodings[op.reg().RealReg()]
+			if dst.rexBit() > 0 {
+				c.EmitByte(rexEncodingDefault | 0x1)
+			}
+			c.EmitByte(0x50 | dst.encoding())
+		case operandKindMem:
+			m := op.addressMode()
+			encodeRegMem(
+				c, legacyPrefixesNone, 0xff, 1, regEnc(6), m, rexInfo(0).clearW(),
+			)
+		case operandKindImm32:
+			c.EmitByte(0x68)
+			c.Emit4Bytes(op.imm32())
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case pop64:
+		dst := regEncodings[i.op1.reg().RealReg()]
+		if dst.rexBit() > 0 {
+			c.EmitByte(rexEncodingDefault | 0x1)
+		}
+		c.EmitByte(0x58 | dst.encoding())
+
+	case xmmMovRM:
+		var legPrefix legacyPrefixes
+		var opcode uint32
+		const opcodeNum = 2
+		switch sseOpcode(i.u1) {
+		case sseOpcodeMovaps:
+			legPrefix, opcode = legacyPrefixesNone, 0x0f29
+		case sseOpcodeMovapd:
+			legPrefix, opcode = legacyPrefixes0x66, 0x0f29
+		case sseOpcodeMovdqa:
+			legPrefix, opcode = legacyPrefixes0x66, 0x0f7f
+		case sseOpcodeMovdqu:
+			legPrefix, opcode = legacyPrefixes0xF3, 0x0f7f
+		case sseOpcodeMovss:
+			legPrefix, opcode = legacyPrefixes0xF3, 0x0f11
+		case sseOpcodeMovsd:
+			legPrefix, opcode = legacyPrefixes0xF2, 0x0f11
+		case sseOpcodeMovups:
+			legPrefix, opcode = legacyPrefixesNone, 0x0f11
+		case sseOpcodeMovupd:
+			legPrefix, opcode = legacyPrefixes0x66, 0x0f11
+		default:
+			panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1)))
+		}
+
+		dst := regEncodings[i.op1.reg().RealReg()]
+		encodeRegMem(c, legPrefix, opcode, opcodeNum, dst, i.op2.addressMode(), rexInfo(0).clearW())
+	case xmmLoadConst:
+		panic("TODO")
+	case xmmToGpr:
+		var legPrefix legacyPrefixes
+		var opcode uint32
+		var argSwap bool
+		const opcodeNum = 2
+		switch sseOpcode(i.u1) {
+		case sseOpcodeMovd, sseOpcodeMovq:
+			legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0f7e, false
+		case sseOpcodeMovmskps:
+			legPrefix, opcode, argSwap = legacyPrefixesNone, 0x0f50, true
+		case sseOpcodeMovmskpd:
+			legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0f50, true
+		case sseOpcodePmovmskb:
+			legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0fd7, true
+		case sseOpcodeCvttss2si:
+			legPrefix, opcode, argSwap = legacyPrefixes0xF3, 0x0f2c, true
+		case sseOpcodeCvttsd2si:
+			legPrefix, opcode, argSwap = legacyPrefixes0xF2, 0x0f2c, true
+		default:
+			panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1)))
+		}
+
+		var rex rexInfo
+		if i.b1 {
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+		src := regEncodings[i.op1.reg().RealReg()]
+		dst := regEncodings[i.op2.reg().RealReg()]
+		if argSwap {
+			src, dst = dst, src
+		}
+		encodeRegReg(c, legPrefix, opcode, opcodeNum, src, dst, rex)
+
+	case cvtUint64ToFloatSeq:
+		panic("TODO")
+	case cvtFloatToSintSeq:
+		panic("TODO")
+	case cvtFloatToUintSeq:
+		panic("TODO")
+	case xmmMinMaxSeq:
+		panic("TODO")
+	case xmmCmpRmR:
+		var prefix legacyPrefixes
+		var opcode uint32
+		var opcodeNum uint32
+		rex := rexInfo(0)
+		_64 := i.b1
+		if _64 { // 64 bit.
+			rex = rex.setW()
+		} else {
+			rex = rex.clearW()
+		}
+
+		op := sseOpcode(i.u1)
+		switch op {
+		case sseOpcodePtest:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3817, 3
+		case sseOpcodeUcomisd:
+			prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f2e, 2
+		case sseOpcodeUcomiss:
+			prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0f2e, 2
+		default:
+			panic(fmt.Sprintf("Unsupported sseOpcode: %s", op))
+		}
+
+		dst := regEncodings[i.op2.reg().RealReg()]
+		op1 := i.op1
+		switch op1.kind {
+		case operandKindReg:
+			reg := regEncodings[op1.reg().RealReg()]
+			encodeRegReg(c, prefix, opcode, opcodeNum, dst, reg, rex)
+
+		case operandKindMem:
+			m := op1.addressMode()
+			encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex)
+
+		default:
+			panic("BUG: invalid operand kind")
+		}
+	case xmmRmRImm:
+		op := sseOpcode(i.u1)
+		var legPrex legacyPrefixes
+		var opcode uint32
+		var opcodeNum uint32
+		var swap bool
+		switch op {
+		case sseOpcodeCmpps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0FC2, 2
+		case sseOpcodeCmppd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC2, 2
+		case sseOpcodeCmpss:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0FC2, 2
+		case sseOpcodeCmpsd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0FC2, 2
+		case sseOpcodeInsertps:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A21, 3
+		case sseOpcodePalignr:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A0F, 3
+		case sseOpcodePinsrb:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A20, 3
+		case sseOpcodePinsrw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC4, 2
+		case sseOpcodePinsrd, sseOpcodePinsrq:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A22, 3
+		case sseOpcodePextrb:
+			swap = true
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A14, 3
+		case sseOpcodePextrw:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC5, 2
+		case sseOpcodePextrd, sseOpcodePextrq:
+			swap = true
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A16, 3
+		case sseOpcodePshufd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F70, 2
+		case sseOpcodeRoundps:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A08, 3
+		case sseOpcodeRoundpd:
+			legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A09, 3
+		case sseOpcodeShufps:
+			legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0FC6, 2
+		default:
+			panic(fmt.Sprintf("Unsupported sseOpcode: %s", op))
+		}
+
+		dst := regEncodings[i.op2.reg().RealReg()]
+
+		var rex rexInfo
+		if op == sseOpcodePextrq || op == sseOpcodePinsrq {
+			rex = rexInfo(0).setW()
+		} else {
+			rex = rexInfo(0).clearW()
+		}
+		op1 := i.op1
+		if op1.kind == operandKindReg {
+			src := regEncodings[op1.reg().RealReg()]
+			if swap {
+				src, dst = dst, src
+			}
+			encodeRegReg(c, legPrex, opcode, opcodeNum, dst, src, rex)
+		} else if i.op1.kind == operandKindMem {
+			if swap {
+				panic("BUG: this is not possible to encode")
+			}
+			m := i.op1.addressMode()
+			encodeRegMem(c, legPrex, opcode, opcodeNum, dst, m, rex)
+		} else {
+			panic("BUG: invalid operand kind")
+		}
+
+		c.EmitByte(byte(i.u2))
+
+	case jmp:
+		const (
+			regMemOpcode    = 0xff
+			regMemOpcodeNum = 1
+			regMemSubOpcode = 4
+		)
+		op := i.op1
+		switch op.kind {
+		case operandKindLabel:
+			needsLabelResolution = true
+			fallthrough
+		case operandKindImm32:
+			c.EmitByte(0xe9)
+			c.Emit4Bytes(op.imm32())
+		case operandKindMem:
+			m := op.addressMode()
+			encodeRegMem(c,
+				legacyPrefixesNone,
+				regMemOpcode, regMemOpcodeNum,
+				regMemSubOpcode, m, rexInfo(0).clearW(),
+			)
+		case operandKindReg:
+			r := op.reg().RealReg()
+			encodeRegReg(
+				c,
+				legacyPrefixesNone,
+				regMemOpcode, regMemOpcodeNum,
+				regMemSubOpcode,
+				regEncodings[r], rexInfo(0).clearW(),
+			)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case jmpIf:
+		op := i.op1
+		switch op.kind {
+		case operandKindLabel:
+			needsLabelResolution = true
+			fallthrough
+		case operandKindImm32:
+			c.EmitByte(0x0f)
+			c.EmitByte(0x80 | cond(i.u1).encoding())
+			c.Emit4Bytes(op.imm32())
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case jmpTableIsland:
+		needsLabelResolution = true
+		for tc := uint64(0); tc < i.u2; tc++ {
+			c.Emit8Bytes(0)
+		}
+
+	case exitSequence:
+		execCtx := i.op1.reg()
+		allocatedAmode := i.op2.addressMode()
+
+		// Restore the RBP, RSP, and return to the Go code:
+		*allocatedAmode = amode{
+			kindWithShift: uint32(amodeImmReg), base: execCtx,
+			imm32: wazevoapi.ExecutionContextOffsetOriginalFramePointer.U32(),
+		}
+		encodeLoad64(c, allocatedAmode, rbp)
+		allocatedAmode.imm32 = wazevoapi.ExecutionContextOffsetOriginalStackPointer.U32()
+		encodeLoad64(c, allocatedAmode, rsp)
+		encodeRet(c)
+
+	case ud2:
+		c.EmitByte(0x0f)
+		c.EmitByte(0x0b)
+
+	case call:
+		c.EmitByte(0xe8)
+		// Meaning that the call target is a function value, and requires relocation.
+		c.AddRelocationInfo(ssa.FuncRef(i.u1))
+		// Note that this is zero as a placeholder for the call target if it's a function value.
+		c.Emit4Bytes(uint32(i.u2))
+
+	case callIndirect:
+		op := i.op1
+
+		const opcodeNum = 1
+		const opcode = 0xff
+		rex := rexInfo(0).clearW()
+		switch op.kind {
+		case operandKindReg:
+			dst := regEncodings[op.reg().RealReg()]
+			encodeRegReg(c,
+				legacyPrefixesNone,
+				opcode, opcodeNum,
+				regEnc(2),
+				dst,
+				rex,
+			)
+		case operandKindMem:
+			m := op.addressMode()
+			encodeRegMem(c,
+				legacyPrefixesNone,
+				opcode, opcodeNum,
+				regEnc(2),
+				m,
+				rex,
+			)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case xchg:
+		src, dst := regEncodings[i.op1.reg().RealReg()], i.op2
+		size := i.u1
+
+		var rex rexInfo
+		var opcode uint32
+		lp := legacyPrefixesNone
+		switch size {
+		case 8:
+			opcode = 0x87
+			rex = rexInfo(0).setW()
+		case 4:
+			opcode = 0x87
+			rex = rexInfo(0).clearW()
+		case 2:
+			lp = legacyPrefixes0x66
+			opcode = 0x87
+			rex = rexInfo(0).clearW()
+		case 1:
+			opcode = 0x86
+			if i.op2.kind == operandKindReg {
+				panic("TODO?: xchg on two 1-byte registers")
+			}
+			// Some destinations must be encoded with REX.R = 1.
+			if e := src.encoding(); e >= 4 && e <= 7 {
+				rex = rexInfo(0).always()
+			}
+		default:
+			panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String()))
+		}
+
+		switch dst.kind {
+		case operandKindMem:
+			m := dst.addressMode()
+			encodeRegMem(c, lp, opcode, 1, src, m, rex)
+		case operandKindReg:
+			r := dst.reg().RealReg()
+			encodeRegReg(c, lp, opcode, 1, src, regEncodings[r], rex)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case lockcmpxchg:
+		src, dst := regEncodings[i.op1.reg().RealReg()], i.op2
+		size := i.u1
+
+		var rex rexInfo
+		var opcode uint32
+		lp := legacyPrefixes0xF0 // Lock prefix.
+		switch size {
+		case 8:
+			opcode = 0x0FB1
+			rex = rexInfo(0).setW()
+		case 4:
+			opcode = 0x0FB1
+			rex = rexInfo(0).clearW()
+		case 2:
+			lp = legacyPrefixes0x660xF0 // Legacy prefix + Lock prefix.
+			opcode = 0x0FB1
+			rex = rexInfo(0).clearW()
+		case 1:
+			opcode = 0x0FB0
+			// Some destinations must be encoded with REX.R = 1.
+			if e := src.encoding(); e >= 4 && e <= 7 {
+				rex = rexInfo(0).always()
+			}
+		default:
+			panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String()))
+		}
+
+		switch dst.kind {
+		case operandKindMem:
+			m := dst.addressMode()
+			encodeRegMem(c, lp, opcode, 2, src, m, rex)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case lockxadd:
+		src, dst := regEncodings[i.op1.reg().RealReg()], i.op2
+		size := i.u1
+
+		var rex rexInfo
+		var opcode uint32
+		lp := legacyPrefixes0xF0 // Lock prefix.
+		switch size {
+		case 8:
+			opcode = 0x0FC1
+			rex = rexInfo(0).setW()
+		case 4:
+			opcode = 0x0FC1
+			rex = rexInfo(0).clearW()
+		case 2:
+			lp = legacyPrefixes0x660xF0 // Legacy prefix + Lock prefix.
+			opcode = 0x0FC1
+			rex = rexInfo(0).clearW()
+		case 1:
+			opcode = 0x0FC0
+			// Some destinations must be encoded with REX.R = 1.
+			if e := src.encoding(); e >= 4 && e <= 7 {
+				rex = rexInfo(0).always()
+			}
+		default:
+			panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String()))
+		}
+
+		switch dst.kind {
+		case operandKindMem:
+			m := dst.addressMode()
+			encodeRegMem(c, lp, opcode, 2, src, m, rex)
+		default:
+			panic("BUG: invalid operand kind")
+		}
+
+	case zeros:
+		r := i.op2.reg()
+		if r.RegType() == regalloc.RegTypeInt {
+			i.asAluRmiR(aluRmiROpcodeXor, newOperandReg(r), r, true)
+		} else {
+			i.asXmmRmR(sseOpcodePxor, newOperandReg(r), r)
+		}
+		i.encode(c)
+
+	case mfence:
+		// https://www.felixcloutier.com/x86/mfence
+		c.EmitByte(0x0f)
+		c.EmitByte(0xae)
+		c.EmitByte(0xf0)
+
+	default:
+		panic(fmt.Sprintf("TODO: %v", i.kind))
+	}
+	return
+}
+
+func encodeLoad64(c backend.Compiler, m *amode, rd regalloc.RealReg) {
+	dst := regEncodings[rd]
+	encodeRegMem(c, legacyPrefixesNone, 0x8b, 1, dst, m, rexInfo(0).setW())
+}
+
+func encodeRet(c backend.Compiler) {
+	c.EmitByte(0xc3)
+}
+
+func encodeEncEnc(
+	c backend.Compiler,
+	legPrefixes legacyPrefixes,
+	opcodes uint32,
+	opcodeNum uint32,
+	r uint8,
+	rm uint8,
+	rex rexInfo,
+) {
+	legPrefixes.encode(c)
+	rex.encode(c, r>>3, rm>>3)
+
+	for opcodeNum > 0 {
+		opcodeNum--
+		c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff))
+	}
+	c.EmitByte(encodeModRM(3, r&7, rm&7))
+}
+
+func encodeRegReg(
+	c backend.Compiler,
+	legPrefixes legacyPrefixes,
+	opcodes uint32,
+	opcodeNum uint32,
+	r regEnc,
+	rm regEnc,
+	rex rexInfo,
+) {
+	encodeEncEnc(c, legPrefixes, opcodes, opcodeNum, uint8(r), uint8(rm), rex)
+}
+
+func encodeModRM(mod byte, reg byte, rm byte) byte {
+	return mod<<6 | reg<<3 | rm
+}
+
+func encodeSIB(shift byte, encIndex byte, encBase byte) byte {
+	return shift<<6 | encIndex<<3 | encBase
+}
+
+func encodeRegMem(
+	c backend.Compiler, legPrefixes legacyPrefixes, opcodes uint32, opcodeNum uint32, r regEnc, m *amode, rex rexInfo,
+) (needsLabelResolution bool) {
+	needsLabelResolution = encodeEncMem(c, legPrefixes, opcodes, opcodeNum, uint8(r), m, rex)
+	return
+}
+
+func encodeEncMem(
+	c backend.Compiler, legPrefixes legacyPrefixes, opcodes uint32, opcodeNum uint32, r uint8, m *amode, rex rexInfo,
+) (needsLabelResolution bool) {
+	legPrefixes.encode(c)
+
+	const (
+		modNoDisplacement    = 0b00
+		modShortDisplacement = 0b01
+		modLongDisplacement  = 0b10
+
+		useSBI = 4 // the encoding of rsp or r12 register.
+	)
+
+	switch m.kind() {
+	case amodeImmReg, amodeImmRBP:
+		base := m.base.RealReg()
+		baseEnc := regEncodings[base]
+
+		rex.encode(c, regRexBit(r), baseEnc.rexBit())
+
+		for opcodeNum > 0 {
+			opcodeNum--
+			c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff))
+		}
+
+		// SIB byte is the last byte of the memory encoding before the displacement
+		const sibByte = 0x24 // == encodeSIB(0, 4, 4)
+
+		immZero, baseRbp, baseR13 := m.imm32 == 0, base == rbp, base == r13
+		short := lower8willSignExtendTo32(m.imm32)
+		rspOrR12 := base == rsp || base == r12
+
+		if immZero && !baseRbp && !baseR13 { // rbp or r13 can't be used as base for without displacement encoding.
+			c.EmitByte(encodeModRM(modNoDisplacement, regEncoding(r), baseEnc.encoding()))
+			if rspOrR12 {
+				c.EmitByte(sibByte)
+			}
+		} else if short { // Note: this includes the case where m.imm32 == 0 && base == rbp || base == r13.
+			c.EmitByte(encodeModRM(modShortDisplacement, regEncoding(r), baseEnc.encoding()))
+			if rspOrR12 {
+				c.EmitByte(sibByte)
+			}
+			c.EmitByte(byte(m.imm32))
+		} else {
+			c.EmitByte(encodeModRM(modLongDisplacement, regEncoding(r), baseEnc.encoding()))
+			if rspOrR12 {
+				c.EmitByte(sibByte)
+			}
+			c.Emit4Bytes(m.imm32)
+		}
+
+	case amodeRegRegShift:
+		base := m.base.RealReg()
+		baseEnc := regEncodings[base]
+		index := m.index.RealReg()
+		indexEnc := regEncodings[index]
+
+		if index == rsp {
+			panic("BUG: rsp can't be used as index of addressing mode")
+		}
+
+		rex.encodeForIndex(c, regEnc(r), indexEnc, baseEnc)
+
+		for opcodeNum > 0 {
+			opcodeNum--
+			c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff))
+		}
+
+		immZero, baseRbp, baseR13 := m.imm32 == 0, base == rbp, base == r13
+		if immZero && !baseRbp && !baseR13 { // rbp or r13 can't be used as base for without displacement encoding. (curious why? because it's interpreted as RIP relative addressing).
+			c.EmitByte(encodeModRM(modNoDisplacement, regEncoding(r), useSBI))
+			c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding()))
+		} else if lower8willSignExtendTo32(m.imm32) {
+			c.EmitByte(encodeModRM(modShortDisplacement, regEncoding(r), useSBI))
+			c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding()))
+			c.EmitByte(byte(m.imm32))
+		} else {
+			c.EmitByte(encodeModRM(modLongDisplacement, regEncoding(r), useSBI))
+			c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding()))
+			c.Emit4Bytes(m.imm32)
+		}
+
+	case amodeRipRel:
+		rex.encode(c, regRexBit(r), 0)
+		for opcodeNum > 0 {
+			opcodeNum--
+			c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff))
+		}
+
+		// Indicate "LEAQ [RIP + 32bit displacement].
+		// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
+		c.EmitByte(encodeModRM(0b00, regEncoding(r), 0b101))
+
+		// This will be resolved later, so we just emit a placeholder.
+		needsLabelResolution = true
+		c.Emit4Bytes(0)
+
+	default:
+		panic("BUG: invalid addressing mode")
+	}
+	return
+}
+
+const (
+	rexEncodingDefault byte = 0x40
+	rexEncodingW            = rexEncodingDefault | 0x08
+)
+
+// rexInfo is a bit set to indicate:
+//
+//	0x01: W bit must be cleared.
+//	0x02: REX prefix must be emitted.
+type rexInfo byte
+
+func (ri rexInfo) setW() rexInfo {
+	return ri | 0x01
+}
+
+func (ri rexInfo) clearW() rexInfo {
+	return ri & 0x02
+}
+
+func (ri rexInfo) always() rexInfo {
+	return ri | 0x02
+}
+
+func (ri rexInfo) notAlways() rexInfo { //nolint
+	return ri & 0x01
+}
+
+func (ri rexInfo) encode(c backend.Compiler, r uint8, b uint8) {
+	var w byte = 0
+	if ri&0x01 != 0 {
+		w = 0x01
+	}
+	rex := rexEncodingDefault | w<<3 | r<<2 | b
+	if rex != rexEncodingDefault || ri&0x02 != 0 {
+		c.EmitByte(rex)
+	}
+}
+
+func (ri rexInfo) encodeForIndex(c backend.Compiler, encR regEnc, encIndex regEnc, encBase regEnc) {
+	var w byte = 0
+	if ri&0x01 != 0 {
+		w = 0x01
+	}
+	r := encR.rexBit()
+	x := encIndex.rexBit()
+	b := encBase.rexBit()
+	rex := byte(0x40) | w<<3 | r<<2 | x<<1 | b
+	if rex != 0x40 || ri&0x02 != 0 {
+		c.EmitByte(rex)
+	}
+}
+
+type regEnc byte
+
+func (r regEnc) rexBit() byte {
+	return regRexBit(byte(r))
+}
+
+func (r regEnc) encoding() byte {
+	return regEncoding(byte(r))
+}
+
+func regRexBit(r byte) byte {
+	return r >> 3
+}
+
+func regEncoding(r byte) byte {
+	return r & 0x07
+}
+
+var regEncodings = [...]regEnc{
+	rax:   0b000,
+	rcx:   0b001,
+	rdx:   0b010,
+	rbx:   0b011,
+	rsp:   0b100,
+	rbp:   0b101,
+	rsi:   0b110,
+	rdi:   0b111,
+	r8:    0b1000,
+	r9:    0b1001,
+	r10:   0b1010,
+	r11:   0b1011,
+	r12:   0b1100,
+	r13:   0b1101,
+	r14:   0b1110,
+	r15:   0b1111,
+	xmm0:  0b000,
+	xmm1:  0b001,
+	xmm2:  0b010,
+	xmm3:  0b011,
+	xmm4:  0b100,
+	xmm5:  0b101,
+	xmm6:  0b110,
+	xmm7:  0b111,
+	xmm8:  0b1000,
+	xmm9:  0b1001,
+	xmm10: 0b1010,
+	xmm11: 0b1011,
+	xmm12: 0b1100,
+	xmm13: 0b1101,
+	xmm14: 0b1110,
+	xmm15: 0b1111,
+}
+
+type legacyPrefixes byte
+
+const (
+	legacyPrefixesNone legacyPrefixes = iota
+	legacyPrefixes0x66
+	legacyPrefixes0xF0
+	legacyPrefixes0x660xF0
+	legacyPrefixes0xF2
+	legacyPrefixes0xF3
+)
+
+func (p legacyPrefixes) encode(c backend.Compiler) {
+	switch p {
+	case legacyPrefixesNone:
+	case legacyPrefixes0x66:
+		c.EmitByte(0x66)
+	case legacyPrefixes0xF0:
+		c.EmitByte(0xf0)
+	case legacyPrefixes0x660xF0:
+		c.EmitByte(0x66)
+		c.EmitByte(0xf0)
+	case legacyPrefixes0xF2:
+		c.EmitByte(0xf2)
+	case legacyPrefixes0xF3:
+		c.EmitByte(0xf3)
+	default:
+		panic("BUG: invalid legacy prefix")
+	}
+}
+
+func lower32willSignExtendTo64(x uint64) bool {
+	xs := int64(x)
+	return xs == int64(uint64(int32(xs)))
+}
+
+func lower8willSignExtendTo32(x uint32) bool {
+	xs := int32(x)
+	return xs == ((xs << 24) >> 24)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
new file mode 100644
index 000000000..55d05ef63
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
@@ -0,0 +1,71 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
+func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+
+	vr = m.c.AllocateVReg(valType)
+	m.insertLoadConstant(instr, vr)
+	return
+}
+
+// InsertLoadConstantBlockArg implements backend.Machine.
+func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
+	m.insertLoadConstant(instr, vr)
+}
+
+func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+	v := instr.ConstantVal()
+
+	bits := valType.Bits()
+	if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
+		v = v & ((1 << valType.Bits()) - 1)
+	}
+
+	switch valType {
+	case ssa.TypeF32, ssa.TypeF64:
+		m.lowerFconst(vr, v, bits == 64)
+	case ssa.TypeI32, ssa.TypeI64:
+		m.lowerIconst(vr, v, bits == 64)
+	default:
+		panic("BUG")
+	}
+}
+
+func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) {
+	if c == 0 {
+		xor := m.allocateInstr().asZeros(dst)
+		m.insert(xor)
+	} else {
+		var tmpType ssa.Type
+		if _64 {
+			tmpType = ssa.TypeI64
+		} else {
+			tmpType = ssa.TypeI32
+		}
+		tmpInt := m.c.AllocateVReg(tmpType)
+		loadToGP := m.allocateInstr().asImm(tmpInt, c, _64)
+		m.insert(loadToGP)
+
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64)
+		m.insert(movToXmm)
+	}
+}
+
+func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) {
+	i := m.allocateInstr()
+	if c == 0 {
+		i.asZeros(dst)
+	} else {
+		i.asImm(dst, c, _64)
+	}
+	m.insert(i)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
new file mode 100644
index 000000000..bee673d25
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
@@ -0,0 +1,187 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl}
+
+type addend struct {
+	r     regalloc.VReg
+	off   int64
+	shift byte
+}
+
+func (a addend) String() string {
+	return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift)
+}
+
+// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) {
+	def := m.c.ValueDefinition(ptr)
+
+	if offsetBase&0x80000000 != 0 {
+		// Special casing the huge base offset whose MSB is set. In x64, the immediate is always
+		// sign-extended, but our IR semantics requires the offset base is always unsigned.
+		// Note that this should be extremely rare or even this shouldn't hit in the real application,
+		// therefore we don't need to optimize this case in my opinion.
+
+		a := m.lowerAddend(def)
+		off64 := a.off + int64(offsetBase)
+		offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64)
+		m.lowerIconst(offsetBaseReg, uint64(off64), true)
+		if a.r != regalloc.VRegInvalid {
+			return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift)
+		} else {
+			return m.newAmodeImmReg(0, offsetBaseReg)
+		}
+	}
+
+	if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd {
+		add := def.Instr
+		x, y := add.Arg2()
+		xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+		ax := m.lowerAddend(xDef)
+		ay := m.lowerAddend(yDef)
+		add.MarkLowered()
+		return m.lowerAddendsToAmode(ax, ay, offsetBase)
+	} else {
+		// If it is not an Iadd, then we lower the one addend.
+		a := m.lowerAddend(def)
+		// off is always 0 if r is valid.
+		if a.r != regalloc.VRegInvalid {
+			if a.shift != 0 {
+				tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+				m.lowerIconst(tmpReg, 0, true)
+				return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift)
+			}
+			return m.newAmodeImmReg(offsetBase, a.r)
+		} else {
+			off64 := a.off + int64(offsetBase)
+			tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(tmpReg, uint64(off64), true)
+			return m.newAmodeImmReg(0, tmpReg)
+		}
+	}
+}
+
+func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {
+	if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 {
+		panic("invalid input")
+	}
+
+	u64 := uint64(x.off+y.off) + uint64(offBase)
+	if u64 != 0 {
+		if _, ok := asImm32(u64, false); !ok {
+			tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(tmpReg, u64, true)
+			// Blank u64 as it has been already lowered.
+			u64 = 0
+
+			if x.r == regalloc.VRegInvalid {
+				x.r = tmpReg
+			} else if y.r == regalloc.VRegInvalid {
+				y.r = tmpReg
+			} else {
+				// We already know that either rx or ry is invalid,
+				// so we overwrite it with the temporary register.
+				panic("BUG")
+			}
+		}
+	}
+
+	u32 := uint32(u64)
+	switch {
+	// We assume rx, ry are valid iff offx, offy are 0.
+	case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
+		switch {
+		case x.shift != 0 && y.shift != 0:
+			// Cannot absorb two shifted registers, must lower one to a shift instruction.
+			shifted := m.allocateInstr()
+			shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true)
+			m.insert(shifted)
+
+			return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
+		case x.shift != 0 && y.shift == 0:
+			// Swap base and index.
+			x, y = y, x
+			fallthrough
+		default:
+			return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
+		}
+	case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
+		x, y = y, x
+		fallthrough
+	case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid:
+		if x.shift != 0 {
+			zero := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(zero, 0, true)
+			return m.newAmodeRegRegShift(u32, zero, x.r, x.shift)
+		}
+		return m.newAmodeImmReg(u32, x.r)
+	default: // Both are invalid: use the offset.
+		tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+		m.lowerIconst(tmpReg, u64, true)
+		return m.newAmodeImmReg(0, tmpReg)
+	}
+}
+
+func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend {
+	if x.IsFromBlockParam() {
+		return addend{x.BlkParamVReg, 0, 0}
+	}
+	// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.
+	op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:])
+	if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd {
+		return m.lowerAddendFromInstr(x.Instr)
+	}
+	p := m.getOperand_Reg(x)
+	return addend{p.reg(), 0, 0}
+}
+
+// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode.
+// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register.
+// The offset is 0 if the addend can be lowered to a register.
+func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend {
+	instr.MarkLowered()
+	switch op := instr.Opcode(); op {
+	case ssa.OpcodeIconst:
+		u64 := instr.ConstantVal()
+		if instr.Return().Type().Bits() == 32 {
+			return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend.
+		} else {
+			return addend{regalloc.VRegInvalid, int64(u64), 0}
+		}
+	case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
+		input := instr.Arg()
+		inputDef := m.c.ValueDefinition(input)
+		if input.Type().Bits() != 32 {
+			panic("BUG: invalid input type " + input.Type().String())
+		}
+		constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
+		switch {
+		case constInst && op == ssa.OpcodeSExtend:
+			return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0}
+		case constInst && op == ssa.OpcodeUExtend:
+			return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend!
+		default:
+			r := m.getOperand_Reg(inputDef)
+			return addend{r.reg(), 0, 0}
+		}
+	case ssa.OpcodeIshl:
+		// If the addend is a shift, we can only handle it if the shift amount is a constant.
+		x, amount := instr.Arg2()
+		amountDef := m.c.ValueDefinition(amount)
+		if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 {
+			r := m.getOperand_Reg(m.c.ValueDefinition(x))
+			return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())}
+		}
+		r := m.getOperand_Reg(m.c.ValueDefinition(x))
+		return addend{r.reg(), 0, 0}
+	}
+	panic("BUG: invalid opcode")
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
new file mode 100644
index 000000000..310ad2203
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
@@ -0,0 +1,3611 @@
+package amd64
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/platform"
+)
+
+// NewBackend returns a new backend for arm64.
+func NewBackend() backend.Machine {
+	ectx := backend.NewExecutableContextT[instruction](
+		resetInstruction,
+		setNext,
+		setPrev,
+		asNop,
+	)
+	return &machine{
+		ectx:                                ectx,
+		cpuFeatures:                         platform.CpuFeatures,
+		regAlloc:                            regalloc.NewAllocator(regInfo),
+		spillSlots:                          map[regalloc.VRegID]int64{},
+		amodePool:                           wazevoapi.NewPool[amode](nil),
+		constSwizzleMaskConstIndex:          -1,
+		constSqmulRoundSatIndex:             -1,
+		constI8x16SHLMaskTableIndex:         -1,
+		constI8x16LogicalSHRMaskTableIndex:  -1,
+		constF64x2CvtFromIMaskIndex:         -1,
+		constTwop52Index:                    -1,
+		constI32sMaxOnF64x2Index:            -1,
+		constI32uMaxOnF64x2Index:            -1,
+		constAllOnesI8x16Index:              -1,
+		constAllOnesI16x8Index:              -1,
+		constExtAddPairwiseI16x8uMask1Index: -1,
+		constExtAddPairwiseI16x8uMask2Index: -1,
+	}
+}
+
+type (
+	// machine implements backend.Machine for amd64.
+	machine struct {
+		c                        backend.Compiler
+		ectx                     *backend.ExecutableContextT[instruction]
+		stackBoundsCheckDisabled bool
+
+		amodePool wazevoapi.Pool[amode]
+
+		cpuFeatures platform.CpuFeatureFlags
+
+		regAlloc        regalloc.Allocator
+		regAllocFn      *backend.RegAllocFunction[*instruction, *machine]
+		regAllocStarted bool
+
+		spillSlotSize int64
+		spillSlots    map[regalloc.VRegID]int64
+		currentABI    *backend.FunctionABI
+		clobberedRegs []regalloc.VReg
+
+		maxRequiredStackSizeForCalls int64
+
+		labelResolutionPends []labelResolutionPend
+
+		jmpTableTargets [][]uint32
+		consts          []_const
+
+		constSwizzleMaskConstIndex, constSqmulRoundSatIndex,
+		constI8x16SHLMaskTableIndex, constI8x16LogicalSHRMaskTableIndex,
+		constF64x2CvtFromIMaskIndex, constTwop52Index,
+		constI32sMaxOnF64x2Index, constI32uMaxOnF64x2Index,
+		constAllOnesI8x16Index, constAllOnesI16x8Index,
+		constExtAddPairwiseI16x8uMask1Index, constExtAddPairwiseI16x8uMask2Index int
+	}
+
+	_const struct {
+		lo, hi uint64
+		_var   []byte
+		label  *labelPosition
+	}
+
+	labelResolutionPend struct {
+		instr       *instruction
+		instrOffset int64
+		// imm32Offset is the offset of the last 4 bytes of the instruction.
+		imm32Offset int64
+	}
+
+	labelPosition = backend.LabelPosition[instruction]
+)
+
+func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) backend.Label {
+	index := *i
+	if index == -1 {
+		label := m.allocateLabel()
+		index = len(m.consts)
+		m.consts = append(m.consts, _const{
+			_var:  _var,
+			label: label,
+		})
+		*i = index
+	}
+	return m.consts[index].label.L
+}
+
+// Reset implements backend.Machine.
+func (m *machine) Reset() {
+	m.consts = m.consts[:0]
+	m.clobberedRegs = m.clobberedRegs[:0]
+	for key := range m.spillSlots {
+		m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
+	}
+	for _, key := range m.clobberedRegs {
+		delete(m.spillSlots, regalloc.VRegID(key))
+	}
+
+	m.stackBoundsCheckDisabled = false
+	m.ectx.Reset()
+
+	m.regAllocFn.Reset()
+	m.regAlloc.Reset()
+	m.regAllocStarted = false
+	m.clobberedRegs = m.clobberedRegs[:0]
+
+	m.spillSlotSize = 0
+	m.maxRequiredStackSizeForCalls = 0
+
+	m.amodePool.Reset()
+	m.jmpTableTargets = m.jmpTableTargets[:0]
+	m.constSwizzleMaskConstIndex = -1
+	m.constSqmulRoundSatIndex = -1
+	m.constI8x16SHLMaskTableIndex = -1
+	m.constI8x16LogicalSHRMaskTableIndex = -1
+	m.constF64x2CvtFromIMaskIndex = -1
+	m.constTwop52Index = -1
+	m.constI32sMaxOnF64x2Index = -1
+	m.constI32uMaxOnF64x2Index = -1
+	m.constAllOnesI8x16Index = -1
+	m.constAllOnesI16x8Index = -1
+	m.constExtAddPairwiseI16x8uMask1Index = -1
+	m.constExtAddPairwiseI16x8uMask2Index = -1
+}
+
+// ExecutableContext implements backend.Machine.
+func (m *machine) ExecutableContext() backend.ExecutableContext { return m.ectx }
+
+// DisableStackCheck implements backend.Machine.
+func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true }
+
+// SetCompiler implements backend.Machine.
+func (m *machine) SetCompiler(c backend.Compiler) {
+	m.c = c
+	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, c.SSABuilder(), c)
+}
+
+// SetCurrentABI implements backend.Machine.
+func (m *machine) SetCurrentABI(abi *backend.FunctionABI) {
+	m.currentABI = abi
+}
+
+// RegAlloc implements backend.Machine.
+func (m *machine) RegAlloc() {
+	rf := m.regAllocFn
+	for _, pos := range m.ectx.OrderedBlockLabels {
+		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End)
+	}
+
+	m.regAllocStarted = true
+	m.regAlloc.DoAllocation(rf)
+	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.
+	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15
+}
+
+// InsertReturn implements backend.Machine.
+func (m *machine) InsertReturn() {
+	i := m.allocateInstr().asRet()
+	m.insert(i)
+}
+
+// LowerSingleBranch implements backend.Machine.
+func (m *machine) LowerSingleBranch(b *ssa.Instruction) {
+	ectx := m.ectx
+	switch b.Opcode() {
+	case ssa.OpcodeJump:
+		_, _, targetBlk := b.BranchData()
+		if b.IsFallthroughJump() {
+			return
+		}
+		jmp := m.allocateInstr()
+		target := ectx.GetOrAllocateSSABlockLabel(targetBlk)
+		if target == backend.LabelReturn {
+			jmp.asRet()
+		} else {
+			jmp.asJmp(newOperandLabel(target))
+		}
+		m.insert(jmp)
+	case ssa.OpcodeBrTable:
+		index, target := b.BrTableData()
+		m.lowerBrTable(index, target)
+	default:
+		panic("BUG: unexpected branch opcode" + b.Opcode().String())
+	}
+}
+
+func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) {
+	// TODO: reuse the slice!
+	labels := make([]uint32, len(targets))
+	for j, target := range targets {
+		labels[j] = uint32(m.ectx.GetOrAllocateSSABlockLabel(target))
+	}
+	index = len(m.jmpTableTargets)
+	m.jmpTableTargets = append(m.jmpTableTargets, labels)
+	return
+}
+
+var condBranchMatches = [...]ssa.Opcode{ssa.OpcodeIcmp, ssa.OpcodeFcmp}
+
+func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) {
+	_v := m.getOperand_Reg(m.c.ValueDefinition(index))
+	v := m.copyToTmp(_v.reg())
+
+	// First, we need to do the bounds check.
+	maxIndex := m.c.AllocateVReg(ssa.TypeI32)
+	m.lowerIconst(maxIndex, uint64(len(targets)-1), false)
+	cmp := m.allocateInstr().asCmpRmiR(true, newOperandReg(maxIndex), v, false)
+	m.insert(cmp)
+
+	// Then do the conditional move maxIndex to v if v > maxIndex.
+	cmov := m.allocateInstr().asCmove(condNB, newOperandReg(maxIndex), v, false)
+	m.insert(cmov)
+
+	// Now that v has the correct index. Load the address of the jump table into the addr.
+	addr := m.c.AllocateVReg(ssa.TypeI64)
+	leaJmpTableAddr := m.allocateInstr()
+	m.insert(leaJmpTableAddr)
+
+	// Then add the target's offset into jmpTableAddr.
+	loadTargetOffsetFromJmpTable := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd,
+		// Shift by 3 because each entry is 8 bytes.
+		newOperandMem(m.newAmodeRegRegShift(0, addr, v, 3)), addr, true)
+	m.insert(loadTargetOffsetFromJmpTable)
+
+	// Now ready to jump.
+	jmp := m.allocateInstr().asJmp(newOperandReg(addr))
+	m.insert(jmp)
+
+	jmpTableBegin, jmpTableBeginLabel := m.allocateBrTarget()
+	m.insert(jmpTableBegin)
+	leaJmpTableAddr.asLEA(newOperandLabel(jmpTableBeginLabel), addr)
+
+	jmpTable := m.allocateInstr()
+	targetSliceIndex := m.addJmpTableTarget(targets)
+	jmpTable.asJmpTableSequence(targetSliceIndex, len(targets))
+	m.insert(jmpTable)
+}
+
+// LowerConditionalBranch implements backend.Machine.
+func (m *machine) LowerConditionalBranch(b *ssa.Instruction) {
+	exctx := m.ectx
+	cval, args, targetBlk := b.BranchData()
+	if len(args) > 0 {
+		panic(fmt.Sprintf(
+			"conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s",
+			exctx.CurrentSSABlk,
+			targetBlk,
+		))
+	}
+
+	target := exctx.GetOrAllocateSSABlockLabel(targetBlk)
+	cvalDef := m.c.ValueDefinition(cval)
+
+	switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) {
+	case ssa.OpcodeIcmp:
+		cvalInstr := cvalDef.Instr
+		x, y, c := cvalInstr.IcmpData()
+
+		cc := condFromSSAIntCmpCond(c)
+		if b.Opcode() == ssa.OpcodeBrz {
+			cc = cc.invert()
+		}
+
+		// First, perform the comparison and set the flag.
+		xd, yd := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+		if !m.tryLowerBandToFlag(xd, yd) {
+			m.lowerIcmpToFlag(xd, yd, x.Type() == ssa.TypeI64)
+		}
+
+		// Then perform the conditional branch.
+		m.insert(m.allocateInstr().asJmpIf(cc, newOperandLabel(target)))
+		cvalDef.Instr.MarkLowered()
+	case ssa.OpcodeFcmp:
+		cvalInstr := cvalDef.Instr
+
+		f1, f2, and := m.lowerFcmpToFlags(cvalInstr)
+		isBrz := b.Opcode() == ssa.OpcodeBrz
+		if isBrz {
+			f1 = f1.invert()
+		}
+		if f2 == condInvalid {
+			m.insert(m.allocateInstr().asJmpIf(f1, newOperandLabel(target)))
+		} else {
+			if isBrz {
+				f2 = f2.invert()
+				and = !and
+			}
+			jmp1, jmp2 := m.allocateInstr(), m.allocateInstr()
+			m.insert(jmp1)
+			m.insert(jmp2)
+			notTaken, notTakenLabel := m.allocateBrTarget()
+			m.insert(notTaken)
+			if and {
+				jmp1.asJmpIf(f1.invert(), newOperandLabel(notTakenLabel))
+				jmp2.asJmpIf(f2, newOperandLabel(target))
+			} else {
+				jmp1.asJmpIf(f1, newOperandLabel(target))
+				jmp2.asJmpIf(f2, newOperandLabel(target))
+			}
+		}
+
+		cvalDef.Instr.MarkLowered()
+	default:
+		v := m.getOperand_Reg(cvalDef)
+
+		var cc cond
+		if b.Opcode() == ssa.OpcodeBrz {
+			cc = condZ
+		} else {
+			cc = condNZ
+		}
+
+		// Perform test %v, %v to set the flag.
+		cmp := m.allocateInstr().asCmpRmiR(false, v, v.reg(), false)
+		m.insert(cmp)
+		m.insert(m.allocateInstr().asJmpIf(cc, newOperandLabel(target)))
+	}
+}
+
+// LowerInstr implements backend.Machine.
+func (m *machine) LowerInstr(instr *ssa.Instruction) {
+	if l := instr.SourceOffset(); l.Valid() {
+		info := m.allocateInstr().asEmitSourceOffsetInfo(l)
+		m.insert(info)
+	}
+
+	switch op := instr.Opcode(); op {
+	case ssa.OpcodeBrz, ssa.OpcodeBrnz, ssa.OpcodeJump, ssa.OpcodeBrTable:
+		panic("BUG: branching instructions are handled by LowerBranches")
+	case ssa.OpcodeReturn:
+		panic("BUG: return must be handled by backend.Compiler")
+	case ssa.OpcodeIconst, ssa.OpcodeF32const, ssa.OpcodeF64const: // Constant instructions are inlined.
+	case ssa.OpcodeCall, ssa.OpcodeCallIndirect:
+		m.lowerCall(instr)
+	case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32:
+		m.lowerStore(instr)
+	case ssa.OpcodeIadd:
+		m.lowerAluRmiROp(instr, aluRmiROpcodeAdd)
+	case ssa.OpcodeIsub:
+		m.lowerAluRmiROp(instr, aluRmiROpcodeSub)
+	case ssa.OpcodeImul:
+		m.lowerAluRmiROp(instr, aluRmiROpcodeMul)
+	case ssa.OpcodeSdiv, ssa.OpcodeUdiv, ssa.OpcodeSrem, ssa.OpcodeUrem:
+		isDiv := op == ssa.OpcodeSdiv || op == ssa.OpcodeUdiv
+		isSigned := op == ssa.OpcodeSdiv || op == ssa.OpcodeSrem
+		m.lowerIDivRem(instr, isDiv, isSigned)
+	case ssa.OpcodeBand:
+		m.lowerAluRmiROp(instr, aluRmiROpcodeAnd)
+	case ssa.OpcodeBor:
+		m.lowerAluRmiROp(instr, aluRmiROpcodeOr)
+	case ssa.OpcodeBxor:
+		m.lowerAluRmiROp(instr, aluRmiROpcodeXor)
+	case ssa.OpcodeIshl:
+		m.lowerShiftR(instr, shiftROpShiftLeft)
+	case ssa.OpcodeSshr:
+		m.lowerShiftR(instr, shiftROpShiftRightArithmetic)
+	case ssa.OpcodeUshr:
+		m.lowerShiftR(instr, shiftROpShiftRightLogical)
+	case ssa.OpcodeRotl:
+		m.lowerShiftR(instr, shiftROpRotateLeft)
+	case ssa.OpcodeRotr:
+		m.lowerShiftR(instr, shiftROpRotateRight)
+	case ssa.OpcodeClz:
+		m.lowerClz(instr)
+	case ssa.OpcodeCtz:
+		m.lowerCtz(instr)
+	case ssa.OpcodePopcnt:
+		m.lowerUnaryRmR(instr, unaryRmROpcodePopcnt)
+	case ssa.OpcodeFadd, ssa.OpcodeFsub, ssa.OpcodeFmul, ssa.OpcodeFdiv:
+		m.lowerXmmRmR(instr)
+	case ssa.OpcodeFabs:
+		m.lowerFabsFneg(instr)
+	case ssa.OpcodeFneg:
+		m.lowerFabsFneg(instr)
+	case ssa.OpcodeCeil:
+		m.lowerRound(instr, roundingModeUp)
+	case ssa.OpcodeFloor:
+		m.lowerRound(instr, roundingModeDown)
+	case ssa.OpcodeTrunc:
+		m.lowerRound(instr, roundingModeZero)
+	case ssa.OpcodeNearest:
+		m.lowerRound(instr, roundingModeNearest)
+	case ssa.OpcodeFmin, ssa.OpcodeFmax:
+		m.lowerFminFmax(instr)
+	case ssa.OpcodeFcopysign:
+		m.lowerFcopysign(instr)
+	case ssa.OpcodeBitcast:
+		m.lowerBitcast(instr)
+	case ssa.OpcodeSqrt:
+		m.lowerSqrt(instr)
+	case ssa.OpcodeFpromote:
+		v := instr.Arg()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(v))
+		rd := m.c.VRegOf(instr.Return())
+		cnt := m.allocateInstr()
+		cnt.asXmmUnaryRmR(sseOpcodeCvtss2sd, rn, rd)
+		m.insert(cnt)
+	case ssa.OpcodeFdemote:
+		v := instr.Arg()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(v))
+		rd := m.c.VRegOf(instr.Return())
+		cnt := m.allocateInstr()
+		cnt.asXmmUnaryRmR(sseOpcodeCvtsd2ss, rn, rd)
+		m.insert(cnt)
+	case ssa.OpcodeFcvtToSint, ssa.OpcodeFcvtToSintSat:
+		x, ctx := instr.Arg2()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := m.c.VRegOf(instr.Return())
+		ctxVReg := m.c.VRegOf(ctx)
+		m.lowerFcvtToSint(ctxVReg, rn.reg(), rd, x.Type() == ssa.TypeF64,
+			instr.Return().Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat)
+	case ssa.OpcodeFcvtToUint, ssa.OpcodeFcvtToUintSat:
+		x, ctx := instr.Arg2()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := m.c.VRegOf(instr.Return())
+		ctxVReg := m.c.VRegOf(ctx)
+		m.lowerFcvtToUint(ctxVReg, rn.reg(), rd, x.Type() == ssa.TypeF64,
+			instr.Return().Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat)
+	case ssa.OpcodeFcvtFromSint:
+		x := instr.Arg()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := newOperandReg(m.c.VRegOf(instr.Return()))
+		m.lowerFcvtFromSint(rn, rd,
+			x.Type() == ssa.TypeI64, instr.Return().Type().Bits() == 64)
+	case ssa.OpcodeFcvtFromUint:
+		x := instr.Arg()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := newOperandReg(m.c.VRegOf(instr.Return()))
+		m.lowerFcvtFromUint(rn, rd, x.Type() == ssa.TypeI64,
+			instr.Return().Type().Bits() == 64)
+	case ssa.OpcodeVanyTrue:
+		m.lowerVanyTrue(instr)
+	case ssa.OpcodeVallTrue:
+		m.lowerVallTrue(instr)
+	case ssa.OpcodeVhighBits:
+		m.lowerVhighBits(instr)
+	case ssa.OpcodeVbnot:
+		m.lowerVbnot(instr)
+	case ssa.OpcodeVband:
+		x, y := instr.Arg2()
+		m.lowerVbBinOp(sseOpcodePand, x, y, instr.Return())
+	case ssa.OpcodeVbor:
+		x, y := instr.Arg2()
+		m.lowerVbBinOp(sseOpcodePor, x, y, instr.Return())
+	case ssa.OpcodeVbxor:
+		x, y := instr.Arg2()
+		m.lowerVbBinOp(sseOpcodePxor, x, y, instr.Return())
+	case ssa.OpcodeVbandnot:
+		m.lowerVbandnot(instr, sseOpcodePandn)
+	case ssa.OpcodeVbitselect:
+		m.lowerVbitselect(instr)
+	case ssa.OpcodeVIadd:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePaddb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePaddw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePaddd
+		case ssa.VecLaneI64x2:
+			vecOp = sseOpcodePaddq
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVSaddSat:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePaddsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePaddsw
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVUaddSat:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePaddusb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePaddusw
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVIsub:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePsubb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePsubw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePsubd
+		case ssa.VecLaneI64x2:
+			vecOp = sseOpcodePsubq
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVSsubSat:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePsubsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePsubsw
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVUsubSat:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePsubusb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePsubusw
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVImul:
+		m.lowerVImul(instr)
+	case ssa.OpcodeVIneg:
+		x, lane := instr.ArgWithLane()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := m.c.VRegOf(instr.Return())
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePsubb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePsubw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePsubd
+		case ssa.VecLaneI64x2:
+			vecOp = sseOpcodePsubq
+		default:
+			panic("BUG")
+		}
+
+		tmp := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asZeros(tmp))
+
+		i := m.allocateInstr()
+		i.asXmmRmR(vecOp, rn, tmp)
+		m.insert(i)
+
+		m.copyTo(tmp, rd)
+	case ssa.OpcodeVFadd:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeAddps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeAddpd
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVFsub:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeSubps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeSubpd
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVFdiv:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeDivps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeDivpd
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVFmul:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeMulps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeMulpd
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVFneg:
+		x, lane := instr.ArgWithLane()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := m.c.VRegOf(instr.Return())
+
+		tmp := m.c.AllocateVReg(ssa.TypeV128)
+
+		var shiftOp, xorOp sseOpcode
+		var shiftAmt uint32
+		switch lane {
+		case ssa.VecLaneF32x4:
+			shiftOp, shiftAmt, xorOp = sseOpcodePslld, 31, sseOpcodeXorps
+		case ssa.VecLaneF64x2:
+			shiftOp, shiftAmt, xorOp = sseOpcodePsllq, 63, sseOpcodeXorpd
+		}
+
+		zero := m.allocateInstr()
+		zero.asZeros(tmp)
+		m.insert(zero)
+
+		// Set all bits on tmp by CMPPD with arg=0 (== pseudo CMPEQPD instruction).
+		// See https://www.felixcloutier.com/x86/cmpps
+		//
+		// Note: if we do not clear all the bits ^ with XORPS, this might end up not setting ones on some lane
+		// if the lane is NaN.
+		cmp := m.allocateInstr()
+		cmp.asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_UQ), newOperandReg(tmp), tmp)
+		m.insert(cmp)
+
+		// Do the left shift on each lane to set only the most significant bit in each.
+		i := m.allocateInstr()
+		i.asXmmRmiReg(shiftOp, newOperandImm32(shiftAmt), tmp)
+		m.insert(i)
+
+		// Get the negated result by XOR on each lane with tmp.
+		i = m.allocateInstr()
+		i.asXmmRmR(xorOp, rn, tmp)
+		m.insert(i)
+
+		m.copyTo(tmp, rd)
+
+	case ssa.OpcodeVSqrt:
+		x, lane := instr.ArgWithLane()
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rd := m.c.VRegOf(instr.Return())
+
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeSqrtps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeSqrtpd
+		}
+		i := m.allocateInstr()
+		i.asXmmUnaryRmR(vecOp, rn, rd)
+		m.insert(i)
+
+	case ssa.OpcodeVImin:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePminsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePminsw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePminsd
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVUmin:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePminub
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePminuw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePminud
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVImax:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePmaxsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePmaxsw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePmaxsd
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVUmax:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePmaxub
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePmaxuw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePmaxud
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVAvgRound:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePavgb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePavgw
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+
+	case ssa.OpcodeVIcmp:
+		x, y, c, lane := instr.VIcmpData()
+		m.lowerVIcmp(x, y, c, instr.Return(), lane)
+
+	case ssa.OpcodeVFcmp:
+		x, y, c, lane := instr.VFcmpData()
+		m.lowerVFcmp(x, y, c, instr.Return(), lane)
+
+	case ssa.OpcodeExtractlane:
+		x, index, signed, lane := instr.ExtractlaneData()
+		m.lowerExtractLane(x, index, signed, instr.Return(), lane)
+
+	case ssa.OpcodeInsertlane:
+		x, y, index, lane := instr.InsertlaneData()
+		m.lowerInsertLane(x, y, index, instr.Return(), lane)
+
+	case ssa.OpcodeSwizzle:
+		x, y, _ := instr.Arg2WithLane()
+		m.lowerSwizzle(x, y, instr.Return())
+
+	case ssa.OpcodeShuffle:
+		x, y, lo, hi := instr.ShuffleData()
+		m.lowerShuffle(x, y, lo, hi, instr.Return())
+
+	case ssa.OpcodeSplat:
+		x, lane := instr.ArgWithLane()
+		m.lowerSplat(x, instr.Return(), lane)
+
+	case ssa.OpcodeSqmulRoundSat:
+		x, y := instr.Arg2()
+		m.lowerSqmulRoundSat(x, y, instr.Return())
+
+	case ssa.OpcodeVZeroExtLoad:
+		ptr, offset, typ := instr.VZeroExtLoadData()
+		var sseOp sseOpcode
+		// Both movss and movsd clears the higher bits of the destination register upt 128 bits.
+		// https://www.felixcloutier.com/x86/movss
+		// https://www.felixcloutier.com/x86/movsd
+		if typ == ssa.TypeF32 {
+			sseOp = sseOpcodeMovss
+		} else {
+			sseOp = sseOpcodeMovsd
+		}
+		mem := m.lowerToAddressMode(ptr, offset)
+		dst := m.c.VRegOf(instr.Return())
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandMem(mem), dst))
+
+	case ssa.OpcodeVMinPseudo:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeMinps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeMinpd
+		default:
+			panic("BUG: unexpected lane type")
+		}
+		m.lowerVbBinOpUnaligned(vecOp, y, x, instr.Return())
+
+	case ssa.OpcodeVMaxPseudo:
+		x, y, lane := instr.Arg2WithLane()
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneF32x4:
+			vecOp = sseOpcodeMaxps
+		case ssa.VecLaneF64x2:
+			vecOp = sseOpcodeMaxpd
+		default:
+			panic("BUG: unexpected lane type")
+		}
+		m.lowerVbBinOpUnaligned(vecOp, y, x, instr.Return())
+
+	case ssa.OpcodeVIshl:
+		x, y, lane := instr.Arg2WithLane()
+		m.lowerVIshl(x, y, instr.Return(), lane)
+
+	case ssa.OpcodeVSshr:
+		x, y, lane := instr.Arg2WithLane()
+		m.lowerVSshr(x, y, instr.Return(), lane)
+
+	case ssa.OpcodeVUshr:
+		x, y, lane := instr.Arg2WithLane()
+		m.lowerVUshr(x, y, instr.Return(), lane)
+
+	case ssa.OpcodeVCeil:
+		x, lane := instr.ArgWithLane()
+		m.lowerVRound(x, instr.Return(), 0x2, lane == ssa.VecLaneF64x2)
+
+	case ssa.OpcodeVFloor:
+		x, lane := instr.ArgWithLane()
+		m.lowerVRound(x, instr.Return(), 0x1, lane == ssa.VecLaneF64x2)
+
+	case ssa.OpcodeVTrunc:
+		x, lane := instr.ArgWithLane()
+		m.lowerVRound(x, instr.Return(), 0x3, lane == ssa.VecLaneF64x2)
+
+	case ssa.OpcodeVNearest:
+		x, lane := instr.ArgWithLane()
+		m.lowerVRound(x, instr.Return(), 0x0, lane == ssa.VecLaneF64x2)
+
+	case ssa.OpcodeExtIaddPairwise:
+		x, lane, signed := instr.ExtIaddPairwiseData()
+		m.lowerExtIaddPairwise(x, instr.Return(), lane, signed)
+
+	case ssa.OpcodeUwidenLow, ssa.OpcodeSwidenLow:
+		x, lane := instr.ArgWithLane()
+		m.lowerWidenLow(x, instr.Return(), lane, op == ssa.OpcodeSwidenLow)
+
+	case ssa.OpcodeUwidenHigh, ssa.OpcodeSwidenHigh:
+		x, lane := instr.ArgWithLane()
+		m.lowerWidenHigh(x, instr.Return(), lane, op == ssa.OpcodeSwidenHigh)
+
+	case ssa.OpcodeLoadSplat:
+		ptr, offset, lane := instr.LoadSplatData()
+		m.lowerLoadSplat(ptr, offset, instr.Return(), lane)
+
+	case ssa.OpcodeVFcvtFromUint, ssa.OpcodeVFcvtFromSint:
+		x, lane := instr.ArgWithLane()
+		m.lowerVFcvtFromInt(x, instr.Return(), lane, op == ssa.OpcodeVFcvtFromSint)
+
+	case ssa.OpcodeVFcvtToSintSat, ssa.OpcodeVFcvtToUintSat:
+		x, lane := instr.ArgWithLane()
+		m.lowerVFcvtToIntSat(x, instr.Return(), lane, op == ssa.OpcodeVFcvtToSintSat)
+
+	case ssa.OpcodeSnarrow, ssa.OpcodeUnarrow:
+		x, y, lane := instr.Arg2WithLane()
+		m.lowerNarrow(x, y, instr.Return(), lane, op == ssa.OpcodeSnarrow)
+
+	case ssa.OpcodeFvpromoteLow:
+		x := instr.Arg()
+		src := m.getOperand_Reg(m.c.ValueDefinition(x))
+		dst := m.c.VRegOf(instr.Return())
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtps2pd, src, dst))
+
+	case ssa.OpcodeFvdemote:
+		x := instr.Arg()
+		src := m.getOperand_Reg(m.c.ValueDefinition(x))
+		dst := m.c.VRegOf(instr.Return())
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtpd2ps, src, dst))
+
+	case ssa.OpcodeWideningPairwiseDotProductS:
+		x, y := instr.Arg2()
+		m.lowerWideningPairwiseDotProductS(x, y, instr.Return())
+
+	case ssa.OpcodeVIabs:
+		m.lowerVIabs(instr)
+	case ssa.OpcodeVIpopcnt:
+		m.lowerVIpopcnt(instr)
+	case ssa.OpcodeVFmin:
+		m.lowerVFmin(instr)
+	case ssa.OpcodeVFmax:
+		m.lowerVFmax(instr)
+	case ssa.OpcodeVFabs:
+		m.lowerVFabs(instr)
+	case ssa.OpcodeUndefined:
+		m.insert(m.allocateInstr().asUD2())
+	case ssa.OpcodeExitWithCode:
+		execCtx, code := instr.ExitWithCodeData()
+		m.lowerExitWithCode(m.c.VRegOf(execCtx), code)
+	case ssa.OpcodeExitIfTrueWithCode:
+		execCtx, c, code := instr.ExitIfTrueWithCodeData()
+		m.lowerExitIfTrueWithCode(m.c.VRegOf(execCtx), c, code)
+	case ssa.OpcodeLoad:
+		ptr, offset, typ := instr.LoadData()
+		dst := m.c.VRegOf(instr.Return())
+		m.lowerLoad(ptr, offset, typ, dst)
+	case ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32, ssa.OpcodeSload8, ssa.OpcodeSload16, ssa.OpcodeSload32:
+		ptr, offset, _ := instr.LoadData()
+		ret := m.c.VRegOf(instr.Return())
+		m.lowerExtLoad(op, ptr, offset, ret)
+	case ssa.OpcodeVconst:
+		result := m.c.VRegOf(instr.Return())
+		lo, hi := instr.VconstData()
+		m.lowerVconst(result, lo, hi)
+	case ssa.OpcodeSExtend, ssa.OpcodeUExtend:
+		from, to, signed := instr.ExtendData()
+		m.lowerExtend(instr.Arg(), instr.Return(), from, to, signed)
+	case ssa.OpcodeIcmp:
+		m.lowerIcmp(instr)
+	case ssa.OpcodeFcmp:
+		m.lowerFcmp(instr)
+	case ssa.OpcodeSelect:
+		cval, x, y := instr.SelectData()
+		m.lowerSelect(x, y, cval, instr.Return())
+	case ssa.OpcodeIreduce:
+		rn := m.getOperand_Mem_Reg(m.c.ValueDefinition(instr.Arg()))
+		retVal := instr.Return()
+		rd := m.c.VRegOf(retVal)
+
+		if retVal.Type() != ssa.TypeI32 {
+			panic("TODO?: Ireduce to non-i32")
+		}
+		m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, rn, rd))
+
+	case ssa.OpcodeAtomicLoad:
+		ptr := instr.Arg()
+		size := instr.AtomicTargetSize()
+		dst := m.c.VRegOf(instr.Return())
+
+		// At this point, the ptr is ensured to be aligned, so using a normal load is atomic.
+		// https://github.com/golang/go/blob/adead1a93f472affa97c494ef19f2f492ee6f34a/src/runtime/internal/atomic/atomic_amd64.go#L30
+		mem := newOperandMem(m.lowerToAddressMode(ptr, 0))
+		load := m.allocateInstr()
+		switch size {
+		case 8:
+			load.asMov64MR(mem, dst)
+		case 4:
+			load.asMovzxRmR(extModeLQ, mem, dst)
+		case 2:
+			load.asMovzxRmR(extModeWQ, mem, dst)
+		case 1:
+			load.asMovzxRmR(extModeBQ, mem, dst)
+		default:
+			panic("BUG")
+		}
+		m.insert(load)
+
+	case ssa.OpcodeFence:
+		m.insert(m.allocateInstr().asMFence())
+
+	case ssa.OpcodeAtomicStore:
+		ptr, _val := instr.Arg2()
+		size := instr.AtomicTargetSize()
+
+		val := m.getOperand_Reg(m.c.ValueDefinition(_val))
+		// The content on the val register will be overwritten by xchg, so we need to copy it to a temporary register.
+		copied := m.copyToTmp(val.reg())
+
+		mem := newOperandMem(m.lowerToAddressMode(ptr, 0))
+		store := m.allocateInstr().asXCHG(copied, mem, byte(size))
+		m.insert(store)
+
+	case ssa.OpcodeAtomicCas:
+		addr, exp, repl := instr.Arg3()
+		size := instr.AtomicTargetSize()
+		m.lowerAtomicCas(addr, exp, repl, size, instr.Return())
+
+	case ssa.OpcodeAtomicRmw:
+		addr, val := instr.Arg2()
+		atomicOp, size := instr.AtomicRmwData()
+		m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return())
+
+	default:
+		panic("TODO: lowering " + op.String())
+	}
+}
+
+func (m *machine) lowerAtomicRmw(op ssa.AtomicRmwOp, addr, val ssa.Value, size uint64, ret ssa.Value) {
+	mem := m.lowerToAddressMode(addr, 0)
+	_val := m.getOperand_Reg(m.c.ValueDefinition(val))
+
+	switch op {
+	case ssa.AtomicRmwOpAdd, ssa.AtomicRmwOpSub:
+		valCopied := m.copyToTmp(_val.reg())
+		if op == ssa.AtomicRmwOpSub {
+			// Negate the value.
+			m.insert(m.allocateInstr().asNeg(newOperandReg(valCopied), true))
+		}
+		m.insert(m.allocateInstr().asLockXAdd(valCopied, mem, byte(size)))
+		m.clearHigherBitsForAtomic(valCopied, size, ret.Type())
+		m.copyTo(valCopied, m.c.VRegOf(ret))
+
+	case ssa.AtomicRmwOpAnd, ssa.AtomicRmwOpOr, ssa.AtomicRmwOpXor:
+		accumulator := raxVReg
+		// Reserve rax for the accumulator to make regalloc happy.
+		// Note: do this initialization before defining valCopied, because it might be the same register and
+		// if that happens, the unnecessary load/store will be performed inside the loop.
+		// This can be mitigated in any way once the register allocator is clever enough.
+		m.insert(m.allocateInstr().asDefineUninitializedReg(accumulator))
+
+		// Copy the value to a temporary register.
+		valCopied := m.copyToTmp(_val.reg())
+		m.clearHigherBitsForAtomic(valCopied, size, ret.Type())
+
+		memOp := newOperandMem(mem)
+		tmp := m.c.AllocateVReg(ssa.TypeI64)
+		beginLoop, beginLoopLabel := m.allocateBrTarget()
+		{
+			m.insert(beginLoop)
+			// Reset the value on tmp by the original value.
+			m.copyTo(valCopied, tmp)
+			// Load the current value at the memory location into accumulator.
+			switch size {
+			case 1:
+				m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, memOp, accumulator))
+			case 2:
+				m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, memOp, accumulator))
+			case 4:
+				m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, memOp, accumulator))
+			case 8:
+				m.insert(m.allocateInstr().asMov64MR(memOp, accumulator))
+			default:
+				panic("BUG")
+			}
+			// Then perform the logical operation on the accumulator and the value on tmp.
+			switch op {
+			case ssa.AtomicRmwOpAnd:
+				m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, newOperandReg(accumulator), tmp, true))
+			case ssa.AtomicRmwOpOr:
+				m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeOr, newOperandReg(accumulator), tmp, true))
+			case ssa.AtomicRmwOpXor:
+				m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(accumulator), tmp, true))
+			default:
+				panic("BUG")
+			}
+			// Finally, try compare-exchange the value at the memory location with the tmp.
+			m.insert(m.allocateInstr().asLockCmpXCHG(tmp, memOp.addressMode(), byte(size)))
+			// If it succeeds, ZF will be set, and we can break the loop.
+			m.insert(m.allocateInstr().asJmpIf(condNZ, newOperandLabel(beginLoopLabel)))
+		}
+
+		// valCopied must be alive at the end of the loop.
+		m.insert(m.allocateInstr().asNopUseReg(valCopied))
+
+		// At this point, accumulator contains the result.
+		m.clearHigherBitsForAtomic(accumulator, size, ret.Type())
+		m.copyTo(accumulator, m.c.VRegOf(ret))
+
+	case ssa.AtomicRmwOpXchg:
+		valCopied := m.copyToTmp(_val.reg())
+
+		m.insert(m.allocateInstr().asXCHG(valCopied, newOperandMem(mem), byte(size)))
+		m.clearHigherBitsForAtomic(valCopied, size, ret.Type())
+		m.copyTo(valCopied, m.c.VRegOf(ret))
+
+	default:
+		panic("BUG")
+	}
+}
+
+func (m *machine) lowerAtomicCas(addr, exp, repl ssa.Value, size uint64, ret ssa.Value) {
+	mem := m.lowerToAddressMode(addr, 0)
+	expOp := m.getOperand_Reg(m.c.ValueDefinition(exp))
+	replOp := m.getOperand_Reg(m.c.ValueDefinition(repl))
+
+	accumulator := raxVReg
+	m.copyTo(expOp.reg(), accumulator)
+	m.insert(m.allocateInstr().asLockCmpXCHG(replOp.reg(), mem, byte(size)))
+	m.clearHigherBitsForAtomic(accumulator, size, ret.Type())
+	m.copyTo(accumulator, m.c.VRegOf(ret))
+}
+
+func (m *machine) clearHigherBitsForAtomic(r regalloc.VReg, valSize uint64, resultType ssa.Type) {
+	switch resultType {
+	case ssa.TypeI32:
+		switch valSize {
+		case 1:
+			m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(r), r))
+		case 2:
+			m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(r), r))
+		}
+	case ssa.TypeI64:
+		switch valSize {
+		case 1:
+			m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(r), r))
+		case 2:
+			m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, newOperandReg(r), r))
+		case 4:
+			m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, newOperandReg(r), r))
+		}
+	}
+}
+
+func (m *machine) lowerFcmp(instr *ssa.Instruction) {
+	f1, f2, and := m.lowerFcmpToFlags(instr)
+	rd := m.c.VRegOf(instr.Return())
+	if f2 == condInvalid {
+		tmp := m.c.AllocateVReg(ssa.TypeI32)
+		m.insert(m.allocateInstr().asSetcc(f1, tmp))
+		// On amd64, setcc only sets the first byte of the register, so we need to zero extend it to match
+		// the semantics of Icmp that sets either 0 or 1.
+		m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(tmp), rd))
+	} else {
+		tmp1, tmp2 := m.c.AllocateVReg(ssa.TypeI32), m.c.AllocateVReg(ssa.TypeI32)
+		m.insert(m.allocateInstr().asSetcc(f1, tmp1))
+		m.insert(m.allocateInstr().asSetcc(f2, tmp2))
+		var op aluRmiROpcode
+		if and {
+			op = aluRmiROpcodeAnd
+		} else {
+			op = aluRmiROpcodeOr
+		}
+		m.insert(m.allocateInstr().asAluRmiR(op, newOperandReg(tmp1), tmp2, false))
+		m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(tmp2), rd))
+	}
+}
+
+func (m *machine) lowerIcmp(instr *ssa.Instruction) {
+	x, y, c := instr.IcmpData()
+	m.lowerIcmpToFlag(m.c.ValueDefinition(x), m.c.ValueDefinition(y), x.Type() == ssa.TypeI64)
+	rd := m.c.VRegOf(instr.Return())
+	tmp := m.c.AllocateVReg(ssa.TypeI32)
+	m.insert(m.allocateInstr().asSetcc(condFromSSAIntCmpCond(c), tmp))
+	// On amd64, setcc only sets the first byte of the register, so we need to zero extend it to match
+	// the semantics of Icmp that sets either 0 or 1.
+	m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(tmp), rd))
+}
+
+func (m *machine) lowerSelect(x, y, cval, ret ssa.Value) {
+	xo, yo := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y))
+	rd := m.c.VRegOf(ret)
+
+	var cond cond
+	cvalDef := m.c.ValueDefinition(cval)
+	switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) {
+	case ssa.OpcodeIcmp:
+		icmp := cvalDef.Instr
+		xc, yc, cc := icmp.IcmpData()
+		m.lowerIcmpToFlag(m.c.ValueDefinition(xc), m.c.ValueDefinition(yc), xc.Type() == ssa.TypeI64)
+		cond = condFromSSAIntCmpCond(cc)
+		icmp.Lowered()
+	default: // TODO: match ssa.OpcodeFcmp for optimization, but seems a bit complex.
+		cv := m.getOperand_Reg(cvalDef)
+		test := m.allocateInstr().asCmpRmiR(false, cv, cv.reg(), false)
+		m.insert(test)
+		cond = condNZ
+	}
+
+	if typ := x.Type(); typ.IsInt() {
+		_64 := typ.Bits() == 64
+		mov := m.allocateInstr()
+		tmp := m.c.AllocateVReg(typ)
+		switch yo.kind {
+		case operandKindReg:
+			mov.asMovRR(yo.reg(), tmp, _64)
+		case operandKindMem:
+			if _64 {
+				mov.asMov64MR(yo, tmp)
+			} else {
+				mov.asMovzxRmR(extModeLQ, yo, tmp)
+			}
+		default:
+			panic("BUG")
+		}
+		m.insert(mov)
+		cmov := m.allocateInstr().asCmove(cond, xo, tmp, _64)
+		m.insert(cmov)
+		m.insert(m.allocateInstr().asMovRR(tmp, rd, _64))
+	} else {
+		mov := m.allocateInstr()
+		tmp := m.c.AllocateVReg(typ)
+		switch typ {
+		case ssa.TypeF32:
+			mov.asXmmUnaryRmR(sseOpcodeMovss, yo, tmp)
+		case ssa.TypeF64:
+			mov.asXmmUnaryRmR(sseOpcodeMovsd, yo, tmp)
+		case ssa.TypeV128:
+			mov.asXmmUnaryRmR(sseOpcodeMovdqu, yo, tmp)
+		default:
+			panic("BUG")
+		}
+		m.insert(mov)
+
+		cmov := m.allocateInstr().asXmmCMov(cond, xo, tmp, typ.Size())
+		m.insert(cmov)
+
+		m.copyTo(tmp, rd)
+	}
+}
+
+func (m *machine) lowerXmmCmovAfterRegAlloc(i *instruction) {
+	x := i.op1
+	rd := i.op2.reg()
+	cond := cond(i.u1)
+
+	jcc := m.allocateInstr()
+	m.insert(jcc)
+
+	mov := m.allocateInstr()
+	switch i.u2 {
+	case 4:
+		mov.asXmmUnaryRmR(sseOpcodeMovss, x, rd)
+	case 8:
+		mov.asXmmUnaryRmR(sseOpcodeMovsd, x, rd)
+	case 16:
+		mov.asXmmUnaryRmR(sseOpcodeMovdqu, x, rd)
+	default:
+		panic("BUG")
+	}
+	m.insert(mov)
+
+	nop, end := m.allocateBrTarget()
+	m.insert(nop)
+	jcc.asJmpIf(cond.invert(), newOperandLabel(end))
+}
+
+func (m *machine) lowerExtend(_arg, ret ssa.Value, from, to byte, signed bool) {
+	rd0 := m.c.VRegOf(ret)
+	arg := m.getOperand_Mem_Reg(m.c.ValueDefinition(_arg))
+
+	rd := m.c.AllocateVReg(ret.Type())
+
+	ext := m.allocateInstr()
+	switch {
+	case from == 8 && to == 16 && signed:
+		ext.asMovsxRmR(extModeBQ, arg, rd)
+	case from == 8 && to == 16 && !signed:
+		ext.asMovzxRmR(extModeBL, arg, rd)
+	case from == 8 && to == 32 && signed:
+		ext.asMovsxRmR(extModeBL, arg, rd)
+	case from == 8 && to == 32 && !signed:
+		ext.asMovzxRmR(extModeBQ, arg, rd)
+	case from == 8 && to == 64 && signed:
+		ext.asMovsxRmR(extModeBQ, arg, rd)
+	case from == 8 && to == 64 && !signed:
+		ext.asMovzxRmR(extModeBQ, arg, rd)
+	case from == 16 && to == 32 && signed:
+		ext.asMovsxRmR(extModeWL, arg, rd)
+	case from == 16 && to == 32 && !signed:
+		ext.asMovzxRmR(extModeWL, arg, rd)
+	case from == 16 && to == 64 && signed:
+		ext.asMovsxRmR(extModeWQ, arg, rd)
+	case from == 16 && to == 64 && !signed:
+		ext.asMovzxRmR(extModeWQ, arg, rd)
+	case from == 32 && to == 64 && signed:
+		ext.asMovsxRmR(extModeLQ, arg, rd)
+	case from == 32 && to == 64 && !signed:
+		ext.asMovzxRmR(extModeLQ, arg, rd)
+	default:
+		panic(fmt.Sprintf("BUG: unhandled extend: from=%d, to=%d, signed=%t", from, to, signed))
+	}
+	m.insert(ext)
+
+	m.copyTo(rd, rd0)
+}
+
+func (m *machine) lowerVconst(dst regalloc.VReg, lo, hi uint64) {
+	if lo == 0 && hi == 0 {
+		m.insert(m.allocateInstr().asZeros(dst))
+		return
+	}
+
+	load := m.allocateInstr()
+	constLabel := m.allocateLabel()
+	m.consts = append(m.consts, _const{label: constLabel, lo: lo, hi: hi})
+	load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(constLabel.L)), dst)
+	m.insert(load)
+}
+
+func (m *machine) lowerCtz(instr *ssa.Instruction) {
+	if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
+		m.lowerUnaryRmR(instr, unaryRmROpcodeTzcnt)
+	} else {
+		// On processors that do not support TZCNT, the BSF instruction is
+		// executed instead. The key difference between TZCNT and BSF
+		// instruction is that if source operand is zero, the content of
+		// destination operand is undefined.
+		// https://www.felixcloutier.com/x86/tzcnt.html
+
+		x := instr.Arg()
+		if !x.Type().IsInt() {
+			panic("BUG?")
+		}
+		_64 := x.Type().Bits() == 64
+
+		xDef := m.c.ValueDefinition(x)
+		tmp := m.c.AllocateVReg(x.Type())
+		rm := m.getOperand_Reg(xDef)
+
+		// First, we have to check if the target is non-zero.
+		test := m.allocateInstr()
+		test.asCmpRmiR(false, rm, rm.reg(), _64)
+		m.insert(test)
+
+		jmpNz := m.allocateInstr()
+		m.insert(jmpNz)
+
+		// If the value is zero, we just push the const value.
+		m.lowerIconst(tmp, uint64(x.Type().Bits()), _64)
+
+		// Now jump right after the non-zero case.
+		jmpAtEnd := m.allocateInstr()
+		m.insert(jmpAtEnd)
+
+		// jmpNz target label is set here.
+		nop, nz := m.allocateBrTarget()
+		jmpNz.asJmpIf(condNZ, newOperandLabel(nz))
+		m.insert(nop)
+
+		// Emit the non-zero case.
+		bsr := m.allocateInstr()
+		bsr.asUnaryRmR(unaryRmROpcodeBsf, rm, tmp, _64)
+		m.insert(bsr)
+
+		// jmpAtEnd target label is set here.
+		nopEnd, end := m.allocateBrTarget()
+		jmpAtEnd.asJmp(newOperandLabel(end))
+		m.insert(nopEnd)
+
+		m.copyTo(tmp, m.c.VRegOf(instr.Return()))
+	}
+}
+
+func (m *machine) lowerClz(instr *ssa.Instruction) {
+	if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) {
+		m.lowerUnaryRmR(instr, unaryRmROpcodeLzcnt)
+	} else {
+		// On processors that do not support LZCNT, we combine BSR (calculating
+		// most significant set bit) with XOR. This logic is described in
+		// "Replace Raw Assembly Code with Builtin Intrinsics" section in:
+		// https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code.
+
+		x := instr.Arg()
+		if !x.Type().IsInt() {
+			panic("BUG?")
+		}
+		_64 := x.Type().Bits() == 64
+
+		xDef := m.c.ValueDefinition(x)
+		rm := m.getOperand_Reg(xDef)
+		tmp := m.c.AllocateVReg(x.Type())
+
+		// First, we have to check if the rm is non-zero as BSR is undefined
+		// on zero. See https://www.felixcloutier.com/x86/bsr.
+		test := m.allocateInstr()
+		test.asCmpRmiR(false, rm, rm.reg(), _64)
+		m.insert(test)
+
+		jmpNz := m.allocateInstr()
+		m.insert(jmpNz)
+
+		// If the value is zero, we just push the const value.
+		m.lowerIconst(tmp, uint64(x.Type().Bits()), _64)
+
+		// Now jump right after the non-zero case.
+		jmpAtEnd := m.allocateInstr()
+		m.insert(jmpAtEnd)
+
+		// jmpNz target label is set here.
+		nop, nz := m.allocateBrTarget()
+		jmpNz.asJmpIf(condNZ, newOperandLabel(nz))
+		m.insert(nop)
+
+		// Emit the non-zero case.
+		bsr := m.allocateInstr()
+		bsr.asUnaryRmR(unaryRmROpcodeBsr, rm, tmp, _64)
+		m.insert(bsr)
+
+		// Now we XOR the value with the bit length minus one.
+		xor := m.allocateInstr()
+		xor.asAluRmiR(aluRmiROpcodeXor, newOperandImm32(uint32(x.Type().Bits()-1)), tmp, _64)
+		m.insert(xor)
+
+		// jmpAtEnd target label is set here.
+		nopEnd, end := m.allocateBrTarget()
+		jmpAtEnd.asJmp(newOperandLabel(end))
+		m.insert(nopEnd)
+
+		m.copyTo(tmp, m.c.VRegOf(instr.Return()))
+	}
+}
+
+func (m *machine) lowerUnaryRmR(si *ssa.Instruction, op unaryRmROpcode) {
+	x := si.Arg()
+	if !x.Type().IsInt() {
+		panic("BUG?")
+	}
+	_64 := x.Type().Bits() == 64
+
+	xDef := m.c.ValueDefinition(x)
+	rm := m.getOperand_Mem_Reg(xDef)
+	rd := m.c.VRegOf(si.Return())
+
+	instr := m.allocateInstr()
+	instr.asUnaryRmR(op, rm, rd, _64)
+	m.insert(instr)
+}
+
+func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, dst regalloc.VReg) {
+	mem := newOperandMem(m.lowerToAddressMode(ptr, offset))
+	load := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, mem, dst)
+	case ssa.TypeI64:
+		load.asMov64MR(mem, dst)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, mem, dst)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, mem, dst)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, dst)
+	default:
+		panic("BUG")
+	}
+	m.insert(load)
+}
+
+func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, dst regalloc.VReg) {
+	mem := newOperandMem(m.lowerToAddressMode(ptr, offset))
+	load := m.allocateInstr()
+	switch op {
+	case ssa.OpcodeUload8:
+		load.asMovzxRmR(extModeBQ, mem, dst)
+	case ssa.OpcodeUload16:
+		load.asMovzxRmR(extModeWQ, mem, dst)
+	case ssa.OpcodeUload32:
+		load.asMovzxRmR(extModeLQ, mem, dst)
+	case ssa.OpcodeSload8:
+		load.asMovsxRmR(extModeBQ, mem, dst)
+	case ssa.OpcodeSload16:
+		load.asMovsxRmR(extModeWQ, mem, dst)
+	case ssa.OpcodeSload32:
+		load.asMovsxRmR(extModeLQ, mem, dst)
+	default:
+		panic("BUG")
+	}
+	m.insert(load)
+}
+
+func (m *machine) lowerExitIfTrueWithCode(execCtx regalloc.VReg, cond ssa.Value, code wazevoapi.ExitCode) {
+	condDef := m.c.ValueDefinition(cond)
+	if !m.c.MatchInstr(condDef, ssa.OpcodeIcmp) {
+		panic("TODO: ExitIfTrue must come after Icmp at the moment: " + condDef.Instr.Opcode().String())
+	}
+	cvalInstr := condDef.Instr
+	cvalInstr.MarkLowered()
+
+	// We need to copy the execution context to a temp register, because if it's spilled,
+	// it might end up being reloaded inside the exiting branch.
+	execCtxTmp := m.copyToTmp(execCtx)
+
+	x, y, c := cvalInstr.IcmpData()
+	xx, yy := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+	if !m.tryLowerBandToFlag(xx, yy) {
+		m.lowerIcmpToFlag(xx, yy, x.Type() == ssa.TypeI64)
+	}
+
+	jmpIf := m.allocateInstr()
+	m.insert(jmpIf)
+	l := m.lowerExitWithCode(execCtxTmp, code)
+	jmpIf.asJmpIf(condFromSSAIntCmpCond(c).invert(), newOperandLabel(l))
+}
+
+func (m *machine) tryLowerBandToFlag(x, y *backend.SSAValueDefinition) (ok bool) {
+	var target *backend.SSAValueDefinition
+	if x.IsFromInstr() && x.Instr.Constant() && x.Instr.ConstantVal() == 0 {
+		if m.c.MatchInstr(y, ssa.OpcodeBand) {
+			target = y
+		}
+	}
+
+	if y.IsFromInstr() && y.Instr.Constant() && y.Instr.ConstantVal() == 0 {
+		if m.c.MatchInstr(x, ssa.OpcodeBand) {
+			target = x
+		}
+	}
+
+	if target == nil {
+		return false
+	}
+
+	bandInstr := target.Instr
+	bandX, bandY := bandInstr.Arg2()
+
+	xx := m.getOperand_Reg(m.c.ValueDefinition(bandX))
+	yy := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(bandY))
+	test := m.allocateInstr().asCmpRmiR(false, yy, xx.reg(), bandX.Type() == ssa.TypeI64)
+	m.insert(test)
+	bandInstr.MarkLowered()
+	return true
+}
+
+func (m *machine) allocateExitInstructions(execCtx, exitCodeReg regalloc.VReg) (saveRsp, saveRbp, setExitCode *instruction) {
+	saveRsp = m.allocateInstr().asMovRM(
+		rspVReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.U32(), execCtx)),
+		8,
+	)
+
+	saveRbp = m.allocateInstr().asMovRM(
+		rbpVReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetFramePointerBeforeGoCall.U32(), execCtx)),
+		8,
+	)
+	setExitCode = m.allocateInstr().asMovRM(
+		exitCodeReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetExitCodeOffset.U32(), execCtx)),
+		4,
+	)
+	return
+}
+
+func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel backend.Label) {
+	exitCodeReg := rbpVReg
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtx, exitCodeReg)
+
+	// Set save RSP, RBP, and write exit code.
+	m.insert(saveRsp)
+	m.insert(saveRbp)
+	m.lowerIconst(exitCodeReg, uint64(code), false)
+	m.insert(setExitCode)
+
+	ripReg := rbpVReg
+
+	// Next is to save the current address for stack unwinding.
+	nop, currentAddrLabel := m.allocateBrTarget()
+	m.insert(nop)
+	readRip := m.allocateInstr().asLEA(newOperandLabel(currentAddrLabel), ripReg)
+	m.insert(readRip)
+	saveRip := m.allocateInstr().asMovRM(
+		ripReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
+		8,
+	)
+	m.insert(saveRip)
+
+	// Finally exit.
+	exitSq := m.allocateExitSeq(execCtx)
+	m.insert(exitSq)
+
+	// Return the label for continuation.
+	continuation, afterLabel := m.allocateBrTarget()
+	m.insert(continuation)
+	return afterLabel
+}
+
+func (m *machine) lowerAluRmiROp(si *ssa.Instruction, op aluRmiROpcode) {
+	x, y := si.Arg2()
+	if !x.Type().IsInt() {
+		panic("BUG?")
+	}
+
+	_64 := x.Type().Bits() == 64
+
+	xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+
+	// TODO: commutative args can be swapped if one of them is an immediate.
+	rn := m.getOperand_Reg(xDef)
+	rm := m.getOperand_Mem_Imm32_Reg(yDef)
+	rd := m.c.VRegOf(si.Return())
+
+	// rn is being overwritten, so we first copy its value to a temp register,
+	// in case it is referenced again later.
+	tmp := m.copyToTmp(rn.reg())
+
+	alu := m.allocateInstr()
+	alu.asAluRmiR(op, rm, tmp, _64)
+	m.insert(alu)
+
+	// tmp now contains the result, we copy it to the dest register.
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerShiftR(si *ssa.Instruction, op shiftROp) {
+	x, amt := si.Arg2()
+	if !x.Type().IsInt() {
+		panic("BUG?")
+	}
+	_64 := x.Type().Bits() == 64
+
+	xDef, amtDef := m.c.ValueDefinition(x), m.c.ValueDefinition(amt)
+
+	opAmt := m.getOperand_Imm32_Reg(amtDef)
+	rx := m.getOperand_Reg(xDef)
+	rd := m.c.VRegOf(si.Return())
+
+	// rx is being overwritten, so we first copy its value to a temp register,
+	// in case it is referenced again later.
+	tmpDst := m.copyToTmp(rx.reg())
+
+	if opAmt.kind == operandKindReg {
+		// If opAmt is a register we must copy its value to rcx,
+		// because shiftR encoding mandates that the shift amount is in rcx.
+		m.copyTo(opAmt.reg(), rcxVReg)
+
+		alu := m.allocateInstr()
+		alu.asShiftR(op, newOperandReg(rcxVReg), tmpDst, _64)
+		m.insert(alu)
+
+	} else {
+		alu := m.allocateInstr()
+		alu.asShiftR(op, opAmt, tmpDst, _64)
+		m.insert(alu)
+	}
+
+	// tmp now contains the result, we copy it to the dest register.
+	m.copyTo(tmpDst, rd)
+}
+
+func (m *machine) lowerXmmRmR(instr *ssa.Instruction) {
+	x, y := instr.Arg2()
+	if !x.Type().IsFloat() {
+		panic("BUG?")
+	}
+	_64 := x.Type().Bits() == 64
+
+	var op sseOpcode
+	if _64 {
+		switch instr.Opcode() {
+		case ssa.OpcodeFadd:
+			op = sseOpcodeAddsd
+		case ssa.OpcodeFsub:
+			op = sseOpcodeSubsd
+		case ssa.OpcodeFmul:
+			op = sseOpcodeMulsd
+		case ssa.OpcodeFdiv:
+			op = sseOpcodeDivsd
+		default:
+			panic("BUG")
+		}
+	} else {
+		switch instr.Opcode() {
+		case ssa.OpcodeFadd:
+			op = sseOpcodeAddss
+		case ssa.OpcodeFsub:
+			op = sseOpcodeSubss
+		case ssa.OpcodeFmul:
+			op = sseOpcodeMulss
+		case ssa.OpcodeFdiv:
+			op = sseOpcodeDivss
+		default:
+			panic("BUG")
+		}
+	}
+
+	xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+	rn := m.getOperand_Reg(yDef)
+	rm := m.getOperand_Reg(xDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	// rm is being overwritten, so we first copy its value to a temp register,
+	// in case it is referenced again later.
+	tmp := m.copyToTmp(rm.reg())
+
+	xmm := m.allocateInstr().asXmmRmR(op, rn, tmp)
+	m.insert(xmm)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerSqrt(instr *ssa.Instruction) {
+	x := instr.Arg()
+	if !x.Type().IsFloat() {
+		panic("BUG")
+	}
+	_64 := x.Type().Bits() == 64
+	var op sseOpcode
+	if _64 {
+		op = sseOpcodeSqrtsd
+	} else {
+		op = sseOpcodeSqrtss
+	}
+
+	xDef := m.c.ValueDefinition(x)
+	rm := m.getOperand_Mem_Reg(xDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	xmm := m.allocateInstr().asXmmUnaryRmR(op, rm, rd)
+	m.insert(xmm)
+}
+
+func (m *machine) lowerFabsFneg(instr *ssa.Instruction) {
+	x := instr.Arg()
+	if !x.Type().IsFloat() {
+		panic("BUG")
+	}
+	_64 := x.Type().Bits() == 64
+	var op sseOpcode
+	var mask uint64
+	if _64 {
+		switch instr.Opcode() {
+		case ssa.OpcodeFabs:
+			mask, op = 0x7fffffffffffffff, sseOpcodeAndpd
+		case ssa.OpcodeFneg:
+			mask, op = 0x8000000000000000, sseOpcodeXorpd
+		}
+	} else {
+		switch instr.Opcode() {
+		case ssa.OpcodeFabs:
+			mask, op = 0x7fffffff, sseOpcodeAndps
+		case ssa.OpcodeFneg:
+			mask, op = 0x80000000, sseOpcodeXorps
+		}
+	}
+
+	tmp := m.c.AllocateVReg(x.Type())
+
+	xDef := m.c.ValueDefinition(x)
+	rm := m.getOperand_Reg(xDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	m.lowerFconst(tmp, mask, _64)
+
+	xmm := m.allocateInstr().asXmmRmR(op, rm, tmp)
+	m.insert(xmm)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerStore(si *ssa.Instruction) {
+	value, ptr, offset, storeSizeInBits := si.StoreData()
+	rm := m.getOperand_Reg(m.c.ValueDefinition(value))
+	mem := newOperandMem(m.lowerToAddressMode(ptr, offset))
+
+	store := m.allocateInstr()
+	switch value.Type() {
+	case ssa.TypeI32:
+		store.asMovRM(rm.reg(), mem, storeSizeInBits/8)
+	case ssa.TypeI64:
+		store.asMovRM(rm.reg(), mem, storeSizeInBits/8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, rm.reg(), mem)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, rm.reg(), mem)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, rm.reg(), mem)
+	default:
+		panic("BUG")
+	}
+	m.insert(store)
+}
+
+func (m *machine) lowerCall(si *ssa.Instruction) {
+	isDirectCall := si.Opcode() == ssa.OpcodeCall
+	var indirectCalleePtr ssa.Value
+	var directCallee ssa.FuncRef
+	var sigID ssa.SignatureID
+	var args []ssa.Value
+	var isMemmove bool
+	if isDirectCall {
+		directCallee, sigID, args = si.CallData()
+	} else {
+		indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
+	}
+	calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
+
+	stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
+	if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
+		m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
+	}
+
+	// Note: See machine.SetupPrologue for the stack layout.
+	// The stack pointer decrease/increase will be inserted later in the compilation.
+
+	for i, arg := range args {
+		reg := m.c.VRegOf(arg)
+		def := m.c.ValueDefinition(arg)
+		m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
+	}
+
+	if isMemmove {
+		// Go's memmove *might* use all xmm0-xmm15, so we need to release them.
+		// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#architecture-specifics
+		// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/runtime/memmove_amd64.s#L271-L286
+		for i := regalloc.RealReg(0); i < 16; i++ {
+			m.insert(m.allocateInstr().asDefineUninitializedReg(regInfo.RealRegToVReg[xmm0+i]))
+		}
+	}
+
+	if isDirectCall {
+		call := m.allocateInstr().asCall(directCallee, calleeABI)
+		m.insert(call)
+	} else {
+		ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr))
+		callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI)
+		m.insert(callInd)
+	}
+
+	if isMemmove {
+		for i := regalloc.RealReg(0); i < 16; i++ {
+			m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[xmm0+i]))
+		}
+	}
+
+	var index int
+	r1, rs := si.Returns()
+	if r1.Valid() {
+		m.callerGenFunctionReturnVReg(calleeABI, 0, m.c.VRegOf(r1), stackSlotSize)
+		index++
+	}
+
+	for _, r := range rs {
+		m.callerGenFunctionReturnVReg(calleeABI, index, m.c.VRegOf(r), stackSlotSize)
+		index++
+	}
+}
+
+// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
+// caller side of the function call.
+func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, stackSlotSize int64) {
+	arg := &a.Args[argIndex]
+	if def != nil && def.IsFromInstr() {
+		// Constant instructions are inlined.
+		if inst := def.Instr; inst.Constant() {
+			m.insertLoadConstant(inst, reg)
+		}
+	}
+	if arg.Kind == backend.ABIArgKindReg {
+		m.InsertMove(arg.Reg, reg, arg.Type)
+	} else {
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(
+			// -stackSlotSize because the stack pointer is not yet decreased.
+			uint32(arg.Offset-stackSlotSize), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(reg, mem, 4)
+		case ssa.TypeI64:
+			store.asMovRM(reg, mem, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, reg, mem)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
+		default:
+			panic("BUG")
+		}
+		m.insert(store)
+	}
+}
+
+func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, stackSlotSize int64) {
+	r := &a.Rets[retIndex]
+	if r.Kind == backend.ABIArgKindReg {
+		m.InsertMove(reg, r.Reg, r.Type)
+	} else {
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(
+			// -stackSlotSize because the stack pointer is not yet decreased.
+			uint32(a.ArgStackSize+r.Offset-stackSlotSize), rspVReg))
+		switch r.Type {
+		case ssa.TypeI32:
+			load.asMovzxRmR(extModeLQ, mem, reg)
+		case ssa.TypeI64:
+			load.asMov64MR(mem, reg)
+		case ssa.TypeF32:
+			load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
+		case ssa.TypeF64:
+			load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
+		case ssa.TypeV128:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
+		default:
+			panic("BUG")
+		}
+		m.insert(load)
+	}
+}
+
+// InsertMove implements backend.Machine.
+func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {
+	switch typ {
+	case ssa.TypeI32, ssa.TypeI64:
+		i := m.allocateInstr().asMovRR(src, dst, typ.Bits() == 64)
+		m.insert(i)
+	case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+		var op sseOpcode
+		switch typ {
+		case ssa.TypeF32:
+			op = sseOpcodeMovss
+		case ssa.TypeF64:
+			op = sseOpcodeMovsd
+		case ssa.TypeV128:
+			op = sseOpcodeMovdqa
+		}
+		i := m.allocateInstr().asXmmUnaryRmR(op, newOperandReg(src), dst)
+		m.insert(i)
+	default:
+		panic("BUG")
+	}
+}
+
+// Format implements backend.Machine.
+func (m *machine) Format() string {
+	ectx := m.ectx
+	begins := map[*instruction]backend.Label{}
+	for l, pos := range ectx.LabelPositions {
+		begins[pos.Begin] = l
+	}
+
+	irBlocks := map[backend.Label]ssa.BasicBlockID{}
+	for i, l := range ectx.SsaBlockIDToLabels {
+		irBlocks[l] = ssa.BasicBlockID(i)
+	}
+
+	var lines []string
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		if l, ok := begins[cur]; ok {
+			var labelStr string
+			if blkID, ok := irBlocks[l]; ok {
+				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
+			} else {
+				labelStr = fmt.Sprintf("%s:", l)
+			}
+			lines = append(lines, labelStr)
+		}
+		if cur.kind == nop0 {
+			continue
+		}
+		lines = append(lines, "\t"+cur.String())
+	}
+	for _, vc := range m.consts {
+		if vc._var == nil {
+			lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label.L, vc.lo, vc.hi))
+		} else {
+			lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label.L, vc._var))
+		}
+	}
+	return "\n" + strings.Join(lines, "\n") + "\n"
+}
+
+func (m *machine) encodeWithoutSSA(root *instruction) {
+	m.labelResolutionPends = m.labelResolutionPends[:0]
+	ectx := m.ectx
+
+	bufPtr := m.c.BufPtr()
+	for cur := root; cur != nil; cur = cur.next {
+		offset := int64(len(*bufPtr))
+		if cur.kind == nop0 {
+			l := cur.nop0Label()
+			if pos, ok := ectx.LabelPositions[l]; ok {
+				pos.BinaryOffset = offset
+			}
+		}
+
+		needLabelResolution := cur.encode(m.c)
+		if needLabelResolution {
+			m.labelResolutionPends = append(m.labelResolutionPends,
+				labelResolutionPend{instr: cur, imm32Offset: int64(len(*bufPtr)) - 4},
+			)
+		}
+	}
+
+	for i := range m.labelResolutionPends {
+		p := &m.labelResolutionPends[i]
+		switch p.instr.kind {
+		case jmp, jmpIf, lea:
+			target := p.instr.jmpLabel()
+			targetOffset := ectx.LabelPositions[target].BinaryOffset
+			imm32Offset := p.imm32Offset
+			jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction.
+			binary.LittleEndian.PutUint32((*bufPtr)[imm32Offset:], uint32(jmpOffset))
+		default:
+			panic("BUG")
+		}
+	}
+}
+
+// Encode implements backend.Machine Encode.
+func (m *machine) Encode(ctx context.Context) (err error) {
+	ectx := m.ectx
+	bufPtr := m.c.BufPtr()
+
+	var fn string
+	var fnIndex int
+	var labelToSSABlockID map[backend.Label]ssa.BasicBlockID
+	if wazevoapi.PerfMapEnabled {
+		fn = wazevoapi.GetCurrentFunctionName(ctx)
+		labelToSSABlockID = make(map[backend.Label]ssa.BasicBlockID)
+		for i, l := range ectx.SsaBlockIDToLabels {
+			labelToSSABlockID[l] = ssa.BasicBlockID(i)
+		}
+		fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
+	}
+
+	m.labelResolutionPends = m.labelResolutionPends[:0]
+	for _, pos := range ectx.OrderedBlockLabels {
+		offset := int64(len(*bufPtr))
+		pos.BinaryOffset = offset
+		for cur := pos.Begin; cur != pos.End.next; cur = cur.next {
+			offset := int64(len(*bufPtr))
+
+			switch cur.kind {
+			case nop0:
+				l := cur.nop0Label()
+				if pos, ok := ectx.LabelPositions[l]; ok {
+					pos.BinaryOffset = offset
+				}
+			case sourceOffsetInfo:
+				m.c.AddSourceOffsetInfo(offset, cur.sourceOffsetInfo())
+			}
+
+			needLabelResolution := cur.encode(m.c)
+			if needLabelResolution {
+				m.labelResolutionPends = append(m.labelResolutionPends,
+					labelResolutionPend{instr: cur, instrOffset: offset, imm32Offset: int64(len(*bufPtr)) - 4},
+				)
+			}
+		}
+
+		if wazevoapi.PerfMapEnabled {
+			l := pos.L
+			var labelStr string
+			if blkID, ok := labelToSSABlockID[l]; ok {
+				labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
+			} else {
+				labelStr = l.String()
+			}
+			size := int64(len(*bufPtr)) - offset
+			wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
+		}
+	}
+
+	for i := range m.consts {
+		offset := int64(len(*bufPtr))
+		vc := &m.consts[i]
+		vc.label.BinaryOffset = offset
+		if vc._var == nil {
+			lo, hi := vc.lo, vc.hi
+			m.c.Emit8Bytes(lo)
+			m.c.Emit8Bytes(hi)
+		} else {
+			for _, b := range vc._var {
+				m.c.EmitByte(b)
+			}
+		}
+	}
+
+	buf := *bufPtr
+	for i := range m.labelResolutionPends {
+		p := &m.labelResolutionPends[i]
+		switch p.instr.kind {
+		case jmp, jmpIf, lea, xmmUnaryRmR:
+			target := p.instr.jmpLabel()
+			targetOffset := ectx.LabelPositions[target].BinaryOffset
+			imm32Offset := p.imm32Offset
+			jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction.
+			binary.LittleEndian.PutUint32(buf[imm32Offset:], uint32(jmpOffset))
+		case jmpTableIsland:
+			tableBegin := p.instrOffset
+			// Each entry is the offset from the beginning of the jmpTableIsland instruction in 8 bytes.
+			targets := m.jmpTableTargets[p.instr.u1]
+			for i, l := range targets {
+				targetOffset := ectx.LabelPositions[backend.Label(l)].BinaryOffset
+				jmpOffset := targetOffset - tableBegin
+				binary.LittleEndian.PutUint64(buf[tableBegin+int64(i)*8:], uint64(jmpOffset))
+			}
+		default:
+			panic("BUG")
+		}
+	}
+	return
+}
+
+// ResolveRelocations implements backend.Machine.
+func (m *machine) ResolveRelocations(refToBinaryOffset []int, binary []byte, relocations []backend.RelocationInfo, _ []int) {
+	for _, r := range relocations {
+		offset := r.Offset
+		calleeFnOffset := refToBinaryOffset[r.FuncRef]
+		// offset is the offset of the last 4 bytes of the call instruction.
+		callInstrOffsetBytes := binary[offset : offset+4]
+		diff := int64(calleeFnOffset) - (offset + 4) // +4 because we want the offset of the next instruction (In x64, RIP always points to the next instruction).
+		callInstrOffsetBytes[0] = byte(diff)
+		callInstrOffsetBytes[1] = byte(diff >> 8)
+		callInstrOffsetBytes[2] = byte(diff >> 16)
+		callInstrOffsetBytes[3] = byte(diff >> 24)
+	}
+}
+
+// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.
+func (m *machine) CallTrampolineIslandInfo(_ int) (_, _ int, _ error) { return }
+
+func (m *machine) lowerIcmpToFlag(xd, yd *backend.SSAValueDefinition, _64 bool) {
+	x := m.getOperand_Reg(xd)
+	y := m.getOperand_Mem_Imm32_Reg(yd)
+	cmp := m.allocateInstr().asCmpRmiR(true, y, x.reg(), _64)
+	m.insert(cmp)
+}
+
+func (m *machine) lowerFcmpToFlags(instr *ssa.Instruction) (f1, f2 cond, and bool) {
+	x, y, c := instr.FcmpData()
+	switch c {
+	case ssa.FloatCmpCondEqual:
+		f1, f2 = condNP, condZ
+		and = true
+	case ssa.FloatCmpCondNotEqual:
+		f1, f2 = condP, condNZ
+	case ssa.FloatCmpCondLessThan:
+		f1 = condFromSSAFloatCmpCond(ssa.FloatCmpCondGreaterThan)
+		f2 = condInvalid
+		x, y = y, x
+	case ssa.FloatCmpCondLessThanOrEqual:
+		f1 = condFromSSAFloatCmpCond(ssa.FloatCmpCondGreaterThanOrEqual)
+		f2 = condInvalid
+		x, y = y, x
+	default:
+		f1 = condFromSSAFloatCmpCond(c)
+		f2 = condInvalid
+	}
+
+	var opc sseOpcode
+	if x.Type() == ssa.TypeF32 {
+		opc = sseOpcodeUcomiss
+	} else {
+		opc = sseOpcodeUcomisd
+	}
+
+	xr := m.getOperand_Reg(m.c.ValueDefinition(x))
+	yr := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asXmmCmpRmR(opc, yr, xr.reg()))
+	return
+}
+
+// allocateInstr allocates an instruction.
+func (m *machine) allocateInstr() *instruction {
+	instr := m.ectx.InstructionPool.Allocate()
+	if !m.regAllocStarted {
+		instr.addedBeforeRegAlloc = true
+	}
+	return instr
+}
+
+func (m *machine) allocateNop() *instruction {
+	instr := m.allocateInstr()
+	instr.kind = nop0
+	return instr
+}
+
+func (m *machine) insert(i *instruction) {
+	ectx := m.ectx
+	ectx.PendingInstructions = append(ectx.PendingInstructions, i)
+}
+
+func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nolint
+	pos := m.allocateLabel()
+	l = pos.L
+	nop = m.allocateInstr()
+	nop.asNop0WithLabel(l)
+	pos.Begin, pos.End = nop, nop
+	return
+}
+
+func (m *machine) allocateLabel() *labelPosition {
+	ectx := m.ectx
+	l := ectx.AllocateLabel()
+	pos := ectx.AllocateLabelPosition(l)
+	ectx.LabelPositions[l] = pos
+	return pos
+}
+
+func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
+	offset, ok := m.spillSlots[id]
+	if !ok {
+		offset = m.spillSlotSize
+		m.spillSlots[id] = offset
+		m.spillSlotSize += int64(size)
+	}
+	return offset
+}
+
+func (m *machine) copyTo(src regalloc.VReg, dst regalloc.VReg) {
+	mov := m.allocateInstr()
+	if src.RegType() == regalloc.RegTypeInt {
+		mov.asMovRR(src, dst, true)
+	} else {
+		mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
+	}
+	m.insert(mov)
+}
+
+func (m *machine) copyToTmp(v regalloc.VReg) regalloc.VReg {
+	typ := m.c.TypeOf(v)
+	tmp := m.c.AllocateVReg(typ)
+	m.copyTo(v, tmp)
+	return tmp
+}
+
+func (m *machine) requiredStackSize() int64 {
+	return m.maxRequiredStackSizeForCalls +
+		m.frameSize() +
+		16 + // Need for stack checking.
+		16 // return address and the caller RBP.
+}
+
+func (m *machine) frameSize() int64 {
+	s := m.clobberedRegSlotSize() + m.spillSlotSize
+	if s&0xf != 0 {
+		panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
+	}
+	return s
+}
+
+func (m *machine) clobberedRegSlotSize() int64 {
+	return int64(len(m.clobberedRegs) * 16)
+}
+
+func (m *machine) lowerIDivRem(si *ssa.Instruction, isDiv bool, signed bool) {
+	x, y, execCtx := si.Arg3()
+
+	dividend := m.getOperand_Reg(m.c.ValueDefinition(x))
+	divisor := m.getOperand_Reg(m.c.ValueDefinition(y))
+	ctxVReg := m.c.VRegOf(execCtx)
+	tmpGp := m.c.AllocateVReg(si.Return().Type())
+
+	m.copyTo(dividend.reg(), raxVReg)
+	m.insert(m.allocateInstr().asDefineUninitializedReg(rdxVReg))
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
+	seq := m.allocateInstr().asIdivRemSequence(ctxVReg, divisor.reg(), tmpGp, isDiv, signed, x.Type().Bits() == 64)
+	m.insert(seq)
+	rd := m.c.VRegOf(si.Return())
+	if isDiv {
+		m.copyTo(raxVReg, rd)
+	} else {
+		m.copyTo(rdxVReg, rd)
+	}
+}
+
+func (m *machine) lowerIDivRemSequenceAfterRegAlloc(i *instruction) {
+	execCtx, divisor, tmpGp, isDiv, signed, _64 := i.idivRemSequenceData()
+
+	dividend := raxVReg
+
+	// Ensure yr is not zero.
+	test := m.allocateInstr()
+	test.asCmpRmiR(false, newOperandReg(divisor), divisor, _64)
+	m.insert(test)
+
+	jnz := m.allocateInstr()
+	m.insert(jnz)
+
+	nz := m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerDivisionByZero)
+
+	// If not zero, we can proceed with the division.
+	jnz.asJmpIf(condNZ, newOperandLabel(nz))
+
+	var ifRemNeg1 *instruction
+	if signed {
+		var neg1 uint64
+		if _64 {
+			neg1 = 0xffffffffffffffff
+		} else {
+			neg1 = 0xffffffff
+		}
+		m.lowerIconst(tmpGp, neg1, _64)
+
+		if isDiv {
+			// For signed division, we have to have branches for "math.MinInt{32,64} / -1"
+			// case which results in the floating point exception via division error as
+			// the resulting value exceeds the maximum of signed int.
+
+			// First, we check if the divisor is -1.
+			cmp := m.allocateInstr()
+			cmp.asCmpRmiR(true, newOperandReg(tmpGp), divisor, _64)
+			m.insert(cmp)
+
+			ifNotNeg1 := m.allocateInstr()
+			m.insert(ifNotNeg1)
+
+			var minInt uint64
+			if _64 {
+				minInt = 0x8000000000000000
+			} else {
+				minInt = 0x80000000
+			}
+			m.lowerIconst(tmpGp, minInt, _64)
+
+			// Next we check if the quotient is the most negative value for the signed integer, i.e.
+			// if we are trying to do (math.MinInt32 / -1) or (math.MinInt64 / -1) respectively.
+			cmp2 := m.allocateInstr()
+			cmp2.asCmpRmiR(true, newOperandReg(tmpGp), dividend, _64)
+			m.insert(cmp2)
+
+			ifNotMinInt := m.allocateInstr()
+			m.insert(ifNotMinInt)
+
+			// Trap if we are trying to do (math.MinInt32 / -1) or (math.MinInt64 / -1),
+			// as that is the overflow in division as the result becomes 2^31 which is larger than
+			// the maximum of signed 32-bit int (2^31-1).
+			end := m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
+			ifNotNeg1.asJmpIf(condNZ, newOperandLabel(end))
+			ifNotMinInt.asJmpIf(condNZ, newOperandLabel(end))
+		} else {
+			// If it is remainder, zeros DX register and compare the divisor to -1.
+			xor := m.allocateInstr().asZeros(rdxVReg)
+			m.insert(xor)
+
+			// We check if the divisor is -1.
+			cmp := m.allocateInstr()
+			cmp.asCmpRmiR(true, newOperandReg(tmpGp), divisor, _64)
+			m.insert(cmp)
+
+			ifRemNeg1 = m.allocateInstr()
+			m.insert(ifRemNeg1)
+		}
+
+		// Sign-extend DX register to have 2*x.Type().Bits() dividend over DX and AX registers.
+		sed := m.allocateInstr()
+		sed.asSignExtendData(_64)
+		m.insert(sed)
+	} else {
+		// Zeros DX register to have 2*x.Type().Bits() dividend over DX and AX registers.
+		zeros := m.allocateInstr().asZeros(rdxVReg)
+		m.insert(zeros)
+	}
+
+	div := m.allocateInstr()
+	div.asDiv(newOperandReg(divisor), signed, _64)
+	m.insert(div)
+
+	nop, end := m.allocateBrTarget()
+	m.insert(nop)
+	// If we are compiling a Rem instruction, when the divisor is -1 we land at the end of the function.
+	if ifRemNeg1 != nil {
+		ifRemNeg1.asJmpIf(condZ, newOperandLabel(end))
+	}
+}
+
+func (m *machine) lowerRound(instr *ssa.Instruction, imm roundingMode) {
+	x := instr.Arg()
+	if !x.Type().IsFloat() {
+		panic("BUG?")
+	}
+	var op sseOpcode
+	if x.Type().Bits() == 64 {
+		op = sseOpcodeRoundsd
+	} else {
+		op = sseOpcodeRoundss
+	}
+
+	xDef := m.c.ValueDefinition(x)
+	rm := m.getOperand_Mem_Reg(xDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	xmm := m.allocateInstr().asXmmUnaryRmRImm(op, uint8(imm), rm, rd)
+	m.insert(xmm)
+}
+
+func (m *machine) lowerFminFmax(instr *ssa.Instruction) {
+	x, y := instr.Arg2()
+	if !x.Type().IsFloat() {
+		panic("BUG?")
+	}
+
+	_64 := x.Type().Bits() == 64
+	isMin := instr.Opcode() == ssa.OpcodeFmin
+	var minMaxOp sseOpcode
+
+	switch {
+	case _64 && isMin:
+		minMaxOp = sseOpcodeMinpd
+	case _64 && !isMin:
+		minMaxOp = sseOpcodeMaxpd
+	case !_64 && isMin:
+		minMaxOp = sseOpcodeMinps
+	case !_64 && !isMin:
+		minMaxOp = sseOpcodeMaxps
+	}
+
+	xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+	rm := m.getOperand_Reg(xDef)
+	// We cannot ensure that y is aligned to 16 bytes, so we have to use it on reg.
+	rn := m.getOperand_Reg(yDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp := m.copyToTmp(rm.reg())
+
+	// Check if this is (either x1 or x2 is NaN) or (x1 equals x2) case.
+	cmp := m.allocateInstr()
+	if _64 {
+		cmp.asXmmCmpRmR(sseOpcodeUcomisd, rn, tmp)
+	} else {
+		cmp.asXmmCmpRmR(sseOpcodeUcomiss, rn, tmp)
+	}
+	m.insert(cmp)
+
+	// At this point, we have the three cases of conditional flags below
+	// (See https://www.felixcloutier.com/x86/ucomiss#operation for detail.)
+	//
+	// 1) Two values are NaN-free and different: All flags are cleared.
+	// 2) Two values are NaN-free and equal: Only ZF flags is set.
+	// 3) One of Two values is NaN: ZF, PF and CF flags are set.
+
+	// Jump instruction to handle 1) case by checking the ZF flag
+	// as ZF is only set for 2) and 3) cases.
+	nanFreeOrDiffJump := m.allocateInstr()
+	m.insert(nanFreeOrDiffJump)
+
+	// Start handling 2) and 3).
+
+	// Jump if one of two values is NaN by checking the parity flag (PF).
+	ifIsNan := m.allocateInstr()
+	m.insert(ifIsNan)
+
+	// Start handling 2) NaN-free and equal.
+
+	// Before we exit this case, we have to ensure that positive zero (or negative zero for min instruction) is
+	// returned if two values are positive and negative zeros.
+	var op sseOpcode
+	switch {
+	case !_64 && isMin:
+		op = sseOpcodeOrps
+	case _64 && isMin:
+		op = sseOpcodeOrpd
+	case !_64 && !isMin:
+		op = sseOpcodeAndps
+	case _64 && !isMin:
+		op = sseOpcodeAndpd
+	}
+	orAnd := m.allocateInstr()
+	orAnd.asXmmRmR(op, rn, tmp)
+	m.insert(orAnd)
+
+	// Done, jump to end.
+	sameExitJump := m.allocateInstr()
+	m.insert(sameExitJump)
+
+	// Start handling 3) either is NaN.
+	isNanTarget, isNan := m.allocateBrTarget()
+	m.insert(isNanTarget)
+	ifIsNan.asJmpIf(condP, newOperandLabel(isNan))
+
+	// We emit the ADD instruction to produce the NaN in tmp.
+	add := m.allocateInstr()
+	if _64 {
+		add.asXmmRmR(sseOpcodeAddsd, rn, tmp)
+	} else {
+		add.asXmmRmR(sseOpcodeAddss, rn, tmp)
+	}
+	m.insert(add)
+
+	// Exit from the NaN case branch.
+	nanExitJmp := m.allocateInstr()
+	m.insert(nanExitJmp)
+
+	// Start handling 1).
+	doMinMaxTarget, doMinMax := m.allocateBrTarget()
+	m.insert(doMinMaxTarget)
+	nanFreeOrDiffJump.asJmpIf(condNZ, newOperandLabel(doMinMax))
+
+	// Now handle the NaN-free and different values case.
+	minMax := m.allocateInstr()
+	minMax.asXmmRmR(minMaxOp, rn, tmp)
+	m.insert(minMax)
+
+	endNop, end := m.allocateBrTarget()
+	m.insert(endNop)
+	nanExitJmp.asJmp(newOperandLabel(end))
+	sameExitJump.asJmp(newOperandLabel(end))
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerFcopysign(instr *ssa.Instruction) {
+	x, y := instr.Arg2()
+	if !x.Type().IsFloat() {
+		panic("BUG")
+	}
+
+	_64 := x.Type().Bits() == 64
+
+	xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+	rm := m.getOperand_Reg(xDef)
+	rn := m.getOperand_Reg(yDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	// Clear the non-sign bits of src via AND with the mask.
+	var opAnd, opOr sseOpcode
+	var signMask uint64
+	if _64 {
+		signMask, opAnd, opOr = 0x8000000000000000, sseOpcodeAndpd, sseOpcodeOrpd
+	} else {
+		signMask, opAnd, opOr = 0x80000000, sseOpcodeAndps, sseOpcodeOrps
+	}
+
+	signBitReg := m.c.AllocateVReg(x.Type())
+	m.lowerFconst(signBitReg, signMask, _64)
+	nonSignBitReg := m.c.AllocateVReg(x.Type())
+	m.lowerFconst(nonSignBitReg, ^signMask, _64)
+
+	// Extract the sign bits of rn.
+	and := m.allocateInstr().asXmmRmR(opAnd, rn, signBitReg)
+	m.insert(and)
+
+	// Clear the sign bit of dst via AND with the non-sign bit mask.
+	xor := m.allocateInstr().asXmmRmR(opAnd, rm, nonSignBitReg)
+	m.insert(xor)
+
+	// Copy the sign bits of src to dst via OR.
+	or := m.allocateInstr().asXmmRmR(opOr, newOperandReg(signBitReg), nonSignBitReg)
+	m.insert(or)
+
+	m.copyTo(nonSignBitReg, rd)
+}
+
+func (m *machine) lowerBitcast(instr *ssa.Instruction) {
+	x, dstTyp := instr.BitcastData()
+	srcTyp := x.Type()
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+	switch {
+	case srcTyp == ssa.TypeF32 && dstTyp == ssa.TypeI32:
+		cvt := m.allocateInstr().asXmmToGpr(sseOpcodeMovd, rn.reg(), rd, false)
+		m.insert(cvt)
+	case srcTyp == ssa.TypeI32 && dstTyp == ssa.TypeF32:
+		cvt := m.allocateInstr().asGprToXmm(sseOpcodeMovd, rn, rd, false)
+		m.insert(cvt)
+	case srcTyp == ssa.TypeF64 && dstTyp == ssa.TypeI64:
+		cvt := m.allocateInstr().asXmmToGpr(sseOpcodeMovq, rn.reg(), rd, true)
+		m.insert(cvt)
+	case srcTyp == ssa.TypeI64 && dstTyp == ssa.TypeF64:
+		cvt := m.allocateInstr().asGprToXmm(sseOpcodeMovq, rn, rd, true)
+		m.insert(cvt)
+	default:
+		panic(fmt.Sprintf("invalid bitcast from %s to %s", srcTyp, dstTyp))
+	}
+}
+
+func (m *machine) lowerFcvtToSint(ctxVReg, rn, rd regalloc.VReg, src64, dst64, sat bool) {
+	var tmpXmm regalloc.VReg
+	if dst64 {
+		tmpXmm = m.c.AllocateVReg(ssa.TypeF64)
+	} else {
+		tmpXmm = m.c.AllocateVReg(ssa.TypeF32)
+	}
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpXmm))
+	tmpGp, tmpGp2 := m.c.AllocateVReg(ssa.TypeI64), m.c.AllocateVReg(ssa.TypeI64)
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp2))
+
+	m.insert(m.allocateFcvtToSintSequence(ctxVReg, rn, tmpGp, tmpGp2, tmpXmm, src64, dst64, sat))
+	m.copyTo(tmpGp, rd)
+}
+
+func (m *machine) lowerFcvtToSintSequenceAfterRegalloc(i *instruction) {
+	execCtx, src, tmpGp, tmpGp2, tmpXmm, src64, dst64, sat := i.fcvtToSintSequenceData()
+	var cmpOp, truncOp sseOpcode
+	if src64 {
+		cmpOp, truncOp = sseOpcodeUcomisd, sseOpcodeCvttsd2si
+	} else {
+		cmpOp, truncOp = sseOpcodeUcomiss, sseOpcodeCvttss2si
+	}
+
+	trunc := m.allocateInstr()
+	trunc.asXmmToGpr(truncOp, src, tmpGp, dst64)
+	m.insert(trunc)
+
+	// Check if the dst operand was INT_MIN, by checking it against 1.
+	cmp1 := m.allocateInstr()
+	cmp1.asCmpRmiR(true, newOperandImm32(1), tmpGp, dst64)
+	m.insert(cmp1)
+
+	// If no overflow, then we are done.
+	doneTarget, done := m.allocateBrTarget()
+	ifNoOverflow := m.allocateInstr()
+	ifNoOverflow.asJmpIf(condNO, newOperandLabel(done))
+	m.insert(ifNoOverflow)
+
+	// Now, check for NaN.
+	cmpNan := m.allocateInstr()
+	cmpNan.asXmmCmpRmR(cmpOp, newOperandReg(src), src)
+	m.insert(cmpNan)
+
+	// We allocate the "non-nan target" here, but we will insert it later.
+	notNanTarget, notNaN := m.allocateBrTarget()
+	ifNotNan := m.allocateInstr()
+	ifNotNan.asJmpIf(condNP, newOperandLabel(notNaN))
+	m.insert(ifNotNan)
+
+	if sat {
+		// If NaN and saturating, return 0.
+		zeroDst := m.allocateInstr().asZeros(tmpGp)
+		m.insert(zeroDst)
+
+		jmpEnd := m.allocateInstr()
+		jmpEnd.asJmp(newOperandLabel(done))
+		m.insert(jmpEnd)
+
+		// Otherwise:
+		m.insert(notNanTarget)
+
+		// Zero-out the tmp register.
+		zero := m.allocateInstr().asZeros(tmpXmm)
+		m.insert(zero)
+
+		cmpXmm := m.allocateInstr().asXmmCmpRmR(cmpOp, newOperandReg(tmpXmm), src)
+		m.insert(cmpXmm)
+
+		// if >= jump to end.
+		jmpEnd2 := m.allocateInstr()
+		jmpEnd2.asJmpIf(condB, newOperandLabel(done))
+		m.insert(jmpEnd2)
+
+		// Otherwise, saturate to INT_MAX.
+		if dst64 {
+			m.lowerIconst(tmpGp, math.MaxInt64, dst64)
+		} else {
+			m.lowerIconst(tmpGp, math.MaxInt32, dst64)
+		}
+
+	} else {
+
+		// If non-sat, NaN, trap.
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeInvalidConversionToInteger)
+
+		// Otherwise, we will jump here.
+		m.insert(notNanTarget)
+
+		// jump over trap if src larger than threshold
+		condAboveThreshold := condNB
+
+		// The magic constants are various combination of minInt for int[32|64] represented as float[32|64].
+		var minInt uint64
+		switch {
+		case src64 && dst64:
+			minInt = 0xc3e0000000000000
+		case src64 && !dst64:
+			condAboveThreshold = condNBE
+			minInt = 0xC1E0_0000_0020_0000
+		case !src64 && dst64:
+			minInt = 0xDF00_0000
+		case !src64 && !dst64:
+			minInt = 0xCF00_0000
+		}
+
+		loadToGP := m.allocateInstr().asImm(tmpGp2, minInt, src64)
+		m.insert(loadToGP)
+
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp2), tmpXmm, src64)
+		m.insert(movToXmm)
+
+		cmpXmm := m.allocateInstr().asXmmCmpRmR(cmpOp, newOperandReg(tmpXmm), src)
+		m.insert(cmpXmm)
+
+		jmpIfLarger := m.allocateInstr()
+		checkPositiveTarget, checkPositive := m.allocateBrTarget()
+		jmpIfLarger.asJmpIf(condAboveThreshold, newOperandLabel(checkPositive))
+		m.insert(jmpIfLarger)
+
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
+
+		// If positive, it was a real overflow.
+		m.insert(checkPositiveTarget)
+
+		// Zero out the temp register.
+		xorpd := m.allocateInstr()
+		xorpd.asXmmRmR(sseOpcodeXorpd, newOperandReg(tmpXmm), tmpXmm)
+		m.insert(xorpd)
+
+		pos := m.allocateInstr()
+		pos.asXmmCmpRmR(cmpOp, newOperandReg(src), tmpXmm)
+		m.insert(pos)
+
+		// If >= jump to end.
+		jmp := m.allocateInstr().asJmpIf(condNB, newOperandLabel(done))
+		m.insert(jmp)
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
+	}
+
+	m.insert(doneTarget)
+}
+
+func (m *machine) lowerFcvtToUint(ctxVReg, rn, rd regalloc.VReg, src64, dst64, sat bool) {
+	tmpXmm, tmpXmm2 := m.c.AllocateVReg(ssa.TypeF64), m.c.AllocateVReg(ssa.TypeF64)
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpXmm))
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpXmm2))
+	tmpGp, tmpGp2 := m.c.AllocateVReg(ssa.TypeI64), m.c.AllocateVReg(ssa.TypeI64)
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp2))
+
+	m.insert(m.allocateFcvtToUintSequence(
+		ctxVReg, rn, tmpGp, tmpGp2, tmpXmm, tmpXmm2, src64, dst64, sat,
+	))
+	m.copyTo(tmpGp, rd)
+}
+
+func (m *machine) lowerFcvtToUintSequenceAfterRegalloc(i *instruction) {
+	execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2, src64, dst64, sat := i.fcvtToUintSequenceData()
+
+	var subOp, cmpOp, truncOp sseOpcode
+	if src64 {
+		subOp, cmpOp, truncOp = sseOpcodeSubsd, sseOpcodeUcomisd, sseOpcodeCvttsd2si
+	} else {
+		subOp, cmpOp, truncOp = sseOpcodeSubss, sseOpcodeUcomiss, sseOpcodeCvttss2si
+	}
+
+	doneTarget, done := m.allocateBrTarget()
+
+	switch {
+	case src64 && dst64:
+		loadToGP := m.allocateInstr().asImm(tmpGp, 0x43e0000000000000, true)
+		m.insert(loadToGP)
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, true)
+		m.insert(movToXmm)
+	case src64 && !dst64:
+		loadToGP := m.allocateInstr().asImm(tmpGp, 0x41e0000000000000, true)
+		m.insert(loadToGP)
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, true)
+		m.insert(movToXmm)
+	case !src64 && dst64:
+		loadToGP := m.allocateInstr().asImm(tmpGp, 0x5f000000, false)
+		m.insert(loadToGP)
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, false)
+		m.insert(movToXmm)
+	case !src64 && !dst64:
+		loadToGP := m.allocateInstr().asImm(tmpGp, 0x4f000000, false)
+		m.insert(loadToGP)
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, false)
+		m.insert(movToXmm)
+	}
+
+	cmp := m.allocateInstr()
+	cmp.asXmmCmpRmR(cmpOp, newOperandReg(tmpXmm), src)
+	m.insert(cmp)
+
+	// If above `tmp` ("large threshold"), jump to `ifAboveThreshold`
+	ifAboveThresholdTarget, ifAboveThreshold := m.allocateBrTarget()
+	jmpIfAboveThreshold := m.allocateInstr()
+	jmpIfAboveThreshold.asJmpIf(condNB, newOperandLabel(ifAboveThreshold))
+	m.insert(jmpIfAboveThreshold)
+
+	ifNotNaNTarget, ifNotNaN := m.allocateBrTarget()
+	jmpIfNotNaN := m.allocateInstr()
+	jmpIfNotNaN.asJmpIf(condNP, newOperandLabel(ifNotNaN))
+	m.insert(jmpIfNotNaN)
+
+	// If NaN, handle the error condition.
+	if sat {
+		// On NaN, saturating, we just return 0.
+		zeros := m.allocateInstr().asZeros(tmpGp)
+		m.insert(zeros)
+
+		jmpEnd := m.allocateInstr()
+		jmpEnd.asJmp(newOperandLabel(done))
+		m.insert(jmpEnd)
+	} else {
+		// On NaN, non-saturating, we trap.
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeInvalidConversionToInteger)
+	}
+
+	// If not NaN, land here.
+	m.insert(ifNotNaNTarget)
+
+	// Truncation happens here.
+
+	trunc := m.allocateInstr()
+	trunc.asXmmToGpr(truncOp, src, tmpGp, dst64)
+	m.insert(trunc)
+
+	// Check if the result is negative.
+	cmpNeg := m.allocateInstr()
+	cmpNeg.asCmpRmiR(true, newOperandImm32(0), tmpGp, dst64)
+	m.insert(cmpNeg)
+
+	// If non-neg, jump to end.
+	jmpIfNonNeg := m.allocateInstr()
+	jmpIfNonNeg.asJmpIf(condNL, newOperandLabel(done))
+	m.insert(jmpIfNonNeg)
+
+	if sat {
+		// If the input was "small" (< 2**(width -1)), the only way to get an integer
+		// overflow is because the input was too small: saturate to the min value, i.e. 0.
+		zeros := m.allocateInstr().asZeros(tmpGp)
+		m.insert(zeros)
+
+		jmpEnd := m.allocateInstr()
+		jmpEnd.asJmp(newOperandLabel(done))
+		m.insert(jmpEnd)
+	} else {
+		// If not saturating, trap.
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
+	}
+
+	// If above the threshold, land here.
+	m.insert(ifAboveThresholdTarget)
+
+	// tmpDiff := threshold - rn.
+	copySrc := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), tmpXmm2)
+	m.insert(copySrc)
+
+	sub := m.allocateInstr()
+	sub.asXmmRmR(subOp, newOperandReg(tmpXmm), tmpXmm2) // must be -0x8000000000000000
+	m.insert(sub)
+
+	trunc2 := m.allocateInstr()
+	trunc2.asXmmToGpr(truncOp, tmpXmm2, tmpGp, dst64)
+	m.insert(trunc2)
+
+	// Check if the result is negative.
+	cmpNeg2 := m.allocateInstr().asCmpRmiR(true, newOperandImm32(0), tmpGp, dst64)
+	m.insert(cmpNeg2)
+
+	ifNextLargeTarget, ifNextLarge := m.allocateBrTarget()
+	jmpIfNextLarge := m.allocateInstr()
+	jmpIfNextLarge.asJmpIf(condNL, newOperandLabel(ifNextLarge))
+	m.insert(jmpIfNextLarge)
+
+	if sat {
+		// The input was "large" (>= maxInt), so the only way to get an integer
+		// overflow is because the input was too large: saturate to the max value.
+		var maxInt uint64
+		if dst64 {
+			maxInt = math.MaxUint64
+		} else {
+			maxInt = math.MaxUint32
+		}
+		m.lowerIconst(tmpGp, maxInt, dst64)
+
+		jmpToEnd := m.allocateInstr()
+		jmpToEnd.asJmp(newOperandLabel(done))
+		m.insert(jmpToEnd)
+	} else {
+		// If not saturating, trap.
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
+	}
+
+	m.insert(ifNextLargeTarget)
+
+	var op operand
+	if dst64 {
+		m.lowerIconst(tmpGp2, 0x8000000000000000, true)
+		op = newOperandReg(tmpGp2)
+	} else {
+		op = newOperandImm32(0x80000000)
+	}
+
+	add := m.allocateInstr()
+	add.asAluRmiR(aluRmiROpcodeAdd, op, tmpGp, dst64)
+	m.insert(add)
+
+	m.insert(doneTarget)
+}
+
+func (m *machine) lowerFcvtFromSint(rn, rd operand, src64, dst64 bool) {
+	var op sseOpcode
+	if dst64 {
+		op = sseOpcodeCvtsi2sd
+	} else {
+		op = sseOpcodeCvtsi2ss
+	}
+
+	trunc := m.allocateInstr()
+	trunc.asGprToXmm(op, rn, rd.reg(), src64)
+	m.insert(trunc)
+}
+
+func (m *machine) lowerFcvtFromUint(rn, rd operand, src64, dst64 bool) {
+	var op sseOpcode
+	if dst64 {
+		op = sseOpcodeCvtsi2sd
+	} else {
+		op = sseOpcodeCvtsi2ss
+	}
+
+	// Src is 32 bit, then we just perform the conversion with 64 bit width.
+	//
+	// See the following link for why we use 64bit conversion for unsigned 32bit integer sources:
+	// https://stackoverflow.com/questions/41495498/fpu-operations-generated-by-gcc-during-casting-integer-to-float.
+	//
+	// Here's the summary:
+	// >> CVTSI2SS is indeed designed for converting a signed integer to a scalar single-precision float,
+	// >> not an unsigned integer like you have here. So what gives? Well, a 64-bit processor has 64-bit wide
+	// >> registers available, so the unsigned 32-bit input values can be stored as signed 64-bit intermediate values,
+	// >> which allows CVTSI2SS to be used after all.
+	//
+	if !src64 {
+		// Before we convert, we have to clear the higher 32-bits of the 64-bit register
+		// to get the correct result.
+		tmp := m.c.AllocateVReg(ssa.TypeI32)
+		m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, rn, tmp))
+		m.insert(m.allocateInstr().asGprToXmm(op, newOperandReg(tmp), rd.reg(), true))
+		return
+	}
+
+	// If uint64, we have to do a bit more work.
+	endTarget, end := m.allocateBrTarget()
+
+	var tmpXmm regalloc.VReg
+	if dst64 {
+		tmpXmm = m.c.AllocateVReg(ssa.TypeF64)
+	} else {
+		tmpXmm = m.c.AllocateVReg(ssa.TypeF32)
+	}
+
+	// Check if the most significant bit (sign bit) is set.
+	test := m.allocateInstr()
+	test.asCmpRmiR(false, rn, rn.reg(), src64)
+	m.insert(test)
+
+	// Jump if the sign bit is set.
+	ifSignTarget, ifSign := m.allocateBrTarget()
+	jmpIfNeg := m.allocateInstr()
+	jmpIfNeg.asJmpIf(condS, newOperandLabel(ifSign))
+	m.insert(jmpIfNeg)
+
+	// If the sign bit is not set, we could fit the unsigned int into float32/float64.
+	// So, we convert it to float and emit jump instruction to exit from this branch.
+	cvt := m.allocateInstr()
+	cvt.asGprToXmm(op, rn, tmpXmm, src64)
+	m.insert(cvt)
+
+	// We are done, jump to end.
+	jmpEnd := m.allocateInstr()
+	jmpEnd.asJmp(newOperandLabel(end))
+	m.insert(jmpEnd)
+
+	// Now handling the case where sign-bit is set.
+	// We emit the following sequences:
+	// 	   mov      %rn, %tmp
+	// 	   shr      1, %tmp
+	// 	   mov      %rn, %tmp2
+	// 	   and      1, %tmp2
+	// 	   or       %tmp2, %tmp
+	// 	   cvtsi2ss %tmp, %xmm0
+	// 	   addsd    %xmm0, %xmm0
+	m.insert(ifSignTarget)
+
+	tmp := m.copyToTmp(rn.reg())
+	shr := m.allocateInstr()
+	shr.asShiftR(shiftROpShiftRightLogical, newOperandImm32(1), tmp, src64)
+	m.insert(shr)
+
+	tmp2 := m.copyToTmp(rn.reg())
+	and := m.allocateInstr()
+	and.asAluRmiR(aluRmiROpcodeAnd, newOperandImm32(1), tmp2, src64)
+	m.insert(and)
+
+	or := m.allocateInstr()
+	or.asAluRmiR(aluRmiROpcodeOr, newOperandReg(tmp2), tmp, src64)
+	m.insert(or)
+
+	cvt2 := m.allocateInstr()
+	cvt2.asGprToXmm(op, newOperandReg(tmp), tmpXmm, src64)
+	m.insert(cvt2)
+
+	addsd := m.allocateInstr()
+	if dst64 {
+		addsd.asXmmRmR(sseOpcodeAddsd, newOperandReg(tmpXmm), tmpXmm)
+	} else {
+		addsd.asXmmRmR(sseOpcodeAddss, newOperandReg(tmpXmm), tmpXmm)
+	}
+	m.insert(addsd)
+
+	m.insert(endTarget)
+	m.copyTo(tmpXmm, rd.reg())
+}
+
+func (m *machine) lowerVanyTrue(instr *ssa.Instruction) {
+	x := instr.Arg()
+	rm := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp := m.c.AllocateVReg(ssa.TypeI32)
+
+	cmp := m.allocateInstr()
+	cmp.asXmmCmpRmR(sseOpcodePtest, rm, rm.reg())
+	m.insert(cmp)
+
+	setcc := m.allocateInstr()
+	setcc.asSetcc(condNZ, tmp)
+	m.insert(setcc)
+
+	// Clear the irrelevant bits.
+	and := m.allocateInstr()
+	and.asAluRmiR(aluRmiROpcodeAnd, newOperandImm32(1), tmp, false)
+	m.insert(and)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerVallTrue(instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	var op sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		op = sseOpcodePcmpeqb
+	case ssa.VecLaneI16x8:
+		op = sseOpcodePcmpeqw
+	case ssa.VecLaneI32x4:
+		op = sseOpcodePcmpeqd
+	case ssa.VecLaneI64x2:
+		op = sseOpcodePcmpeqq
+	}
+	rm := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+
+	zeros := m.allocateInstr()
+	zeros.asZeros(tmp)
+	m.insert(zeros)
+
+	pcmp := m.allocateInstr()
+	pcmp.asXmmRmR(op, rm, tmp)
+	m.insert(pcmp)
+
+	test := m.allocateInstr()
+	test.asXmmCmpRmR(sseOpcodePtest, newOperandReg(tmp), tmp)
+	m.insert(test)
+
+	tmp2 := m.c.AllocateVReg(ssa.TypeI32)
+
+	setcc := m.allocateInstr()
+	setcc.asSetcc(condZ, tmp2)
+	m.insert(setcc)
+
+	// Clear the irrelevant bits.
+	and := m.allocateInstr()
+	and.asAluRmiR(aluRmiROpcodeAnd, newOperandImm32(1), tmp2, false)
+	m.insert(and)
+
+	m.copyTo(tmp2, rd)
+}
+
+func (m *machine) lowerVhighBits(instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	rm := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+	switch lane {
+	case ssa.VecLaneI8x16:
+		mov := m.allocateInstr()
+		mov.asXmmToGpr(sseOpcodePmovmskb, rm.reg(), rd, false)
+		m.insert(mov)
+
+	case ssa.VecLaneI16x8:
+		// When we have:
+		// 	R1 = [R1(w1), R1(w2), R1(w3), R1(w4), R1(w5), R1(w6), R1(w7), R1(v8)]
+		// 	R2 = [R2(w1), R2(w2), R2(w3), R2(v4), R2(w5), R2(w6), R2(w7), R2(v8)]
+		//	where RX(wn) is n-th signed word (16-bit) of RX register,
+		//
+		// "PACKSSWB R1, R2" produces
+		//  R1 = [
+		// 		byte_sat(R1(w1)), byte_sat(R1(w2)), byte_sat(R1(w3)), byte_sat(R1(w4)),
+		// 		byte_sat(R1(w5)), byte_sat(R1(w6)), byte_sat(R1(w7)), byte_sat(R1(w8)),
+		// 		byte_sat(R2(w1)), byte_sat(R2(w2)), byte_sat(R2(w3)), byte_sat(R2(w4)),
+		// 		byte_sat(R2(w5)), byte_sat(R2(w6)), byte_sat(R2(w7)), byte_sat(R2(w8)),
+		//  ]
+		//  where R1 is the destination register, and
+		// 	byte_sat(w) = int8(w) if w fits as signed 8-bit,
+		//                0x80 if w is less than 0x80
+		//                0x7F if w is greater than 0x7f
+		//
+		// See https://www.felixcloutier.com/x86/packsswb:packssdw for detail.
+		//
+		// Therefore, v.register ends up having i-th and (i+8)-th bit set if i-th lane is negative (for i in 0..8).
+		tmp := m.copyToTmp(rm.reg())
+		res := m.c.AllocateVReg(ssa.TypeI32)
+
+		pak := m.allocateInstr()
+		pak.asXmmRmR(sseOpcodePacksswb, rm, tmp)
+		m.insert(pak)
+
+		mov := m.allocateInstr()
+		mov.asXmmToGpr(sseOpcodePmovmskb, tmp, res, false)
+		m.insert(mov)
+
+		// Clear the higher bits than 8.
+		shr := m.allocateInstr()
+		shr.asShiftR(shiftROpShiftRightLogical, newOperandImm32(8), res, false)
+		m.insert(shr)
+
+		m.copyTo(res, rd)
+
+	case ssa.VecLaneI32x4:
+		mov := m.allocateInstr()
+		mov.asXmmToGpr(sseOpcodeMovmskps, rm.reg(), rd, true)
+		m.insert(mov)
+
+	case ssa.VecLaneI64x2:
+		mov := m.allocateInstr()
+		mov.asXmmToGpr(sseOpcodeMovmskpd, rm.reg(), rd, true)
+		m.insert(mov)
+	}
+}
+
+func (m *machine) lowerVbnot(instr *ssa.Instruction) {
+	x := instr.Arg()
+	xDef := m.c.ValueDefinition(x)
+	rm := m.getOperand_Reg(xDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp := m.copyToTmp(rm.reg())
+	tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+
+	// Ensure tmp2 is considered defined by regalloc.
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2))
+
+	// Set all bits on tmp register.
+	pak := m.allocateInstr()
+	pak.asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp2), tmp2)
+	m.insert(pak)
+
+	// Then XOR with tmp to reverse all bits on v.register.
+	xor := m.allocateInstr()
+	xor.asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp)
+	m.insert(xor)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerSplat(x, ret ssa.Value, lane ssa.VecLane) {
+	tmpDst := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+
+	switch lane {
+	case ssa.VecLaneI8x16:
+		tmp := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asDefineUninitializedReg(tmp))
+		xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpDst))
+	case ssa.VecLaneI16x8:
+		xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI32x4:
+		xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI64x2:
+		xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, xx, tmpDst))
+	case ssa.VecLaneF32x4:
+		xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, 0, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneF64x2:
+		xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, xx, tmpDst))
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) {
+	var xMask, yMask [2]uint64
+	for i := 0; i < 8; i++ {
+		loLane := byte(lo >> (i * 8))
+		if loLane < 16 {
+			xMask[0] |= uint64(loLane) << (i * 8)
+			yMask[0] |= uint64(0x80) << (i * 8)
+		} else {
+			xMask[0] |= uint64(0x80) << (i * 8)
+			yMask[0] |= uint64(loLane-16) << (i * 8)
+		}
+		hiLane := byte(hi >> (i * 8))
+		if hiLane < 16 {
+			xMask[1] |= uint64(hiLane) << (i * 8)
+			yMask[1] |= uint64(0x80) << (i * 8)
+		} else {
+			xMask[1] |= uint64(0x80) << (i * 8)
+			yMask[1] |= uint64(hiLane-16) << (i * 8)
+		}
+	}
+
+	xmaskLabel := m.allocateLabel()
+	m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xmaskLabel})
+	ymaskLabel := m.allocateLabel()
+	m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: ymaskLabel})
+
+	xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y))
+	tmpX, tmpY := m.copyToTmp(xx.reg()), m.copyToTmp(yy.reg())
+
+	// Apply mask to X.
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xmaskLabel.L)), tmp)
+	m.insert(loadMaskLo)
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpX))
+
+	// Apply mask to Y.
+	loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(ymaskLabel.L)), tmp)
+	m.insert(loadMaskHi)
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpY))
+
+	// Combine the results.
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodeOrps, newOperandReg(tmpX), tmpY))
+
+	m.copyTo(tmpY, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVbBinOpUnaligned(op sseOpcode, x, y, ret ssa.Value) {
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rm := m.getOperand_Reg(m.c.ValueDefinition(y))
+	rd := m.c.VRegOf(ret)
+
+	tmp := m.copyToTmp(rn.reg())
+
+	binOp := m.allocateInstr()
+	binOp.asXmmRmR(op, rm, tmp)
+	m.insert(binOp)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerVbBinOp(op sseOpcode, x, y, ret ssa.Value) {
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rm := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	rd := m.c.VRegOf(ret)
+
+	tmp := m.copyToTmp(rn.reg())
+
+	binOp := m.allocateInstr()
+	binOp.asXmmRmR(op, rm, tmp)
+	m.insert(binOp)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerVFcmp(x, y ssa.Value, c ssa.FloatCmpCond, ret ssa.Value, lane ssa.VecLane) {
+	var cmpOp sseOpcode
+	switch lane {
+	case ssa.VecLaneF32x4:
+		cmpOp = sseOpcodeCmpps
+	case ssa.VecLaneF64x2:
+		cmpOp = sseOpcodeCmppd
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	xx, yy := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+	var cmpImm cmpPred
+	switch c {
+	case ssa.FloatCmpCondGreaterThan:
+		yy, xx = xx, yy
+		cmpImm = cmpPredLT_OS
+	case ssa.FloatCmpCondGreaterThanOrEqual:
+		yy, xx = xx, yy
+		cmpImm = cmpPredLE_OS
+	case ssa.FloatCmpCondEqual:
+		cmpImm = cmpPredEQ_OQ
+	case ssa.FloatCmpCondNotEqual:
+		cmpImm = cmpPredNEQ_UQ
+	case ssa.FloatCmpCondLessThan:
+		cmpImm = cmpPredLT_OS
+	case ssa.FloatCmpCondLessThanOrEqual:
+		cmpImm = cmpPredLE_OS
+	default:
+		panic(fmt.Sprintf("invalid float comparison condition: %s", c))
+	}
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	xxx := m.getOperand_Mem_Reg(xx)
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, xxx, tmp))
+
+	rm := m.getOperand_Mem_Reg(yy)
+	m.insert(m.allocateInstr().asXmmRmRImm(cmpOp, byte(cmpImm), rm, tmp))
+
+	m.copyTo(tmp, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIcmp(x, y ssa.Value, c ssa.IntegerCmpCond, ret ssa.Value, lane ssa.VecLane) {
+	var eq, gt, maxu, minu, mins sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		eq, gt, maxu, minu, mins = sseOpcodePcmpeqb, sseOpcodePcmpgtb, sseOpcodePmaxub, sseOpcodePminub, sseOpcodePminsb
+	case ssa.VecLaneI16x8:
+		eq, gt, maxu, minu, mins = sseOpcodePcmpeqw, sseOpcodePcmpgtw, sseOpcodePmaxuw, sseOpcodePminuw, sseOpcodePminsw
+	case ssa.VecLaneI32x4:
+		eq, gt, maxu, minu, mins = sseOpcodePcmpeqd, sseOpcodePcmpgtd, sseOpcodePmaxud, sseOpcodePminud, sseOpcodePminsd
+	case ssa.VecLaneI64x2:
+		eq, gt = sseOpcodePcmpeqq, sseOpcodePcmpgtq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	var op operand
+	switch c {
+	case ssa.IntegerCmpCondSignedLessThanOrEqual:
+		if lane == ssa.VecLaneI64x2 {
+			x := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+			// Copy x to tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, x, tmp))
+			op = m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+		} else {
+			y := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+			// Copy y to tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, y, tmp))
+			op = m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+		}
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
+		if lane == ssa.VecLaneI64x2 {
+			y := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+			// Copy y to tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, y, tmp))
+			op = m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+		} else {
+			x := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+			// Copy x to tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, x, tmp))
+			op = m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+		}
+	case ssa.IntegerCmpCondSignedLessThan, ssa.IntegerCmpCondUnsignedLessThan, ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		y := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+		// Copy y to tmp.
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, y, tmp))
+		op = m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	default:
+		x := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+		// Copy x to tmp.
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, x, tmp))
+		op = m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	}
+
+	switch c {
+	case ssa.IntegerCmpCondEqual:
+		m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp))
+	case ssa.IntegerCmpCondNotEqual:
+		// First we compare for equality.
+		m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp))
+		// Then flip the bits. To do so, we set all bits on tmp2.
+		tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2))
+		m.insert(m.allocateInstr().asXmmRmR(eq, newOperandReg(tmp2), tmp2))
+		// And then xor with tmp.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp))
+	case ssa.IntegerCmpCondSignedGreaterThan, ssa.IntegerCmpCondSignedLessThan:
+		m.insert(m.allocateInstr().asXmmRmR(gt, op, tmp))
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual, ssa.IntegerCmpCondSignedLessThanOrEqual:
+		if lane == ssa.VecLaneI64x2 {
+			m.insert(m.allocateInstr().asXmmRmR(gt, op, tmp))
+			// Then flip the bits. To do so, we set all bits on tmp2.
+			tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(eq, newOperandReg(tmp2), tmp2))
+			// And then xor with tmp.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp))
+		} else {
+			// First take min of x and y.
+			m.insert(m.allocateInstr().asXmmRmR(mins, op, tmp))
+			// Then compare for equality.
+			m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp))
+		}
+	case ssa.IntegerCmpCondUnsignedGreaterThan, ssa.IntegerCmpCondUnsignedLessThan:
+		// First maxu of x and y.
+		m.insert(m.allocateInstr().asXmmRmR(maxu, op, tmp))
+		// Then compare for equality.
+		m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp))
+		// Then flip the bits. To do so, we set all bits on tmp2.
+		tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2))
+		m.insert(m.allocateInstr().asXmmRmR(eq, newOperandReg(tmp2), tmp2))
+		// And then xor with tmp.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp))
+	case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual, ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		m.insert(m.allocateInstr().asXmmRmR(minu, op, tmp))
+		m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp))
+	default:
+		panic("BUG")
+	}
+
+	m.copyTo(tmp, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVbandnot(instr *ssa.Instruction, op sseOpcode) {
+	x, y := instr.Arg2()
+	xDef := m.c.ValueDefinition(x)
+	yDef := m.c.ValueDefinition(y)
+	rm, rn := m.getOperand_Reg(xDef), m.getOperand_Reg(yDef)
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp := m.copyToTmp(rn.reg())
+
+	// pandn between rn, rm.
+	pand := m.allocateInstr()
+	pand.asXmmRmR(sseOpcodePandn, rm, tmp)
+	m.insert(pand)
+
+	m.copyTo(tmp, rd)
+}
+
+func (m *machine) lowerVbitselect(instr *ssa.Instruction) {
+	c, x, y := instr.SelectData()
+	xDef := m.c.ValueDefinition(x)
+	yDef := m.c.ValueDefinition(y)
+	rm, rn := m.getOperand_Reg(xDef), m.getOperand_Reg(yDef)
+	creg := m.getOperand_Reg(m.c.ValueDefinition(c))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmpC := m.copyToTmp(creg.reg())
+	tmpX := m.copyToTmp(rm.reg())
+
+	// And between c, x (overwrites x).
+	pand := m.allocateInstr()
+	pand.asXmmRmR(sseOpcodePand, creg, tmpX)
+	m.insert(pand)
+
+	// Andn between y, c (overwrites c).
+	pandn := m.allocateInstr()
+	pandn.asXmmRmR(sseOpcodePandn, rn, tmpC)
+	m.insert(pandn)
+
+	por := m.allocateInstr()
+	por.asXmmRmR(sseOpcodePor, newOperandReg(tmpC), tmpX)
+	m.insert(por)
+
+	m.copyTo(tmpX, rd)
+}
+
+func (m *machine) lowerVFmin(instr *ssa.Instruction) {
+	x, y, lane := instr.Arg2WithLane()
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rm := m.getOperand_Reg(m.c.ValueDefinition(y))
+	rd := m.c.VRegOf(instr.Return())
+
+	var min, cmp, andn, or, srl /* shift right logical */ sseOpcode
+	var shiftNumToInverseNaN uint32
+	if lane == ssa.VecLaneF32x4 {
+		min, cmp, andn, or, srl, shiftNumToInverseNaN = sseOpcodeMinps, sseOpcodeCmpps, sseOpcodeAndnps, sseOpcodeOrps, sseOpcodePsrld, 0xa
+	} else {
+		min, cmp, andn, or, srl, shiftNumToInverseNaN = sseOpcodeMinpd, sseOpcodeCmppd, sseOpcodeAndnpd, sseOpcodeOrpd, sseOpcodePsrlq, 0xd
+	}
+
+	tmp1 := m.copyToTmp(rn.reg())
+	tmp2 := m.copyToTmp(rm.reg())
+
+	// tmp1=min(rn, rm)
+	minIns1 := m.allocateInstr()
+	minIns1.asXmmRmR(min, rn, tmp2)
+	m.insert(minIns1)
+
+	// tmp2=min(rm, rn)
+	minIns2 := m.allocateInstr()
+	minIns2.asXmmRmR(min, rm, tmp1)
+	m.insert(minIns2)
+
+	// tmp3:=tmp1=min(rn, rm)
+	tmp3 := m.copyToTmp(tmp1)
+
+	// tmp1 = -0         if (rn == -0 || rm == -0) && rn != NaN && rm !=NaN
+	//       NaN         if rn == NaN || rm == NaN
+	//       min(rm, rm) otherwise
+	orIns := m.allocateInstr()
+	orIns.asXmmRmR(or, newOperandReg(tmp2), tmp1)
+	m.insert(orIns)
+
+	// tmp3 is originally min(rn,rm).
+	// tmp3 = 0^ (set all bits) if rn == NaN || rm == NaN
+	//        0 otherwise
+	cmpIns := m.allocateInstr()
+	cmpIns.asXmmRmRImm(cmp, uint8(cmpPredUNORD_Q), newOperandReg(tmp2), tmp3)
+	m.insert(cmpIns)
+
+	// tmp1 = -0          if (rn == -0 || rm == -0) && rn != NaN && rm !=NaN
+	//        ^0          if rn == NaN || rm == NaN
+	//        min(v1, v2) otherwise
+	orIns2 := m.allocateInstr()
+	orIns2.asXmmRmR(or, newOperandReg(tmp3), tmp1)
+	m.insert(orIns2)
+
+	// tmp3 = set all bits on the mantissa bits
+	//        0 otherwise
+	shift := m.allocateInstr()
+	shift.asXmmRmiReg(srl, newOperandImm32(shiftNumToInverseNaN), tmp3)
+	m.insert(shift)
+
+	// tmp3 = tmp1 and !tmp3
+	//     = -0                                                   if (rn == -0 || rm == -0) && rn != NaN && rm !=NaN
+	//       set all bits on exponential and sign bit (== NaN)    if rn == NaN || rm == NaN
+	//       min(rn, rm)                                          otherwise
+	andnIns := m.allocateInstr()
+	andnIns.asXmmRmR(andn, newOperandReg(tmp1), tmp3)
+	m.insert(andnIns)
+
+	m.copyTo(tmp3, rd)
+}
+
+func (m *machine) lowerVFmax(instr *ssa.Instruction) {
+	x, y, lane := instr.Arg2WithLane()
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rm := m.getOperand_Reg(m.c.ValueDefinition(y))
+	rd := m.c.VRegOf(instr.Return())
+
+	var max, cmp, andn, or, xor, sub, srl /* shift right logical */ sseOpcode
+	var shiftNumToInverseNaN uint32
+	if lane == ssa.VecLaneF32x4 {
+		max, cmp, andn, or, xor, sub, srl, shiftNumToInverseNaN = sseOpcodeMaxps, sseOpcodeCmpps, sseOpcodeAndnps, sseOpcodeOrps, sseOpcodeXorps, sseOpcodeSubps, sseOpcodePsrld, 0xa
+	} else {
+		max, cmp, andn, or, xor, sub, srl, shiftNumToInverseNaN = sseOpcodeMaxpd, sseOpcodeCmppd, sseOpcodeAndnpd, sseOpcodeOrpd, sseOpcodeXorpd, sseOpcodeSubpd, sseOpcodePsrlq, 0xd
+	}
+
+	tmp0 := m.copyToTmp(rm.reg())
+	tmp1 := m.copyToTmp(rn.reg())
+
+	// tmp0=max(rn, rm)
+	maxIns1 := m.allocateInstr()
+	maxIns1.asXmmRmR(max, rn, tmp0)
+	m.insert(maxIns1)
+
+	// tmp1=max(rm, rn)
+	maxIns2 := m.allocateInstr()
+	maxIns2.asXmmRmR(max, rm, tmp1)
+	m.insert(maxIns2)
+
+	// tmp2=max(rm, rn)
+	tmp2 := m.copyToTmp(tmp1)
+
+	// tmp2 = -0       if (rn == -0 && rm == 0) || (rn == 0 && rm == -0)
+	//         0       if (rn == 0 && rm ==  0)
+	//        -0       if (rn == -0 && rm == -0)
+	//       v1^v2     if rn == NaN || rm == NaN
+	//         0       otherwise
+	xorInstr := m.allocateInstr()
+	xorInstr.asXmmRmR(xor, newOperandReg(tmp0), tmp2)
+	m.insert(xorInstr)
+	// tmp1 = -0           if (rn == -0 && rm == 0) || (rn == 0 && rm == -0)
+	//         0           if (rn == 0 && rm ==  0)
+	//        -0           if (rn == -0 && rm == -0)
+	//        NaN          if rn == NaN || rm == NaN
+	//        max(v1, v2)  otherwise
+	orInstr := m.allocateInstr()
+	orInstr.asXmmRmR(or, newOperandReg(tmp2), tmp1)
+	m.insert(orInstr)
+
+	tmp3 := m.copyToTmp(tmp1)
+
+	// tmp3 = 0           if (rn == -0 && rm == 0) || (rn == 0 && rm == -0) || (rn == 0 && rm ==  0)
+	//       -0           if (rn == -0 && rm == -0)
+	//       NaN          if rn == NaN || rm == NaN
+	//       max(v1, v2)  otherwise
+	//
+	// Note: -0 - (-0) = 0 (!= -0) in floating point operation.
+	subIns := m.allocateInstr()
+	subIns.asXmmRmR(sub, newOperandReg(tmp2), tmp3)
+	m.insert(subIns)
+
+	// tmp1 = 0^ if rn == NaN || rm == NaN
+	cmpIns := m.allocateInstr()
+	cmpIns.asXmmRmRImm(cmp, uint8(cmpPredUNORD_Q), newOperandReg(tmp1), tmp1)
+	m.insert(cmpIns)
+
+	// tmp1 = set all bits on the mantissa bits
+	//        0 otherwise
+	shift := m.allocateInstr()
+	shift.asXmmRmiReg(srl, newOperandImm32(shiftNumToInverseNaN), tmp1)
+	m.insert(shift)
+
+	andnIns := m.allocateInstr()
+	andnIns.asXmmRmR(andn, newOperandReg(tmp3), tmp1)
+	m.insert(andnIns)
+
+	m.copyTo(tmp1, rd)
+}
+
+func (m *machine) lowerVFabs(instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	rm := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+
+	def := m.allocateInstr()
+	def.asDefineUninitializedReg(tmp)
+	m.insert(def)
+
+	// Set all bits on tmp.
+	pcmp := m.allocateInstr()
+	pcmp.asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp)
+	m.insert(pcmp)
+
+	switch lane {
+	case ssa.VecLaneF32x4:
+		// Shift right packed single floats by 1 to clear the sign bits.
+		shift := m.allocateInstr()
+		shift.asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp)
+		m.insert(shift)
+		// Clear the sign bit of rm.
+		andp := m.allocateInstr()
+		andp.asXmmRmR(sseOpcodeAndpd, rm, tmp)
+		m.insert(andp)
+	case ssa.VecLaneF64x2:
+		// Shift right packed single floats by 1 to clear the sign bits.
+		shift := m.allocateInstr()
+		shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(1), tmp)
+		m.insert(shift)
+		// Clear the sign bit of rm.
+		andp := m.allocateInstr()
+		andp.asXmmRmR(sseOpcodeAndps, rm, tmp)
+		m.insert(andp)
+	}
+
+	m.copyTo(tmp, rd)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
new file mode 100644
index 000000000..8fa974c66
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
@@ -0,0 +1,304 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// PostRegAlloc implements backend.Machine.
+func (m *machine) PostRegAlloc() {
+	m.setupPrologue()
+	m.postRegAlloc()
+}
+
+func (m *machine) setupPrologue() {
+	cur := m.ectx.RootInstr
+	prevInitInst := cur.next
+
+	// At this point, we have the stack layout as follows:
+	//
+	//                   (high address)
+	//                 +-----------------+ <----- RBP (somewhere in the middle of the stack)
+	//                 |     .......     |
+	//                 |      ret Y      |
+	//                 |     .......     |
+	//                 |      ret 0      |
+	//                 |      arg X      |
+	//                 |     .......     |
+	//                 |      arg 1      |
+	//                 |      arg 0      |
+	//                 |   Return Addr   |
+	//       RSP ----> +-----------------+
+	//                    (low address)
+
+	// First, we push the RBP, and update the RBP to the current RSP.
+	//
+	//                   (high address)                     (high address)
+	//       RBP ----> +-----------------+                +-----------------+
+	//                 |     .......     |                |     .......     |
+	//                 |      ret Y      |                |      ret Y      |
+	//                 |     .......     |                |     .......     |
+	//                 |      ret 0      |                |      ret 0      |
+	//                 |      arg X      |                |      arg X      |
+	//                 |     .......     |     ====>      |     .......     |
+	//                 |      arg 1      |                |      arg 1      |
+	//                 |      arg 0      |                |      arg 0      |
+	//                 |   Return Addr   |                |   Return Addr   |
+	//       RSP ----> +-----------------+                |    Caller_RBP   |
+	//                    (low address)                   +-----------------+ <----- RSP, RBP
+	//
+	cur = m.setupRBPRSP(cur)
+
+	if !m.stackBoundsCheckDisabled {
+		cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
+	}
+
+	//
+	//            (high address)
+	//          +-----------------+                  +-----------------+
+	//          |     .......     |                  |     .......     |
+	//          |      ret Y      |                  |      ret Y      |
+	//          |     .......     |                  |     .......     |
+	//          |      ret 0      |                  |      ret 0      |
+	//          |      arg X      |                  |      arg X      |
+	//          |     .......     |                  |     .......     |
+	//          |      arg 1      |                  |      arg 1      |
+	//          |      arg 0      |                  |      arg 0      |
+	//          |      xxxxx      |                  |      xxxxx      |
+	//          |   Return Addr   |                  |   Return Addr   |
+	//          |    Caller_RBP   |      ====>       |    Caller_RBP   |
+	// RBP,RSP->+-----------------+                  +-----------------+ <----- RBP
+	//             (low address)                     |   clobbered M   |
+	//                                               |   clobbered 1   |
+	//                                               |   ...........   |
+	//                                               |   clobbered 0   |
+	//                                               +-----------------+ <----- RSP
+	//
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		for i := range regs {
+			r := regs[len(regs)-1-i] // Reverse order.
+			if r.RegType() == regalloc.RegTypeInt {
+				cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r)))
+			} else {
+				// Push the XMM register is not supported by the PUSH instruction.
+				cur = m.addRSP(-16, cur)
+				push := m.allocateInstr().asXmmMovRM(
+					sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)),
+				)
+				cur = linkInstr(cur, push)
+			}
+		}
+	}
+
+	if size := m.spillSlotSize; size > 0 {
+		// Simply decrease the RSP to allocate the spill slots.
+		// 		sub $size, %rsp
+		cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true))
+
+		// At this point, we have the stack layout as follows:
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |   ReturnAddress |
+		//          |   Caller_RBP    |
+		//          +-----------------+ <--- RBP
+		//          |    clobbered M  |
+		//          |   ............  |
+		//          |    clobbered 1  |
+		//          |    clobbered 0  |
+		//          |   spill slot N  |
+		//          |   ............  |
+		//          |   spill slot 0  |
+		//          +-----------------+ <--- RSP
+		//             (low address)
+	}
+
+	linkInstr(cur, prevInitInst)
+}
+
+// postRegAlloc does multiple things while walking through the instructions:
+// 1. Inserts the epilogue code.
+// 2. Removes the redundant copy instruction.
+// 3. Inserts the dec/inc RSP instruction right before/after the call instruction.
+// 4. Lowering that is supposed to be done after regalloc.
+func (m *machine) postRegAlloc() {
+	ectx := m.ectx
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		switch k := cur.kind; k {
+		case ret:
+			m.setupEpilogueAfter(cur.prev)
+			continue
+		case fcvtToSintSequence, fcvtToUintSequence:
+			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
+			if k == fcvtToSintSequence {
+				m.lowerFcvtToSintSequenceAfterRegalloc(cur)
+			} else {
+				m.lowerFcvtToUintSequenceAfterRegalloc(cur)
+			}
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.ectx.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case xmmCMov:
+			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
+			m.lowerXmmCmovAfterRegAlloc(cur)
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.ectx.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case idivRemSequence:
+			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
+			m.lowerIDivRemSequenceAfterRegAlloc(cur)
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.ectx.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case call, callIndirect:
+			// At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction
+			// right before/after the call instruction. If this is done before reg alloc, the stack slot
+			// can point to the wrong location and therefore results in a wrong value.
+			call := cur
+			next := call.next
+			_, _, _, _, size := backend.ABIInfoFromUint64(call.u2)
+			if size > 0 {
+				dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
+				linkInstr(call.prev, dec)
+				linkInstr(dec, call)
+				inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true)
+				linkInstr(call, inc)
+				linkInstr(inc, next)
+			}
+			continue
+		}
+
+		// Removes the redundant copy instruction.
+		if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() {
+			prev, next := cur.prev, cur.next
+			// Remove the copy instruction.
+			prev.next = next
+			if next != nil {
+				next.prev = prev
+			}
+		}
+	}
+}
+
+func (m *machine) setupEpilogueAfter(cur *instruction) {
+	prevNext := cur.next
+
+	// At this point, we have the stack layout as follows:
+	//
+	//            (high address)
+	//          +-----------------+
+	//          |     .......     |
+	//          |      ret Y      |
+	//          |     .......     |
+	//          |      ret 0      |
+	//          |      arg X      |
+	//          |     .......     |
+	//          |      arg 1      |
+	//          |      arg 0      |
+	//          |   ReturnAddress |
+	//          |   Caller_RBP    |
+	//          +-----------------+ <--- RBP
+	//          |    clobbered M  |
+	//          |   ............  |
+	//          |    clobbered 1  |
+	//          |    clobbered 0  |
+	//          |   spill slot N  |
+	//          |   ............  |
+	//          |   spill slot 0  |
+	//          +-----------------+ <--- RSP
+	//             (low address)
+
+	if size := m.spillSlotSize; size > 0 {
+		// Simply increase the RSP to free the spill slots.
+		// 		add $size, %rsp
+		cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true))
+	}
+
+	//
+	//             (high address)
+	//            +-----------------+                     +-----------------+
+	//            |     .......     |                     |     .......     |
+	//            |      ret Y      |                     |      ret Y      |
+	//            |     .......     |                     |     .......     |
+	//            |      ret 0      |                     |      ret 0      |
+	//            |      arg X      |                     |      arg X      |
+	//            |     .......     |                     |     .......     |
+	//            |      arg 1      |                     |      arg 1      |
+	//            |      arg 0      |                     |      arg 0      |
+	//            |   ReturnAddress |                     |   ReturnAddress |
+	//            |    Caller_RBP   |                     |    Caller_RBP   |
+	//   RBP ---> +-----------------+      ========>      +-----------------+ <---- RSP, RBP
+	//            |    clobbered M  |
+	//            |   ............  |
+	//            |    clobbered 1  |
+	//            |    clobbered 0  |
+	//   RSP ---> +-----------------+
+	//               (low address)
+	//
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		for _, r := range regs {
+			if r.RegType() == regalloc.RegTypeInt {
+				cur = linkInstr(cur, m.allocateInstr().asPop64(r))
+			} else {
+				// Pop the XMM register is not supported by the POP instruction.
+				pop := m.allocateInstr().asXmmUnaryRmR(
+					sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r,
+				)
+				cur = linkInstr(cur, pop)
+				cur = m.addRSP(16, cur)
+			}
+		}
+	}
+
+	// Now roll back the RSP to RBP, and pop the caller's RBP.
+	cur = m.revertRBPRSP(cur)
+
+	linkInstr(cur, prevNext)
+}
+
+func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
+	if offset == 0 {
+		return cur
+	}
+	opcode := aluRmiROpcodeAdd
+	if offset < 0 {
+		opcode = aluRmiROpcodeSub
+		offset = -offset
+	}
+	return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true))
+}
+
+func (m *machine) setupRBPRSP(cur *instruction) *instruction {
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg)))
+	cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true))
+	return cur
+}
+
+func (m *machine) revertRBPRSP(cur *instruction) *instruction {
+	cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true))
+	cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg))
+	return cur
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
new file mode 100644
index 000000000..0bb28ee9e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
@@ -0,0 +1,153 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// InsertMoveBefore implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
+	typ := src.RegType()
+	if typ != dst.RegType() {
+		panic("BUG: src and dst must have the same type")
+	}
+
+	mov := m.allocateInstr()
+	if typ == regalloc.RegTypeInt {
+		mov.asMovRR(src, dst, true)
+	} else {
+		mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
+	}
+
+	cur := instr.prev
+	prevNext := cur.next
+	cur = linkInstr(cur, mov)
+	linkInstr(cur, prevNext)
+}
+
+// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.c.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	store := m.allocateInstr()
+	mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
+	switch typ {
+	case ssa.TypeI32:
+		store.asMovRM(v, mem, 4)
+	case ssa.TypeI64:
+		store.asMovRM(v, mem, 8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, v, mem)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+	}
+
+	cur = linkInstr(cur, store)
+	return linkInstr(cur, prevNext)
+}
+
+// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.c.TypeOf(v)
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	// Load the value to the temporary.
+	load := m.allocateInstr()
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
+	switch typ {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, a, v)
+	case ssa.TypeI64:
+		load.asMov64MR(a, v)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, a, v)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, a, v)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v)
+	default:
+		panic("BUG")
+	}
+
+	cur = linkInstr(cur, load)
+	return linkInstr(cur, prevNext)
+}
+
+// ClobberedRegisters implements backend.RegAllocFunctionMachine.
+func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
+	m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
+}
+
+// Swap implements backend.RegAllocFunctionMachine.
+func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
+	if x1.RegType() == regalloc.RegTypeInt {
+		prevNext := cur.next
+		xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8)
+		cur = linkInstr(cur, xc)
+		linkInstr(cur, prevNext)
+	} else {
+		if tmp.Valid() {
+			prevNext := cur.next
+			m.InsertMoveBefore(tmp, x1, prevNext)
+			m.InsertMoveBefore(x1, x2, prevNext)
+			m.InsertMoveBefore(x2, tmp, prevNext)
+		} else {
+			prevNext := cur.next
+			r2 := x2.RealReg()
+			// Temporarily spill x1 to stack.
+			cur = m.InsertStoreRegisterAt(x1, cur, true).prev
+			// Then move x2 to x1.
+			cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))
+			linkInstr(cur, prevNext)
+			// Then reload the original value on x1 from stack to r2.
+			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
+		}
+	}
+}
+
+// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
+func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
+	cur := end
+	for cur.kind == nop0 {
+		cur = cur.prev
+		if cur == begin {
+			return end
+		}
+	}
+	switch cur.kind {
+	case jmp:
+		return cur
+	default:
+		return end
+	}
+}
+
+// SSABlockLabel implements backend.RegAllocFunctionMachine.
+func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
+	return m.ectx.SsaBlockIDToLabels[id]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
new file mode 100644
index 000000000..539a8b754
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
@@ -0,0 +1,992 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+var swizzleMask = [16]byte{
+	0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+	0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+}
+
+func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) {
+	masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:])
+
+	// Load mask to maskReg.
+	maskReg := m.c.AllocateVReg(ssa.TypeV128)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg)
+	m.insert(loadMask)
+
+	// Copy x and y to tmp registers.
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	tmpDst := m.copyToTmp(xx.reg())
+	yy := m.getOperand_Reg(m.c.ValueDefinition(y))
+	tmpX := m.copyToTmp(yy.reg())
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst))
+
+	// Copy the result to the destination register.
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) {
+	// Copy x to tmp.
+	tmpDst := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst))
+
+	yy := m.getOperand_Reg(m.c.ValueDefinition(y))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst))
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst))
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst))
+	case ssa.VecLaneF32x4:
+		// In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument.
+		// See https://www.felixcloutier.com/x86/insertps
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst))
+	case ssa.VecLaneF64x2:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) {
+	// Pextr variants are used to extract a lane from a vector register.
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+	tmpDst := m.c.AllocateVReg(ret.Type())
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst))
+		if signed {
+			m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
+		}
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst))
+		if signed {
+			m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
+		}
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst))
+	case ssa.VecLaneF32x4:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst))
+		}
+	case ssa.VecLaneF64x2:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
+		} else {
+			m.copyTo(xx.reg(), tmpDst)
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+var sqmulRoundSat = [16]byte{
+	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+}
+
+func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {
+	// See https://github.com/WebAssembly/simd/pull/365 for the following logic.
+	maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:])
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp)
+	m.insert(loadMask)
+
+	xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	tmpX := m.copyToTmp(xx.reg())
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))
+
+	m.copyTo(tmpX, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.lowerVUshri8x16(x, y, ret)
+	case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2:
+		m.lowerShr(x, y, ret, lane, false)
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64.
+// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
+var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
+	0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift
+	0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift
+	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift
+	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift
+}
+
+func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) {
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, 0x7, false)
+	// Take the modulo 8 of the shift amount.
+	shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false))
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	vecTmp := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false))
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx))
+
+	maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:])
+	base := m.c.AllocateVReg(ssa.TypeI64)
+	lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
+	m.insert(lea)
+
+	// Shift tmpGpReg by 4 to multiply the shift amount by 16.
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
+
+	mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp)
+	m.insert(loadMask)
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.lowerVSshri8x16(x, y, ret)
+	case ssa.VecLaneI16x8, ssa.VecLaneI32x4:
+		m.lowerShr(x, y, ret, lane, true)
+	case ssa.VecLaneI64x2:
+		m.lowerVSshri64x2(x, y, ret)
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) {
+	shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(shiftAmtReg, 0x7, false)
+	// Take the modulo 8 of the shift amount.
+	shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false))
+
+	// Copy the x value to two temporary registers.
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	vecTmp := m.c.AllocateVReg(ssa.TypeV128)
+	m.copyTo(xx, vecTmp)
+
+	// Assuming that we have
+	//  xx   = [b1, ..., b16]
+	//  vecTmp = [b1, ..., b16]
+	// at this point, then we use PUNPCKLBW and PUNPCKHBW to produce:
+	//  xx   = [b1, b1, b2, b2, ..., b8, b8]
+	//  vecTmp = [b9, b9, b10, b10, ..., b16, b16]
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp))
+
+	// Adding 8 to the shift amount, and then move the amount to vecTmp2.
+	vecTmp2 := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false))
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false))
+
+	// Perform the word packed arithmetic right shifts on vreg and vecTmp.
+	// This changes these two registers as:
+	//  xx   = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s]
+	//  vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s]
+	// where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte.
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx))
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp))
+
+	// Finally, we can get the result by packing these two word vectors.
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx))
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) {
+	// Load the shift amount to RCX.
+	shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg))
+
+	tmpGp := m.c.AllocateVReg(ssa.TypeI64)
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xxReg := m.copyToTmp(_xx.reg())
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp))
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp))
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg))
+
+	m.copyTo(xxReg, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	var modulo uint64
+	var shiftOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI16x8:
+		modulo = 0xf
+		if signed {
+			shiftOp = sseOpcodePsraw
+		} else {
+			shiftOp = sseOpcodePsrlw
+		}
+	case ssa.VecLaneI32x4:
+		modulo = 0x1f
+		if signed {
+			shiftOp = sseOpcodePsrad
+		} else {
+			shiftOp = sseOpcodePsrld
+		}
+	case ssa.VecLaneI64x2:
+		modulo = 0x3f
+		if signed {
+			panic("BUG")
+		}
+		shiftOp = sseOpcodePsrlq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, modulo, false)
+	// Take the modulo 8 of the shift amount.
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
+		m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
+	// And move it to a xmm register.
+	tmpVec := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
+
+	// Then do the actual shift.
+	m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) {
+	var modulo uint64
+	var shiftOp sseOpcode
+	var isI8x16 bool
+	switch lane {
+	case ssa.VecLaneI8x16:
+		isI8x16 = true
+		modulo = 0x7
+		shiftOp = sseOpcodePsllw
+	case ssa.VecLaneI16x8:
+		modulo = 0xf
+		shiftOp = sseOpcodePsllw
+	case ssa.VecLaneI32x4:
+		modulo = 0x1f
+		shiftOp = sseOpcodePslld
+	case ssa.VecLaneI64x2:
+		modulo = 0x3f
+		shiftOp = sseOpcodePsllq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, modulo, false)
+	// Take the modulo 8 of the shift amount.
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
+		m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
+	// And move it to a xmm register.
+	tmpVec := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
+
+	// Then do the actual shift.
+	m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
+
+	if isI8x16 {
+		maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:])
+		base := m.c.AllocateVReg(ssa.TypeI64)
+		lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
+		m.insert(lea)
+
+		// Shift tmpGpReg by 4 to multiply the shift amount by 16.
+		m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
+
+		mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
+		loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec)
+		m.insert(loadMask)
+
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx))
+	}
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64.
+// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
+var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
+	0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift
+	0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift
+	0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift
+	0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift
+	0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift
+	0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift
+}
+
+func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) {
+	xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	var round sseOpcode
+	if _64 {
+		round = sseOpcodeRoundpd
+	} else {
+		round = sseOpcodeRoundps
+	}
+	m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret)))
+}
+
+var (
+	allOnesI8x16              = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
+	allOnesI16x8              = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
+	extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}
+	extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00}
+)
+
+func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	switch srcLane {
+	case ssa.VecLaneI8x16:
+		allOneReg := m.c.AllocateVReg(ssa.TypeV128)
+		mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:])
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg))
+
+		var resultReg regalloc.VReg
+		if signed {
+			resultReg = allOneReg
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg))
+		} else {
+			// Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned.
+			resultReg = xx
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg))
+		}
+		m.copyTo(resultReg, m.c.VRegOf(ret))
+
+	case ssa.VecLaneI16x8:
+		if signed {
+			allOnesReg := m.c.AllocateVReg(ssa.TypeV128)
+			mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx))
+			m.copyTo(xx, m.c.VRegOf(ret))
+		} else {
+			maskReg := m.c.AllocateVReg(ssa.TypeV128)
+			mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// Flip the sign bits on xx.
+			//
+			// Assuming that xx = [w1, ..., w8], now we have,
+			// 	xx[i] = int8(-w1) for i = 0...8
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx))
+
+			mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// For i = 0,..4 (as this results in i32x4 lanes), now we have
+			// xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1)))
+			// c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx))
+
+			mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)).
+			// c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx))
+
+			m.copyTo(xx, m.c.VRegOf(ret))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", srcLane))
+	}
+}
+
+func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		if signed {
+			sseOp = sseOpcodePmovsxbw
+		} else {
+			sseOp = sseOpcodePmovzxbw
+		}
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePmovsxwd
+		} else {
+			sseOp = sseOpcodePmovzxwd
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePmovsxdq
+		} else {
+			sseOp = sseOpcodePmovzxdq
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret)))
+}
+
+func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	m.copyTo(xx.reg(), tmp)
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp))
+
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		if signed {
+			sseOp = sseOpcodePmovsxbw
+		} else {
+			sseOp = sseOpcodePmovzxbw
+		}
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePmovsxwd
+		} else {
+			sseOp = sseOpcodePmovzxwd
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePmovsxdq
+		} else {
+			sseOp = sseOpcodePmovzxdq
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret)))
+}
+
+func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) {
+	tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64)
+	am := newOperandMem(m.lowerToAddressMode(ptr, offset))
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst))
+		tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asZeros(tmpZeroVec))
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst))
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asMov64MR(am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst))
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+var f64x2CvtFromIMask = [16]byte{
+	0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+}
+
+func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	switch lane {
+	case ssa.VecLaneF32x4:
+		if signed {
+			xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
+		} else {
+			xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			// Copy the value to two temporary registers.
+			tmp := m.copyToTmp(xx.reg())
+			tmp2 := m.copyToTmp(xx.reg())
+
+			// Clear the higher 16 bits of each 32-bit element.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp))
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp))
+
+			// Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2))
+
+			// Convert the lower 16-bits in tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
+
+			// Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2))
+
+			// Double the converted halved higher 16bits.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2))
+
+			// Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2))
+
+			m.copyTo(tmp2, m.c.VRegOf(ret))
+		}
+	case ssa.VecLaneF64x2:
+		if signed {
+			xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret)))
+		} else {
+			maskReg := m.c.AllocateVReg(ssa.TypeV128)
+			maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:])
+			// maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
+
+			_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			xx := m.copyToTmp(_xx.reg())
+
+			// Given that we have xx = [d1, d2, d3, d4], this results in
+			//	xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]]
+			//     = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52]
+			//     ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx))
+
+			// maskReg = [float64(0x1.0p52), float64(0x1.0p52)]
+			maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
+
+			// Now, we get the result as
+			// 	xx = [float64(uint32(d1)), float64(uint32(d2))]
+			// because the following equality always satisfies:
+			//  float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx))
+
+			m.copyTo(xx, m.c.VRegOf(ret))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+var (
+	// i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes.
+	i32sMaxOnF64x2 = [16]byte{
+		0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
+		0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
+	}
+
+	// i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes.
+	i32uMaxOnF64x2 = [16]byte{
+		0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
+		0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
+	}
+
+	// twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that
+	// with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics,
+	// like addition or subtraction, the resulted floating point holds exactly the same
+	// bit representations in 32-bit integer on its mantissa.
+	//
+	// Note: the name twop52 is common across various compiler ecosystem.
+	// 	E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28
+	// 	E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html
+	twop52 = [16]byte{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
+	}
+)
+
+func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	switch lane {
+	case ssa.VecLaneF32x4:
+		if signed {
+			tmp := m.copyToTmp(xx)
+
+			// Assuming we have xx = [v1, v2, v3, v4].
+			//
+			// Set all bits if lane is not NaN on tmp.
+			// tmp[i] = 0xffffffff  if vi != NaN
+			//        = 0           if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
+
+			// Clear NaN lanes on xx, meaning that
+			// 	xx[i] = vi  if vi != NaN
+			//	        0   if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx))
+
+			// tmp[i] = ^vi         if vi != NaN
+			//        = 0xffffffff  if vi == NaN
+			// which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp))
+
+			// xx[i] = int32(vi)   if vi != NaN and xx is not overflowing.
+			//       = 0x80000000  if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq)
+			//       = 0           if vi == NaN
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
+
+			// Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane.
+			//
+			// tmp[i] = 0x80000000                         if vi is positive
+			//        = any satisfying any&0x80000000 = 0  if vi is negative or zero.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp))
+
+			// Arithmetic right shifting tmp by 31, meaning that we have
+			// tmp[i] = 0xffffffff if vi is positive, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp))
+
+			// Flipping 0x80000000 if vi is positive, otherwise keep intact.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx))
+		} else {
+			tmp := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asZeros(tmp))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp))
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
+			tmp2 := m.copyToTmp(xx)
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx))
+		}
+
+	case ssa.VecLaneF64x2:
+		tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+		if signed {
+			tmp := m.copyToTmp(xx)
+
+			// Set all bits for non-NaN lanes, zeros otherwise.
+			// I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
+
+			maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:])
+			// Load the 2147483647 into tmp2's each lane.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2))
+
+			// tmp[i] = 2147483647 if vi != NaN, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp))
+
+			// MINPD returns the source register's value as-is, so we have
+			//  xx[i] = vi   if vi != NaN
+			//        = 0    if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx))
+
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx))
+		} else {
+			tmp := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asZeros(tmp))
+
+			//  xx[i] = vi   if vi != NaN && vi > 0
+			//        = 0    if vi == NaN || vi <= 0
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx))
+
+			// tmp2[i] = float64(math.MaxUint32) = math.MaxUint32
+			maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
+
+			// xx[i] = vi   if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//       = 0    otherwise
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx))
+
+			// Round the floating points into integer.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx))
+
+			// tmp2[i] = float64(0x1.0p52)
+			maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
+
+			// xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//       = 0                                       otherwise
+			//
+			// This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx))
+
+			// At this point, we have
+			// 	xx  = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)]
+			//  tmp = [0, 0, 0, 0]
+			// as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in
+			//	xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0]
+			// meaning that for i = 0 and 1, we have
+			//  xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//        = 0          otherwise.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePacksswb
+		} else {
+			sseOp = sseOpcodePackuswb
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePackssdw
+		} else {
+			sseOp = sseOpcodePackusdw
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+	m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIabs(instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	rd := m.c.VRegOf(instr.Return())
+
+	if lane == ssa.VecLaneI64x2 {
+		_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+		blendReg := xmm0VReg
+		m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg))
+
+		tmp := m.copyToTmp(_xx.reg())
+		xx := m.copyToTmp(_xx.reg())
+
+		// Clear all bits on blendReg.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg))
+		// Subtract xx from blendMaskReg.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg))
+		// Copy the subtracted value ^^ back into tmp.
+		m.copyTo(blendReg, xx)
+
+		m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx))
+
+		m.copyTo(xx, rd)
+	} else {
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePabsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePabsw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePabsd
+		}
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+		i := m.allocateInstr()
+		i.asXmmUnaryRmR(vecOp, rn, rd)
+		m.insert(i)
+	}
+}
+
+func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) {
+	x := instr.Arg()
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp1 := m.c.AllocateVReg(ssa.TypeV128)
+	m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f)
+
+	// Copy input into tmp2.
+	tmp2 := m.copyToTmp(rn.reg())
+
+	// Given that we have:
+	//  rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn.
+	//
+	// Take PAND on tmp1 and tmp2, so that we mask out all the higher bits.
+	//  tmp2 = [l1, ..., l16].
+	pand := m.allocateInstr()
+	pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2)
+	m.insert(pand)
+
+	// Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have
+	//  tmp3 = [h1, ...., h16].
+	tmp3 := m.copyToTmp(rn.reg())
+	psrlw := m.allocateInstr()
+	psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3)
+	m.insert(psrlw)
+
+	pand2 := m.allocateInstr()
+	pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3)
+	m.insert(pand2)
+
+	// Read the popcntTable into tmp4, and we have
+	//  tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04]
+	tmp4 := m.c.AllocateVReg(ssa.TypeV128)
+	m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01)
+
+	// Make a copy for later.
+	tmp5 := m.copyToTmp(tmp4)
+
+	//  tmp4 = [popcnt(l1), ..., popcnt(l16)].
+	pshufb := m.allocateInstr()
+	pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4)
+	m.insert(pshufb)
+
+	pshufb2 := m.allocateInstr()
+	pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5)
+	m.insert(pshufb2)
+
+	// tmp4 + tmp5 is the result.
+	paddb := m.allocateInstr()
+	paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5)
+	m.insert(paddb)
+
+	m.copyTo(tmp5, rd)
+}
+
+func (m *machine) lowerVImul(instr *ssa.Instruction) {
+	x, y, lane := instr.Arg2WithLane()
+	rd := m.c.VRegOf(instr.Return())
+	if lane == ssa.VecLaneI64x2 {
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rm := m.getOperand_Reg(m.c.ValueDefinition(y))
+		// Assuming that we have
+		//	rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high]
+		//  rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high]
+		// where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane.
+
+		// Copy rn into tmp1.
+		tmp1 := m.copyToTmp(rn.reg())
+
+		// And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high]
+		shift := m.allocateInstr()
+		shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1)
+		m.insert(shift)
+
+		// Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit.
+		mul := m.allocateInstr()
+		mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1)
+		m.insert(mul)
+
+		// Copy rm value into tmp2.
+		tmp2 := m.copyToTmp(rm.reg())
+
+		// And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high]
+		shift2 := m.allocateInstr()
+		shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2)
+		m.insert(shift2)
+
+		// Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit.
+		mul2 := m.allocateInstr()
+		mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2)
+		m.insert(mul2)
+
+		// Adds tmp1 and tmp2 and do the logical left shift by 32-bit,
+		// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32]
+		add := m.allocateInstr()
+		add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1)
+		m.insert(add)
+
+		shift3 := m.allocateInstr()
+		shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1)
+		m.insert(shift3)
+
+		// Copy rm value into tmp3.
+		tmp3 := m.copyToTmp(rm.reg())
+
+		// "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit.
+		mul3 := m.allocateInstr()
+		mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3)
+		m.insert(mul3)
+
+		// Finally, we get the result by computing tmp1 + tmp3,
+		// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo]
+		add2 := m.allocateInstr()
+		add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1)
+		m.insert(add2)
+
+		m.copyTo(tmp1, rd)
+
+	} else {
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePmullw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePmulld
+		default:
+			panic("unsupported: " + lane.String())
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
new file mode 100644
index 000000000..c6fcb8673
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
@@ -0,0 +1,346 @@
+package amd64
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type operand struct {
+	kind operandKind
+	data uint64
+}
+
+type operandKind byte
+
+const (
+	// operandKindReg is an operand which is an integer Register.
+	operandKindReg operandKind = iota + 1
+
+	// operandKindMem is a value in Memory.
+	// 32, 64, or 128 bit value.
+	operandKindMem
+
+	// operandKindImm32 is a signed-32-bit integer immediate value.
+	operandKindImm32
+
+	// operandKindLabel is a label.
+	operandKindLabel
+)
+
+// String implements fmt.Stringer.
+func (o operandKind) String() string {
+	switch o {
+	case operandKindReg:
+		return "reg"
+	case operandKindMem:
+		return "mem"
+	case operandKindImm32:
+		return "imm32"
+	case operandKindLabel:
+		return "label"
+	default:
+		panic("BUG: invalid operand kind")
+	}
+}
+
+// format returns the string representation of the operand.
+// _64 is only for the case where the operand is a register, and it's integer.
+func (o *operand) format(_64 bool) string {
+	switch o.kind {
+	case operandKindReg:
+		return formatVRegSized(o.reg(), _64)
+	case operandKindMem:
+		return o.addressMode().String()
+	case operandKindImm32:
+		return fmt.Sprintf("$%d", int32(o.imm32()))
+	case operandKindLabel:
+		return backend.Label(o.imm32()).String()
+	default:
+		panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))
+	}
+}
+
+//go:inline
+func (o *operand) reg() regalloc.VReg {
+	return regalloc.VReg(o.data)
+}
+
+//go:inline
+func (o *operand) setReg(r regalloc.VReg) {
+	o.data = uint64(r)
+}
+
+//go:inline
+func (o *operand) addressMode() *amode {
+	return wazevoapi.PtrFromUintptr[amode](uintptr(o.data))
+}
+
+//go:inline
+func (o *operand) imm32() uint32 {
+	return uint32(o.data)
+}
+
+func (o *operand) label() backend.Label {
+	switch o.kind {
+	case operandKindLabel:
+		return backend.Label(o.data)
+	case operandKindMem:
+		mem := o.addressMode()
+		if mem.kind() != amodeRipRel {
+			panic("BUG: invalid label")
+		}
+		return backend.Label(mem.imm32)
+	default:
+		panic("BUG: invalid operand kind")
+	}
+}
+
+func newOperandLabel(label backend.Label) operand {
+	return operand{kind: operandKindLabel, data: uint64(label)}
+}
+
+func newOperandReg(r regalloc.VReg) operand {
+	return operand{kind: operandKindReg, data: uint64(r)}
+}
+
+func newOperandImm32(imm32 uint32) operand {
+	return operand{kind: operandKindImm32, data: uint64(imm32)}
+}
+
+func newOperandMem(amode *amode) operand {
+	return operand{kind: operandKindMem, data: uint64(uintptr(unsafe.Pointer(amode)))}
+}
+
+// amode is a memory operand (addressing mode).
+type amode struct {
+	kindWithShift uint32
+	imm32         uint32
+	base          regalloc.VReg
+
+	// For amodeRegRegShift:
+	index regalloc.VReg
+}
+
+type amodeKind byte
+
+const (
+	// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base
+	amodeImmReg amodeKind = iota + 1
+
+	// amodeImmRBP is the same as amodeImmReg, but the base register is fixed to RBP.
+	// The only differece is that it doesn't tell the register allocator to use RBP which is distracting for the
+	// register allocator.
+	amodeImmRBP
+
+	// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base + (Register2 << Shift)
+	amodeRegRegShift
+
+	// amodeRipRel is a RIP-relative addressing mode specified by the label.
+	amodeRipRel
+
+	// TODO: there are other addressing modes such as the one without base register.
+)
+
+func (a *amode) kind() amodeKind {
+	return amodeKind(a.kindWithShift & 0xff)
+}
+
+func (a *amode) shift() byte {
+	return byte(a.kindWithShift >> 8)
+}
+
+func (a *amode) uses(rs *[]regalloc.VReg) {
+	switch a.kind() {
+	case amodeImmReg:
+		*rs = append(*rs, a.base)
+	case amodeRegRegShift:
+		*rs = append(*rs, a.base, a.index)
+	case amodeImmRBP, amodeRipRel:
+	default:
+		panic("BUG: invalid amode kind")
+	}
+}
+
+func (a *amode) nregs() int {
+	switch a.kind() {
+	case amodeImmReg:
+		return 1
+	case amodeRegRegShift:
+		return 2
+	case amodeImmRBP, amodeRipRel:
+		return 0
+	default:
+		panic("BUG: invalid amode kind")
+	}
+}
+
+func (a *amode) assignUses(i int, reg regalloc.VReg) {
+	switch a.kind() {
+	case amodeImmReg:
+		if i == 0 {
+			a.base = reg
+		} else {
+			panic("BUG: invalid amode assignment")
+		}
+	case amodeRegRegShift:
+		if i == 0 {
+			a.base = reg
+		} else if i == 1 {
+			a.index = reg
+		} else {
+			panic("BUG: invalid amode assignment")
+		}
+	default:
+		panic("BUG: invalid amode assignment")
+	}
+}
+
+func (m *machine) newAmodeImmReg(imm32 uint32, base regalloc.VReg) *amode {
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeImmReg), imm32: imm32, base: base}
+	return ret
+}
+
+func (m *machine) newAmodeImmRBPReg(imm32 uint32) *amode {
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeImmRBP), imm32: imm32, base: rbpVReg}
+	return ret
+}
+
+func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, shift byte) *amode {
+	if shift > 3 {
+		panic(fmt.Sprintf("BUG: invalid shift (must be 3>=): %d", shift))
+	}
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeRegRegShift) | uint32(shift)<<8, imm32: imm32, base: base, index: index}
+	return ret
+}
+
+func (m *machine) newAmodeRipRel(label backend.Label) *amode {
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}
+	return ret
+}
+
+// String implements fmt.Stringer.
+func (a *amode) String() string {
+	switch a.kind() {
+	case amodeImmReg, amodeImmRBP:
+		if a.imm32 == 0 {
+			return fmt.Sprintf("(%s)", formatVRegSized(a.base, true))
+		}
+		return fmt.Sprintf("%d(%s)", int32(a.imm32), formatVRegSized(a.base, true))
+	case amodeRegRegShift:
+		shift := 1 << a.shift()
+		if a.imm32 == 0 {
+			return fmt.Sprintf(
+				"(%s,%s,%d)",
+				formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
+		}
+		return fmt.Sprintf(
+			"%d(%s,%s,%d)",
+			int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
+	case amodeRipRel:
+		return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32))
+	default:
+		panic("BUG: invalid amode kind")
+	}
+}
+
+func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) {
+	if def.IsFromBlockParam() {
+		return newOperandReg(def.BlkParamVReg)
+	}
+
+	if def.SSAValue().Type() == ssa.TypeV128 {
+		// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.
+		return m.getOperand_Reg(def)
+	}
+
+	if m.c.MatchInstr(def, ssa.OpcodeLoad) {
+		instr := def.Instr
+		ptr, offset, _ := instr.LoadData()
+		op = newOperandMem(m.lowerToAddressMode(ptr, offset))
+		instr.MarkLowered()
+		return op
+	}
+	return m.getOperand_Reg(def)
+}
+
+func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
+	if def.IsFromBlockParam() {
+		return newOperandReg(def.BlkParamVReg)
+	}
+
+	if m.c.MatchInstr(def, ssa.OpcodeLoad) {
+		instr := def.Instr
+		ptr, offset, _ := instr.LoadData()
+		op = newOperandMem(m.lowerToAddressMode(ptr, offset))
+		instr.MarkLowered()
+		return op
+	}
+	return m.getOperand_Imm32_Reg(def)
+}
+
+func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
+	if def.IsFromBlockParam() {
+		return newOperandReg(def.BlkParamVReg)
+	}
+
+	instr := def.Instr
+	if instr.Constant() {
+		// If the operation is 64-bit, x64 sign-extends the 32-bit immediate value.
+		// Therefore, we need to check if the immediate value is within the 32-bit range and if the sign bit is set,
+		// we should not use the immediate value.
+		if op, ok := asImm32Operand(instr.ConstantVal(), instr.Return().Type() == ssa.TypeI32); ok {
+			instr.MarkLowered()
+			return op
+		}
+	}
+	return m.getOperand_Reg(def)
+}
+
+func asImm32Operand(val uint64, allowSignExt bool) (operand, bool) {
+	if imm32, ok := asImm32(val, allowSignExt); ok {
+		return newOperandImm32(imm32), true
+	}
+	return operand{}, false
+}
+
+func asImm32(val uint64, allowSignExt bool) (uint32, bool) {
+	u32val := uint32(val)
+	if uint64(u32val) != val {
+		return 0, false
+	}
+	if !allowSignExt && u32val&0x80000000 != 0 {
+		return 0, false
+	}
+	return u32val, true
+}
+
+func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) {
+	var v regalloc.VReg
+	if def.IsFromBlockParam() {
+		v = def.BlkParamVReg
+	} else {
+		instr := def.Instr
+		if instr.Constant() {
+			// We inline all the constant instructions so that we could reduce the register usage.
+			v = m.lowerConstant(instr)
+			instr.MarkLowered()
+		} else {
+			if n := def.N; n == 0 {
+				v = m.c.VRegOf(instr.Return())
+			} else {
+				_, rs := instr.Returns()
+				v = m.c.VRegOf(rs[n-1])
+			}
+		}
+	}
+	return newOperandReg(v)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
new file mode 100644
index 000000000..5219837e3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
@@ -0,0 +1,11 @@
+//go:build !tinygo
+
+package amd64
+
+import "reflect"
+
+// setSliceLimits sets both Cap and Len for the given reflected slice.
+func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
+	s.Len = int(limit)
+	s.Cap = int(limit)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
new file mode 100644
index 000000000..df4cf46ec
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
@@ -0,0 +1,11 @@
+//go:build tinygo
+
+package amd64
+
+import "reflect"
+
+// setSliceLimits sets both Cap and Len for the given reflected slice.
+func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
+	s.Len = limit
+	s.Len = limit
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
new file mode 100644
index 000000000..4aec856fa
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
@@ -0,0 +1,181 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// Amd64-specific registers.
+const (
+	// rax is a gp register.
+	rax = regalloc.RealRegInvalid + 1 + iota
+	// rcx is a gp register.
+	rcx
+	// rdx is a gp register.
+	rdx
+	// rbx is a gp register.
+	rbx
+	// rsp is a gp register.
+	rsp
+	// rbp is a gp register.
+	rbp
+	// rsi is a gp register.
+	rsi
+	// rdi is a gp register.
+	rdi
+	// r8 is a gp register.
+	r8
+	// r9 is a gp register.
+	r9
+	// r10 is a gp register.
+	r10
+	// r11 is a gp register.
+	r11
+	// r12 is a gp register.
+	r12
+	// r13 is a gp register.
+	r13
+	// r14 is a gp register.
+	r14
+	// r15 is a gp register.
+	r15
+
+	// xmm0 is a vector register.
+	xmm0
+	// xmm1 is a vector register.
+	xmm1
+	// xmm2 is a vector register.
+	xmm2
+	// xmm3 is a vector register.
+	xmm3
+	// xmm4 is a vector register.
+	xmm4
+	// xmm5 is a vector register.
+	xmm5
+	// xmm6 is a vector register.
+	xmm6
+	// xmm7 is a vector register.
+	xmm7
+	// xmm8 is a vector register.
+	xmm8
+	// xmm9 is a vector register.
+	xmm9
+	// xmm10 is a vector register.
+	xmm10
+	// xmm11 is a vector register.
+	xmm11
+	// xmm12 is a vector register.
+	xmm12
+	// xmm13 is a vector register.
+	xmm13
+	// xmm14 is a vector register.
+	xmm14
+	// xmm15 is a vector register.
+	xmm15
+)
+
+var (
+	raxVReg = regalloc.FromRealReg(rax, regalloc.RegTypeInt)
+	rcxVReg = regalloc.FromRealReg(rcx, regalloc.RegTypeInt)
+	rdxVReg = regalloc.FromRealReg(rdx, regalloc.RegTypeInt)
+	rbxVReg = regalloc.FromRealReg(rbx, regalloc.RegTypeInt)
+	rspVReg = regalloc.FromRealReg(rsp, regalloc.RegTypeInt)
+	rbpVReg = regalloc.FromRealReg(rbp, regalloc.RegTypeInt)
+	rsiVReg = regalloc.FromRealReg(rsi, regalloc.RegTypeInt)
+	rdiVReg = regalloc.FromRealReg(rdi, regalloc.RegTypeInt)
+	r8VReg  = regalloc.FromRealReg(r8, regalloc.RegTypeInt)
+	r9VReg  = regalloc.FromRealReg(r9, regalloc.RegTypeInt)
+	r10VReg = regalloc.FromRealReg(r10, regalloc.RegTypeInt)
+	r11VReg = regalloc.FromRealReg(r11, regalloc.RegTypeInt)
+	r12VReg = regalloc.FromRealReg(r12, regalloc.RegTypeInt)
+	r13VReg = regalloc.FromRealReg(r13, regalloc.RegTypeInt)
+	r14VReg = regalloc.FromRealReg(r14, regalloc.RegTypeInt)
+	r15VReg = regalloc.FromRealReg(r15, regalloc.RegTypeInt)
+
+	xmm0VReg  = regalloc.FromRealReg(xmm0, regalloc.RegTypeFloat)
+	xmm1VReg  = regalloc.FromRealReg(xmm1, regalloc.RegTypeFloat)
+	xmm2VReg  = regalloc.FromRealReg(xmm2, regalloc.RegTypeFloat)
+	xmm3VReg  = regalloc.FromRealReg(xmm3, regalloc.RegTypeFloat)
+	xmm4VReg  = regalloc.FromRealReg(xmm4, regalloc.RegTypeFloat)
+	xmm5VReg  = regalloc.FromRealReg(xmm5, regalloc.RegTypeFloat)
+	xmm6VReg  = regalloc.FromRealReg(xmm6, regalloc.RegTypeFloat)
+	xmm7VReg  = regalloc.FromRealReg(xmm7, regalloc.RegTypeFloat)
+	xmm8VReg  = regalloc.FromRealReg(xmm8, regalloc.RegTypeFloat)
+	xmm9VReg  = regalloc.FromRealReg(xmm9, regalloc.RegTypeFloat)
+	xmm10VReg = regalloc.FromRealReg(xmm10, regalloc.RegTypeFloat)
+	xmm11VReg = regalloc.FromRealReg(xmm11, regalloc.RegTypeFloat)
+	xmm12VReg = regalloc.FromRealReg(xmm12, regalloc.RegTypeFloat)
+	xmm13VReg = regalloc.FromRealReg(xmm13, regalloc.RegTypeFloat)
+	xmm14VReg = regalloc.FromRealReg(xmm14, regalloc.RegTypeFloat)
+	xmm15VReg = regalloc.FromRealReg(xmm15, regalloc.RegTypeFloat)
+)
+
+var regNames = [...]string{
+	rax:   "rax",
+	rcx:   "rcx",
+	rdx:   "rdx",
+	rbx:   "rbx",
+	rsp:   "rsp",
+	rbp:   "rbp",
+	rsi:   "rsi",
+	rdi:   "rdi",
+	r8:    "r8",
+	r9:    "r9",
+	r10:   "r10",
+	r11:   "r11",
+	r12:   "r12",
+	r13:   "r13",
+	r14:   "r14",
+	r15:   "r15",
+	xmm0:  "xmm0",
+	xmm1:  "xmm1",
+	xmm2:  "xmm2",
+	xmm3:  "xmm3",
+	xmm4:  "xmm4",
+	xmm5:  "xmm5",
+	xmm6:  "xmm6",
+	xmm7:  "xmm7",
+	xmm8:  "xmm8",
+	xmm9:  "xmm9",
+	xmm10: "xmm10",
+	xmm11: "xmm11",
+	xmm12: "xmm12",
+	xmm13: "xmm13",
+	xmm14: "xmm14",
+	xmm15: "xmm15",
+}
+
+func formatVRegSized(r regalloc.VReg, _64 bool) string {
+	if r.IsRealReg() {
+		if r.RegType() == regalloc.RegTypeInt {
+			rr := r.RealReg()
+			orig := regNames[rr]
+			if rr <= rdi {
+				if _64 {
+					return "%" + orig
+				} else {
+					return "%e" + orig[1:]
+				}
+			} else {
+				if _64 {
+					return "%" + orig
+				} else {
+					return "%" + orig + "d"
+				}
+			}
+		} else {
+			return "%" + regNames[r.RealReg()]
+		}
+	} else {
+		if r.RegType() == regalloc.RegTypeInt {
+			if _64 {
+				return fmt.Sprintf("%%r%d?", r.ID())
+			} else {
+				return fmt.Sprintf("%%r%dd?", r.ID())
+			}
+		} else {
+			return fmt.Sprintf("%%xmm%d?", r.ID())
+		}
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
new file mode 100644
index 000000000..05ba5f027
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
@@ -0,0 +1,128 @@
+package amd64
+
+import (
+	"encoding/binary"
+	"reflect"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+func stackView(rbp, top uintptr) []byte {
+	var stackBuf []byte
+	{
+		// TODO: use unsafe.Slice after floor version is set to Go 1.20.
+		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
+		hdr.Data = rbp
+		setSliceLimits(hdr, top-rbp)
+	}
+	return stackBuf
+}
+
+// UnwindStack implements wazevo.unwindStack.
+func UnwindStack(_, rbp, top uintptr, returnAddresses []uintptr) []uintptr {
+	stackBuf := stackView(rbp, top)
+
+	for i := uint64(0); i < uint64(len(stackBuf)); {
+		//       (high address)
+		//    +-----------------+
+		//    |     .......     |
+		//    |      ret Y      |
+		//    |     .......     |
+		//    |      ret 0      |
+		//    |      arg X      |
+		//    |     .......     |
+		//    |      arg 1      |
+		//    |      arg 0      |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- Caller_RBP
+		//    |   ...........   |
+		//    |   clobbered  M  |
+		//    |   ............  |
+		//    |   clobbered  0  |
+		//    |   spill slot N  |
+		//    |   ............  |
+		//    |   spill slot 0  |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- RBP
+		//       (low address)
+
+		callerRBP := binary.LittleEndian.Uint64(stackBuf[i:])
+		retAddr := binary.LittleEndian.Uint64(stackBuf[i+8:])
+		returnAddresses = append(returnAddresses, uintptr(retAddr))
+		i = callerRBP - uint64(rbp)
+		if len(returnAddresses) == wasmdebug.MaxFrames {
+			break
+		}
+	}
+	return returnAddresses
+}
+
+// GoCallStackView implements wazevo.goCallStackView.
+func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	//                  (high address)
+	//              +-----------------+ <----+
+	//              |   xxxxxxxxxxx   |      | ;; optional unused space to make it 16-byte aligned.
+	//           ^  |  arg[N]/ret[M]  |      |
+	// sliceSize |  |  ............   |      | SizeInBytes/8
+	//           |  |  arg[1]/ret[1]  |      |
+	//           v  |  arg[0]/ret[0]  | <----+
+	//              |   SizeInBytes   |
+	//              +-----------------+ <---- stackPointerBeforeGoCall
+	//                 (low address)
+	data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8)
+	size := *stackPointerBeforeGoCall / 8
+	return unsafe.Slice((*uint64)(data), int(size))
+}
+
+func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) {
+	diff := uint64(rsp - oldRsp)
+
+	newBuf := stackView(rbp, top)
+	for i := uint64(0); i < uint64(len(newBuf)); {
+		//       (high address)
+		//    +-----------------+
+		//    |     .......     |
+		//    |      ret Y      |
+		//    |     .......     |
+		//    |      ret 0      |
+		//    |      arg X      |
+		//    |     .......     |
+		//    |      arg 1      |
+		//    |      arg 0      |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- Caller_RBP
+		//    |   ...........   |
+		//    |   clobbered  M  |
+		//    |   ............  |
+		//    |   clobbered  0  |
+		//    |   spill slot N  |
+		//    |   ............  |
+		//    |   spill slot 0  |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- RBP
+		//       (low address)
+
+		callerRBP := binary.LittleEndian.Uint64(newBuf[i:])
+		if callerRBP == 0 {
+			// End of stack.
+			break
+		}
+		if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) {
+			panic("BUG: callerRBP is out of range")
+		}
+		if int(callerRBP) < 0 {
+			panic("BUG: callerRBP is negative")
+		}
+		adjustedCallerRBP := callerRBP + diff
+		if int(adjustedCallerRBP) < 0 {
+			panic("BUG: adjustedCallerRBP is negative")
+		}
+		binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP)
+		i = adjustedCallerRBP - uint64(rbp)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
new file mode 100644
index 000000000..6615471c6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
@@ -0,0 +1,332 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// References:
+// * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
+// * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
+
+var (
+	intParamResultRegs   = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7}
+	floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7}
+)
+
+var regInfo = &regalloc.RegisterInfo{
+	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
+		// We don't allocate:
+		// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
+		// - x28: Reserved by Go runtime.
+		// - x27(=tmpReg): because of the reason described on tmpReg.
+		regalloc.RegTypeInt: {
+			x8, x9, x10, x11, x12, x13, x14, x15,
+			x16, x17, x19, x20, x21, x22, x23, x24, x25,
+			x26, x29, x30,
+			// These are the argument/return registers. Less preferred in the allocation.
+			x7, x6, x5, x4, x3, x2, x1, x0,
+		},
+		regalloc.RegTypeFloat: {
+			v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+			v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
+			// These are the argument/return registers. Less preferred in the allocation.
+			v7, v6, v5, v4, v3, v2, v1, v0,
+		},
+	},
+	CalleeSavedRegisters: regalloc.NewRegSet(
+		x19, x20, x21, x22, x23, x24, x25, x26, x28,
+		v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
+	),
+	CallerSavedRegisters: regalloc.NewRegSet(
+		x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30,
+		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+	),
+	RealRegToVReg: []regalloc.VReg{
+		x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
+		v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
+	},
+	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
+	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
+		if r < v0 {
+			return regalloc.RegTypeInt
+		}
+		return regalloc.RegTypeFloat
+	},
+}
+
+// ArgsResultsRegs implements backend.Machine.
+func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
+	return intParamResultRegs, floatParamResultRegs
+}
+
+// LowerParams implements backend.FunctionABI.
+func (m *machine) LowerParams(args []ssa.Value) {
+	a := m.currentABI
+
+	for i, ssaArg := range args {
+		if !ssaArg.Valid() {
+			continue
+		}
+		reg := m.compiler.VRegOf(ssaArg)
+		arg := &a.Args[i]
+		if arg.Kind == backend.ABIArgKindReg {
+			m.InsertMove(reg, arg.Reg, arg.Type)
+		} else {
+			// TODO: we could use pair load if there's consecutive loads for the same type.
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |
+			//          |      arg X      |
+			//          |     .......     |
+			//          |      arg 1      |
+			//          |      arg 0      |    <-|
+			//          |   ReturnAddress |      |
+			//          +-----------------+      |
+			//          |   ...........   |      |
+			//          |   clobbered  M  |      |   argStackOffset: is unknown at this point of compilation.
+			//          |   ............  |      |
+			//          |   clobbered  0  |      |
+			//          |   spill slot N  |      |
+			//          |   ...........   |      |
+			//          |   spill slot 0  |      |
+			//   SP---> +-----------------+    <-+
+			//             (low address)
+
+			bits := arg.Type.Bits()
+			// At this point of compilation, we don't yet know how much space exist below the return address.
+			// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
+			amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
+			load := m.allocateInstr()
+			switch arg.Type {
+			case ssa.TypeI32, ssa.TypeI64:
+				load.asULoad(operandNR(reg), amode, bits)
+			case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+				load.asFpuLoad(operandNR(reg), amode, bits)
+			default:
+				panic("BUG")
+			}
+			m.insert(load)
+			m.unresolvedAddressModes = append(m.unresolvedAddressModes, load)
+		}
+	}
+}
+
+// LowerReturns lowers the given returns.
+func (m *machine) LowerReturns(rets []ssa.Value) {
+	a := m.currentABI
+
+	l := len(rets) - 1
+	for i := range rets {
+		// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
+		ret := rets[l-i]
+		r := &a.Rets[l-i]
+		reg := m.compiler.VRegOf(ret)
+		if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() {
+			// Constant instructions are inlined.
+			if inst := def.Instr; inst.Constant() {
+				val := inst.Return()
+				valType := val.Type()
+				v := inst.ConstantVal()
+				m.insertLoadConstant(v, valType, reg)
+			}
+		}
+		if r.Kind == backend.ABIArgKindReg {
+			m.InsertMove(r.Reg, reg, ret.Type())
+		} else {
+			// TODO: we could use pair store if there's consecutive stores for the same type.
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |    <-+
+			//          |      arg X      |      |
+			//          |     .......     |      |
+			//          |      arg 1      |      |
+			//          |      arg 0      |      |
+			//          |   ReturnAddress |      |
+			//          +-----------------+      |
+			//          |   ...........   |      |
+			//          |   spill slot M  |      |   retStackOffset: is unknown at this point of compilation.
+			//          |   ............  |      |
+			//          |   spill slot 2  |      |
+			//          |   spill slot 1  |      |
+			//          |   clobbered 0   |      |
+			//          |   clobbered 1   |      |
+			//          |   ...........   |      |
+			//          |   clobbered N   |      |
+			//   SP---> +-----------------+    <-+
+			//             (low address)
+
+			bits := r.Type.Bits()
+
+			// At this point of compilation, we don't yet know how much space exist below the return address.
+			// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
+			amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
+			store := m.allocateInstr()
+			store.asStore(operandNR(reg), amode, bits)
+			m.insert(store)
+			m.unresolvedAddressModes = append(m.unresolvedAddressModes, store)
+		}
+	}
+}
+
+// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
+// caller side of the function call.
+func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
+	arg := &a.Args[argIndex]
+	if def != nil && def.IsFromInstr() {
+		// Constant instructions are inlined.
+		if inst := def.Instr; inst.Constant() {
+			val := inst.Return()
+			valType := val.Type()
+			v := inst.ConstantVal()
+			m.insertLoadConstant(v, valType, reg)
+		}
+	}
+	if arg.Kind == backend.ABIArgKindReg {
+		m.InsertMove(arg.Reg, reg, arg.Type)
+	} else {
+		// TODO: we could use pair store if there's consecutive stores for the same type.
+		//
+		// Note that at this point, stack pointer is already adjusted.
+		bits := arg.Type.Bits()
+		amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
+		store := m.allocateInstr()
+		store.asStore(operandNR(reg), amode, bits)
+		m.insert(store)
+	}
+}
+
+func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) {
+	r := &a.Rets[retIndex]
+	if r.Kind == backend.ABIArgKindReg {
+		m.InsertMove(reg, r.Reg, r.Type)
+	} else {
+		// TODO: we could use pair load if there's consecutive loads for the same type.
+		amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
+		ldr := m.allocateInstr()
+		switch r.Type {
+		case ssa.TypeI32, ssa.TypeI64:
+			ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
+		default:
+			panic("BUG")
+		}
+		m.insert(ldr)
+	}
+}
+
+func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
+	exct := m.executableContext
+	exct.PendingInstructions = exct.PendingInstructions[:0]
+	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
+	for _, instr := range exct.PendingInstructions {
+		cur = linkInstr(cur, instr)
+	}
+	return cur, mode
+}
+
+func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
+	if rn.RegType() != regalloc.RegTypeInt {
+		panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
+	}
+	var amode addressMode
+	if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
+		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
+	} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
+		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
+	} else {
+		var indexReg regalloc.VReg
+		if allowTmpRegUse {
+			m.lowerConstantI64(tmpRegVReg, offset)
+			indexReg = tmpRegVReg
+		} else {
+			indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
+			m.lowerConstantI64(indexReg, offset)
+		}
+		amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
+	}
+	return amode
+}
+
+func (m *machine) lowerCall(si *ssa.Instruction) {
+	isDirectCall := si.Opcode() == ssa.OpcodeCall
+	var indirectCalleePtr ssa.Value
+	var directCallee ssa.FuncRef
+	var sigID ssa.SignatureID
+	var args []ssa.Value
+	if isDirectCall {
+		directCallee, sigID, args = si.CallData()
+	} else {
+		indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData()
+	}
+	calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID))
+
+	stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
+	if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
+		m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
+	}
+
+	for i, arg := range args {
+		reg := m.compiler.VRegOf(arg)
+		def := m.compiler.ValueDefinition(arg)
+		m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
+	}
+
+	if isDirectCall {
+		call := m.allocateInstr()
+		call.asCall(directCallee, calleeABI)
+		m.insert(call)
+	} else {
+		ptr := m.compiler.VRegOf(indirectCalleePtr)
+		callInd := m.allocateInstr()
+		callInd.asCallIndirect(ptr, calleeABI)
+		m.insert(callInd)
+	}
+
+	var index int
+	r1, rs := si.Returns()
+	if r1.Valid() {
+		m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize)
+		index++
+	}
+
+	for _, r := range rs {
+		m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize)
+		index++
+	}
+}
+
+func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
+	if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
+		alu := m.allocateInstr()
+		var ao aluOp
+		if add {
+			ao = aluOpAdd
+		} else {
+			ao = aluOpSub
+		}
+		alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
+		m.insert(alu)
+	} else {
+		m.lowerConstantI64(tmpRegVReg, diff)
+		alu := m.allocateInstr()
+		var ao aluOp
+		if add {
+			ao = aluOpAdd
+		} else {
+			ao = aluOpSub
+		}
+		alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
+		m.insert(alu)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
new file mode 100644
index 000000000..5f0c613df
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
@@ -0,0 +1,9 @@
+package arm64
+
+// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
+// This implements wazevo.entrypoint, and see the comments there for detail.
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
+// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
new file mode 100644
index 000000000..0b579f852
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
@@ -0,0 +1,29 @@
+//go:build arm64
+
+#include "funcdata.h"
+#include "textflag.h"
+
+// See the comments on EmitGoEntryPreamble for what this function is supposed to do.
+TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
+	MOVD preambleExecutable+0(FP), R27
+	MOVD functionExectuable+8(FP), R24
+	MOVD executionContextPtr+16(FP), R0
+	MOVD moduleContextPtr+24(FP), R1
+	MOVD paramResultSlicePtr+32(FP), R19
+	MOVD goAllocatedStackSlicePtr+40(FP), R26
+	JMP  (R27)
+
+TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
+	MOVD goCallReturnAddress+0(FP), R20
+	MOVD executionContextPtr+8(FP), R0
+	MOVD stackPointer+16(FP), R19
+
+	// Save the current FP(R29), SP and LR(R30) into the wazevo.executionContext (stored in R0).
+	MOVD R29, 16(R0) // Store FP(R29) into [RO, #ExecutionContextOffsets.OriginalFramePointer]
+	MOVD RSP, R27    // Move SP to R27 (temporary register) since SP cannot be stored directly in str instructions.
+	MOVD R27, 24(R0) // Store R27 into [RO, #ExecutionContextOffsets.OriginalFramePointer]
+	MOVD R30, 32(R0) // Store R30 into [R0, #ExecutionContextOffsets.GoReturnAddress]
+
+	// Load the new stack pointer (which sits somewhere in Go-allocated stack) into SP.
+	MOVD R19, RSP
+	JMP  (R20)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
new file mode 100644
index 000000000..7a9cceb33
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
@@ -0,0 +1,230 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// CompileEntryPreamble implements backend.Machine. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes:
+//
+//  1. First (execution context ptr) and Second arguments are already passed in x0, and x1.
+//  2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values.
+//  3. Go-allocated stack slice ptr in x26.
+//  4. Function executable in x24.
+//
+// also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller.
+func (m *machine) CompileEntryPreamble(signature *ssa.Signature) []byte {
+	root := m.constructEntryPreamble(signature)
+	m.encode(root)
+	return m.compiler.Buf()
+}
+
+var (
+	executionContextPtrReg = x0VReg
+	// callee-saved regs so that they can be used in the prologue and epilogue.
+	paramResultSlicePtr      = x19VReg
+	savedExecutionContextPtr = x20VReg
+	// goAllocatedStackPtr is not used in the epilogue.
+	goAllocatedStackPtr = x26VReg
+	// paramResultSliceCopied is not used in the epilogue.
+	paramResultSliceCopied = x25VReg
+	// tmpRegVReg is not used in the epilogue.
+	functionExecutable = x24VReg
+)
+
+func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, arg *backend.ABIArg, argStartOffsetFromSP int64) *instruction {
+	typ := arg.Type
+	bits := typ.Bits()
+	isStackArg := arg.Kind == backend.ABIArgKindStack
+
+	var loadTargetReg operand
+	if !isStackArg {
+		loadTargetReg = operandNR(arg.Reg)
+	} else {
+		switch typ {
+		case ssa.TypeI32, ssa.TypeI64:
+			loadTargetReg = operandNR(x15VReg)
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			loadTargetReg = operandNR(v15VReg)
+		default:
+			panic("TODO?")
+		}
+	}
+
+	var postIndexImm int64
+	if typ == ssa.TypeV128 {
+		postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
+	} else {
+		postIndexImm = 8
+	}
+	loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
+
+	instr := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32:
+		instr.asULoad(loadTargetReg, loadMode, 32)
+	case ssa.TypeI64:
+		instr.asULoad(loadTargetReg, loadMode, 64)
+	case ssa.TypeF32:
+		instr.asFpuLoad(loadTargetReg, loadMode, 32)
+	case ssa.TypeF64:
+		instr.asFpuLoad(loadTargetReg, loadMode, 64)
+	case ssa.TypeV128:
+		instr.asFpuLoad(loadTargetReg, loadMode, 128)
+	}
+	cur = linkInstr(cur, instr)
+
+	if isStackArg {
+		var storeMode addressMode
+		cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
+		toStack := m.allocateInstr()
+		toStack.asStore(loadTargetReg, storeMode, bits)
+		cur = linkInstr(cur, toStack)
+	}
+	return cur
+}
+
+func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, result *backend.ABIArg, resultStartOffsetFromSP int64) *instruction {
+	isStackArg := result.Kind == backend.ABIArgKindStack
+	typ := result.Type
+	bits := typ.Bits()
+
+	var storeTargetReg operand
+	if !isStackArg {
+		storeTargetReg = operandNR(result.Reg)
+	} else {
+		switch typ {
+		case ssa.TypeI32, ssa.TypeI64:
+			storeTargetReg = operandNR(x15VReg)
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			storeTargetReg = operandNR(v15VReg)
+		default:
+			panic("TODO?")
+		}
+	}
+
+	var postIndexImm int64
+	if typ == ssa.TypeV128 {
+		postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
+	} else {
+		postIndexImm = 8
+	}
+
+	if isStackArg {
+		var loadMode addressMode
+		cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
+		toReg := m.allocateInstr()
+		switch typ {
+		case ssa.TypeI32, ssa.TypeI64:
+			toReg.asULoad(storeTargetReg, loadMode, bits)
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			toReg.asFpuLoad(storeTargetReg, loadMode, bits)
+		default:
+			panic("TODO?")
+		}
+		cur = linkInstr(cur, toReg)
+	}
+
+	mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
+	instr := m.allocateInstr()
+	instr.asStore(storeTargetReg, mode, bits)
+	cur = linkInstr(cur, instr)
+	return cur
+}
+
+func (m *machine) constructEntryPreamble(sig *ssa.Signature) (root *instruction) {
+	abi := backend.FunctionABI{}
+	abi.Init(sig, intParamResultRegs, floatParamResultRegs)
+
+	root = m.allocateNop()
+
+	//// ----------------------------------- prologue ----------------------------------- ////
+
+	// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
+	// 		mov savedExecutionContextPtr, x0
+	cur := m.move64(savedExecutionContextPtr, executionContextPtrReg, root)
+
+	// Next, save the current FP, SP and LR into the wazevo.executionContext:
+	// 		str fp, [savedExecutionContextPtr, #OriginalFramePointer]
+	//      mov tmp, sp ;; sp cannot be str'ed directly.
+	// 		str sp, [savedExecutionContextPtr, #OriginalStackPointer]
+	// 		str lr, [savedExecutionContextPtr, #GoReturnAddress]
+	cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, true, cur)
+	cur = m.move64(tmpRegVReg, spVReg, cur)
+	cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, true, cur)
+	cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, true, cur)
+
+	// Then, move the Go-allocated stack pointer to SP:
+	// 		mov sp, goAllocatedStackPtr
+	cur = m.move64(spVReg, goAllocatedStackPtr, cur)
+
+	prReg := paramResultSlicePtr
+	if len(abi.Args) > 2 && len(abi.Rets) > 0 {
+		// paramResultSlicePtr is modified during the execution of goEntryPreamblePassArg,
+		// so copy it to another reg.
+		cur = m.move64(paramResultSliceCopied, paramResultSlicePtr, cur)
+		prReg = paramResultSliceCopied
+	}
+
+	stackSlotSize := int64(abi.AlignedArgResultStackSlotSize())
+	for i := range abi.Args {
+		if i < 2 {
+			// module context ptr and execution context ptr are passed in x0 and x1 by the Go assembly function.
+			continue
+		}
+		arg := &abi.Args[i]
+		cur = m.goEntryPreamblePassArg(cur, prReg, arg, -stackSlotSize)
+	}
+
+	// Call the real function.
+	bl := m.allocateInstr()
+	bl.asCallIndirect(functionExecutable, &abi)
+	cur = linkInstr(cur, bl)
+
+	///// ----------------------------------- epilogue ----------------------------------- /////
+
+	// Store the register results into paramResultSlicePtr.
+	for i := range abi.Rets {
+		cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, &abi.Rets[i], abi.ArgStackSize-stackSlotSize)
+	}
+
+	// Finally, restore the FP, SP and LR, and return to the Go code.
+	// 		ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
+	// 		ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
+	//      mov sp, tmp ;; sp cannot be str'ed directly.
+	// 		ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
+	// 		ret ;; --> return to the Go code
+	cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, false, cur)
+	cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, false, cur)
+	cur = m.move64(spVReg, tmpRegVReg, cur)
+	cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, false, cur)
+	retInst := m.allocateInstr()
+	retInst.asRet()
+	linkInstr(cur, retInst)
+	return
+}
+
+func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction {
+	instr := m.allocateInstr()
+	instr.asMove64(dst, src)
+	return linkInstr(prev, instr)
+}
+
+func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
+	instr := m.allocateInstr()
+	mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
+	if store {
+		instr.asStore(operandNR(d), mode, 64)
+	} else {
+		instr.asULoad(operandNR(d), mode, 64)
+	}
+	return linkInstr(prev, instr)
+}
+
+func linkInstr(prev, next *instruction) *instruction {
+	prev.next = next
+	next.prev = prev
+	return next
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
new file mode 100644
index 000000000..466b1f960
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
@@ -0,0 +1,428 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var calleeSavedRegistersSorted = []regalloc.VReg{
+	x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg,
+	v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
+}
+
+// CompileGoFunctionTrampoline implements backend.Machine.
+func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
+	exct := m.executableContext
+	argBegin := 1 // Skips exec context by default.
+	if needModuleContextPtr {
+		argBegin++
+	}
+
+	abi := &backend.FunctionABI{}
+	abi.Init(sig, intParamResultRegs, floatParamResultRegs)
+	m.currentABI = abi
+
+	cur := m.allocateInstr()
+	cur.asNop0()
+	exct.RootInstr = cur
+
+	// Execution context is always the first argument.
+	execCtrPtr := x0VReg
+
+	// In the following, we create the following stack layout:
+	//
+	//                   (high address)
+	//     SP ------> +-----------------+  <----+
+	//                |     .......     |       |
+	//                |      ret Y      |       |
+	//                |     .......     |       |
+	//                |      ret 0      |       |
+	//                |      arg X      |       |  size_of_arg_ret
+	//                |     .......     |       |
+	//                |      arg 1      |       |
+	//                |      arg 0      |  <----+ <-------- originalArg0Reg
+	//                | size_of_arg_ret |
+	//                |  ReturnAddress  |
+	//                +-----------------+ <----+
+	//                |      xxxx       |      |  ;; might be padded to make it 16-byte aligned.
+	//           +--->|  arg[N]/ret[M]  |      |
+	//  sliceSize|    |   ............  |      | goCallStackSize
+	//           |    |  arg[1]/ret[1]  |      |
+	//           +--->|  arg[0]/ret[0]  | <----+ <-------- arg0ret0AddrReg
+	//                |    sliceSize    |
+	//                |   frame_size    |
+	//                +-----------------+
+	//                   (low address)
+	//
+	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
+	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
+	// the arguments/return values.
+
+	// First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret".
+	cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
+
+	const frameInfoSize = 16 // == frame_size + sliceSize.
+
+	// Next, we should allocate the stack for the Go function call if necessary.
+	goCallStackSize, sliceSizeInBytes := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
+	cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur)
+
+	originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want.
+	if m.currentABI.AlignedArgResultStackSlotSize() > 0 {
+		// At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot.
+		cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true)
+	}
+
+	// Save the callee saved registers.
+	cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
+
+	if needModuleContextPtr {
+		offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64()
+		if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) {
+			panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context")
+		}
+
+		// Module context is always the second argument.
+		moduleCtrPtr := x1VReg
+		store := m.allocateInstr()
+		amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
+		store.asStore(operandNR(moduleCtrPtr), amode, 64)
+		cur = linkInstr(cur, store)
+	}
+
+	// Advances the stack pointer.
+	cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false)
+
+	// Copy the pointer to x15VReg.
+	arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want.
+	copySp := m.allocateInstr()
+	copySp.asMove64(arg0ret0AddrReg, spVReg)
+	cur = linkInstr(cur, copySp)
+
+	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
+	for i := range abi.Args[argBegin:] {
+		arg := &abi.Args[argBegin+i]
+		store := m.allocateInstr()
+		var v regalloc.VReg
+		if arg.Kind == backend.ABIArgKindReg {
+			v = arg.Reg
+		} else {
+			cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg,
+				// Caller save, so we can use it for whatever we want.
+				x11VReg, v11VReg)
+		}
+
+		var sizeInBits byte
+		if arg.Type == ssa.TypeV128 {
+			sizeInBits = 128
+		} else {
+			sizeInBits = 64
+		}
+		store.asStore(operandNR(v),
+			addressMode{
+				kind: addressModeKindPostIndex,
+				rn:   arg0ret0AddrReg, imm: int64(sizeInBits / 8),
+			}, sizeInBits)
+		cur = linkInstr(cur, store)
+	}
+
+	// Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`.
+	var frameSizeReg, sliceSizeReg regalloc.VReg
+	if goCallStackSize > 0 {
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize)
+		frameSizeReg = tmpRegVReg
+		cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8)
+		sliceSizeReg = x16VReg
+	} else {
+		frameSizeReg = xzrVReg
+		sliceSizeReg = xzrVReg
+	}
+	_amode := addressModePreOrPostIndex(spVReg, -16, true)
+	storeP := m.allocateInstr()
+	storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
+	cur = linkInstr(cur, storeP)
+
+	// Set the exit status on the execution context.
+	cur = m.setExitCode(cur, x0VReg, exitCode)
+
+	// Save the current stack pointer.
+	cur = m.saveCurrentStackPointer(cur, x0VReg)
+
+	// Exit the execution.
+	cur = m.storeReturnAddressAndExit(cur)
+
+	// After the call, we need to restore the callee saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
+
+	// Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`.
+	if len(abi.Rets) > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true)
+	}
+
+	// Advances the SP so that it points to `ReturnAddress`.
+	cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
+	ldr := m.allocateInstr()
+	// And load the return address.
+	ldr.asULoad(operandNR(lrVReg),
+		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+	cur = linkInstr(cur, ldr)
+
+	originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
+	if m.currentABI.RetStackSize > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.ArgStackSize, true)
+	}
+
+	// Make the SP point to the original address (above the result slot).
+	if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
+	}
+
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		if r.Kind == backend.ABIArgKindReg {
+			loadIntoReg := m.allocateInstr()
+			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+			switch r.Type {
+			case ssa.TypeI32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
+			case ssa.TypeI64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
+			case ssa.TypeF32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
+			case ssa.TypeF64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
+			case ssa.TypeV128:
+				mode.imm = 16
+				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
+			default:
+				panic("TODO")
+			}
+			cur = linkInstr(cur, loadIntoReg)
+		} else {
+			// First we need to load the value to a temporary just like ^^.
+			intTmp, floatTmp := x11VReg, v11VReg
+			loadIntoTmpReg := m.allocateInstr()
+			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+			var resultReg regalloc.VReg
+			switch r.Type {
+			case ssa.TypeI32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
+				resultReg = intTmp
+			case ssa.TypeI64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
+				resultReg = intTmp
+			case ssa.TypeF32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
+				resultReg = floatTmp
+			case ssa.TypeF64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
+				resultReg = floatTmp
+			case ssa.TypeV128:
+				mode.imm = 16
+				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
+				resultReg = floatTmp
+			default:
+				panic("TODO")
+			}
+			cur = linkInstr(cur, loadIntoTmpReg)
+			cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg)
+		}
+	}
+
+	ret := m.allocateInstr()
+	ret.asRet()
+	linkInstr(cur, ret)
+
+	m.encode(m.executableContext.RootInstr)
+	return m.compiler.Buf()
+}
+
+func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		store := m.allocateInstr()
+		var sizeInBits byte
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			sizeInBits = 64
+		case regalloc.RegTypeFloat:
+			sizeInBits = 128
+		}
+		store.asStore(operandNR(v),
+			addressMode{
+				kind: addressModeKindRegUnsignedImm12,
+				// Execution context is always the first argument.
+				rn: x0VReg, imm: offset,
+			}, sizeInBits)
+		store.prev = cur
+		cur.next = store
+		cur = store
+		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16.
+	}
+	return cur
+}
+
+func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		load := m.allocateInstr()
+		var as func(dst operand, amode addressMode, sizeInBits byte)
+		var sizeInBits byte
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			as = load.asULoad
+			sizeInBits = 64
+		case regalloc.RegTypeFloat:
+			as = load.asFpuLoad
+			sizeInBits = 128
+		}
+		as(operandNR(v),
+			addressMode{
+				kind: addressModeKindRegUnsignedImm12,
+				// Execution context is always the first argument.
+				rn: x0VReg, imm: offset,
+			}, sizeInBits)
+		cur = linkInstr(cur, load)
+		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
+	}
+	return cur
+}
+
+func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction {
+	exct := m.executableContext
+	exct.PendingInstructions = exct.PendingInstructions[:0]
+	m.lowerConstantI64(dst, v)
+	for _, instr := range exct.PendingInstructions {
+		cur = linkInstr(cur, instr)
+	}
+	return cur
+}
+
+func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction {
+	exct := m.executableContext
+	exct.PendingInstructions = exct.PendingInstructions[:0]
+	m.lowerConstantI32(dst, v)
+	for _, instr := range exct.PendingInstructions {
+		cur = linkInstr(cur, instr)
+	}
+	return cur
+}
+
+func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction {
+	constReg := x17VReg // caller-saved, so we can use it.
+	cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode))
+
+	// Set the exit status on the execution context.
+	setExistStatus := m.allocateInstr()
+	setExistStatus.asStore(operandNR(constReg),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
+		}, 32)
+	cur = linkInstr(cur, setExistStatus)
+	return cur
+}
+
+func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
+	// Read the return address into tmp, and store it in the execution context.
+	adr := m.allocateInstr()
+	adr.asAdr(tmpRegVReg, exitSequenceSize+8)
+	cur = linkInstr(cur, adr)
+
+	storeReturnAddr := m.allocateInstr()
+	storeReturnAddr.asStore(operandNR(tmpRegVReg),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			// Execution context is always the first argument.
+			rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+		}, 64)
+	cur = linkInstr(cur, storeReturnAddr)
+
+	// Exit the execution.
+	trapSeq := m.allocateInstr()
+	trapSeq.asExitSequence(x0VReg)
+	cur = linkInstr(cur, trapSeq)
+	return cur
+}
+
+func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction {
+	// Save the current stack pointer:
+	// 	mov tmp, sp,
+	// 	str tmp, [exec_ctx, #stackPointerBeforeGoCall]
+	movSp := m.allocateInstr()
+	movSp.asMove64(tmpRegVReg, spVReg)
+	cur = linkInstr(cur, movSp)
+
+	strSp := m.allocateInstr()
+	strSp.asStore(operandNR(tmpRegVReg),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+		}, 64)
+	cur = linkInstr(cur, strSp)
+	return cur
+}
+
+func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
+	load := m.allocateInstr()
+	var result regalloc.VReg
+	mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
+	switch arg.Type {
+	case ssa.TypeI32:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asULoad(operandNR(intVReg), mode, 32)
+		result = intVReg
+	case ssa.TypeI64:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asULoad(operandNR(intVReg), mode, 64)
+		result = intVReg
+	case ssa.TypeF32:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asFpuLoad(operandNR(floatVReg), mode, 32)
+		result = floatVReg
+	case ssa.TypeF64:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asFpuLoad(operandNR(floatVReg), mode, 64)
+		result = floatVReg
+	case ssa.TypeV128:
+		mode.imm = 16
+		load.asFpuLoad(operandNR(floatVReg), mode, 128)
+		result = floatVReg
+	default:
+		panic("TODO")
+	}
+
+	cur = linkInstr(cur, load)
+	return cur, result
+}
+
+func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
+	store := m.allocateInstr()
+	mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
+	var sizeInBits byte
+	switch result.Type {
+	case ssa.TypeI32, ssa.TypeF32:
+		mode.imm = 8
+		sizeInBits = 32
+	case ssa.TypeI64, ssa.TypeF64:
+		mode.imm = 8
+		sizeInBits = 64
+	case ssa.TypeV128:
+		mode.imm = 16
+		sizeInBits = 128
+	default:
+		panic("TODO")
+	}
+	store.asStore(operandNR(resultVReg), mode, sizeInBits)
+	return linkInstr(cur, store)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
new file mode 100644
index 000000000..6f6cdd1b2
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
@@ -0,0 +1,215 @@
+package arm64
+
+import (
+	"strconv"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	cond     uint64
+	condKind byte
+)
+
+const (
+	// condKindRegisterZero represents a condition which checks if the register is zero.
+	// This indicates that the instruction must be encoded as CBZ:
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-
+	condKindRegisterZero condKind = iota
+	// condKindRegisterNotZero indicates that the instruction must be encoded as CBNZ:
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero-
+	condKindRegisterNotZero
+	// condKindCondFlagSet indicates that the instruction must be encoded as B.cond:
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
+	condKindCondFlagSet
+)
+
+// kind returns the kind of condition which is stored in the first two bits.
+func (c cond) kind() condKind {
+	return condKind(c & 0b11)
+}
+
+func (c cond) asUint64() uint64 {
+	return uint64(c)
+}
+
+// register returns the register for register conditions.
+// This panics if the condition is not a register condition (condKindRegisterZero or condKindRegisterNotZero).
+func (c cond) register() regalloc.VReg {
+	if c.kind() != condKindRegisterZero && c.kind() != condKindRegisterNotZero {
+		panic("condition is not a register")
+	}
+	return regalloc.VReg(c >> 2)
+}
+
+func registerAsRegZeroCond(r regalloc.VReg) cond {
+	return cond(r)<<2 | cond(condKindRegisterZero)
+}
+
+func registerAsRegNotZeroCond(r regalloc.VReg) cond {
+	return cond(r)<<2 | cond(condKindRegisterNotZero)
+}
+
+func (c cond) flag() condFlag {
+	if c.kind() != condKindCondFlagSet {
+		panic("condition is not a flag")
+	}
+	return condFlag(c >> 2)
+}
+
+func (c condFlag) asCond() cond {
+	return cond(c)<<2 | cond(condKindCondFlagSet)
+}
+
+// condFlag represents a condition flag for conditional branches.
+// The value matches the encoding of condition flags in the ARM64 instruction set.
+// https://developer.arm.com/documentation/den0024/a/The-A64-instruction-set/Data-processing-instructions/Conditional-instructions
+type condFlag uint8
+
+const (
+	eq condFlag = iota // eq represents "equal"
+	ne                 // ne represents "not equal"
+	hs                 // hs represents "higher or same"
+	lo                 // lo represents "lower"
+	mi                 // mi represents "minus or negative result"
+	pl                 // pl represents "plus or positive result"
+	vs                 // vs represents "overflow set"
+	vc                 // vc represents "overflow clear"
+	hi                 // hi represents "higher"
+	ls                 // ls represents "lower or same"
+	ge                 // ge represents "greater or equal"
+	lt                 // lt represents "less than"
+	gt                 // gt represents "greater than"
+	le                 // le represents "less than or equal"
+	al                 // al represents "always"
+	nv                 // nv represents "never"
+)
+
+// invert returns the inverted condition.
+func (c condFlag) invert() condFlag {
+	switch c {
+	case eq:
+		return ne
+	case ne:
+		return eq
+	case hs:
+		return lo
+	case lo:
+		return hs
+	case mi:
+		return pl
+	case pl:
+		return mi
+	case vs:
+		return vc
+	case vc:
+		return vs
+	case hi:
+		return ls
+	case ls:
+		return hi
+	case ge:
+		return lt
+	case lt:
+		return ge
+	case gt:
+		return le
+	case le:
+		return gt
+	case al:
+		return nv
+	case nv:
+		return al
+	default:
+		panic(c)
+	}
+}
+
+// String implements fmt.Stringer.
+func (c condFlag) String() string {
+	switch c {
+	case eq:
+		return "eq"
+	case ne:
+		return "ne"
+	case hs:
+		return "hs"
+	case lo:
+		return "lo"
+	case mi:
+		return "mi"
+	case pl:
+		return "pl"
+	case vs:
+		return "vs"
+	case vc:
+		return "vc"
+	case hi:
+		return "hi"
+	case ls:
+		return "ls"
+	case ge:
+		return "ge"
+	case lt:
+		return "lt"
+	case gt:
+		return "gt"
+	case le:
+		return "le"
+	case al:
+		return "al"
+	case nv:
+		return "nv"
+	default:
+		panic(strconv.Itoa(int(c)))
+	}
+}
+
+// condFlagFromSSAIntegerCmpCond returns the condition flag for the given ssa.IntegerCmpCond.
+func condFlagFromSSAIntegerCmpCond(c ssa.IntegerCmpCond) condFlag {
+	switch c {
+	case ssa.IntegerCmpCondEqual:
+		return eq
+	case ssa.IntegerCmpCondNotEqual:
+		return ne
+	case ssa.IntegerCmpCondSignedLessThan:
+		return lt
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
+		return ge
+	case ssa.IntegerCmpCondSignedGreaterThan:
+		return gt
+	case ssa.IntegerCmpCondSignedLessThanOrEqual:
+		return le
+	case ssa.IntegerCmpCondUnsignedLessThan:
+		return lo
+	case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
+		return hs
+	case ssa.IntegerCmpCondUnsignedGreaterThan:
+		return hi
+	case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		return ls
+	default:
+		panic(c)
+	}
+}
+
+// condFlagFromSSAFloatCmpCond returns the condition flag for the given ssa.FloatCmpCond.
+func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
+	switch c {
+	case ssa.FloatCmpCondEqual:
+		return eq
+	case ssa.FloatCmpCondNotEqual:
+		return ne
+	case ssa.FloatCmpCondLessThan:
+		return mi
+	case ssa.FloatCmpCondLessThanOrEqual:
+		return ls
+	case ssa.FloatCmpCondGreaterThan:
+		return gt
+	case ssa.FloatCmpCondGreaterThanOrEqual:
+		return ge
+	default:
+		panic(c)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
new file mode 100644
index 000000000..8aabc5997
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
@@ -0,0 +1,2545 @@
+package arm64
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	// instruction represents either a real instruction in arm64, or the meta instructions
+	// that are convenient for code generation. For example, inline constants are also treated
+	// as instructions.
+	//
+	// Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation
+	// can be considered equivalent to the sequence of such instructions.
+	//
+	// Each field is interpreted depending on the kind.
+	//
+	// TODO: optimize the layout later once the impl settles.
+	instruction struct {
+		prev, next          *instruction
+		u1, u2, u3          uint64
+		rd, rm, rn, ra      operand
+		amode               addressMode
+		kind                instructionKind
+		addedBeforeRegAlloc bool
+	}
+
+	// instructionKind represents the kind of instruction.
+	// This controls how the instruction struct is interpreted.
+	instructionKind byte
+)
+
+func asNop0(i *instruction) {
+	i.kind = nop0
+}
+
+func setNext(i, next *instruction) {
+	i.next = next
+}
+
+func setPrev(i, prev *instruction) {
+	i.prev = prev
+}
+
+// IsCall implements regalloc.Instr IsCall.
+func (i *instruction) IsCall() bool {
+	return i.kind == call
+}
+
+// IsIndirectCall implements regalloc.Instr IsIndirectCall.
+func (i *instruction) IsIndirectCall() bool {
+	return i.kind == callInd
+}
+
+// IsReturn implements regalloc.Instr IsReturn.
+func (i *instruction) IsReturn() bool {
+	return i.kind == ret
+}
+
+// Next implements regalloc.Instr Next.
+func (i *instruction) Next() regalloc.Instr {
+	return i.next
+}
+
+// Prev implements regalloc.Instr Prev.
+func (i *instruction) Prev() regalloc.Instr {
+	return i.prev
+}
+
+// AddedBeforeRegAlloc implements regalloc.Instr AddedBeforeRegAlloc.
+func (i *instruction) AddedBeforeRegAlloc() bool {
+	return i.addedBeforeRegAlloc
+}
+
+type defKind byte
+
+const (
+	defKindNone defKind = iota + 1
+	defKindRD
+	defKindCall
+)
+
+var defKinds = [numInstructionKinds]defKind{
+	adr:                  defKindRD,
+	aluRRR:               defKindRD,
+	aluRRRR:              defKindRD,
+	aluRRImm12:           defKindRD,
+	aluRRBitmaskImm:      defKindRD,
+	aluRRRShift:          defKindRD,
+	aluRRImmShift:        defKindRD,
+	aluRRRExtend:         defKindRD,
+	bitRR:                defKindRD,
+	movZ:                 defKindRD,
+	movK:                 defKindRD,
+	movN:                 defKindRD,
+	mov32:                defKindRD,
+	mov64:                defKindRD,
+	fpuMov64:             defKindRD,
+	fpuMov128:            defKindRD,
+	fpuRR:                defKindRD,
+	fpuRRR:               defKindRD,
+	nop0:                 defKindNone,
+	call:                 defKindCall,
+	callInd:              defKindCall,
+	ret:                  defKindNone,
+	store8:               defKindNone,
+	store16:              defKindNone,
+	store32:              defKindNone,
+	store64:              defKindNone,
+	exitSequence:         defKindNone,
+	condBr:               defKindNone,
+	br:                   defKindNone,
+	brTableSequence:      defKindNone,
+	cSet:                 defKindRD,
+	extend:               defKindRD,
+	fpuCmp:               defKindNone,
+	uLoad8:               defKindRD,
+	uLoad16:              defKindRD,
+	uLoad32:              defKindRD,
+	sLoad8:               defKindRD,
+	sLoad16:              defKindRD,
+	sLoad32:              defKindRD,
+	uLoad64:              defKindRD,
+	fpuLoad32:            defKindRD,
+	fpuLoad64:            defKindRD,
+	fpuLoad128:           defKindRD,
+	vecLoad1R:            defKindRD,
+	loadFpuConst32:       defKindRD,
+	loadFpuConst64:       defKindRD,
+	loadFpuConst128:      defKindRD,
+	fpuStore32:           defKindNone,
+	fpuStore64:           defKindNone,
+	fpuStore128:          defKindNone,
+	udf:                  defKindNone,
+	cSel:                 defKindRD,
+	fpuCSel:              defKindRD,
+	movToVec:             defKindRD,
+	movFromVec:           defKindRD,
+	movFromVecSigned:     defKindRD,
+	vecDup:               defKindRD,
+	vecDupElement:        defKindRD,
+	vecExtract:           defKindRD,
+	vecMisc:              defKindRD,
+	vecMovElement:        defKindRD,
+	vecLanes:             defKindRD,
+	vecShiftImm:          defKindRD,
+	vecTbl:               defKindRD,
+	vecTbl2:              defKindRD,
+	vecPermute:           defKindRD,
+	vecRRR:               defKindRD,
+	vecRRRRewrite:        defKindNone,
+	fpuToInt:             defKindRD,
+	intToFpu:             defKindRD,
+	cCmpImm:              defKindNone,
+	movToFPSR:            defKindNone,
+	movFromFPSR:          defKindRD,
+	emitSourceOffsetInfo: defKindNone,
+	atomicRmw:            defKindRD,
+	atomicCas:            defKindNone,
+	atomicLoad:           defKindRD,
+	atomicStore:          defKindNone,
+	dmb:                  defKindNone,
+	loadConstBlockArg:    defKindRD,
+}
+
+// Defs returns the list of regalloc.VReg that are defined by the instruction.
+// In order to reduce the number of allocations, the caller can pass the slice to be used.
+func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
+	*regs = (*regs)[:0]
+	switch defKinds[i.kind] {
+	case defKindNone:
+	case defKindRD:
+		*regs = append(*regs, i.rd.nr())
+	case defKindCall:
+		_, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2)
+		for i := byte(0); i < retIntRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[intParamResultRegs[i]])
+		}
+		for i := byte(0); i < retFloatRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]])
+		}
+	default:
+		panic(fmt.Sprintf("defKind for %v not defined", i))
+	}
+	return *regs
+}
+
+// AssignDef implements regalloc.Instr AssignDef.
+func (i *instruction) AssignDef(reg regalloc.VReg) {
+	switch defKinds[i.kind] {
+	case defKindNone:
+	case defKindRD:
+		i.rd = i.rd.assignReg(reg)
+	case defKindCall:
+		panic("BUG: call instructions shouldn't be assigned")
+	default:
+		panic(fmt.Sprintf("defKind for %v not defined", i))
+	}
+}
+
+type useKind byte
+
+const (
+	useKindNone useKind = iota + 1
+	useKindRN
+	useKindRNRM
+	useKindRNRMRA
+	useKindRNRN1RM
+	useKindCall
+	useKindCallInd
+	useKindAMode
+	useKindRNAMode
+	useKindCond
+	// useKindRDRewrite indicates an instruction where RD is used both as a source and destination.
+	// A temporary register for RD must be allocated explicitly with the source copied to this
+	// register before the instruction and the value copied from this register to the instruction
+	// return register.
+	useKindRDRewrite
+)
+
+var useKinds = [numInstructionKinds]useKind{
+	udf:                  useKindNone,
+	aluRRR:               useKindRNRM,
+	aluRRRR:              useKindRNRMRA,
+	aluRRImm12:           useKindRN,
+	aluRRBitmaskImm:      useKindRN,
+	aluRRRShift:          useKindRNRM,
+	aluRRImmShift:        useKindRN,
+	aluRRRExtend:         useKindRNRM,
+	bitRR:                useKindRN,
+	movZ:                 useKindNone,
+	movK:                 useKindNone,
+	movN:                 useKindNone,
+	mov32:                useKindRN,
+	mov64:                useKindRN,
+	fpuMov64:             useKindRN,
+	fpuMov128:            useKindRN,
+	fpuRR:                useKindRN,
+	fpuRRR:               useKindRNRM,
+	nop0:                 useKindNone,
+	call:                 useKindCall,
+	callInd:              useKindCallInd,
+	ret:                  useKindNone,
+	store8:               useKindRNAMode,
+	store16:              useKindRNAMode,
+	store32:              useKindRNAMode,
+	store64:              useKindRNAMode,
+	exitSequence:         useKindRN,
+	condBr:               useKindCond,
+	br:                   useKindNone,
+	brTableSequence:      useKindRN,
+	cSet:                 useKindNone,
+	extend:               useKindRN,
+	fpuCmp:               useKindRNRM,
+	uLoad8:               useKindAMode,
+	uLoad16:              useKindAMode,
+	uLoad32:              useKindAMode,
+	sLoad8:               useKindAMode,
+	sLoad16:              useKindAMode,
+	sLoad32:              useKindAMode,
+	uLoad64:              useKindAMode,
+	fpuLoad32:            useKindAMode,
+	fpuLoad64:            useKindAMode,
+	fpuLoad128:           useKindAMode,
+	fpuStore32:           useKindRNAMode,
+	fpuStore64:           useKindRNAMode,
+	fpuStore128:          useKindRNAMode,
+	loadFpuConst32:       useKindNone,
+	loadFpuConst64:       useKindNone,
+	loadFpuConst128:      useKindNone,
+	vecLoad1R:            useKindRN,
+	cSel:                 useKindRNRM,
+	fpuCSel:              useKindRNRM,
+	movToVec:             useKindRN,
+	movFromVec:           useKindRN,
+	movFromVecSigned:     useKindRN,
+	vecDup:               useKindRN,
+	vecDupElement:        useKindRN,
+	vecExtract:           useKindRNRM,
+	cCmpImm:              useKindRN,
+	vecMisc:              useKindRN,
+	vecMovElement:        useKindRN,
+	vecLanes:             useKindRN,
+	vecShiftImm:          useKindRN,
+	vecTbl:               useKindRNRM,
+	vecTbl2:              useKindRNRN1RM,
+	vecRRR:               useKindRNRM,
+	vecRRRRewrite:        useKindRDRewrite,
+	vecPermute:           useKindRNRM,
+	fpuToInt:             useKindRN,
+	intToFpu:             useKindRN,
+	movToFPSR:            useKindRN,
+	movFromFPSR:          useKindNone,
+	adr:                  useKindNone,
+	emitSourceOffsetInfo: useKindNone,
+	atomicRmw:            useKindRNRM,
+	atomicCas:            useKindRDRewrite,
+	atomicLoad:           useKindRN,
+	atomicStore:          useKindRNRM,
+	loadConstBlockArg:    useKindNone,
+	dmb:                  useKindNone,
+}
+
+// Uses returns the list of regalloc.VReg that are used by the instruction.
+// In order to reduce the number of allocations, the caller can pass the slice to be used.
+func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
+	*regs = (*regs)[:0]
+	switch useKinds[i.kind] {
+	case useKindNone:
+	case useKindRN:
+		if rn := i.rn.reg(); rn.Valid() {
+			*regs = append(*regs, rn)
+		}
+	case useKindRNRM:
+		if rn := i.rn.reg(); rn.Valid() {
+			*regs = append(*regs, rn)
+		}
+		if rm := i.rm.reg(); rm.Valid() {
+			*regs = append(*regs, rm)
+		}
+	case useKindRNRMRA:
+		if rn := i.rn.reg(); rn.Valid() {
+			*regs = append(*regs, rn)
+		}
+		if rm := i.rm.reg(); rm.Valid() {
+			*regs = append(*regs, rm)
+		}
+		if ra := i.ra.reg(); ra.Valid() {
+			*regs = append(*regs, ra)
+		}
+	case useKindRNRN1RM:
+		if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() {
+			rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
+			*regs = append(*regs, rn, rn1)
+		}
+		if rm := i.rm.reg(); rm.Valid() {
+			*regs = append(*regs, rm)
+		}
+	case useKindAMode:
+		if amodeRN := i.amode.rn; amodeRN.Valid() {
+			*regs = append(*regs, amodeRN)
+		}
+		if amodeRM := i.amode.rm; amodeRM.Valid() {
+			*regs = append(*regs, amodeRM)
+		}
+	case useKindRNAMode:
+		*regs = append(*regs, i.rn.reg())
+		if amodeRN := i.amode.rn; amodeRN.Valid() {
+			*regs = append(*regs, amodeRN)
+		}
+		if amodeRM := i.amode.rm; amodeRM.Valid() {
+			*regs = append(*regs, amodeRM)
+		}
+	case useKindCond:
+		cnd := cond(i.u1)
+		if cnd.kind() != condKindCondFlagSet {
+			*regs = append(*regs, cnd.register())
+		}
+	case useKindCallInd:
+		*regs = append(*regs, i.rn.nr())
+		fallthrough
+	case useKindCall:
+		argIntRealRegs, argFloatRealRegs, _, _, _ := backend.ABIInfoFromUint64(i.u2)
+		for i := byte(0); i < argIntRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[intParamResultRegs[i]])
+		}
+		for i := byte(0); i < argFloatRealRegs; i++ {
+			*regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]])
+		}
+	case useKindRDRewrite:
+		*regs = append(*regs, i.rn.reg())
+		*regs = append(*regs, i.rm.reg())
+		*regs = append(*regs, i.rd.reg())
+	default:
+		panic(fmt.Sprintf("useKind for %v not defined", i))
+	}
+	return *regs
+}
+
+func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
+	switch useKinds[i.kind] {
+	case useKindNone:
+	case useKindRN:
+		if rn := i.rn.reg(); rn.Valid() {
+			i.rn = i.rn.assignReg(reg)
+		}
+	case useKindRNRM:
+		if index == 0 {
+			if rn := i.rn.reg(); rn.Valid() {
+				i.rn = i.rn.assignReg(reg)
+			}
+		} else {
+			if rm := i.rm.reg(); rm.Valid() {
+				i.rm = i.rm.assignReg(reg)
+			}
+		}
+	case useKindRDRewrite:
+		if index == 0 {
+			if rn := i.rn.reg(); rn.Valid() {
+				i.rn = i.rn.assignReg(reg)
+			}
+		} else if index == 1 {
+			if rm := i.rm.reg(); rm.Valid() {
+				i.rm = i.rm.assignReg(reg)
+			}
+		} else {
+			if rd := i.rd.reg(); rd.Valid() {
+				i.rd = i.rd.assignReg(reg)
+			}
+		}
+	case useKindRNRN1RM:
+		if index == 0 {
+			if rn := i.rn.reg(); rn.Valid() {
+				i.rn = i.rn.assignReg(reg)
+			}
+			if rn1 := i.rn.reg() + 1; rn1.Valid() {
+				i.rm = i.rm.assignReg(reg + 1)
+			}
+		} else {
+			if rm := i.rm.reg(); rm.Valid() {
+				i.rm = i.rm.assignReg(reg)
+			}
+		}
+	case useKindRNRMRA:
+		if index == 0 {
+			if rn := i.rn.reg(); rn.Valid() {
+				i.rn = i.rn.assignReg(reg)
+			}
+		} else if index == 1 {
+			if rm := i.rm.reg(); rm.Valid() {
+				i.rm = i.rm.assignReg(reg)
+			}
+		} else {
+			if ra := i.ra.reg(); ra.Valid() {
+				i.ra = i.ra.assignReg(reg)
+			}
+		}
+	case useKindAMode:
+		if index == 0 {
+			if amodeRN := i.amode.rn; amodeRN.Valid() {
+				i.amode.rn = reg
+			}
+		} else {
+			if amodeRM := i.amode.rm; amodeRM.Valid() {
+				i.amode.rm = reg
+			}
+		}
+	case useKindRNAMode:
+		if index == 0 {
+			i.rn = i.rn.assignReg(reg)
+		} else if index == 1 {
+			if amodeRN := i.amode.rn; amodeRN.Valid() {
+				i.amode.rn = reg
+			} else {
+				panic("BUG")
+			}
+		} else {
+			if amodeRM := i.amode.rm; amodeRM.Valid() {
+				i.amode.rm = reg
+			} else {
+				panic("BUG")
+			}
+		}
+	case useKindCond:
+		c := cond(i.u1)
+		switch c.kind() {
+		case condKindRegisterZero:
+			i.u1 = uint64(registerAsRegZeroCond(reg))
+		case condKindRegisterNotZero:
+			i.u1 = uint64(registerAsRegNotZeroCond(reg))
+		}
+	case useKindCall:
+		panic("BUG: call instructions shouldn't be assigned")
+	case useKindCallInd:
+		i.rn = i.rn.assignReg(reg)
+	default:
+		panic(fmt.Sprintf("useKind for %v not defined", i))
+	}
+}
+
+func (i *instruction) asCall(ref ssa.FuncRef, abi *backend.FunctionABI) {
+	i.kind = call
+	i.u1 = uint64(ref)
+	if abi != nil {
+		i.u2 = abi.ABIInfoAsUint64()
+	}
+}
+
+func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) {
+	i.kind = callInd
+	i.rn = operandNR(ptr)
+	if abi != nil {
+		i.u2 = abi.ABIInfoAsUint64()
+	}
+}
+
+func (i *instruction) callFuncRef() ssa.FuncRef {
+	return ssa.FuncRef(i.u1)
+}
+
+// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
+func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+	i.kind = movZ
+	i.rd = operandNR(dst)
+	i.u1 = imm
+	i.u2 = shift
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
+func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+	i.kind = movK
+	i.rd = operandNR(dst)
+	i.u1 = imm
+	i.u2 = shift
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
+func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+	i.kind = movN
+	i.rd = operandNR(dst)
+	i.u1 = imm
+	i.u2 = shift
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asNop0() *instruction {
+	i.kind = nop0
+	return i
+}
+
+func (i *instruction) asNop0WithLabel(l label) {
+	i.kind = nop0
+	i.u1 = uint64(l)
+}
+
+func (i *instruction) nop0Label() label {
+	return label(i.u1)
+}
+
+func (i *instruction) asRet() {
+	i.kind = ret
+}
+
+func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) {
+	i.kind = storeP64
+	i.rn = operandNR(src1)
+	i.rm = operandNR(src2)
+	i.amode = amode
+}
+
+func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) {
+	i.kind = loadP64
+	i.rn = operandNR(src1)
+	i.rm = operandNR(src2)
+	i.amode = amode
+}
+
+func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
+	switch sizeInBits {
+	case 8:
+		i.kind = store8
+	case 16:
+		i.kind = store16
+	case 32:
+		if src.reg().RegType() == regalloc.RegTypeInt {
+			i.kind = store32
+		} else {
+			i.kind = fpuStore32
+		}
+	case 64:
+		if src.reg().RegType() == regalloc.RegTypeInt {
+			i.kind = store64
+		} else {
+			i.kind = fpuStore64
+		}
+	case 128:
+		i.kind = fpuStore128
+	}
+	i.rn = src
+	i.amode = amode
+}
+
+func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
+	switch sizeInBits {
+	case 8:
+		i.kind = sLoad8
+	case 16:
+		i.kind = sLoad16
+	case 32:
+		i.kind = sLoad32
+	default:
+		panic("BUG")
+	}
+	i.rd = dst
+	i.amode = amode
+}
+
+func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
+	switch sizeInBits {
+	case 8:
+		i.kind = uLoad8
+	case 16:
+		i.kind = uLoad16
+	case 32:
+		i.kind = uLoad32
+	case 64:
+		i.kind = uLoad64
+	}
+	i.rd = dst
+	i.amode = amode
+}
+
+func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) {
+	switch sizeInBits {
+	case 32:
+		i.kind = fpuLoad32
+	case 64:
+		i.kind = fpuLoad64
+	case 128:
+		i.kind = fpuLoad128
+	}
+	i.rd = dst
+	i.amode = amode
+}
+
+func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
+	// NOTE: currently only has support for no-offset loads, though it is suspicious that
+	// we would need to support offset load (that is only available for post-index).
+	i.kind = vecLoad1R
+	i.rd = rd
+	i.rn = rn
+	i.u1 = uint64(arr)
+}
+
+func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) {
+	i.kind = cSet
+	i.rd = operandNR(rd)
+	i.u1 = uint64(c)
+	if mask {
+		i.u2 = 1
+	}
+}
+
+func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
+	i.kind = cSel
+	i.rd = rd
+	i.rn = rn
+	i.rm = rm
+	i.u1 = uint64(c)
+	if _64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
+	i.kind = fpuCSel
+	i.rd = rd
+	i.rn = rn
+	i.rm = rm
+	i.u1 = uint64(c)
+	if _64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asBr(target label) {
+	if target == labelReturn {
+		panic("BUG: call site should special case for returnLabel")
+	}
+	i.kind = br
+	i.u1 = uint64(target)
+}
+
+func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, targetCounts int) {
+	i.kind = brTableSequence
+	i.rn = operandNR(indexReg)
+	i.u1 = uint64(targetIndex)
+	i.u2 = uint64(targetCounts)
+}
+
+func (i *instruction) brTableSequenceOffsetsResolved() {
+	i.u3 = 1 // indicate that the offsets are resolved, for debugging.
+}
+
+func (i *instruction) brLabel() label {
+	return label(i.u1)
+}
+
+// brOffsetResolved is called when the target label is resolved.
+func (i *instruction) brOffsetResolve(offset int64) {
+	i.u2 = uint64(offset)
+	i.u3 = 1 // indicate that the offset is resolved, for debugging.
+}
+
+func (i *instruction) brOffset() int64 {
+	return int64(i.u2)
+}
+
+// asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag.
+func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
+	i.kind = condBr
+	i.u1 = c.asUint64()
+	i.u2 = uint64(target)
+	if is64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) setCondBrTargets(target label) {
+	i.u2 = uint64(target)
+}
+
+func (i *instruction) condBrLabel() label {
+	return label(i.u2)
+}
+
+// condBrOffsetResolve is called when the target label is resolved.
+func (i *instruction) condBrOffsetResolve(offset int64) {
+	i.rd.data = uint64(offset)
+	i.rd.data2 = 1 // indicate that the offset is resolved, for debugging.
+}
+
+// condBrOffsetResolved returns true if condBrOffsetResolve is already called.
+func (i *instruction) condBrOffsetResolved() bool {
+	return i.rd.data2 == 1
+}
+
+func (i *instruction) condBrOffset() int64 {
+	return int64(i.rd.data)
+}
+
+func (i *instruction) condBrCond() cond {
+	return cond(i.u1)
+}
+
+func (i *instruction) condBr64bit() bool {
+	return i.u3 == 1
+}
+
+func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {
+	i.kind = loadFpuConst32
+	i.u1 = raw
+	i.rd = operandNR(rd)
+}
+
+func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {
+	i.kind = loadFpuConst64
+	i.u1 = raw
+	i.rd = operandNR(rd)
+}
+
+func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {
+	i.kind = loadFpuConst128
+	i.u1 = lo
+	i.u2 = hi
+	i.rd = operandNR(rd)
+}
+
+func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {
+	i.kind = fpuCmp
+	i.rn, i.rm = rn, rm
+	if is64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) {
+	i.kind = cCmpImm
+	i.rn = rn
+	i.rm.data = imm
+	i.u1 = uint64(c)
+	i.u2 = uint64(flag)
+	if is64bit {
+		i.u3 = 1
+	}
+}
+
+// asALU setups a basic ALU instruction.
+func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
+	switch rm.kind {
+	case operandKindNR:
+		i.kind = aluRRR
+	case operandKindSR:
+		i.kind = aluRRRShift
+	case operandKindER:
+		i.kind = aluRRRExtend
+	case operandKindImm12:
+		i.kind = aluRRImm12
+	default:
+		panic("BUG")
+	}
+	i.u1 = uint64(aluOp)
+	i.rd, i.rn, i.rm = rd, rn, rm
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+// asALU setups a basic ALU instruction.
+func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) {
+	i.kind = aluRRRR
+	i.u1 = uint64(aluOp)
+	i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+// asALUShift setups a shift based ALU instruction.
+func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
+	switch rm.kind {
+	case operandKindNR:
+		i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands.
+	case operandKindShiftImm:
+		i.kind = aluRRImmShift
+	default:
+		panic("BUG")
+	}
+	i.u1 = uint64(aluOp)
+	i.rd, i.rn, i.rm = rd, rn, rm
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {
+	i.kind = aluRRBitmaskImm
+	i.u1 = uint64(aluOp)
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+	i.u2 = imm
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asMovToFPSR(rn regalloc.VReg) {
+	i.kind = movToFPSR
+	i.rn = operandNR(rn)
+}
+
+func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {
+	i.kind = movFromFPSR
+	i.rd = operandNR(rd)
+}
+
+func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {
+	i.kind = bitRR
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+	i.u1 = uint64(bitOp)
+	if is64bit {
+		i.u2 = 1
+	}
+}
+
+func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) {
+	i.kind = fpuRRR
+	i.u1 = uint64(op)
+	i.rd, i.rn, i.rm = rd, rn, rm
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) {
+	i.kind = fpuRR
+	i.u1 = uint64(op)
+	i.rd, i.rn = rd, rn
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {
+	i.kind = extend
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+	i.u1 = uint64(fromBits)
+	i.u2 = uint64(toBits)
+	if signed {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asMove32(rd, rn regalloc.VReg) {
+	i.kind = mov32
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+}
+
+func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {
+	i.kind = mov64
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+	return i
+}
+
+func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
+	i.kind = fpuMov64
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+}
+
+func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {
+	i.kind = fpuMov128
+	i.rn, i.rd = operandNR(rn), operandNR(rd)
+	return i
+}
+
+func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
+	i.kind = movToVec
+	i.rd = rd
+	i.rn = rn
+	i.u1, i.u2 = uint64(arr), uint64(index)
+}
+
+func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) {
+	if signed {
+		i.kind = movFromVecSigned
+	} else {
+		i.kind = movFromVec
+	}
+	i.rd = rd
+	i.rn = rn
+	i.u1, i.u2 = uint64(arr), uint64(index)
+}
+
+func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) {
+	i.kind = vecDup
+	i.u1 = uint64(arr)
+	i.rn, i.rd = rn, rd
+}
+
+func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) {
+	i.kind = vecDupElement
+	i.u1 = uint64(arr)
+	i.rn, i.rd = rn, rd
+	i.u2 = uint64(index)
+}
+
+func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) {
+	i.kind = vecExtract
+	i.u1 = uint64(arr)
+	i.rn, i.rm, i.rd = rn, rm, rd
+	i.u2 = uint64(index)
+}
+
+func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
+	i.kind = vecMovElement
+	i.u1 = uint64(arr)
+	i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex)
+	i.rn, i.rd = rn, rd
+}
+
+func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) {
+	i.kind = vecMisc
+	i.u1 = uint64(op)
+	i.rn, i.rd = rn, rd
+	i.u2 = uint64(arr)
+}
+
+func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) {
+	i.kind = vecLanes
+	i.u1 = uint64(op)
+	i.rn, i.rd = rn, rd
+	i.u2 = uint64(arr)
+}
+
+func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction {
+	i.kind = vecShiftImm
+	i.u1 = uint64(op)
+	i.rn, i.rm, i.rd = rn, rm, rd
+	i.u2 = uint64(arr)
+	return i
+}
+
+func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) {
+	switch nregs {
+	case 0, 1:
+		i.kind = vecTbl
+	case 2:
+		i.kind = vecTbl2
+		if !rn.reg().IsRealReg() {
+			panic("rn is not a RealReg")
+		}
+		if rn.realReg() == v31 {
+			panic("rn cannot be v31")
+		}
+	default:
+		panic(fmt.Sprintf("unsupported number of registers %d", nregs))
+	}
+	i.rn, i.rm, i.rd = rn, rm, rd
+	i.u2 = uint64(arr)
+}
+
+func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) {
+	i.kind = vecPermute
+	i.u1 = uint64(op)
+	i.rn, i.rm, i.rd = rn, rm, rd
+	i.u2 = uint64(arr)
+}
+
+func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction {
+	i.kind = vecRRR
+	i.u1 = uint64(op)
+	i.rn, i.rd, i.rm = rn, rd, rm
+	i.u2 = uint64(arr)
+	return i
+}
+
+// asVecRRRRewrite encodes a vector instruction that rewrites the destination register.
+// IMPORTANT: the destination register must be already defined before this instruction.
+func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) {
+	i.kind = vecRRRRewrite
+	i.u1 = uint64(op)
+	i.rn, i.rd, i.rm = rn, rd, rm
+	i.u2 = uint64(arr)
+}
+
+func (i *instruction) IsCopy() bool {
+	op := i.kind
+	// We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits,
+	// and it is only used in the translation of IReduce, not the actual copy indeed.
+	return op == mov64 || op == fpuMov64 || op == fpuMov128
+}
+
+// String implements fmt.Stringer.
+func (i *instruction) String() (str string) {
+	is64SizeBitToSize := func(u3 uint64) byte {
+		if u3 == 0 {
+			return 32
+		}
+		return 64
+	}
+
+	switch i.kind {
+	case nop0:
+		if i.u1 != 0 {
+			l := label(i.u1)
+			str = fmt.Sprintf("%s:", l)
+		} else {
+			str = "nop0"
+		}
+	case aluRRR:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size),
+			i.rm.format(size))
+	case aluRRRR:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size))
+	case aluRRImm12:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
+	case aluRRBitmaskImm:
+		size := is64SizeBitToSize(i.u3)
+		rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size)
+		if size == 32 {
+			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))
+		} else {
+			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)
+		}
+	case aluRRImmShift:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %#x",
+			aluOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size),
+			formatVRegSized(i.rn.nr(), size),
+			i.rm.shiftImm(),
+		)
+	case aluRRRShift:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %s",
+			aluOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size),
+			formatVRegSized(i.rn.nr(), size),
+			i.rm.format(size),
+		)
+	case aluRRRExtend:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size),
+			formatVRegSized(i.rn.nr(), size),
+			// Regardless of the source size, the register is formatted in 32-bit.
+			i.rm.format(32),
+		)
+	case bitRR:
+		size := is64SizeBitToSize(i.u2)
+		str = fmt.Sprintf("%s %s, %s",
+			bitOp(i.u1),
+			formatVRegSized(i.rd.nr(), size),
+			formatVRegSized(i.rn.nr(), size),
+		)
+	case uLoad8:
+		str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case sLoad8:
+		str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case uLoad16:
+		str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case sLoad16:
+		str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case uLoad32:
+		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case sLoad32:
+		str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case uLoad64:
+		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
+	case store8:
+		str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8))
+	case store16:
+		str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16))
+	case store32:
+		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32))
+	case store64:
+		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
+	case storeP64:
+		str = fmt.Sprintf("stp %s, %s, %s",
+			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
+	case loadP64:
+		str = fmt.Sprintf("ldp %s, %s, %s",
+			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
+	case mov64:
+		str = fmt.Sprintf("mov %s, %s",
+			formatVRegSized(i.rd.nr(), 64),
+			formatVRegSized(i.rn.nr(), 64))
+	case mov32:
+		str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32))
+	case movZ:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+	case movN:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+	case movK:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+	case extend:
+		fromBits, toBits := byte(i.u1), byte(i.u2)
+
+		var signedStr string
+		if i.u3 == 1 {
+			signedStr = "s"
+		} else {
+			signedStr = "u"
+		}
+		var fromStr string
+		switch fromBits {
+		case 8:
+			fromStr = "b"
+		case 16:
+			fromStr = "h"
+		case 32:
+			fromStr = "w"
+		}
+		str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32))
+	case cSel:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("csel %s, %s, %s, %s",
+			formatVRegSized(i.rd.nr(), size),
+			formatVRegSized(i.rn.nr(), size),
+			formatVRegSized(i.rm.nr(), size),
+			condFlag(i.u1),
+		)
+	case cSet:
+		if i.u2 != 0 {
+			str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
+		} else {
+			str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
+		}
+	case cCmpImm:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",
+			formatVRegSized(i.rn.nr(), size), i.rm.data,
+			i.u2&0b1111,
+			condFlag(i.u1))
+	case fpuMov64:
+		str = fmt.Sprintf("mov %s, %s",
+			formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone),
+			formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))
+	case fpuMov128:
+		str = fmt.Sprintf("mov %s, %s",
+			formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone),
+			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))
+	case fpuMovFromVec:
+		panic("TODO")
+	case fpuRR:
+		dstSz := is64SizeBitToSize(i.u3)
+		srcSz := dstSz
+		op := fpuUniOp(i.u1)
+		switch op {
+		case fpuUniOpCvt32To64:
+			srcSz = 32
+		case fpuUniOpCvt64To32:
+			srcSz = 64
+		}
+		str = fmt.Sprintf("%s %s, %s", op.String(),
+			formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz))
+	case fpuRRR:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(),
+			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
+	case fpuRRI:
+		panic("TODO")
+	case fpuRRRR:
+		panic("TODO")
+	case fpuCmp:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("fcmp %s, %s",
+			formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
+	case fpuLoad32:
+		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+	case fpuStore32:
+		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64))
+	case fpuLoad64:
+		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
+	case fpuStore64:
+		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
+	case fpuLoad128:
+		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64))
+	case fpuStore128:
+		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64))
+	case loadFpuConst32:
+		str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1)))
+	case loadFpuConst64:
+		str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1))
+	case loadFpuConst128:
+		str = fmt.Sprintf("ldr %s, #8; b 32; data.v128  %016x %016x",
+			formatVRegSized(i.rd.nr(), 128), i.u1, i.u2)
+	case fpuToInt:
+		var op, src, dst string
+		if signed := i.u1 == 1; signed {
+			op = "fcvtzs"
+		} else {
+			op = "fcvtzu"
+		}
+		if src64 := i.u2 == 1; src64 {
+			src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)
+		} else {
+			src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)
+		}
+		if dst64 := i.u3 == 1; dst64 {
+			dst = formatVRegSized(i.rd.nr(), 64)
+		} else {
+			dst = formatVRegSized(i.rd.nr(), 32)
+		}
+		str = fmt.Sprintf("%s %s, %s", op, dst, src)
+
+	case intToFpu:
+		var op, src, dst string
+		if signed := i.u1 == 1; signed {
+			op = "scvtf"
+		} else {
+			op = "ucvtf"
+		}
+		if src64 := i.u2 == 1; src64 {
+			src = formatVRegSized(i.rn.nr(), 64)
+		} else {
+			src = formatVRegSized(i.rn.nr(), 32)
+		}
+		if dst64 := i.u3 == 1; dst64 {
+			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD)
+		} else {
+			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS)
+		}
+		str = fmt.Sprintf("%s %s, %s", op, dst, src)
+	case fpuCSel:
+		size := is64SizeBitToSize(i.u3)
+		str = fmt.Sprintf("fcsel %s, %s, %s, %s",
+			formatVRegSized(i.rd.nr(), size),
+			formatVRegSized(i.rn.nr(), size),
+			formatVRegSized(i.rm.nr(), size),
+			condFlag(i.u1),
+		)
+	case movToVec:
+		var size byte
+		arr := vecArrangement(i.u1)
+		switch arr {
+		case vecArrangementB, vecArrangementH, vecArrangementS:
+			size = 32
+		case vecArrangementD:
+			size = 64
+		default:
+			panic("unsupported arrangement " + arr.String())
+		}
+		str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
+	case movFromVec, movFromVecSigned:
+		var size byte
+		var opcode string
+		arr := vecArrangement(i.u1)
+		signed := i.kind == movFromVecSigned
+		switch arr {
+		case vecArrangementB, vecArrangementH, vecArrangementS:
+			size = 32
+			if signed {
+				opcode = "smov"
+			} else {
+				opcode = "umov"
+			}
+		case vecArrangementD:
+			size = 64
+			if signed {
+				opcode = "smov"
+			} else {
+				opcode = "mov"
+			}
+		default:
+			panic("unsupported arrangement " + arr.String())
+		}
+		str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
+	case vecDup:
+		str = fmt.Sprintf("dup %s, %s",
+			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
+			formatVRegSized(i.rn.nr(), 64),
+		)
+	case vecDupElement:
+		arr := vecArrangement(i.u1)
+		str = fmt.Sprintf("dup %s, %s",
+			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+			formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),
+		)
+	case vecDupFromFpu:
+		panic("TODO")
+	case vecExtract:
+		str = fmt.Sprintf("ext %s, %s, %s, #%d",
+			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
+			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),
+			formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),
+			uint32(i.u2),
+		)
+	case vecExtend:
+		panic("TODO")
+	case vecMovElement:
+		str = fmt.Sprintf("mov %s, %s",
+			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)),
+			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)),
+		)
+	case vecMiscNarrow:
+		panic("TODO")
+	case vecRRR, vecRRRRewrite:
+		str = fmt.Sprintf("%s %s, %s, %s",
+			vecOp(i.u1),
+			formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+			formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),
+			formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),
+		)
+	case vecMisc:
+		vop := vecOp(i.u1)
+		if vop == vecOpCmeq0 {
+			str = fmt.Sprintf("cmeq %s, %s, #0",
+				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
+		} else {
+			str = fmt.Sprintf("%s %s, %s",
+				vop,
+				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
+		}
+	case vecLanes:
+		arr := vecArrangement(i.u2)
+		var destArr vecArrangement
+		switch arr {
+		case vecArrangement8B, vecArrangement16B:
+			destArr = vecArrangementH
+		case vecArrangement4H, vecArrangement8H:
+			destArr = vecArrangementS
+		case vecArrangement4S:
+			destArr = vecArrangementD
+		default:
+			panic("invalid arrangement " + arr.String())
+		}
+		str = fmt.Sprintf("%s %s, %s",
+			vecOp(i.u1),
+			formatVRegWidthVec(i.rd.nr(), destArr),
+			formatVRegVec(i.rn.nr(), arr, vecIndexNone))
+	case vecShiftImm:
+		arr := vecArrangement(i.u2)
+		str = fmt.Sprintf("%s %s, %s, #%d",
+			vecOp(i.u1),
+			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+			formatVRegVec(i.rn.nr(), arr, vecIndexNone),
+			i.rm.shiftImm())
+	case vecTbl:
+		arr := vecArrangement(i.u2)
+		str = fmt.Sprintf("tbl %s, { %s }, %s",
+			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),
+			formatVRegVec(i.rm.nr(), arr, vecIndexNone))
+	case vecTbl2:
+		arr := vecArrangement(i.u2)
+		rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr()
+		rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
+		str = fmt.Sprintf("tbl %s, { %s, %s }, %s",
+			formatVRegVec(rd, arr, vecIndexNone),
+			formatVRegVec(rn, vecArrangement16B, vecIndexNone),
+			formatVRegVec(rn1, vecArrangement16B, vecIndexNone),
+			formatVRegVec(rm, arr, vecIndexNone))
+	case vecPermute:
+		arr := vecArrangement(i.u2)
+		str = fmt.Sprintf("%s %s, %s, %s",
+			vecOp(i.u1),
+			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+			formatVRegVec(i.rn.nr(), arr, vecIndexNone),
+			formatVRegVec(i.rm.nr(), arr, vecIndexNone))
+	case movToFPSR:
+		str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))
+	case movFromFPSR:
+		str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64))
+	case call:
+		str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))
+	case callInd:
+		str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64))
+	case ret:
+		str = "ret"
+	case br:
+		target := label(i.u1)
+		if i.u3 != 0 {
+			str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())
+		} else {
+			str = fmt.Sprintf("b %s", target.String())
+		}
+	case condBr:
+		size := is64SizeBitToSize(i.u3)
+		c := cond(i.u1)
+		target := label(i.u2)
+		switch c.kind() {
+		case condKindRegisterZero:
+			if !i.condBrOffsetResolved() {
+				str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String())
+			} else {
+				str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String())
+			}
+		case condKindRegisterNotZero:
+			if offset := i.condBrOffset(); offset != 0 {
+				str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String())
+			} else {
+				str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String())
+			}
+		case condKindCondFlagSet:
+			if offset := i.condBrOffset(); offset != 0 {
+				if target == labelInvalid {
+					str = fmt.Sprintf("b.%s #%#x", c.flag(), offset)
+				} else {
+					str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String())
+				}
+			} else {
+				str = fmt.Sprintf("b.%s %s", c.flag(), target.String())
+			}
+		}
+	case adr:
+		str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1))
+	case brTableSequence:
+		targetIndex := i.u1
+		str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex)
+	case exitSequence:
+		str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64))
+	case atomicRmw:
+		m := atomicRmwOp(i.u1).String()
+		size := byte(32)
+		switch i.u2 {
+		case 8:
+			size = 64
+		case 2:
+			m = m + "h"
+		case 1:
+			m = m + "b"
+		}
+		str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
+	case atomicCas:
+		m := "casal"
+		size := byte(32)
+		switch i.u2 {
+		case 8:
+			size = 64
+		case 2:
+			m = m + "h"
+		case 1:
+			m = m + "b"
+		}
+		str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
+	case atomicLoad:
+		m := "ldar"
+		size := byte(32)
+		switch i.u2 {
+		case 8:
+			size = 64
+		case 2:
+			m = m + "h"
+		case 1:
+			m = m + "b"
+		}
+		str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
+	case atomicStore:
+		m := "stlr"
+		size := byte(32)
+		switch i.u2 {
+		case 8:
+			size = 64
+		case 2:
+			m = m + "h"
+		case 1:
+			m = m + "b"
+		}
+		str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
+	case dmb:
+		str = "dmb"
+	case udf:
+		str = "udf"
+	case emitSourceOffsetInfo:
+		str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))
+	case vecLoad1R:
+		str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
+	case loadConstBlockArg:
+		str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1)
+	default:
+		panic(i.kind)
+	}
+	return
+}
+
+func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {
+	i.kind = adr
+	i.rd = operandNR(rd)
+	i.u1 = uint64(offset)
+}
+
+func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) {
+	i.kind = atomicRmw
+	i.rd, i.rn, i.rm = rt, rn, rs
+	i.u1 = uint64(op)
+	i.u2 = size
+}
+
+func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) {
+	i.kind = atomicCas
+	i.rm, i.rn, i.rd = rt, rn, rs
+	i.u2 = size
+}
+
+func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) {
+	i.kind = atomicLoad
+	i.rn, i.rd = rn, rt
+	i.u2 = size
+}
+
+func (i *instruction) asAtomicStore(rn, rt operand, size uint64) {
+	i.kind = atomicStore
+	i.rn, i.rm = rn, rt
+	i.u2 = size
+}
+
+func (i *instruction) asDMB() {
+	i.kind = dmb
+}
+
+// TODO: delete unnecessary things.
+const (
+	// nop0 represents a no-op of zero size.
+	nop0 instructionKind = iota + 1
+	// aluRRR represents an ALU operation with two register sources and a register destination.
+	aluRRR
+	// aluRRRR represents an ALU operation with three register sources and a register destination.
+	aluRRRR
+	// aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination.
+	aluRRImm12
+	// aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination.
+	aluRRBitmaskImm
+	// aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination.
+	aluRRImmShift
+	// aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination.
+	aluRRRShift
+	// aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination.
+	aluRRRExtend
+	// bitRR represents a bit op instruction with a single register source.
+	bitRR
+	// uLoad8 represents an unsigned 8-bit load.
+	uLoad8
+	// sLoad8 represents a signed 8-bit load into 64-bit register.
+	sLoad8
+	// uLoad16 represents an unsigned 16-bit load into 64-bit register.
+	uLoad16
+	// sLoad16 represents a signed 16-bit load into 64-bit register.
+	sLoad16
+	// uLoad32 represents an unsigned 32-bit load into 64-bit register.
+	uLoad32
+	// sLoad32 represents a signed 32-bit load into 64-bit register.
+	sLoad32
+	// uLoad64 represents a 64-bit load.
+	uLoad64
+	// store8 represents an 8-bit store.
+	store8
+	// store16 represents a 16-bit store.
+	store16
+	// store32 represents a 32-bit store.
+	store32
+	// store64 represents a 64-bit store.
+	store64
+	// storeP64 represents a store of a pair of registers.
+	storeP64
+	// loadP64 represents a load of a pair of registers.
+	loadP64
+	// mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling.
+	mov64
+	// mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination.
+	mov32
+	// movZ represents a MOVZ with a 16-bit immediate.
+	movZ
+	// movN represents a MOVN with a 16-bit immediate.
+	movN
+	// movK represents a MOVK with a 16-bit immediate.
+	movK
+	// extend represents a sign- or zero-extend operation.
+	extend
+	// cSel represents a conditional-select operation.
+	cSel
+	// cSet represents a conditional-set operation.
+	cSet
+	// cCmpImm represents a conditional comparison with an immediate.
+	cCmpImm
+	// fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster.
+	fpuMov64
+	// fpuMov128 represents a vector register move.
+	fpuMov128
+	// fpuMovFromVec represents a move to scalar from a vector element.
+	fpuMovFromVec
+	// fpuRR represents a 1-op FPU instruction.
+	fpuRR
+	// fpuRRR represents a 2-op FPU instruction.
+	fpuRRR
+	// fpuRRI represents a 2-op FPU instruction with immediate value.
+	fpuRRI
+	// fpuRRRR represents a 3-op FPU instruction.
+	fpuRRRR
+	// fpuCmp represents a FPU comparison, either 32 or 64 bit.
+	fpuCmp
+	// fpuLoad32 represents a floating-point load, single-precision (32 bit).
+	fpuLoad32
+	// fpuStore32 represents a floating-point store, single-precision (32 bit).
+	fpuStore32
+	// fpuLoad64 represents a floating-point load, double-precision (64 bit).
+	fpuLoad64
+	// fpuStore64 represents a floating-point store, double-precision (64 bit).
+	fpuStore64
+	// fpuLoad128 represents a floating-point/vector load, 128 bit.
+	fpuLoad128
+	// fpuStore128 represents a floating-point/vector store, 128 bit.
+	fpuStore128
+	// loadFpuConst32 represents a load of a 32-bit floating-point constant.
+	loadFpuConst32
+	// loadFpuConst64 represents a load of a 64-bit floating-point constant.
+	loadFpuConst64
+	// loadFpuConst128 represents a load of a 128-bit floating-point constant.
+	loadFpuConst128
+	// vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector.
+	vecLoad1R
+	// fpuToInt represents a conversion from FP to integer.
+	fpuToInt
+	// intToFpu represents a conversion from integer to FP.
+	intToFpu
+	// fpuCSel represents a 32/64-bit FP conditional select.
+	fpuCSel
+	// movToVec represents a move to a vector element from a GPR.
+	movToVec
+	// movFromVec represents an unsigned move from a vector element to a GPR.
+	movFromVec
+	// movFromVecSigned represents a signed move from a vector element to a GPR.
+	movFromVecSigned
+	// vecDup represents a duplication of general-purpose register to vector.
+	vecDup
+	// vecDupElement represents a duplication of a vector element to vector or scalar.
+	vecDupElement
+	// vecDupFromFpu represents a duplication of scalar to vector.
+	vecDupFromFpu
+	// vecExtract represents a vector extraction operation.
+	vecExtract
+	// vecExtend represents a vector extension operation.
+	vecExtend
+	// vecMovElement represents a move vector element to another vector element operation.
+	vecMovElement
+	// vecMiscNarrow represents a vector narrowing operation.
+	vecMiscNarrow
+	// vecRRR represents a vector ALU operation.
+	vecRRR
+	// vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register.
+	// For example, BSL instruction rewrites the destination register, and the existing value influences the result.
+	// Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive
+	// the instruction while this instruction doesn't have "def" in the context of register allocation.
+	vecRRRRewrite
+	// vecMisc represents a vector two register miscellaneous instruction.
+	vecMisc
+	// vecLanes represents a vector instruction across lanes.
+	vecLanes
+	// vecShiftImm represents a SIMD scalar shift by immediate instruction.
+	vecShiftImm
+	// vecTbl represents a table vector lookup - single register table.
+	vecTbl
+	// vecTbl2 represents a table vector lookup - two register table.
+	vecTbl2
+	// vecPermute represents a vector permute instruction.
+	vecPermute
+	// movToNZCV represents a move to the FPSR.
+	movToFPSR
+	// movFromNZCV represents a move from the FPSR.
+	movFromFPSR
+	// call represents a machine call instruction.
+	call
+	// callInd represents a machine indirect-call instruction.
+	callInd
+	// ret represents a machine return instruction.
+	ret
+	// br represents an unconditional branch.
+	br
+	// condBr represents a conditional branch.
+	condBr
+	// adr represents a compute the address (using a PC-relative offset) of a memory location.
+	adr
+	// brTableSequence represents a jump-table sequence.
+	brTableSequence
+	// exitSequence consists of multiple instructions, and exits the execution immediately.
+	// See encodeExitSequence.
+	exitSequence
+	// atomicRmw represents an atomic read-modify-write operation with two register sources and a register destination.
+	atomicRmw
+	// atomicCas represents an atomic compare-and-swap operation with three register sources. The value is loaded to
+	// the source register containing the comparison value.
+	atomicCas
+	// atomicLoad represents an atomic load with one source register and a register destination.
+	atomicLoad
+	// atomicStore represents an atomic store with two source registers and no destination.
+	atomicStore
+	// dmb represents the data memory barrier instruction in inner-shareable (ish) mode.
+	dmb
+	// UDF is the undefined instruction. For debugging only.
+	udf
+	// loadConstBlockArg represents a load of a constant block argument.
+	loadConstBlockArg
+
+	// emitSourceOffsetInfo is a dummy instruction to emit source offset info.
+	// The existence of this instruction does not affect the execution.
+	emitSourceOffsetInfo
+
+	// ------------------- do not define below this line -------------------
+	numInstructionKinds
+)
+
+func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.VReg) *instruction {
+	i.kind = loadConstBlockArg
+	i.u1 = v
+	i.u2 = uint64(typ)
+	i.rd = operandNR(dst)
+	return i
+}
+
+func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) {
+	return i.u1, ssa.Type(i.u2), i.rd.nr()
+}
+
+func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
+	i.kind = emitSourceOffsetInfo
+	i.u1 = uint64(l)
+	return i
+}
+
+func (i *instruction) sourceOffsetInfo() ssa.SourceOffset {
+	return ssa.SourceOffset(i.u1)
+}
+
+func (i *instruction) asUDF() *instruction {
+	i.kind = udf
+	return i
+}
+
+func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) {
+	i.kind = fpuToInt
+	i.rn = rn
+	i.rd = rd
+	if rdSigned {
+		i.u1 = 1
+	}
+	if src64bit {
+		i.u2 = 1
+	}
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) {
+	i.kind = intToFpu
+	i.rn = rn
+	i.rd = rd
+	if rnSigned {
+		i.u1 = 1
+	}
+	if src64bit {
+		i.u2 = 1
+	}
+	if dst64bit {
+		i.u3 = 1
+	}
+}
+
+func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {
+	i.kind = exitSequence
+	i.rn = operandNR(ctx)
+	return i
+}
+
+// aluOp determines the type of ALU operation. Instructions whose kind is one of
+// aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend
+// would use this type.
+type aluOp int
+
+func (a aluOp) String() string {
+	switch a {
+	case aluOpAdd:
+		return "add"
+	case aluOpSub:
+		return "sub"
+	case aluOpOrr:
+		return "orr"
+	case aluOpOrn:
+		return "orn"
+	case aluOpAnd:
+		return "and"
+	case aluOpAnds:
+		return "ands"
+	case aluOpBic:
+		return "bic"
+	case aluOpEor:
+		return "eor"
+	case aluOpAddS:
+		return "adds"
+	case aluOpSubS:
+		return "subs"
+	case aluOpSMulH:
+		return "sMulH"
+	case aluOpUMulH:
+		return "uMulH"
+	case aluOpSDiv:
+		return "sdiv"
+	case aluOpUDiv:
+		return "udiv"
+	case aluOpRotR:
+		return "ror"
+	case aluOpLsr:
+		return "lsr"
+	case aluOpAsr:
+		return "asr"
+	case aluOpLsl:
+		return "lsl"
+	case aluOpMAdd:
+		return "madd"
+	case aluOpMSub:
+		return "msub"
+	}
+	panic(int(a))
+}
+
+const (
+	// 32/64-bit Add.
+	aluOpAdd aluOp = iota
+	// 32/64-bit Subtract.
+	aluOpSub
+	// 32/64-bit Bitwise OR.
+	aluOpOrr
+	// 32/64-bit Bitwise OR NOT.
+	aluOpOrn
+	// 32/64-bit Bitwise AND.
+	aluOpAnd
+	// 32/64-bit Bitwise ANDS.
+	aluOpAnds
+	// 32/64-bit Bitwise AND NOT.
+	aluOpBic
+	// 32/64-bit Bitwise XOR (Exclusive OR).
+	aluOpEor
+	// 32/64-bit Add setting flags.
+	aluOpAddS
+	// 32/64-bit Subtract setting flags.
+	aluOpSubS
+	// Signed multiply, high-word result.
+	aluOpSMulH
+	// Unsigned multiply, high-word result.
+	aluOpUMulH
+	// 64-bit Signed divide.
+	aluOpSDiv
+	// 64-bit Unsigned divide.
+	aluOpUDiv
+	// 32/64-bit Rotate right.
+	aluOpRotR
+	// 32/64-bit Logical shift right.
+	aluOpLsr
+	// 32/64-bit Arithmetic shift right.
+	aluOpAsr
+	// 32/64-bit Logical shift left.
+	aluOpLsl /// Multiply-add
+
+	// MAdd and MSub are only applicable for aluRRRR.
+	aluOpMAdd
+	aluOpMSub
+)
+
+// vecOp determines the type of vector operation. Instructions whose kind is one of
+// vecOpCnt would use this type.
+type vecOp int
+
+// String implements fmt.Stringer.
+func (b vecOp) String() string {
+	switch b {
+	case vecOpCnt:
+		return "cnt"
+	case vecOpCmeq:
+		return "cmeq"
+	case vecOpCmgt:
+		return "cmgt"
+	case vecOpCmhi:
+		return "cmhi"
+	case vecOpCmge:
+		return "cmge"
+	case vecOpCmhs:
+		return "cmhs"
+	case vecOpFcmeq:
+		return "fcmeq"
+	case vecOpFcmgt:
+		return "fcmgt"
+	case vecOpFcmge:
+		return "fcmge"
+	case vecOpCmeq0:
+		return "cmeq0"
+	case vecOpUaddlv:
+		return "uaddlv"
+	case vecOpBit:
+		return "bit"
+	case vecOpBic:
+		return "bic"
+	case vecOpBsl:
+		return "bsl"
+	case vecOpNot:
+		return "not"
+	case vecOpAnd:
+		return "and"
+	case vecOpOrr:
+		return "orr"
+	case vecOpEOR:
+		return "eor"
+	case vecOpFadd:
+		return "fadd"
+	case vecOpAdd:
+		return "add"
+	case vecOpAddp:
+		return "addp"
+	case vecOpAddv:
+		return "addv"
+	case vecOpSub:
+		return "sub"
+	case vecOpFsub:
+		return "fsub"
+	case vecOpSmin:
+		return "smin"
+	case vecOpUmin:
+		return "umin"
+	case vecOpUminv:
+		return "uminv"
+	case vecOpSmax:
+		return "smax"
+	case vecOpUmax:
+		return "umax"
+	case vecOpUmaxp:
+		return "umaxp"
+	case vecOpUrhadd:
+		return "urhadd"
+	case vecOpFmul:
+		return "fmul"
+	case vecOpSqrdmulh:
+		return "sqrdmulh"
+	case vecOpMul:
+		return "mul"
+	case vecOpUmlal:
+		return "umlal"
+	case vecOpFdiv:
+		return "fdiv"
+	case vecOpFsqrt:
+		return "fsqrt"
+	case vecOpAbs:
+		return "abs"
+	case vecOpFabs:
+		return "fabs"
+	case vecOpNeg:
+		return "neg"
+	case vecOpFneg:
+		return "fneg"
+	case vecOpFrintp:
+		return "frintp"
+	case vecOpFrintm:
+		return "frintm"
+	case vecOpFrintn:
+		return "frintn"
+	case vecOpFrintz:
+		return "frintz"
+	case vecOpFcvtl:
+		return "fcvtl"
+	case vecOpFcvtn:
+		return "fcvtn"
+	case vecOpFcvtzu:
+		return "fcvtzu"
+	case vecOpFcvtzs:
+		return "fcvtzs"
+	case vecOpScvtf:
+		return "scvtf"
+	case vecOpUcvtf:
+		return "ucvtf"
+	case vecOpSqxtn:
+		return "sqxtn"
+	case vecOpUqxtn:
+		return "uqxtn"
+	case vecOpSqxtun:
+		return "sqxtun"
+	case vecOpRev64:
+		return "rev64"
+	case vecOpXtn:
+		return "xtn"
+	case vecOpShll:
+		return "shll"
+	case vecOpSshl:
+		return "sshl"
+	case vecOpSshll:
+		return "sshll"
+	case vecOpUshl:
+		return "ushl"
+	case vecOpUshll:
+		return "ushll"
+	case vecOpSshr:
+		return "sshr"
+	case vecOpZip1:
+		return "zip1"
+	case vecOpFmin:
+		return "fmin"
+	case vecOpFmax:
+		return "fmax"
+	case vecOpSmull:
+		return "smull"
+	case vecOpSmull2:
+		return "smull2"
+	}
+	panic(int(b))
+}
+
+const (
+	vecOpCnt vecOp = iota
+	vecOpCmeq0
+	vecOpCmeq
+	vecOpCmgt
+	vecOpCmhi
+	vecOpCmge
+	vecOpCmhs
+	vecOpFcmeq
+	vecOpFcmgt
+	vecOpFcmge
+	vecOpUaddlv
+	vecOpBit
+	vecOpBic
+	vecOpBsl
+	vecOpNot
+	vecOpAnd
+	vecOpOrr
+	vecOpEOR
+	vecOpAdd
+	vecOpFadd
+	vecOpAddv
+	vecOpSqadd
+	vecOpUqadd
+	vecOpAddp
+	vecOpSub
+	vecOpFsub
+	vecOpSqsub
+	vecOpUqsub
+	vecOpSmin
+	vecOpUmin
+	vecOpUminv
+	vecOpFmin
+	vecOpSmax
+	vecOpUmax
+	vecOpUmaxp
+	vecOpFmax
+	vecOpUrhadd
+	vecOpMul
+	vecOpFmul
+	vecOpSqrdmulh
+	vecOpUmlal
+	vecOpFdiv
+	vecOpFsqrt
+	vecOpAbs
+	vecOpFabs
+	vecOpNeg
+	vecOpFneg
+	vecOpFrintm
+	vecOpFrintn
+	vecOpFrintp
+	vecOpFrintz
+	vecOpFcvtl
+	vecOpFcvtn
+	vecOpFcvtzs
+	vecOpFcvtzu
+	vecOpScvtf
+	vecOpUcvtf
+	vecOpSqxtn
+	vecOpSqxtun
+	vecOpUqxtn
+	vecOpRev64
+	vecOpXtn
+	vecOpShll
+	vecOpSshl
+	vecOpSshll
+	vecOpUshl
+	vecOpUshll
+	vecOpSshr
+	vecOpZip1
+	vecOpSmull
+	vecOpSmull2
+)
+
+// bitOp determines the type of bitwise operation. Instructions whose kind is one of
+// bitOpRbit and bitOpClz would use this type.
+type bitOp int
+
+// String implements fmt.Stringer.
+func (b bitOp) String() string {
+	switch b {
+	case bitOpRbit:
+		return "rbit"
+	case bitOpClz:
+		return "clz"
+	}
+	panic(int(b))
+}
+
+const (
+	// 32/64-bit Rbit.
+	bitOpRbit bitOp = iota
+	// 32/64-bit Clz.
+	bitOpClz
+)
+
+// fpuUniOp represents a unary floating-point unit (FPU) operation.
+type fpuUniOp byte
+
+const (
+	fpuUniOpNeg fpuUniOp = iota
+	fpuUniOpCvt32To64
+	fpuUniOpCvt64To32
+	fpuUniOpSqrt
+	fpuUniOpRoundPlus
+	fpuUniOpRoundMinus
+	fpuUniOpRoundZero
+	fpuUniOpRoundNearest
+	fpuUniOpAbs
+)
+
+// String implements the fmt.Stringer.
+func (f fpuUniOp) String() string {
+	switch f {
+	case fpuUniOpNeg:
+		return "fneg"
+	case fpuUniOpCvt32To64:
+		return "fcvt"
+	case fpuUniOpCvt64To32:
+		return "fcvt"
+	case fpuUniOpSqrt:
+		return "fsqrt"
+	case fpuUniOpRoundPlus:
+		return "frintp"
+	case fpuUniOpRoundMinus:
+		return "frintm"
+	case fpuUniOpRoundZero:
+		return "frintz"
+	case fpuUniOpRoundNearest:
+		return "frintn"
+	case fpuUniOpAbs:
+		return "fabs"
+	}
+	panic(int(f))
+}
+
+// fpuBinOp represents a binary floating-point unit (FPU) operation.
+type fpuBinOp byte
+
+const (
+	fpuBinOpAdd = iota
+	fpuBinOpSub
+	fpuBinOpMul
+	fpuBinOpDiv
+	fpuBinOpMax
+	fpuBinOpMin
+)
+
+// String implements the fmt.Stringer.
+func (f fpuBinOp) String() string {
+	switch f {
+	case fpuBinOpAdd:
+		return "fadd"
+	case fpuBinOpSub:
+		return "fsub"
+	case fpuBinOpMul:
+		return "fmul"
+	case fpuBinOpDiv:
+		return "fdiv"
+	case fpuBinOpMax:
+		return "fmax"
+	case fpuBinOpMin:
+		return "fmin"
+	}
+	panic(int(f))
+}
+
+// extMode represents the mode of a register operand extension.
+// For example, aluRRRExtend instructions need this info to determine the extensions.
+type extMode byte
+
+const (
+	extModeNone extMode = iota
+	// extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32.
+	extModeZeroExtend32
+	// extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32.
+	extModeSignExtend32
+	// extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64.
+	extModeZeroExtend64
+	// extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64.
+	extModeSignExtend64
+)
+
+func (e extMode) bits() byte {
+	switch e {
+	case extModeZeroExtend32, extModeSignExtend32:
+		return 32
+	case extModeZeroExtend64, extModeSignExtend64:
+		return 64
+	default:
+		return 0
+	}
+}
+
+func (e extMode) signed() bool {
+	switch e {
+	case extModeSignExtend32, extModeSignExtend64:
+		return true
+	default:
+		return false
+	}
+}
+
+func extModeOf(t ssa.Type, signed bool) extMode {
+	switch t.Bits() {
+	case 32:
+		if signed {
+			return extModeSignExtend32
+		}
+		return extModeZeroExtend32
+	case 64:
+		if signed {
+			return extModeSignExtend64
+		}
+		return extModeZeroExtend64
+	default:
+		panic("TODO? do we need narrower than 32 bits?")
+	}
+}
+
+type extendOp byte
+
+const (
+	extendOpUXTB extendOp = 0b000
+	extendOpUXTH extendOp = 0b001
+	extendOpUXTW extendOp = 0b010
+	// extendOpUXTX does nothing, but convenient symbol that officially exists. See:
+	// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
+	extendOpUXTX extendOp = 0b011
+	extendOpSXTB extendOp = 0b100
+	extendOpSXTH extendOp = 0b101
+	extendOpSXTW extendOp = 0b110
+	// extendOpSXTX does nothing, but convenient symbol that officially exists. See:
+	// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
+	extendOpSXTX extendOp = 0b111
+	extendOpNone extendOp = 0xff
+)
+
+func (e extendOp) srcBits() byte {
+	switch e {
+	case extendOpUXTB, extendOpSXTB:
+		return 8
+	case extendOpUXTH, extendOpSXTH:
+		return 16
+	case extendOpUXTW, extendOpSXTW:
+		return 32
+	case extendOpUXTX, extendOpSXTX:
+		return 64
+	}
+	panic(int(e))
+}
+
+func (e extendOp) String() string {
+	switch e {
+	case extendOpUXTB:
+		return "UXTB"
+	case extendOpUXTH:
+		return "UXTH"
+	case extendOpUXTW:
+		return "UXTW"
+	case extendOpUXTX:
+		return "UXTX"
+	case extendOpSXTB:
+		return "SXTB"
+	case extendOpSXTH:
+		return "SXTH"
+	case extendOpSXTW:
+		return "SXTW"
+	case extendOpSXTX:
+		return "SXTX"
+	}
+	panic(int(e))
+}
+
+func extendOpFrom(signed bool, from byte) extendOp {
+	switch from {
+	case 8:
+		if signed {
+			return extendOpSXTB
+		}
+		return extendOpUXTB
+	case 16:
+		if signed {
+			return extendOpSXTH
+		}
+		return extendOpUXTH
+	case 32:
+		if signed {
+			return extendOpSXTW
+		}
+		return extendOpUXTW
+	case 64:
+		if signed {
+			return extendOpSXTX
+		}
+		return extendOpUXTX
+	}
+	panic("invalid extendOpFrom")
+}
+
+type shiftOp byte
+
+const (
+	shiftOpLSL shiftOp = 0b00
+	shiftOpLSR shiftOp = 0b01
+	shiftOpASR shiftOp = 0b10
+	shiftOpROR shiftOp = 0b11
+)
+
+func (s shiftOp) String() string {
+	switch s {
+	case shiftOpLSL:
+		return "lsl"
+	case shiftOpLSR:
+		return "lsr"
+	case shiftOpASR:
+		return "asr"
+	case shiftOpROR:
+		return "ror"
+	}
+	panic(int(s))
+}
+
+const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence.
+
+// size returns the size of the instruction in encoded bytes.
+func (i *instruction) size() int64 {
+	switch i.kind {
+	case exitSequence:
+		return exitSequenceSize // 5 instructions as in encodeExitSequence.
+	case nop0, loadConstBlockArg:
+		return 0
+	case emitSourceOffsetInfo:
+		return 0
+	case loadFpuConst32:
+		if i.u1 == 0 {
+			return 4 // zero loading can be encoded as a single instruction.
+		}
+		return 4 + 4 + 4
+	case loadFpuConst64:
+		if i.u1 == 0 {
+			return 4 // zero loading can be encoded as a single instruction.
+		}
+		return 4 + 4 + 8
+	case loadFpuConst128:
+		if i.u1 == 0 && i.u2 == 0 {
+			return 4 // zero loading can be encoded as a single instruction.
+		}
+		return 4 + 4 + 16
+	case brTableSequence:
+		return 4*4 + int64(i.u2)*4
+	default:
+		return 4
+	}
+}
+
+// vecArrangement is the arrangement of data within a vector register.
+type vecArrangement byte
+
+const (
+	// vecArrangementNone is an arrangement indicating no data is stored.
+	vecArrangementNone vecArrangement = iota
+	// vecArrangement8B is an arrangement of 8 bytes (64-bit vector)
+	vecArrangement8B
+	// vecArrangement16B is an arrangement of 16 bytes (128-bit vector)
+	vecArrangement16B
+	// vecArrangement4H is an arrangement of 4 half precisions (64-bit vector)
+	vecArrangement4H
+	// vecArrangement8H is an arrangement of 8 half precisions (128-bit vector)
+	vecArrangement8H
+	// vecArrangement2S is an arrangement of 2 single precisions (64-bit vector)
+	vecArrangement2S
+	// vecArrangement4S is an arrangement of 4 single precisions (128-bit vector)
+	vecArrangement4S
+	// vecArrangement1D is an arrangement of 1 double precision (64-bit vector)
+	vecArrangement1D
+	// vecArrangement2D is an arrangement of 2 double precisions (128-bit vector)
+	vecArrangement2D
+
+	// Assign each vector size specifier to a vector arrangement ID.
+	// Instructions can only have an arrangement or a size specifier, but not both, so it
+	// simplifies the internal representation of vector instructions by being able to
+	// store either into the same field.
+
+	// vecArrangementB is a size specifier of byte
+	vecArrangementB
+	// vecArrangementH is a size specifier of word (16-bit)
+	vecArrangementH
+	// vecArrangementS is a size specifier of double word (32-bit)
+	vecArrangementS
+	// vecArrangementD is a size specifier of quad word (64-bit)
+	vecArrangementD
+	// vecArrangementQ is a size specifier of the entire vector (128-bit)
+	vecArrangementQ
+)
+
+// String implements fmt.Stringer
+func (v vecArrangement) String() (ret string) {
+	switch v {
+	case vecArrangement8B:
+		ret = "8B"
+	case vecArrangement16B:
+		ret = "16B"
+	case vecArrangement4H:
+		ret = "4H"
+	case vecArrangement8H:
+		ret = "8H"
+	case vecArrangement2S:
+		ret = "2S"
+	case vecArrangement4S:
+		ret = "4S"
+	case vecArrangement1D:
+		ret = "1D"
+	case vecArrangement2D:
+		ret = "2D"
+	case vecArrangementB:
+		ret = "B"
+	case vecArrangementH:
+		ret = "H"
+	case vecArrangementS:
+		ret = "S"
+	case vecArrangementD:
+		ret = "D"
+	case vecArrangementQ:
+		ret = "Q"
+	case vecArrangementNone:
+		ret = "none"
+	default:
+		panic(v)
+	}
+	return
+}
+
+// vecIndex is the index of an element of a vector register
+type vecIndex byte
+
+// vecIndexNone indicates no vector index specified.
+const vecIndexNone = ^vecIndex(0)
+
+func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		return vecArrangement16B
+	case ssa.VecLaneI16x8:
+		return vecArrangement8H
+	case ssa.VecLaneI32x4:
+		return vecArrangement4S
+	case ssa.VecLaneI64x2:
+		return vecArrangement2D
+	case ssa.VecLaneF32x4:
+		return vecArrangement4S
+	case ssa.VecLaneF64x2:
+		return vecArrangement2D
+	default:
+		panic(lane)
+	}
+}
+
+// atomicRmwOp is the type of atomic read-modify-write operation.
+type atomicRmwOp byte
+
+const (
+	// atomicRmwOpAdd is an atomic add operation.
+	atomicRmwOpAdd atomicRmwOp = iota
+	// atomicRmwOpClr is an atomic clear operation, i.e. AND NOT.
+	atomicRmwOpClr
+	// atomicRmwOpSet is an atomic set operation, i.e. OR.
+	atomicRmwOpSet
+	// atomicRmwOpEor is an atomic exclusive OR operation.
+	atomicRmwOpEor
+	// atomicRmwOpSwp is an atomic swap operation.
+	atomicRmwOpSwp
+)
+
+// String implements fmt.Stringer
+func (a atomicRmwOp) String() string {
+	switch a {
+	case atomicRmwOpAdd:
+		return "ldaddal"
+	case atomicRmwOpClr:
+		return "ldclral"
+	case atomicRmwOpSet:
+		return "ldsetal"
+	case atomicRmwOpEor:
+		return "ldeoral"
+	case atomicRmwOpSwp:
+		return "swpal"
+	}
+	panic(fmt.Sprintf("unknown atomicRmwOp: %d", a))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
new file mode 100644
index 000000000..227a96474
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
@@ -0,0 +1,2351 @@
+package arm64
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// Encode implements backend.Machine Encode.
+func (m *machine) Encode(ctx context.Context) error {
+	m.resolveRelativeAddresses(ctx)
+	m.encode(m.executableContext.RootInstr)
+	if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize {
+		return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize)
+	}
+	return nil
+}
+
+func (m *machine) encode(root *instruction) {
+	for cur := root; cur != nil; cur = cur.next {
+		cur.encode(m)
+	}
+}
+
+func (i *instruction) encode(m *machine) {
+	c := m.compiler
+	switch kind := i.kind; kind {
+	case nop0, emitSourceOffsetInfo, loadConstBlockArg:
+	case exitSequence:
+		encodeExitSequence(c, i.rn.reg())
+	case ret:
+		// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en
+		c.Emit4Bytes(encodeRet())
+	case br:
+		imm := i.brOffset()
+		c.Emit4Bytes(encodeUnconditionalBranch(false, imm))
+	case call:
+		// We still don't know the exact address of the function to call, so we emit a placeholder.
+		c.AddRelocationInfo(i.callFuncRef())
+		c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder
+	case callInd:
+		c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
+	case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128:
+		c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
+	case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
+		c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
+	case vecLoad1R:
+		c.Emit4Bytes(encodeVecLoad1R(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(i.u1)))
+	case condBr:
+		imm19 := i.condBrOffset()
+		if imm19%4 != 0 {
+			panic("imm26 for branch must be a multiple of 4")
+		}
+
+		imm19U32 := uint32(imm19/4) & 0b111_11111111_11111111
+		brCond := i.condBrCond()
+		switch brCond.kind() {
+		case condKindRegisterZero:
+			rt := regNumberInEncoding[brCond.register().RealReg()]
+			c.Emit4Bytes(encodeCBZCBNZ(rt, false, imm19U32, i.condBr64bit()))
+		case condKindRegisterNotZero:
+			rt := regNumberInEncoding[brCond.register().RealReg()]
+			c.Emit4Bytes(encodeCBZCBNZ(rt, true, imm19U32, i.condBr64bit()))
+		case condKindCondFlagSet:
+			// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally-
+			fl := brCond.flag()
+			c.Emit4Bytes(0b01010100<<24 | (imm19U32 << 5) | uint32(fl))
+		default:
+			panic("BUG")
+		}
+	case movN:
+		c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+	case movZ:
+		c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+	case movK:
+		c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+	case mov32:
+		to, from := i.rd.realReg(), i.rn.realReg()
+		c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to]))
+	case mov64:
+		to, from := i.rd.realReg(), i.rn.realReg()
+		toIsSp := to == sp
+		fromIsSp := from == sp
+		c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))
+	case loadP64, storeP64:
+		rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
+		amode := i.amode
+		rn := regNumberInEncoding[amode.rn.RealReg()]
+		var pre bool
+		switch amode.kind {
+		case addressModeKindPostIndex:
+		case addressModeKindPreIndex:
+			pre = true
+		default:
+			panic("BUG")
+		}
+		c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm))
+	case loadFpuConst32:
+		rd := regNumberInEncoding[i.rd.realReg()]
+		if i.u1 == 0 {
+			c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
+		} else {
+			encodeLoadFpuConst32(c, rd, i.u1)
+		}
+	case loadFpuConst64:
+		rd := regNumberInEncoding[i.rd.realReg()]
+		if i.u1 == 0 {
+			c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
+		} else {
+			encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1)
+		}
+	case loadFpuConst128:
+		rd := regNumberInEncoding[i.rd.realReg()]
+		lo, hi := i.u1, i.u2
+		if lo == 0 && hi == 0 {
+			c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B))
+		} else {
+			encodeLoadFpuConst128(c, rd, lo, hi)
+		}
+	case aluRRRR:
+		c.Emit4Bytes(encodeAluRRRR(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			regNumberInEncoding[i.ra.realReg()],
+			uint32(i.u3),
+		))
+	case aluRRImmShift:
+		c.Emit4Bytes(encodeAluRRImm(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			uint32(i.rm.shiftImm()),
+			uint32(i.u3),
+		))
+	case aluRRR:
+		rn := i.rn.realReg()
+		c.Emit4Bytes(encodeAluRRR(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[rn],
+			regNumberInEncoding[i.rm.realReg()],
+			i.u3 == 1,
+			rn == sp,
+		))
+	case aluRRRExtend:
+		rm, exo, to := i.rm.er()
+		c.Emit4Bytes(encodeAluRRRExtend(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[rm.RealReg()],
+			exo,
+			to,
+		))
+	case aluRRRShift:
+		r, amt, sop := i.rm.sr()
+		c.Emit4Bytes(encodeAluRRRShift(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[r.RealReg()],
+			uint32(amt),
+			sop,
+			i.u3 == 1,
+		))
+	case aluRRBitmaskImm:
+		c.Emit4Bytes(encodeAluBitmaskImmediate(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			i.u2,
+			i.u3 == 1,
+		))
+	case bitRR:
+		c.Emit4Bytes(encodeBitRR(
+			bitOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			uint32(i.u2)),
+		)
+	case aluRRImm12:
+		imm12, shift := i.rm.imm12()
+		c.Emit4Bytes(encodeAluRRImm12(
+			aluOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			imm12, shift,
+			i.u3 == 1,
+		))
+	case fpuRRR:
+		c.Emit4Bytes(encodeFpuRRR(
+			fpuBinOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			i.u3 == 1,
+		))
+	case fpuMov64, fpuMov128:
+		// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register--
+		rd := regNumberInEncoding[i.rd.realReg()]
+		rn := regNumberInEncoding[i.rn.realReg()]
+		var q uint32
+		if kind == fpuMov128 {
+			q = 0b1
+		}
+		c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd)
+	case cSet:
+		rd := regNumberInEncoding[i.rd.realReg()]
+		cf := condFlag(i.u1)
+		if i.u2 == 1 {
+			// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV-
+			// Note that we set 64bit version here.
+			c.Emit4Bytes(0b1101101010011111<<16 | uint32(cf.invert())<<12 | 0b011111<<5 | rd)
+		} else {
+			// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-
+			// Note that we set 64bit version here.
+			c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd)
+		}
+	case extend:
+		c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()]))
+	case fpuCmp:
+		// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en
+		rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
+		var ftype uint32
+		if i.u3 == 1 {
+			ftype = 0b01 // double precision.
+		}
+		c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5)
+	case udf:
+		// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en
+		if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
+			c.Emit4Bytes(dummyInstruction)
+		} else {
+			c.Emit4Bytes(0)
+		}
+	case adr:
+		c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
+	case cSel:
+		c.Emit4Bytes(encodeConditionalSelect(
+			kind,
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			condFlag(i.u1),
+			i.u3 == 1,
+		))
+	case fpuCSel:
+		c.Emit4Bytes(encodeFpuCSel(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			condFlag(i.u1),
+			i.u3 == 1,
+		))
+	case movToVec:
+		c.Emit4Bytes(encodeMoveToVec(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(byte(i.u1)),
+			vecIndex(i.u2),
+		))
+	case movFromVec, movFromVecSigned:
+		c.Emit4Bytes(encodeMoveFromVec(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(byte(i.u1)),
+			vecIndex(i.u2),
+			i.kind == movFromVecSigned,
+		))
+	case vecDup:
+		c.Emit4Bytes(encodeVecDup(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(byte(i.u1))))
+	case vecDupElement:
+		c.Emit4Bytes(encodeVecDupElement(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(byte(i.u1)),
+			vecIndex(i.u2)))
+	case vecExtract:
+		c.Emit4Bytes(encodeVecExtract(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			vecArrangement(byte(i.u1)),
+			uint32(i.u2)))
+	case vecPermute:
+		c.Emit4Bytes(encodeVecPermute(
+			vecOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			vecArrangement(byte(i.u2))))
+	case vecMovElement:
+		c.Emit4Bytes(encodeVecMovElement(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(i.u1),
+			uint32(i.u2), uint32(i.u3),
+		))
+	case vecMisc:
+		c.Emit4Bytes(encodeAdvancedSIMDTwoMisc(
+			vecOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(i.u2),
+		))
+	case vecLanes:
+		c.Emit4Bytes(encodeVecLanes(
+			vecOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			vecArrangement(i.u2),
+		))
+	case vecShiftImm:
+		c.Emit4Bytes(encodeVecShiftImm(
+			vecOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			uint32(i.rm.shiftImm()),
+			vecArrangement(i.u2),
+		))
+	case vecTbl:
+		c.Emit4Bytes(encodeVecTbl(
+			1,
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			vecArrangement(i.u2)),
+		)
+	case vecTbl2:
+		c.Emit4Bytes(encodeVecTbl(
+			2,
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			vecArrangement(i.u2)),
+		)
+	case brTableSequence:
+		targets := m.jmpTableTargets[i.u1]
+		encodeBrTableSequence(c, i.rn.reg(), targets)
+	case fpuToInt, intToFpu:
+		c.Emit4Bytes(encodeCnvBetweenFloatInt(i))
+	case fpuRR:
+		c.Emit4Bytes(encodeFloatDataOneSource(
+			fpuUniOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			i.u3 == 1,
+		))
+	case vecRRR:
+		if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal {
+			panic(fmt.Sprintf("vecOp %s must use vecRRRRewrite instead of vecRRR", op.String()))
+		}
+		fallthrough
+	case vecRRRRewrite:
+		c.Emit4Bytes(encodeVecRRR(
+			vecOp(i.u1),
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			vecArrangement(i.u2),
+		))
+	case cCmpImm:
+		// Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
+		sf := uint32(i.u3 & 0b1)
+		nzcv := uint32(i.u2 & 0b1111)
+		cond := uint32(condFlag(i.u1))
+		imm := uint32(i.rm.data & 0b11111)
+		rn := regNumberInEncoding[i.rn.realReg()]
+		c.Emit4Bytes(
+			sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv,
+		)
+	case movFromFPSR:
+		rt := regNumberInEncoding[i.rd.realReg()]
+		c.Emit4Bytes(encodeSystemRegisterMove(rt, true))
+	case movToFPSR:
+		rt := regNumberInEncoding[i.rn.realReg()]
+		c.Emit4Bytes(encodeSystemRegisterMove(rt, false))
+	case atomicRmw:
+		c.Emit4Bytes(encodeAtomicRmw(
+			atomicRmwOp(i.u1),
+			regNumberInEncoding[i.rm.realReg()],
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			uint32(i.u2),
+		))
+	case atomicCas:
+		c.Emit4Bytes(encodeAtomicCas(
+			regNumberInEncoding[i.rd.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			regNumberInEncoding[i.rn.realReg()],
+			uint32(i.u2),
+		))
+	case atomicLoad:
+		c.Emit4Bytes(encodeAtomicLoadStore(
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rd.realReg()],
+			uint32(i.u2),
+			1,
+		))
+	case atomicStore:
+		c.Emit4Bytes(encodeAtomicLoadStore(
+			regNumberInEncoding[i.rn.realReg()],
+			regNumberInEncoding[i.rm.realReg()],
+			uint32(i.u2),
+			0,
+		))
+	case dmb:
+		c.Emit4Bytes(encodeDMB())
+	default:
+		panic(i.String())
+	}
+}
+
+func encodeMov64(rd, rn uint32, toIsSp, fromIsSp bool) uint32 {
+	if toIsSp || fromIsSp {
+		// This is an alias of ADD (immediate):
+		// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate--
+		return encodeAddSubtractImmediate(0b100, 0, 0, rn, rd)
+	} else {
+		// This is an alias of ORR (shifted register):
+		// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--
+		return encodeLogicalShiftedRegister(0b101, 0, rn, 0, regNumberInEncoding[xzr], rd)
+	}
+}
+
+// encodeSystemRegisterMove encodes as "System register move" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+//
+// Note that currently we only supports read/write of FPSR.
+func encodeSystemRegisterMove(rt uint32, fromSystem bool) uint32 {
+	ret := 0b11010101<<24 | 0b11011<<16 | 0b01000100<<8 | 0b001<<5 | rt
+	if fromSystem {
+		ret |= 0b1 << 21
+	}
+	return ret
+}
+
+// encodeVecRRR encodes as either "Advanced SIMD three *" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeVecRRR(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 {
+	switch op {
+	case vecOpBit:
+		_, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b1, q)
+	case vecOpBic:
+		if arr > vecArrangement16B {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		_, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b0, q)
+	case vecOpBsl:
+		if arr > vecArrangement16B {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		_, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b1, q)
+	case vecOpAnd:
+		if arr > vecArrangement16B {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		_, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b00 /* always has size 0b00 */, 0b0, q)
+	case vecOpOrr:
+		_, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b0, q)
+	case vecOpEOR:
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, size, 0b1, q)
+	case vecOpCmeq:
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10001, size, 0b1, q)
+	case vecOpCmgt:
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b0, q)
+	case vecOpCmhi:
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b1, q)
+	case vecOpCmge:
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b0, q)
+	case vecOpCmhs:
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b1, q)
+	case vecOpFcmeq:
+		var size, q uint32
+		switch arr {
+		case vecArrangement4S:
+			size, q = 0b00, 0b1
+		case vecArrangement2S:
+			size, q = 0b00, 0b0
+		case vecArrangement2D:
+			size, q = 0b01, 0b1
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b0, q)
+	case vecOpFcmgt:
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q)
+	case vecOpFcmge:
+		var size, q uint32
+		switch arr {
+		case vecArrangement4S:
+			size, q = 0b00, 0b1
+		case vecArrangement2S:
+			size, q = 0b00, 0b0
+		case vecArrangement2D:
+			size, q = 0b01, 0b1
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q)
+	case vecOpAdd:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b0, q)
+	case vecOpSqadd:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b0, q)
+	case vecOpUqadd:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b1, q)
+	case vecOpAddp:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10111, size, 0b0, q)
+	case vecOpSqsub:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b0, q)
+	case vecOpUqsub:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b1, q)
+	case vecOpSub:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b1, q)
+	case vecOpFmin:
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q)
+	case vecOpSmin:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b0, q)
+	case vecOpUmin:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b1, q)
+	case vecOpFmax:
+		var size, q uint32
+		switch arr {
+		case vecArrangement4S:
+			size, q = 0b00, 0b1
+		case vecArrangement2S:
+			size, q = 0b00, 0b0
+		case vecArrangement2D:
+			size, q = 0b01, 0b1
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q)
+	case vecOpFadd:
+		var size, q uint32
+		switch arr {
+		case vecArrangement4S:
+			size, q = 0b00, 0b1
+		case vecArrangement2S:
+			size, q = 0b00, 0b0
+		case vecArrangement2D:
+			size, q = 0b01, 0b1
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q)
+	case vecOpFsub:
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q)
+	case vecOpFmul:
+		var size, q uint32
+		switch arr {
+		case vecArrangement4S:
+			size, q = 0b00, 0b1
+		case vecArrangement2S:
+			size, q = 0b00, 0b0
+		case vecArrangement2D:
+			size, q = 0b01, 0b1
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11011, size, 0b1, q)
+	case vecOpSqrdmulh:
+		if arr < vecArrangement4H || arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10110, size, 0b1, q)
+	case vecOpFdiv:
+		var size, q uint32
+		switch arr {
+		case vecArrangement4S:
+			size, q = 0b00, 0b1
+		case vecArrangement2S:
+			size, q = 0b00, 0b0
+		case vecArrangement2D:
+			size, q = 0b01, 0b1
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11111, size, 0b1, q)
+	case vecOpSmax:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b0, q)
+	case vecOpUmax:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b1, q)
+	case vecOpUmaxp:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10100, size, 0b1, q)
+	case vecOpUrhadd:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00010, size, 0b1, q)
+	case vecOpMul:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10011, size, 0b0, q)
+	case vecOpUmlal:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1000, size, 0b1, q)
+	case vecOpSshl:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b0, q)
+	case vecOpUshl:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b1, q)
+
+	case vecOpSmull:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, _ := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1100, size, 0b0, 0b0)
+
+	case vecOpSmull2:
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, _ := arrToSizeQEncoded(arr)
+		return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1100, size, 0b0, 0b1)
+
+	default:
+		panic("TODO: " + op.String())
+	}
+}
+
+func arrToSizeQEncoded(arr vecArrangement) (size, q uint32) {
+	switch arr {
+	case vecArrangement16B:
+		q = 0b1
+		fallthrough
+	case vecArrangement8B:
+		size = 0b00
+	case vecArrangement8H:
+		q = 0b1
+		fallthrough
+	case vecArrangement4H:
+		size = 0b01
+	case vecArrangement4S:
+		q = 0b1
+		fallthrough
+	case vecArrangement2S:
+		size = 0b10
+	case vecArrangement2D:
+		q = 0b1
+		fallthrough
+	case vecArrangement1D:
+		size = 0b11
+	default:
+		panic("BUG")
+	}
+	return
+}
+
+// encodeAdvancedSIMDThreeSame encodes as "Advanced SIMD three same" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeAdvancedSIMDThreeSame(rd, rn, rm, opcode, size, U, Q uint32) uint32 {
+	return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<11 | 0b1<<10 | rn<<5 | rd
+}
+
+// encodeAdvancedSIMDThreeDifferent encodes as "Advanced SIMD three different" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeAdvancedSIMDThreeDifferent(rd, rn, rm, opcode, size, U, Q uint32) uint32 {
+	return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<12 | rn<<5 | rd
+}
+
+// encodeFloatDataOneSource encodes as "Floating-point data-processing (1 source)" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp
+func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 {
+	var opcode, ptype uint32
+	switch op {
+	case fpuUniOpCvt32To64:
+		opcode = 0b000101
+	case fpuUniOpCvt64To32:
+		opcode = 0b000100
+		ptype = 0b01
+	case fpuUniOpNeg:
+		opcode = 0b000010
+		if dst64bit {
+			ptype = 0b01
+		}
+	case fpuUniOpSqrt:
+		opcode = 0b000011
+		if dst64bit {
+			ptype = 0b01
+		}
+	case fpuUniOpRoundPlus:
+		opcode = 0b001001
+		if dst64bit {
+			ptype = 0b01
+		}
+	case fpuUniOpRoundMinus:
+		opcode = 0b001010
+		if dst64bit {
+			ptype = 0b01
+		}
+	case fpuUniOpRoundZero:
+		opcode = 0b001011
+		if dst64bit {
+			ptype = 0b01
+		}
+	case fpuUniOpRoundNearest:
+		opcode = 0b001000
+		if dst64bit {
+			ptype = 0b01
+		}
+	case fpuUniOpAbs:
+		opcode = 0b000001
+		if dst64bit {
+			ptype = 0b01
+		}
+	default:
+		panic("BUG")
+	}
+	return 0b1111<<25 | ptype<<22 | 0b1<<21 | opcode<<15 | 0b1<<14 | rn<<5 | rd
+}
+
+// encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeCnvBetweenFloatInt(i *instruction) uint32 {
+	rd := regNumberInEncoding[i.rd.realReg()]
+	rn := regNumberInEncoding[i.rn.realReg()]
+
+	var opcode uint32
+	var rmode uint32
+	var ptype uint32
+	var sf uint32
+	switch i.kind {
+	case intToFpu: // Either UCVTF or SCVTF.
+		rmode = 0b00
+
+		signed := i.u1 == 1
+		src64bit := i.u2 == 1
+		dst64bit := i.u3 == 1
+		if signed {
+			opcode = 0b010
+		} else {
+			opcode = 0b011
+		}
+		if src64bit {
+			sf = 0b1
+		}
+		if dst64bit {
+			ptype = 0b01
+		} else {
+			ptype = 0b00
+		}
+	case fpuToInt: // Either FCVTZU or FCVTZS.
+		rmode = 0b11
+
+		signed := i.u1 == 1
+		src64bit := i.u2 == 1
+		dst64bit := i.u3 == 1
+
+		if signed {
+			opcode = 0b000
+		} else {
+			opcode = 0b001
+		}
+		if dst64bit {
+			sf = 0b1
+		}
+		if src64bit {
+			ptype = 0b01
+		} else {
+			ptype = 0b00
+		}
+	}
+	return sf<<31 | 0b1111<<25 | ptype<<22 | 0b1<<21 | rmode<<19 | opcode<<16 | rn<<5 | rd
+}
+
+// encodeAdr encodes a PC-relative ADR instruction.
+// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address-
+func encodeAdr(rd uint32, offset uint32) uint32 {
+	if offset >= 1<<20 {
+		panic("BUG: too large adr instruction")
+	}
+	return offset&0b11<<29 | 0b1<<28 | offset&0b1111111111_1111111100<<3 | rd
+}
+
+// encodeFpuCSel encodes as "Floating-point conditional select" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 {
+	var ftype uint32
+	if _64bit {
+		ftype = 0b01 // double precision.
+	}
+	return 0b1111<<25 | ftype<<22 | 0b1<<21 | rm<<16 | uint32(c)<<12 | 0b11<<10 | rn<<5 | rd
+}
+
+// encodeMoveToVec encodes as "Move general-purpose register to a vector element" (represented as `ins`) in
+// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/MOV--vector--from-general-
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--from-general---Move-general-purpose-register-to-a-vector-element--an-alias-of-INS--general--?lang=en
+func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 {
+	var imm5 uint32
+	switch arr {
+	case vecArrangementB:
+		imm5 |= 0b1
+		imm5 |= uint32(index) << 1
+		if index > 0b1111 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index))
+		}
+	case vecArrangementH:
+		imm5 |= 0b10
+		imm5 |= uint32(index) << 2
+		if index > 0b111 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index))
+		}
+	case vecArrangementS:
+		imm5 |= 0b100
+		imm5 |= uint32(index) << 3
+		if index > 0b11 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index))
+		}
+	case vecArrangementD:
+		imm5 |= 0b1000
+		imm5 |= uint32(index) << 4
+		if index > 0b1 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index))
+		}
+	default:
+		panic("Unsupported arrangement " + arr.String())
+	}
+
+	return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd
+}
+
+// encodeMoveToVec encodes as "Move vector element to another vector element, mov (element)" (represented as `ins`) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--element---Move-vector-element-to-another-vector-element--an-alias-of-INS--element--?lang=en
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en
+func encodeVecMovElement(rd, rn uint32, arr vecArrangement, srcIndex, dstIndex uint32) uint32 {
+	var imm4, imm5 uint32
+	switch arr {
+	case vecArrangementB:
+		imm5 |= 0b1
+		imm5 |= srcIndex << 1
+		imm4 = dstIndex
+		if srcIndex > 0b1111 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", srcIndex))
+		}
+	case vecArrangementH:
+		imm5 |= 0b10
+		imm5 |= srcIndex << 2
+		imm4 = dstIndex << 1
+		if srcIndex > 0b111 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", srcIndex))
+		}
+	case vecArrangementS:
+		imm5 |= 0b100
+		imm5 |= srcIndex << 3
+		imm4 = dstIndex << 2
+		if srcIndex > 0b11 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", srcIndex))
+		}
+	case vecArrangementD:
+		imm5 |= 0b1000
+		imm5 |= srcIndex << 4
+		imm4 = dstIndex << 3
+		if srcIndex > 0b1 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", srcIndex))
+		}
+	default:
+		panic("Unsupported arrangement " + arr.String())
+	}
+
+	return 0b01101110000<<21 | imm5<<16 | imm4<<11 | 0b1<<10 | rn<<5 | rd
+}
+
+// encodeUnconditionalBranchReg encodes as "Unconditional branch (register)" in:
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+func encodeUnconditionalBranchReg(rn uint32, link bool) uint32 {
+	var opc uint32
+	if link {
+		opc = 0b0001
+	}
+	return 0b1101011<<25 | opc<<21 | 0b11111<<16 | rn<<5
+}
+
+// encodeMoveFromVec encodes as "Move vector element to a general-purpose register"
+// (represented as `umov` when dest is 32-bit, `umov` otherwise) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--to-general---Move-vector-element-to-general-purpose-register--an-alias-of-UMOV-?lang=en
+func encodeMoveFromVec(rd, rn uint32, arr vecArrangement, index vecIndex, signed bool) uint32 {
+	var op, imm4, q, imm5 uint32
+	switch {
+	case arr == vecArrangementB:
+		imm5 |= 0b1
+		imm5 |= uint32(index) << 1
+		if index > 0b1111 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index))
+		}
+	case arr == vecArrangementH:
+		imm5 |= 0b10
+		imm5 |= uint32(index) << 2
+		if index > 0b111 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index))
+		}
+	case arr == vecArrangementS && signed:
+		q = 0b1
+		fallthrough
+	case arr == vecArrangementS:
+		imm5 |= 0b100
+		imm5 |= uint32(index) << 3
+		if index > 0b11 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index))
+		}
+	case arr == vecArrangementD && !signed:
+		imm5 |= 0b1000
+		imm5 |= uint32(index) << 4
+		q = 0b1
+		if index > 0b1 {
+			panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index))
+		}
+	default:
+		panic("Unsupported arrangement " + arr.String())
+	}
+	if signed {
+		op, imm4 = 0, 0b0101
+	} else {
+		op, imm4 = 0, 0b0111
+	}
+	return op<<29 | 0b01110000<<21 | q<<30 | imm5<<16 | imm4<<11 | 1<<10 | rn<<5 | rd
+}
+
+// encodeVecDup encodes as "Duplicate general-purpose register to vector" DUP (general)
+// (represented as `dup`)
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en
+func encodeVecDup(rd, rn uint32, arr vecArrangement) uint32 {
+	var q, imm5 uint32
+	switch arr {
+	case vecArrangement8B:
+		q, imm5 = 0b0, 0b1
+	case vecArrangement16B:
+		q, imm5 = 0b1, 0b1
+	case vecArrangement4H:
+		q, imm5 = 0b0, 0b10
+	case vecArrangement8H:
+		q, imm5 = 0b1, 0b10
+	case vecArrangement2S:
+		q, imm5 = 0b0, 0b100
+	case vecArrangement4S:
+		q, imm5 = 0b1, 0b100
+	case vecArrangement2D:
+		q, imm5 = 0b1, 0b1000
+	default:
+		panic("Unsupported arrangement " + arr.String())
+	}
+	return q<<30 | 0b001110000<<21 | imm5<<16 | 0b000011<<10 | rn<<5 | rd
+}
+
+// encodeVecDup encodes as "Duplicate vector element to vector or scalar" DUP (element).
+// (represented as `dup`)
+// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar-
+func encodeVecDupElement(rd, rn uint32, arr vecArrangement, srcIndex vecIndex) uint32 {
+	var q, imm5 uint32
+	q = 0b1
+	switch arr {
+	case vecArrangementB:
+		imm5 |= 0b1
+		imm5 |= uint32(srcIndex) << 1
+	case vecArrangementH:
+		imm5 |= 0b10
+		imm5 |= uint32(srcIndex) << 2
+	case vecArrangementS:
+		imm5 |= 0b100
+		imm5 |= uint32(srcIndex) << 3
+	case vecArrangementD:
+		imm5 |= 0b1000
+		imm5 |= uint32(srcIndex) << 4
+	default:
+		panic("unsupported arrangement" + arr.String())
+	}
+
+	return q<<30 | 0b001110000<<21 | imm5<<16 | 0b1<<10 | rn<<5 | rd
+}
+
+// encodeVecExtract encodes as "Advanced SIMD extract."
+// Currently only `ext` is defined.
+// https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp
+// https://developer.arm.com/documentation/ddi0602/2023-06/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en
+func encodeVecExtract(rd, rn, rm uint32, arr vecArrangement, index uint32) uint32 {
+	var q, imm4 uint32
+	switch arr {
+	case vecArrangement8B:
+		q, imm4 = 0, 0b0111&uint32(index)
+	case vecArrangement16B:
+		q, imm4 = 1, 0b1111&uint32(index)
+	default:
+		panic("Unsupported arrangement " + arr.String())
+	}
+	return q<<30 | 0b101110000<<21 | rm<<16 | imm4<<11 | rn<<5 | rd
+}
+
+// encodeVecPermute encodes as "Advanced SIMD permute."
+// https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp
+func encodeVecPermute(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 {
+	var q, size, opcode uint32
+	switch op {
+	case vecOpZip1:
+		opcode = 0b011
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q = arrToSizeQEncoded(arr)
+	default:
+		panic("TODO: " + op.String())
+	}
+	return q<<30 | 0b001110<<24 | size<<22 | rm<<16 | opcode<<12 | 0b10<<10 | rn<<5 | rd
+}
+
+// encodeConditionalSelect encodes as "Conditional select" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#condsel
+func encodeConditionalSelect(kind instructionKind, rd, rn, rm uint32, c condFlag, _64bit bool) uint32 {
+	if kind != cSel {
+		panic("TODO: support other conditional select")
+	}
+
+	ret := 0b110101<<23 | rm<<16 | uint32(c)<<12 | rn<<5 | rd
+	if _64bit {
+		ret |= 0b1 << 31
+	}
+	return ret
+}
+
+const dummyInstruction uint32 = 0x14000000 // "b 0"
+
+// encodeLoadFpuConst32 encodes the following three instructions:
+//
+//	ldr s8, #8  ;; literal load of data.f32
+//	b 8           ;; skip the data
+//	data.f32 xxxxxxx
+func encodeLoadFpuConst32(c backend.Compiler, rd uint32, rawF32 uint64) {
+	c.Emit4Bytes(
+		// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en
+		0b111<<26 | (0x8/4)<<5 | rd,
+	)
+	c.Emit4Bytes(encodeUnconditionalBranch(false, 8)) // b 8
+	if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
+		// Inlined data.f32 cannot be disassembled, so we add a dummy instruction here.
+		c.Emit4Bytes(dummyInstruction)
+	} else {
+		c.Emit4Bytes(uint32(rawF32)) // data.f32 xxxxxxx
+	}
+}
+
+// encodeLoadFpuConst64 encodes the following three instructions:
+//
+//	ldr d8, #8  ;; literal load of data.f64
+//	b 12           ;; skip the data
+//	data.f64 xxxxxxx
+func encodeLoadFpuConst64(c backend.Compiler, rd uint32, rawF64 uint64) {
+	c.Emit4Bytes(
+		// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en
+		0b1<<30 | 0b111<<26 | (0x8/4)<<5 | rd,
+	)
+	c.Emit4Bytes(encodeUnconditionalBranch(false, 12)) // b 12
+	if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
+		// Inlined data.f64 cannot be disassembled, so we add dummy instructions here.
+		c.Emit4Bytes(dummyInstruction)
+		c.Emit4Bytes(dummyInstruction)
+	} else {
+		// data.f64 xxxxxxx
+		c.Emit4Bytes(uint32(rawF64))
+		c.Emit4Bytes(uint32(rawF64 >> 32))
+	}
+}
+
+// encodeLoadFpuConst128 encodes the following three instructions:
+//
+//	ldr v8, #8  ;; literal load of data.f64
+//	b 20           ;; skip the data
+//	data.v128 xxxxxxx
+func encodeLoadFpuConst128(c backend.Compiler, rd uint32, lo, hi uint64) {
+	c.Emit4Bytes(
+		// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en
+		0b1<<31 | 0b111<<26 | (0x8/4)<<5 | rd,
+	)
+	c.Emit4Bytes(encodeUnconditionalBranch(false, 20)) // b 20
+	if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
+		// Inlined data.v128 cannot be disassembled, so we add dummy instructions here.
+		c.Emit4Bytes(dummyInstruction)
+		c.Emit4Bytes(dummyInstruction)
+		c.Emit4Bytes(dummyInstruction)
+		c.Emit4Bytes(dummyInstruction)
+	} else {
+		// data.v128 xxxxxxx
+		c.Emit4Bytes(uint32(lo))
+		c.Emit4Bytes(uint32(lo >> 32))
+		c.Emit4Bytes(uint32(hi))
+		c.Emit4Bytes(uint32(hi >> 32))
+	}
+}
+
+// encodeAluRRRR encodes as Data-processing (3 source) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
+func encodeAluRRRR(op aluOp, rd, rn, rm, ra, _64bit uint32) uint32 {
+	var oO, op31 uint32
+	switch op {
+	case aluOpMAdd:
+		op31, oO = 0b000, 0b0
+	case aluOpMSub:
+		op31, oO = 0b000, 0b1
+	default:
+		panic("TODO/BUG")
+	}
+	return _64bit<<31 | 0b11011<<24 | op31<<21 | rm<<16 | oO<<15 | ra<<10 | rn<<5 | rd
+}
+
+// encodeBitRR encodes as Data-processing (1 source) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
+func encodeBitRR(op bitOp, rd, rn, _64bit uint32) uint32 {
+	var opcode2, opcode uint32
+	switch op {
+	case bitOpRbit:
+		opcode2, opcode = 0b00000, 0b000000
+	case bitOpClz:
+		opcode2, opcode = 0b00000, 0b000100
+	default:
+		panic("TODO/BUG")
+	}
+	return _64bit<<31 | 0b1_0_11010110<<21 | opcode2<<15 | opcode<<10 | rn<<5 | rd
+}
+
+func encodeAsMov32(rn, rd uint32) uint32 {
+	// This is an alias of ORR (shifted register):
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--
+	return encodeLogicalShiftedRegister(0b001, 0, rn, 0, regNumberInEncoding[xzr], rd)
+}
+
+// encodeExtend encodes extension instructions.
+func encodeExtend(signed bool, from, to byte, rd, rn uint32) uint32 {
+	// UTXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM-?lang=en
+	// UTXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTH--Unsigned-Extend-Halfword--an-alias-of-UBFM-?lang=en
+	// STXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTB--Signed-Extend-Byte--an-alias-of-SBFM-
+	// STXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTH--Sign-Extend-Halfword--an-alias-of-SBFM-
+	// STXW: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-
+	var _31to10 uint32
+	switch {
+	case !signed && from == 8 && to == 32:
+		// 32-bit UXTB
+		_31to10 = 0b0101001100000000000111
+	case !signed && from == 16 && to == 32:
+		// 32-bit UXTH
+		_31to10 = 0b0101001100000000001111
+	case !signed && from == 8 && to == 64:
+		// 64-bit UXTB
+		_31to10 = 0b0101001100000000000111
+	case !signed && from == 16 && to == 64:
+		// 64-bit UXTH
+		_31to10 = 0b0101001100000000001111
+	case !signed && from == 32 && to == 64:
+		return encodeAsMov32(rn, rd)
+	case signed && from == 8 && to == 32:
+		// 32-bit SXTB
+		_31to10 = 0b0001001100000000000111
+	case signed && from == 16 && to == 32:
+		// 32-bit SXTH
+		_31to10 = 0b0001001100000000001111
+	case signed && from == 8 && to == 64:
+		// 64-bit SXTB
+		_31to10 = 0b1001001101000000000111
+	case signed && from == 16 && to == 64:
+		// 64-bit SXTH
+		_31to10 = 0b1001001101000000001111
+	case signed && from == 32 && to == 64:
+		// SXTW
+		_31to10 = 0b1001001101000000011111
+	default:
+		panic("BUG")
+	}
+	return _31to10<<10 | rn<<5 | rd
+}
+
+func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 {
+	var _22to31 uint32
+	var bits int64
+	switch kind {
+	case uLoad8:
+		_22to31 = 0b0011100001
+		bits = 8
+	case sLoad8:
+		_22to31 = 0b0011100010
+		bits = 8
+	case uLoad16:
+		_22to31 = 0b0111100001
+		bits = 16
+	case sLoad16:
+		_22to31 = 0b0111100010
+		bits = 16
+	case uLoad32:
+		_22to31 = 0b1011100001
+		bits = 32
+	case sLoad32:
+		_22to31 = 0b1011100010
+		bits = 32
+	case uLoad64:
+		_22to31 = 0b1111100001
+		bits = 64
+	case fpuLoad32:
+		_22to31 = 0b1011110001
+		bits = 32
+	case fpuLoad64:
+		_22to31 = 0b1111110001
+		bits = 64
+	case fpuLoad128:
+		_22to31 = 0b0011110011
+		bits = 128
+	case store8:
+		_22to31 = 0b0011100000
+		bits = 8
+	case store16:
+		_22to31 = 0b0111100000
+		bits = 16
+	case store32:
+		_22to31 = 0b1011100000
+		bits = 32
+	case store64:
+		_22to31 = 0b1111100000
+		bits = 64
+	case fpuStore32:
+		_22to31 = 0b1011110000
+		bits = 32
+	case fpuStore64:
+		_22to31 = 0b1111110000
+		bits = 64
+	case fpuStore128:
+		_22to31 = 0b0011110010
+		bits = 128
+	default:
+		panic("BUG")
+	}
+
+	switch amode.kind {
+	case addressModeKindRegScaledExtended:
+		return encodeLoadOrStoreExtended(_22to31,
+			regNumberInEncoding[amode.rn.RealReg()],
+			regNumberInEncoding[amode.rm.RealReg()],
+			rt, true, amode.extOp)
+	case addressModeKindRegScaled:
+		return encodeLoadOrStoreExtended(_22to31,
+			regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()],
+			rt, true, extendOpNone)
+	case addressModeKindRegExtended:
+		return encodeLoadOrStoreExtended(_22to31,
+			regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()],
+			rt, false, amode.extOp)
+	case addressModeKindRegReg:
+		return encodeLoadOrStoreExtended(_22to31,
+			regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()],
+			rt, false, extendOpNone)
+	case addressModeKindRegSignedImm9:
+		// e.g. https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--
+		return encodeLoadOrStoreSIMM9(_22to31, 0b00 /* unscaled */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm)
+	case addressModeKindPostIndex:
+		return encodeLoadOrStoreSIMM9(_22to31, 0b01 /* post index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm)
+	case addressModeKindPreIndex:
+		return encodeLoadOrStoreSIMM9(_22to31, 0b11 /* pre index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm)
+	case addressModeKindRegUnsignedImm12:
+		// "unsigned immediate" in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en
+		rn := regNumberInEncoding[amode.rn.RealReg()]
+		imm := amode.imm
+		div := bits / 8
+		if imm != 0 && !offsetFitsInAddressModeKindRegUnsignedImm12(byte(bits), imm) {
+			panic("BUG")
+		}
+		imm /= div
+		return _22to31<<22 | 0b1<<24 | uint32(imm&0b111111111111)<<10 | rn<<5 | rt
+	default:
+		panic("BUG")
+	}
+}
+
+// encodeVecLoad1R encodes as Load one single-element structure and Replicate to all lanes (of one register) in
+// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#sa_imm
+func encodeVecLoad1R(rt, rn uint32, arr vecArrangement) uint32 {
+	size, q := arrToSizeQEncoded(arr)
+	return q<<30 | 0b001101010000001100<<12 | size<<10 | rn<<5 | rt
+}
+
+// encodeAluBitmaskImmediate encodes as Logical (immediate) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
+func encodeAluBitmaskImmediate(op aluOp, rd, rn uint32, imm uint64, _64bit bool) uint32 {
+	var _31to23 uint32
+	switch op {
+	case aluOpAnd:
+		_31to23 = 0b00_100100
+	case aluOpOrr:
+		_31to23 = 0b01_100100
+	case aluOpEor:
+		_31to23 = 0b10_100100
+	case aluOpAnds:
+		_31to23 = 0b11_100100
+	default:
+		panic("BUG")
+	}
+	if _64bit {
+		_31to23 |= 0b1 << 8
+	}
+	immr, imms, N := bitmaskImmediate(imm, _64bit)
+	return _31to23<<23 | uint32(N)<<22 | uint32(immr)<<16 | uint32(imms)<<10 | rn<<5 | rd
+}
+
+func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) {
+	var size uint32
+	switch {
+	case c != c>>32|c<<32:
+		size = 64
+	case c != c>>16|c<<48:
+		size = 32
+		c = uint64(int32(c))
+	case c != c>>8|c<<56:
+		size = 16
+		c = uint64(int16(c))
+	case c != c>>4|c<<60:
+		size = 8
+		c = uint64(int8(c))
+	case c != c>>2|c<<62:
+		size = 4
+		c = uint64(int64(c<<60) >> 60)
+	default:
+		size = 2
+		c = uint64(int64(c<<62) >> 62)
+	}
+
+	neg := false
+	if int64(c) < 0 {
+		c = ^c
+		neg = true
+	}
+
+	onesSize, nonZeroPos := getOnesSequenceSize(c)
+	if neg {
+		nonZeroPos = onesSize + nonZeroPos
+		onesSize = size - onesSize
+	}
+
+	var mode byte = 32
+	if is64bit && size == 64 {
+		N, mode = 0b1, 64
+	}
+
+	immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1))
+	imms = byte((onesSize - 1) | 63&^(size<<1-1))
+	return
+}
+
+func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) {
+	// Take 0b00111000 for example:
+	y := getLowestBit(x)               // = 0b0000100
+	nonZeroPos = setBitPos(y)          // = 2
+	size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3
+	return
+}
+
+func setBitPos(x uint64) (ret uint32) {
+	for ; ; ret++ {
+		if x == 0b1 {
+			break
+		}
+		x = x >> 1
+	}
+	return
+}
+
+// encodeLoadOrStoreExtended encodes store/load instruction as "extended register offset" in Load/store register (register offset):
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en
+func encodeLoadOrStoreExtended(_22to32 uint32, rn, rm, rt uint32, scaled bool, extOp extendOp) uint32 {
+	var option uint32
+	switch extOp {
+	case extendOpUXTW:
+		option = 0b010
+	case extendOpSXTW:
+		option = 0b110
+	case extendOpNone:
+		option = 0b111
+	default:
+		panic("BUG")
+	}
+	var s uint32
+	if scaled {
+		s = 0b1
+	}
+	return _22to32<<22 | 0b1<<21 | rm<<16 | option<<13 | s<<12 | 0b10<<10 | rn<<5 | rt
+}
+
+// encodeLoadOrStoreSIMM9 encodes store/load instruction as one of post-index, pre-index or unscaled immediate as in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en
+func encodeLoadOrStoreSIMM9(_22to32, _1011 uint32, rn, rt uint32, imm9 int64) uint32 {
+	return _22to32<<22 | (uint32(imm9)&0b111111111)<<12 | _1011<<10 | rn<<5 | rt
+}
+
+// encodeFpuRRR encodes as single or double precision (depending on `_64bit`) of Floating-point data-processing (2 source) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeFpuRRR(op fpuBinOp, rd, rn, rm uint32, _64bit bool) (ret uint32) {
+	// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector--Add-vectors--scalar--floating-point-and-integer-
+	var opcode uint32
+	switch op {
+	case fpuBinOpAdd:
+		opcode = 0b0010
+	case fpuBinOpSub:
+		opcode = 0b0011
+	case fpuBinOpMul:
+		opcode = 0b0000
+	case fpuBinOpDiv:
+		opcode = 0b0001
+	case fpuBinOpMax:
+		opcode = 0b0100
+	case fpuBinOpMin:
+		opcode = 0b0101
+	default:
+		panic("BUG")
+	}
+	var ptype uint32
+	if _64bit {
+		ptype = 0b01
+	}
+	return 0b1111<<25 | ptype<<22 | 0b1<<21 | rm<<16 | opcode<<12 | 0b1<<11 | rn<<5 | rd
+}
+
+// encodeAluRRImm12 encodes as Add/subtract (immediate) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
+func encodeAluRRImm12(op aluOp, rd, rn uint32, imm12 uint16, shiftBit byte, _64bit bool) uint32 {
+	var _31to24 uint32
+	switch op {
+	case aluOpAdd:
+		_31to24 = 0b00_10001
+	case aluOpAddS:
+		_31to24 = 0b01_10001
+	case aluOpSub:
+		_31to24 = 0b10_10001
+	case aluOpSubS:
+		_31to24 = 0b11_10001
+	default:
+		panic("BUG")
+	}
+	if _64bit {
+		_31to24 |= 0b1 << 7
+	}
+	return _31to24<<24 | uint32(shiftBit)<<22 | uint32(imm12&0b111111111111)<<10 | rn<<5 | rd
+}
+
+// encodeAluRRR encodes as Data Processing (shifted register), depending on aluOp.
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift
+func encodeAluRRRShift(op aluOp, rd, rn, rm, amount uint32, shiftOp shiftOp, _64bit bool) uint32 {
+	var _31to24 uint32
+	var opc, n uint32
+	switch op {
+	case aluOpAdd:
+		_31to24 = 0b00001011
+	case aluOpAddS:
+		_31to24 = 0b00101011
+	case aluOpSub:
+		_31to24 = 0b01001011
+	case aluOpSubS:
+		_31to24 = 0b01101011
+	case aluOpAnd, aluOpOrr, aluOpEor, aluOpAnds:
+		// "Logical (shifted register)".
+		switch op {
+		case aluOpAnd:
+			// all zeros
+		case aluOpOrr:
+			opc = 0b01
+		case aluOpEor:
+			opc = 0b10
+		case aluOpAnds:
+			opc = 0b11
+		}
+		_31to24 = 0b000_01010
+	default:
+		panic(op.String())
+	}
+
+	if _64bit {
+		_31to24 |= 0b1 << 7
+	}
+
+	var shift uint32
+	switch shiftOp {
+	case shiftOpLSL:
+		shift = 0b00
+	case shiftOpLSR:
+		shift = 0b01
+	case shiftOpASR:
+		shift = 0b10
+	default:
+		panic(shiftOp.String())
+	}
+	return opc<<29 | n<<21 | _31to24<<24 | shift<<22 | rm<<16 | (amount << 10) | (rn << 5) | rd
+}
+
+// "Add/subtract (extended register)" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_ext
+func encodeAluRRRExtend(ao aluOp, rd, rn, rm uint32, extOp extendOp, to byte) uint32 {
+	var s, op uint32
+	switch ao {
+	case aluOpAdd:
+		op = 0b0
+	case aluOpAddS:
+		op, s = 0b0, 0b1
+	case aluOpSub:
+		op = 0b1
+	case aluOpSubS:
+		op, s = 0b1, 0b1
+	default:
+		panic("BUG: extended register operand can be used only for add/sub")
+	}
+
+	var sf uint32
+	if to == 64 {
+		sf = 0b1
+	}
+
+	var option uint32
+	switch extOp {
+	case extendOpUXTB:
+		option = 0b000
+	case extendOpUXTH:
+		option = 0b001
+	case extendOpUXTW:
+		option = 0b010
+	case extendOpSXTB:
+		option = 0b100
+	case extendOpSXTH:
+		option = 0b101
+	case extendOpSXTW:
+		option = 0b110
+	case extendOpSXTX, extendOpUXTX:
+		panic(fmt.Sprintf("%s is essentially noop, and should be handled much earlier than encoding", extOp.String()))
+	}
+	return sf<<31 | op<<30 | s<<29 | 0b1011001<<21 | rm<<16 | option<<13 | rn<<5 | rd
+}
+
+// encodeAluRRR encodes as Data Processing (register), depending on aluOp.
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
+func encodeAluRRR(op aluOp, rd, rn, rm uint32, _64bit, isRnSp bool) uint32 {
+	var _31to21, _15to10 uint32
+	switch op {
+	case aluOpAdd:
+		if isRnSp {
+			// "Extended register" with UXTW.
+			_31to21 = 0b00001011_001
+			_15to10 = 0b011000
+		} else {
+			// "Shifted register" with shift = 0
+			_31to21 = 0b00001011_000
+		}
+	case aluOpAddS:
+		if isRnSp {
+			panic("TODO")
+		}
+		// "Shifted register" with shift = 0
+		_31to21 = 0b00101011_000
+	case aluOpSub:
+		if isRnSp {
+			// "Extended register" with UXTW.
+			_31to21 = 0b01001011_001
+			_15to10 = 0b011000
+		} else {
+			// "Shifted register" with shift = 0
+			_31to21 = 0b01001011_000
+		}
+	case aluOpSubS:
+		if isRnSp {
+			panic("TODO")
+		}
+		// "Shifted register" with shift = 0
+		_31to21 = 0b01101011_000
+	case aluOpAnd, aluOpOrr, aluOpOrn, aluOpEor, aluOpAnds:
+		// "Logical (shifted register)".
+		var opc, n uint32
+		switch op {
+		case aluOpAnd:
+			// all zeros
+		case aluOpOrr:
+			opc = 0b01
+		case aluOpOrn:
+			opc = 0b01
+			n = 1
+		case aluOpEor:
+			opc = 0b10
+		case aluOpAnds:
+			opc = 0b11
+		}
+		_31to21 = 0b000_01010_000 | opc<<8 | n
+	case aluOpLsl, aluOpAsr, aluOpLsr, aluOpRotR:
+		// "Data-processing (2 source)".
+		_31to21 = 0b00011010_110
+		switch op {
+		case aluOpLsl:
+			_15to10 = 0b001000
+		case aluOpLsr:
+			_15to10 = 0b001001
+		case aluOpAsr:
+			_15to10 = 0b001010
+		case aluOpRotR:
+			_15to10 = 0b001011
+		}
+	case aluOpSDiv:
+		// "Data-processing (2 source)".
+		_31to21 = 0b11010110
+		_15to10 = 0b000011
+	case aluOpUDiv:
+		// "Data-processing (2 source)".
+		_31to21 = 0b11010110
+		_15to10 = 0b000010
+	default:
+		panic(op.String())
+	}
+	if _64bit {
+		_31to21 |= 0b1 << 10
+	}
+	return _31to21<<21 | rm<<16 | (_15to10 << 10) | (rn << 5) | rd
+}
+
+// encodeLogicalShiftedRegister encodes as Logical (shifted register) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
+func encodeLogicalShiftedRegister(sf_opc uint32, shift_N uint32, rm uint32, imm6 uint32, rn, rd uint32) (ret uint32) {
+	ret = sf_opc << 29
+	ret |= 0b01010 << 24
+	ret |= shift_N << 21
+	ret |= rm << 16
+	ret |= imm6 << 10
+	ret |= rn << 5
+	ret |= rd
+	return
+}
+
+// encodeAddSubtractImmediate encodes as Add/subtract (immediate) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
+func encodeAddSubtractImmediate(sf_op_s uint32, sh uint32, imm12 uint32, rn, rd uint32) (ret uint32) {
+	ret = sf_op_s << 29
+	ret |= 0b100010 << 23
+	ret |= sh << 22
+	ret |= imm12 << 10
+	ret |= rn << 5
+	ret |= rd
+	return
+}
+
+// encodePreOrPostIndexLoadStorePair64 encodes as Load/store pair (pre/post-indexed) in
+// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
+// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-
+func encodePreOrPostIndexLoadStorePair64(pre bool, load bool, rn, rt, rt2 uint32, imm7 int64) (ret uint32) {
+	if imm7%8 != 0 {
+		panic("imm7 for pair load/store must be a multiple of 8")
+	}
+	imm7 /= 8
+	ret = rt
+	ret |= rn << 5
+	ret |= rt2 << 10
+	ret |= (uint32(imm7) & 0b1111111) << 15
+	if load {
+		ret |= 0b1 << 22
+	}
+	ret |= 0b101010001 << 23
+	if pre {
+		ret |= 0b1 << 24
+	}
+	return
+}
+
+// encodeUnconditionalBranch encodes as B or BL instructions:
+// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-
+// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-
+func encodeUnconditionalBranch(link bool, imm26 int64) (ret uint32) {
+	if imm26%4 != 0 {
+		panic("imm26 for branch must be a multiple of 4")
+	}
+	imm26 /= 4
+	ret = uint32(imm26 & 0b11_11111111_11111111_11111111)
+	ret |= 0b101 << 26
+	if link {
+		ret |= 0b1 << 31
+	}
+	return
+}
+
+// encodeCBZCBNZ encodes as either CBZ or CBNZ:
+// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-
+// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero-
+func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) {
+	ret = rt
+	ret |= imm19 << 5
+	if nz {
+		ret |= 1 << 24
+	}
+	ret |= 0b11010 << 25
+	if _64bit {
+		ret |= 1 << 31
+	}
+	return
+}
+
+// encodeMoveWideImmediate encodes as either MOVZ, MOVN or MOVK, as Move wide (immediate) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
+//
+// "shift" must have been divided by 16 at this point.
+func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) {
+	ret = rd
+	ret |= uint32(imm&0xffff) << 5
+	ret |= (uint32(shift)) << 21
+	ret |= 0b100101 << 23
+	ret |= opc << 29
+	ret |= uint32(_64bit) << 31
+	return
+}
+
+// encodeAluRRImm encodes as "Bitfield" in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm
+func encodeAluRRImm(op aluOp, rd, rn, amount, _64bit uint32) uint32 {
+	var opc uint32
+	var immr, imms uint32
+	switch op {
+	case aluOpLsl:
+		// LSL (immediate) is an alias for UBFM.
+		// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/UBFM--Unsigned-Bitfield-Move-?lang=en
+		opc = 0b10
+		if amount == 0 {
+			// This can be encoded as NOP, but we don't do it for consistency: lsr xn, xm, #0
+			immr = 0
+			if _64bit == 1 {
+				imms = 0b111111
+			} else {
+				imms = 0b11111
+			}
+		} else {
+			if _64bit == 1 {
+				immr = 64 - amount
+			} else {
+				immr = (32 - amount) & 0b11111
+			}
+			imms = immr - 1
+		}
+	case aluOpLsr:
+		// LSR (immediate) is an alias for UBFM.
+		// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en
+		opc = 0b10
+		imms, immr = 0b011111|_64bit<<5, amount
+	case aluOpAsr:
+		// ASR (immediate) is an alias for SBFM.
+		// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SBFM--Signed-Bitfield-Move-?lang=en
+		opc = 0b00
+		imms, immr = 0b011111|_64bit<<5, amount
+	default:
+		panic(op.String())
+	}
+	return _64bit<<31 | opc<<29 | 0b100110<<23 | _64bit<<22 | immr<<16 | imms<<10 | rn<<5 | rd
+}
+
+// encodeVecLanes encodes as Data Processing (Advanced SIMD across lanes) depending on vecOp in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeVecLanes(op vecOp, rd uint32, rn uint32, arr vecArrangement) uint32 {
+	var u, q, size, opcode uint32
+	switch arr {
+	case vecArrangement8B:
+		q, size = 0b0, 0b00
+	case vecArrangement16B:
+		q, size = 0b1, 0b00
+	case vecArrangement4H:
+		q, size = 0, 0b01
+	case vecArrangement8H:
+		q, size = 1, 0b01
+	case vecArrangement4S:
+		q, size = 1, 0b10
+	default:
+		panic("unsupported arrangement: " + arr.String())
+	}
+	switch op {
+	case vecOpUaddlv:
+		u, opcode = 1, 0b00011
+	case vecOpUminv:
+		u, opcode = 1, 0b11010
+	case vecOpAddv:
+		u, opcode = 0, 0b11011
+	default:
+		panic("unsupported or illegal vecOp: " + op.String())
+	}
+	return q<<30 | u<<29 | 0b1110<<24 | size<<22 | 0b11000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd
+}
+
+// encodeVecLanes encodes as Data Processing (Advanced SIMD scalar shift by immediate) depending on vecOp in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
+func encodeVecShiftImm(op vecOp, rd uint32, rn, amount uint32, arr vecArrangement) uint32 {
+	var u, q, immh, immb, opcode uint32
+	switch op {
+	case vecOpSshll:
+		u, opcode = 0b0, 0b10100
+	case vecOpUshll:
+		u, opcode = 0b1, 0b10100
+	case vecOpSshr:
+		u, opcode = 0, 0b00000
+	default:
+		panic("unsupported or illegal vecOp: " + op.String())
+	}
+	switch arr {
+	case vecArrangement16B:
+		q = 0b1
+		fallthrough
+	case vecArrangement8B:
+		immh = 0b0001
+		immb = 8 - uint32(amount&0b111)
+	case vecArrangement8H:
+		q = 0b1
+		fallthrough
+	case vecArrangement4H:
+		v := 16 - uint32(amount&0b1111)
+		immb = v & 0b111
+		immh = 0b0010 | (v >> 3)
+	case vecArrangement4S:
+		q = 0b1
+		fallthrough
+	case vecArrangement2S:
+		v := 32 - uint32(amount&0b11111)
+		immb = v & 0b111
+		immh = 0b0100 | (v >> 3)
+	case vecArrangement2D:
+		q = 0b1
+		v := 64 - uint32(amount&0b111111)
+		immb = v & 0b111
+		immh = 0b1000 | (v >> 3)
+	default:
+		panic("unsupported arrangement: " + arr.String())
+	}
+	return q<<30 | u<<29 | 0b011110<<23 | immh<<19 | immb<<16 | 0b000001<<10 | opcode<<11 | 0b1<<10 | rn<<5 | rd
+}
+
+// encodeVecTbl encodes as Data Processing (Advanced SIMD table lookup) in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp
+//
+// Note: tblOp may encode tbl1, tbl2... in the future. Currently, it is ignored.
+func encodeVecTbl(nregs, rd, rn, rm uint32, arr vecArrangement) uint32 {
+	var q, op2, len, op uint32
+
+	switch nregs {
+	case 1:
+		// tbl: single-register
+		len = 0b00
+	case 2:
+		// tbl2: 2-register table
+		len = 0b01
+	default:
+		panic(fmt.Sprintf("unsupported number or registers %d", nregs))
+	}
+	switch arr {
+	case vecArrangement8B:
+		q = 0b0
+	case vecArrangement16B:
+		q = 0b1
+	default:
+		panic("unsupported arrangement: " + arr.String())
+	}
+
+	return q<<30 | 0b001110<<24 | op2<<22 | rm<<16 | len<<13 | op<<12 | rn<<5 | rd
+}
+
+// encodeVecMisc encodes as Data Processing (Advanced SIMD two-register miscellaneous) depending on vecOp in
+// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp
+func encodeAdvancedSIMDTwoMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 {
+	var q, u, size, opcode uint32
+	switch op {
+	case vecOpCnt:
+		opcode = 0b00101
+		switch arr {
+		case vecArrangement8B:
+			q, size = 0b0, 0b00
+		case vecArrangement16B:
+			q, size = 0b1, 0b00
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpCmeq0:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		opcode = 0b01001
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpNot:
+		u = 1
+		opcode = 0b00101
+		switch arr {
+		case vecArrangement8B:
+			q, size = 0b0, 0b00
+		case vecArrangement16B:
+			q, size = 0b1, 0b00
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpAbs:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		opcode = 0b01011
+		u = 0b0
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpNeg:
+		if arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		opcode = 0b01011
+		u = 0b1
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpFabs:
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		opcode = 0b01111
+		u = 0b0
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpFneg:
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		opcode = 0b01111
+		u = 0b1
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpFrintm:
+		u = 0b0
+		opcode = 0b11001
+		switch arr {
+		case vecArrangement2S:
+			q, size = 0b0, 0b00
+		case vecArrangement4S:
+			q, size = 0b1, 0b00
+		case vecArrangement2D:
+			q, size = 0b1, 0b01
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpFrintn:
+		u = 0b0
+		opcode = 0b11000
+		switch arr {
+		case vecArrangement2S:
+			q, size = 0b0, 0b00
+		case vecArrangement4S:
+			q, size = 0b1, 0b00
+		case vecArrangement2D:
+			q, size = 0b1, 0b01
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpFrintp:
+		u = 0b0
+		opcode = 0b11000
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpFrintz:
+		u = 0b0
+		opcode = 0b11001
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpFsqrt:
+		if arr < vecArrangement2S || arr == vecArrangement1D {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		opcode = 0b11111
+		u = 0b1
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpFcvtl:
+		opcode = 0b10111
+		u = 0b0
+		switch arr {
+		case vecArrangement2S:
+			size, q = 0b01, 0b0
+		case vecArrangement4H:
+			size, q = 0b00, 0b0
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpFcvtn:
+		opcode = 0b10110
+		u = 0b0
+		switch arr {
+		case vecArrangement2S:
+			size, q = 0b01, 0b0
+		case vecArrangement4H:
+			size, q = 0b00, 0b0
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpFcvtzs:
+		opcode = 0b11011
+		u = 0b0
+		switch arr {
+		case vecArrangement2S:
+			q, size = 0b0, 0b10
+		case vecArrangement4S:
+			q, size = 0b1, 0b10
+		case vecArrangement2D:
+			q, size = 0b1, 0b11
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpFcvtzu:
+		opcode = 0b11011
+		u = 0b1
+		switch arr {
+		case vecArrangement2S:
+			q, size = 0b0, 0b10
+		case vecArrangement4S:
+			q, size = 0b1, 0b10
+		case vecArrangement2D:
+			q, size = 0b1, 0b11
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpScvtf:
+		opcode = 0b11101
+		u = 0b0
+		switch arr {
+		case vecArrangement4S:
+			q, size = 0b1, 0b00
+		case vecArrangement2S:
+			q, size = 0b0, 0b00
+		case vecArrangement2D:
+			q, size = 0b1, 0b01
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpUcvtf:
+		opcode = 0b11101
+		u = 0b1
+		switch arr {
+		case vecArrangement4S:
+			q, size = 0b1, 0b00
+		case vecArrangement2S:
+			q, size = 0b0, 0b00
+		case vecArrangement2D:
+			q, size = 0b1, 0b01
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	case vecOpSqxtn:
+		// When q == 1 it encodes sqxtn2 (operates on upper 64 bits).
+		opcode = 0b10100
+		u = 0b0
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpUqxtn:
+		// When q == 1 it encodes uqxtn2 (operates on upper 64 bits).
+		opcode = 0b10100
+		u = 0b1
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpSqxtun:
+		// When q == 1 it encodes sqxtun2 (operates on upper 64 bits).
+		opcode = 0b10010 // 0b10100
+		u = 0b1
+		if arr > vecArrangement4S {
+			panic("unsupported arrangement: " + arr.String())
+		}
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpRev64:
+		opcode = 0b00000
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpXtn:
+		u = 0b0
+		opcode = 0b10010
+		size, q = arrToSizeQEncoded(arr)
+	case vecOpShll:
+		u = 0b1
+		opcode = 0b10011
+		switch arr {
+		case vecArrangement8B:
+			q, size = 0b0, 0b00
+		case vecArrangement4H:
+			q, size = 0b0, 0b01
+		case vecArrangement2S:
+			q, size = 0b0, 0b10
+		default:
+			panic("unsupported arrangement: " + arr.String())
+		}
+	default:
+		panic("unsupported or illegal vecOp: " + op.String())
+	}
+	return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd
+}
+
+// brTableSequenceOffsetTableBegin is the offset inside the brTableSequence where the table begins after 4 instructions
+const brTableSequenceOffsetTableBegin = 16
+
+func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []uint32) {
+	tmpRegNumber := regNumberInEncoding[tmp]
+	indexNumber := regNumberInEncoding[index.RealReg()]
+
+	// adr tmpReg, PC+16 (PC+16 is the address of the first label offset)
+	// ldrsw index, [tmpReg, index, UXTW 2] ;; index = int64(*(tmpReg + index*8))
+	// add tmpReg, tmpReg, index
+	// br tmpReg
+	// [offset_to_l1, offset_to_l2, ..., offset_to_lN]
+	c.Emit4Bytes(encodeAdr(tmpRegNumber, 16))
+	c.Emit4Bytes(encodeLoadOrStore(sLoad32, indexNumber,
+		addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW},
+	))
+	c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false))
+	c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false))
+
+	// Offsets are resolved in ResolveRelativeAddress phase.
+	for _, offset := range targets {
+		if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
+			// Inlined offset tables cannot be disassembled properly, so pad dummy instructions to make the debugging easier.
+			c.Emit4Bytes(dummyInstruction)
+		} else {
+			c.Emit4Bytes(offset)
+		}
+	}
+}
+
+// encodeExitSequence matches the implementation detail of functionABI.emitGoEntryPreamble.
+func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
+	// Restore the FP, SP and LR, and return to the Go code:
+	// 		ldr lr,  [ctxReg, #GoReturnAddress]
+	// 		ldr fp,  [ctxReg, #OriginalFramePointer]
+	// 		ldr tmp, [ctxReg, #OriginalStackPointer]
+	//      mov sp, tmp ;; sp cannot be str'ed directly.
+	// 		ret ;; --> return to the Go code
+
+	var ctxEvicted bool
+	if ctx := ctxReg.RealReg(); ctx == fp || ctx == lr {
+		// In order to avoid overwriting the context register, we move ctxReg to tmp.
+		c.Emit4Bytes(encodeMov64(regNumberInEncoding[tmp], regNumberInEncoding[ctx], false, false))
+		ctxReg = tmpRegVReg
+		ctxEvicted = true
+	}
+
+	restoreLr := encodeLoadOrStore(
+		uLoad64,
+		regNumberInEncoding[lr],
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   ctxReg,
+			imm:  wazevoapi.ExecutionContextOffsetGoReturnAddress.I64(),
+		},
+	)
+
+	restoreFp := encodeLoadOrStore(
+		uLoad64,
+		regNumberInEncoding[fp],
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   ctxReg,
+			imm:  wazevoapi.ExecutionContextOffsetOriginalFramePointer.I64(),
+		},
+	)
+
+	restoreSpToTmp := encodeLoadOrStore(
+		uLoad64,
+		regNumberInEncoding[tmp],
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   ctxReg,
+			imm:  wazevoapi.ExecutionContextOffsetOriginalStackPointer.I64(),
+		},
+	)
+
+	movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0,
+		regNumberInEncoding[tmp], regNumberInEncoding[sp])
+
+	c.Emit4Bytes(restoreFp)
+	c.Emit4Bytes(restoreLr)
+	c.Emit4Bytes(restoreSpToTmp)
+	c.Emit4Bytes(movTmpToSp)
+	c.Emit4Bytes(encodeRet())
+	if !ctxEvicted {
+		// In order to have the fixed-length exit sequence, we need to padd the binary.
+		// Since this will never be reached, we insert a dummy instruction.
+		c.Emit4Bytes(dummyInstruction)
+	}
+}
+
+func encodeRet() uint32 {
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en
+	return 0b1101011001011111<<16 | regNumberInEncoding[lr]<<5
+}
+
+func encodeAtomicRmw(op atomicRmwOp, rs, rt, rn uint32, size uint32) uint32 {
+	var _31to21, _15to10, sz uint32
+
+	switch size {
+	case 8:
+		sz = 0b11
+	case 4:
+		sz = 0b10
+	case 2:
+		sz = 0b01
+	case 1:
+		sz = 0b00
+	}
+
+	_31to21 = 0b00111000_111 | sz<<9
+
+	switch op {
+	case atomicRmwOpAdd:
+		_15to10 = 0b000000
+	case atomicRmwOpClr:
+		_15to10 = 0b000100
+	case atomicRmwOpSet:
+		_15to10 = 0b001100
+	case atomicRmwOpEor:
+		_15to10 = 0b001000
+	case atomicRmwOpSwp:
+		_15to10 = 0b100000
+	}
+
+	return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt
+}
+
+func encodeAtomicCas(rs, rt, rn uint32, size uint32) uint32 {
+	var _31to21, _15to10, sz uint32
+
+	switch size {
+	case 8:
+		sz = 0b11
+	case 4:
+		sz = 0b10
+	case 2:
+		sz = 0b01
+	case 1:
+		sz = 0b00
+	}
+
+	_31to21 = 0b00001000_111 | sz<<9
+	_15to10 = 0b111111
+
+	return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt
+}
+
+func encodeAtomicLoadStore(rn, rt, size, l uint32) uint32 {
+	var _31to21, _20to16, _15to10, sz uint32
+
+	switch size {
+	case 8:
+		sz = 0b11
+	case 4:
+		sz = 0b10
+	case 2:
+		sz = 0b01
+	case 1:
+		sz = 0b00
+	}
+
+	_31to21 = 0b00001000_100 | sz<<9 | l<<1
+	_20to16 = 0b11111
+	_15to10 = 0b111111
+
+	return _31to21<<21 | _20to16<<16 | _15to10<<10 | rn<<5 | rt
+}
+
+func encodeDMB() uint32 {
+	return 0b11010101000000110011101110111111
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
new file mode 100644
index 000000000..698b382d4
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
@@ -0,0 +1,301 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
+func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+
+	vr = m.compiler.AllocateVReg(valType)
+	v := instr.ConstantVal()
+	m.insertLoadConstant(v, valType, vr)
+	return
+}
+
+// InsertLoadConstantBlockArg implements backend.Machine.
+func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+	v := instr.ConstantVal()
+	load := m.allocateInstr()
+	load.asLoadConstBlockArg(v, valType, vr)
+	m.insert(load)
+}
+
+func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) {
+	v, typ, dst := i.loadConstBlockArgData()
+	m.insertLoadConstant(v, typ, dst)
+}
+
+func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) {
+	if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
+		v = v & ((1 << valType.Bits()) - 1)
+	}
+
+	switch valType {
+	case ssa.TypeF32:
+		loadF := m.allocateInstr()
+		loadF.asLoadFpuConst32(vr, v)
+		m.insert(loadF)
+	case ssa.TypeF64:
+		loadF := m.allocateInstr()
+		loadF.asLoadFpuConst64(vr, v)
+		m.insert(loadF)
+	case ssa.TypeI32:
+		if v == 0 {
+			m.InsertMove(vr, xzrVReg, ssa.TypeI32)
+		} else {
+			m.lowerConstantI32(vr, int32(v))
+		}
+	case ssa.TypeI64:
+		if v == 0 {
+			m.InsertMove(vr, xzrVReg, ssa.TypeI64)
+		} else {
+			m.lowerConstantI64(vr, int64(v))
+		}
+	default:
+		panic("TODO")
+	}
+}
+
+// The following logics are based on the old asm/arm64 package.
+// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
+
+func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
+	// Following the logic here:
+	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
+	ic := int64(uint32(c))
+	if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
+		if isBitMaskImmediate(uint64(c), false) {
+			m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
+			return
+		}
+	}
+
+	if t := const16bitAligned(int64(uint32(c))); t >= 0 {
+		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
+		// We could load it into temporary with movk.
+		m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
+	} else if t := const16bitAligned(int64(^c)); t >= 0 {
+		// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
+		m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
+	} else if isBitMaskImmediate(uint64(uint32(c)), false) {
+		m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
+	} else {
+		// Otherwise, we use MOVZ and MOVK to load it.
+		c16 := uint16(c)
+		m.insertMOVZ(dst, uint64(c16), 0, false)
+		c16 = uint16(uint32(c) >> 16)
+		m.insertMOVK(dst, uint64(c16), 1, false)
+	}
+}
+
+func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
+	// Following the logic here:
+	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
+	if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
+		if isBitMaskImmediate(uint64(c), true) {
+			m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
+			return
+		}
+	}
+
+	if t := const16bitAligned(c); t >= 0 {
+		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
+		// We could load it into temporary with movk.
+		m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
+	} else if t := const16bitAligned(^c); t >= 0 {
+		// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
+		m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
+	} else if isBitMaskImmediate(uint64(c), true) {
+		m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
+	} else {
+		m.load64bitConst(c, dst)
+	}
+}
+
+func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
+	instr := m.allocateInstr()
+	instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
+	m.insert(instr)
+}
+
+// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
+//
+//	Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
+//	Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
+//
+// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
+func isBitMaskImmediate(x uint64, _64 bool) bool {
+	// All zeros and ones are not "bitmask immediate" by definition.
+	if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) {
+		return false
+	}
+
+	switch {
+	case x != x>>32|x<<32:
+		// e = 64
+	case x != x>>16|x<<48:
+		// e = 32 (x == x>>32|x<<32).
+		// e.g. 0x00ff_ff00_00ff_ff00
+		x = uint64(int32(x))
+	case x != x>>8|x<<56:
+		// e = 16 (x == x>>16|x<<48).
+		// e.g. 0x00ff_00ff_00ff_00ff
+		x = uint64(int16(x))
+	case x != x>>4|x<<60:
+		// e = 8 (x == x>>8|x<<56).
+		// e.g. 0x0f0f_0f0f_0f0f_0f0f
+		x = uint64(int8(x))
+	default:
+		// e = 4 or 2.
+		return true
+	}
+	return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
+}
+
+// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
+// For example: 0b1110 -> true, 0b1010 -> false
+func sequenceOfSetbits(x uint64) bool {
+	y := getLowestBit(x)
+	// If x is a sequence of set bit, this should results in the number
+	// with only one set bit (i.e. power of two).
+	y += x
+	return (y-1)&y == 0
+}
+
+func getLowestBit(x uint64) uint64 {
+	return x & (^x + 1)
+}
+
+// const16bitAligned check if the value is on the 16-bit alignment.
+// If so, returns the shift num divided by 16, and otherwise -1.
+func const16bitAligned(v int64) (ret int) {
+	ret = -1
+	for s := 0; s < 64; s += 16 {
+		if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
+			ret = s / 16
+			break
+		}
+	}
+	return
+}
+
+// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
+// consts as in the Go assembler.
+//
+// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
+func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
+	var bits [4]uint64
+	var zeros, negs int
+	for i := 0; i < 4; i++ {
+		bits[i] = uint64(c) >> uint(i*16) & 0xffff
+		if v := bits[i]; v == 0 {
+			zeros++
+		} else if v == 0xffff {
+			negs++
+		}
+	}
+
+	if zeros == 3 {
+		// one MOVZ instruction.
+		for i, v := range bits {
+			if v != 0 {
+				m.insertMOVZ(dst, v, i, true)
+			}
+		}
+	} else if negs == 3 {
+		// one MOVN instruction.
+		for i, v := range bits {
+			if v != 0xffff {
+				v = ^v
+				m.insertMOVN(dst, v, i, true)
+			}
+		}
+	} else if zeros == 2 {
+		// one MOVZ then one OVK.
+		var movz bool
+		for i, v := range bits {
+			if !movz && v != 0 { // MOVZ.
+				m.insertMOVZ(dst, v, i, true)
+				movz = true
+			} else if v != 0 {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else if negs == 2 {
+		// one MOVN then one or two MOVK.
+		var movn bool
+		for i, v := range bits { // Emit MOVN.
+			if !movn && v != 0xffff {
+				v = ^v
+				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
+				m.insertMOVN(dst, v, i, true)
+				movn = true
+			} else if v != 0xffff {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else if zeros == 1 {
+		// one MOVZ then two MOVK.
+		var movz bool
+		for i, v := range bits {
+			if !movz && v != 0 { // MOVZ.
+				m.insertMOVZ(dst, v, i, true)
+				movz = true
+			} else if v != 0 {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else if negs == 1 {
+		// one MOVN then two MOVK.
+		var movn bool
+		for i, v := range bits { // Emit MOVN.
+			if !movn && v != 0xffff {
+				v = ^v
+				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
+				m.insertMOVN(dst, v, i, true)
+				movn = true
+			} else if v != 0xffff {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else {
+		// one MOVZ then up to three MOVK.
+		var movz bool
+		for i, v := range bits {
+			if !movz && v != 0 { // MOVZ.
+				m.insertMOVZ(dst, v, i, true)
+				movz = true
+			} else if v != 0 {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+	}
+}
+
+func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
+	instr := m.allocateInstr()
+	instr.asMOVZ(dst, v, uint64(shift), dst64)
+	m.insert(instr)
+}
+
+func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
+	instr := m.allocateInstr()
+	instr.asMOVK(dst, v, uint64(shift), dst64)
+	m.insert(instr)
+}
+
+func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
+	instr := m.allocateInstr()
+	instr.asMOVN(dst, v, uint64(shift), dst64)
+	m.insert(instr)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
new file mode 100644
index 000000000..2bb234e8c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
@@ -0,0 +1,2221 @@
+package arm64
+
+// Files prefixed as lower_instr** do the instruction selection, meaning that lowering SSA level instructions
+// into machine specific instructions.
+//
+// Importantly, what the lower** functions does includes tree-matching; find the pattern from the given instruction tree,
+// and merge the multiple instructions if possible. It can be considered as "N:1" instruction selection.
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// LowerSingleBranch implements backend.Machine.
+func (m *machine) LowerSingleBranch(br *ssa.Instruction) {
+	ectx := m.executableContext
+	switch br.Opcode() {
+	case ssa.OpcodeJump:
+		_, _, targetBlk := br.BranchData()
+		if br.IsFallthroughJump() {
+			return
+		}
+		b := m.allocateInstr()
+		target := ectx.GetOrAllocateSSABlockLabel(targetBlk)
+		if target == labelReturn {
+			b.asRet()
+		} else {
+			b.asBr(target)
+		}
+		m.insert(b)
+	case ssa.OpcodeBrTable:
+		m.lowerBrTable(br)
+	default:
+		panic("BUG: unexpected branch opcode" + br.Opcode().String())
+	}
+}
+
+func (m *machine) lowerBrTable(i *ssa.Instruction) {
+	index, targets := i.BrTableData()
+	indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone)
+
+	// Firstly, we have to do the bounds check of the index, and
+	// set it to the default target (sitting at the end of the list) if it's out of bounds.
+
+	// mov  maxIndexReg #maximum_index
+	// subs wzr, index, maxIndexReg
+	// csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg.
+	maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32)
+	m.lowerConstantI32(maxIndexReg, int32(len(targets)-1))
+	subs := m.allocateInstr()
+	subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false)
+	m.insert(subs)
+	csel := m.allocateInstr()
+	adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32)
+	csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false)
+	m.insert(csel)
+
+	brSequence := m.allocateInstr()
+
+	tableIndex := m.addJmpTableTarget(targets)
+	brSequence.asBrTableSequence(adjustedIndex, tableIndex, len(targets))
+	m.insert(brSequence)
+}
+
+// LowerConditionalBranch implements backend.Machine.
+func (m *machine) LowerConditionalBranch(b *ssa.Instruction) {
+	exctx := m.executableContext
+	cval, args, targetBlk := b.BranchData()
+	if len(args) > 0 {
+		panic(fmt.Sprintf(
+			"conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s",
+			exctx.CurrentSSABlk,
+			targetBlk,
+		))
+	}
+
+	target := exctx.GetOrAllocateSSABlockLabel(targetBlk)
+	cvalDef := m.compiler.ValueDefinition(cval)
+
+	switch {
+	case m.compiler.MatchInstr(cvalDef, ssa.OpcodeIcmp): // This case, we can use the ALU flag set by SUBS instruction.
+		cvalInstr := cvalDef.Instr
+		x, y, c := cvalInstr.IcmpData()
+		cc, signed := condFlagFromSSAIntegerCmpCond(c), c.Signed()
+		if b.Opcode() == ssa.OpcodeBrz {
+			cc = cc.invert()
+		}
+
+		if !m.tryLowerBandToFlag(x, y) {
+			m.lowerIcmpToFlag(x, y, signed)
+		}
+		cbr := m.allocateInstr()
+		cbr.asCondBr(cc.asCond(), target, false /* ignored */)
+		m.insert(cbr)
+		cvalDef.Instr.MarkLowered()
+	case m.compiler.MatchInstr(cvalDef, ssa.OpcodeFcmp): // This case we can use the Fpu flag directly.
+		cvalInstr := cvalDef.Instr
+		x, y, c := cvalInstr.FcmpData()
+		cc := condFlagFromSSAFloatCmpCond(c)
+		if b.Opcode() == ssa.OpcodeBrz {
+			cc = cc.invert()
+		}
+		m.lowerFcmpToFlag(x, y)
+		cbr := m.allocateInstr()
+		cbr.asCondBr(cc.asCond(), target, false /* ignored */)
+		m.insert(cbr)
+		cvalDef.Instr.MarkLowered()
+	default:
+		rn := m.getOperand_NR(cvalDef, extModeNone)
+		var c cond
+		if b.Opcode() == ssa.OpcodeBrz {
+			c = registerAsRegZeroCond(rn.nr())
+		} else {
+			c = registerAsRegNotZeroCond(rn.nr())
+		}
+		cbr := m.allocateInstr()
+		cbr.asCondBr(c, target, false)
+		m.insert(cbr)
+	}
+}
+
+func (m *machine) tryLowerBandToFlag(x, y ssa.Value) (ok bool) {
+	xx := m.compiler.ValueDefinition(x)
+	yy := m.compiler.ValueDefinition(y)
+	if xx.IsFromInstr() && xx.Instr.Constant() && xx.Instr.ConstantVal() == 0 {
+		if m.compiler.MatchInstr(yy, ssa.OpcodeBand) {
+			bandInstr := yy.Instr
+			m.lowerBitwiseAluOp(bandInstr, aluOpAnds, true)
+			ok = true
+			bandInstr.MarkLowered()
+			return
+		}
+	}
+
+	if yy.IsFromInstr() && yy.Instr.Constant() && yy.Instr.ConstantVal() == 0 {
+		if m.compiler.MatchInstr(xx, ssa.OpcodeBand) {
+			bandInstr := xx.Instr
+			m.lowerBitwiseAluOp(bandInstr, aluOpAnds, true)
+			ok = true
+			bandInstr.MarkLowered()
+			return
+		}
+	}
+	return
+}
+
+// LowerInstr implements backend.Machine.
+func (m *machine) LowerInstr(instr *ssa.Instruction) {
+	if l := instr.SourceOffset(); l.Valid() {
+		info := m.allocateInstr().asEmitSourceOffsetInfo(l)
+		m.insert(info)
+	}
+
+	switch op := instr.Opcode(); op {
+	case ssa.OpcodeBrz, ssa.OpcodeBrnz, ssa.OpcodeJump, ssa.OpcodeBrTable:
+		panic("BUG: branching instructions are handled by LowerBranches")
+	case ssa.OpcodeReturn:
+		panic("BUG: return must be handled by backend.Compiler")
+	case ssa.OpcodeIadd, ssa.OpcodeIsub:
+		m.lowerSubOrAdd(instr, op == ssa.OpcodeIadd)
+	case ssa.OpcodeFadd, ssa.OpcodeFsub, ssa.OpcodeFmul, ssa.OpcodeFdiv, ssa.OpcodeFmax, ssa.OpcodeFmin:
+		m.lowerFpuBinOp(instr)
+	case ssa.OpcodeIconst, ssa.OpcodeF32const, ssa.OpcodeF64const: // Constant instructions are inlined.
+	case ssa.OpcodeExitWithCode:
+		execCtx, code := instr.ExitWithCodeData()
+		m.lowerExitWithCode(m.compiler.VRegOf(execCtx), code)
+	case ssa.OpcodeExitIfTrueWithCode:
+		execCtx, c, code := instr.ExitIfTrueWithCodeData()
+		m.lowerExitIfTrueWithCode(m.compiler.VRegOf(execCtx), c, code)
+	case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32:
+		m.lowerStore(instr)
+	case ssa.OpcodeLoad:
+		dst := instr.Return()
+		ptr, offset, typ := instr.LoadData()
+		m.lowerLoad(ptr, offset, typ, dst)
+	case ssa.OpcodeVZeroExtLoad:
+		dst := instr.Return()
+		ptr, offset, typ := instr.VZeroExtLoadData()
+		m.lowerLoad(ptr, offset, typ, dst)
+	case ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32, ssa.OpcodeSload8, ssa.OpcodeSload16, ssa.OpcodeSload32:
+		ptr, offset, _ := instr.LoadData()
+		ret := m.compiler.VRegOf(instr.Return())
+		m.lowerExtLoad(op, ptr, offset, ret)
+	case ssa.OpcodeCall, ssa.OpcodeCallIndirect:
+		m.lowerCall(instr)
+	case ssa.OpcodeIcmp:
+		m.lowerIcmp(instr)
+	case ssa.OpcodeVIcmp:
+		m.lowerVIcmp(instr)
+	case ssa.OpcodeVFcmp:
+		m.lowerVFcmp(instr)
+	case ssa.OpcodeVCeil:
+		m.lowerVecMisc(vecOpFrintp, instr)
+	case ssa.OpcodeVFloor:
+		m.lowerVecMisc(vecOpFrintm, instr)
+	case ssa.OpcodeVTrunc:
+		m.lowerVecMisc(vecOpFrintz, instr)
+	case ssa.OpcodeVNearest:
+		m.lowerVecMisc(vecOpFrintn, instr)
+	case ssa.OpcodeVMaxPseudo:
+		m.lowerVMinMaxPseudo(instr, true)
+	case ssa.OpcodeVMinPseudo:
+		m.lowerVMinMaxPseudo(instr, false)
+	case ssa.OpcodeBand:
+		m.lowerBitwiseAluOp(instr, aluOpAnd, false)
+	case ssa.OpcodeBor:
+		m.lowerBitwiseAluOp(instr, aluOpOrr, false)
+	case ssa.OpcodeBxor:
+		m.lowerBitwiseAluOp(instr, aluOpEor, false)
+	case ssa.OpcodeIshl:
+		m.lowerShifts(instr, extModeNone, aluOpLsl)
+	case ssa.OpcodeSshr:
+		if instr.Return().Type().Bits() == 64 {
+			m.lowerShifts(instr, extModeSignExtend64, aluOpAsr)
+		} else {
+			m.lowerShifts(instr, extModeSignExtend32, aluOpAsr)
+		}
+	case ssa.OpcodeUshr:
+		if instr.Return().Type().Bits() == 64 {
+			m.lowerShifts(instr, extModeZeroExtend64, aluOpLsr)
+		} else {
+			m.lowerShifts(instr, extModeZeroExtend32, aluOpLsr)
+		}
+	case ssa.OpcodeRotl:
+		m.lowerRotl(instr)
+	case ssa.OpcodeRotr:
+		m.lowerRotr(instr)
+	case ssa.OpcodeSExtend, ssa.OpcodeUExtend:
+		from, to, signed := instr.ExtendData()
+		m.lowerExtend(instr.Arg(), instr.Return(), from, to, signed)
+	case ssa.OpcodeFcmp:
+		x, y, c := instr.FcmpData()
+		m.lowerFcmp(x, y, instr.Return(), c)
+	case ssa.OpcodeImul:
+		x, y := instr.Arg2()
+		result := instr.Return()
+		m.lowerImul(x, y, result)
+	case ssa.OpcodeUndefined:
+		undef := m.allocateInstr()
+		undef.asUDF()
+		m.insert(undef)
+	case ssa.OpcodeSelect:
+		c, x, y := instr.SelectData()
+		if x.Type() == ssa.TypeV128 {
+			rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)
+			rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+			rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+			rd := operandNR(m.compiler.VRegOf(instr.Return()))
+			m.lowerSelectVec(rc, rn, rm, rd)
+		} else {
+			m.lowerSelect(c, x, y, instr.Return())
+		}
+	case ssa.OpcodeClz:
+		x := instr.Arg()
+		result := instr.Return()
+		m.lowerClz(x, result)
+	case ssa.OpcodeCtz:
+		x := instr.Arg()
+		result := instr.Return()
+		m.lowerCtz(x, result)
+	case ssa.OpcodePopcnt:
+		x := instr.Arg()
+		result := instr.Return()
+		m.lowerPopcnt(x, result)
+	case ssa.OpcodeFcvtToSint, ssa.OpcodeFcvtToSintSat:
+		x, ctx := instr.Arg2()
+		result := instr.Return()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(result))
+		ctxVReg := m.compiler.VRegOf(ctx)
+		m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64,
+			result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat)
+	case ssa.OpcodeFcvtToUint, ssa.OpcodeFcvtToUintSat:
+		x, ctx := instr.Arg2()
+		result := instr.Return()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(result))
+		ctxVReg := m.compiler.VRegOf(ctx)
+		m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64,
+			result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat)
+	case ssa.OpcodeFcvtFromSint:
+		x := instr.Arg()
+		result := instr.Return()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(result))
+		m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)
+	case ssa.OpcodeFcvtFromUint:
+		x := instr.Arg()
+		result := instr.Return()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(result))
+		m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)
+	case ssa.OpcodeFdemote:
+		v := instr.Arg()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		cnt := m.allocateInstr()
+		cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false)
+		m.insert(cnt)
+	case ssa.OpcodeFpromote:
+		v := instr.Arg()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		cnt := m.allocateInstr()
+		cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true)
+		m.insert(cnt)
+	case ssa.OpcodeIreduce:
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(instr.Arg()), extModeNone)
+		retVal := instr.Return()
+		rd := m.compiler.VRegOf(retVal)
+
+		if retVal.Type() != ssa.TypeI32 {
+			panic("TODO?: Ireduce to non-i32")
+		}
+		mov := m.allocateInstr()
+		mov.asMove32(rd, rn.reg())
+		m.insert(mov)
+	case ssa.OpcodeFneg:
+		m.lowerFpuUniOp(fpuUniOpNeg, instr.Arg(), instr.Return())
+	case ssa.OpcodeSqrt:
+		m.lowerFpuUniOp(fpuUniOpSqrt, instr.Arg(), instr.Return())
+	case ssa.OpcodeCeil:
+		m.lowerFpuUniOp(fpuUniOpRoundPlus, instr.Arg(), instr.Return())
+	case ssa.OpcodeFloor:
+		m.lowerFpuUniOp(fpuUniOpRoundMinus, instr.Arg(), instr.Return())
+	case ssa.OpcodeTrunc:
+		m.lowerFpuUniOp(fpuUniOpRoundZero, instr.Arg(), instr.Return())
+	case ssa.OpcodeNearest:
+		m.lowerFpuUniOp(fpuUniOpRoundNearest, instr.Arg(), instr.Return())
+	case ssa.OpcodeFabs:
+		m.lowerFpuUniOp(fpuUniOpAbs, instr.Arg(), instr.Return())
+	case ssa.OpcodeBitcast:
+		m.lowerBitcast(instr)
+	case ssa.OpcodeFcopysign:
+		x, y := instr.Arg2()
+		m.lowerFcopysign(x, y, instr.Return())
+	case ssa.OpcodeSdiv, ssa.OpcodeUdiv:
+		x, y, ctx := instr.Arg3()
+		ctxVReg := m.compiler.VRegOf(ctx)
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv)
+	case ssa.OpcodeSrem, ssa.OpcodeUrem:
+		x, y, ctx := instr.Arg3()
+		ctxVReg := m.compiler.VRegOf(ctx)
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem)
+	case ssa.OpcodeVconst:
+		result := m.compiler.VRegOf(instr.Return())
+		lo, hi := instr.VconstData()
+		v := m.allocateInstr()
+		v.asLoadFpuConst128(result, lo, hi)
+		m.insert(v)
+	case ssa.OpcodeVbnot:
+		x := instr.Arg()
+		ins := m.allocateInstr()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B)
+		m.insert(ins)
+	case ssa.OpcodeVbxor:
+		x, y := instr.Arg2()
+		m.lowerVecRRR(vecOpEOR, x, y, instr.Return(), vecArrangement16B)
+	case ssa.OpcodeVbor:
+		x, y := instr.Arg2()
+		m.lowerVecRRR(vecOpOrr, x, y, instr.Return(), vecArrangement16B)
+	case ssa.OpcodeVband:
+		x, y := instr.Arg2()
+		m.lowerVecRRR(vecOpAnd, x, y, instr.Return(), vecArrangement16B)
+	case ssa.OpcodeVbandnot:
+		x, y := instr.Arg2()
+		m.lowerVecRRR(vecOpBic, x, y, instr.Return(), vecArrangement16B)
+	case ssa.OpcodeVbitselect:
+		c, x, y := instr.SelectData()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)
+		tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+		// creg is overwritten by BSL, so we need to move it to the result register before the instruction
+		// in case when it is used somewhere else.
+		mov := m.allocateInstr()
+		mov.asFpuMov128(tmp.nr(), creg.nr())
+		m.insert(mov)
+
+		ins := m.allocateInstr()
+		ins.asVecRRRRewrite(vecOpBsl, tmp, rn, rm, vecArrangement16B)
+		m.insert(ins)
+
+		mov2 := m.allocateInstr()
+		rd := m.compiler.VRegOf(instr.Return())
+		mov2.asFpuMov128(rd, tmp.nr())
+		m.insert(mov2)
+	case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue:
+		x, lane := instr.ArgWithLane()
+		var arr vecArrangement
+		if op == ssa.OpcodeVallTrue {
+			arr = ssaLaneToArrangement(lane)
+		}
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerVcheckTrue(op, rm, rd, arr)
+	case ssa.OpcodeVhighBits:
+		x, lane := instr.ArgWithLane()
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVhighBits(rm, rd, arr)
+	case ssa.OpcodeVIadd:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpAdd, x, y, instr.Return(), arr)
+	case ssa.OpcodeExtIaddPairwise:
+		v, lane, signed := instr.ExtIaddPairwiseData()
+		vv := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
+
+		tmpLo, tmpHi := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+		var widen vecOp
+		if signed {
+			widen = vecOpSshll
+		} else {
+			widen = vecOpUshll
+		}
+
+		var loArr, hiArr, dstArr vecArrangement
+		switch lane {
+		case ssa.VecLaneI8x16:
+			loArr, hiArr, dstArr = vecArrangement8B, vecArrangement16B, vecArrangement8H
+		case ssa.VecLaneI16x8:
+			loArr, hiArr, dstArr = vecArrangement4H, vecArrangement8H, vecArrangement4S
+		case ssa.VecLaneI32x4:
+			loArr, hiArr, dstArr = vecArrangement2S, vecArrangement4S, vecArrangement2D
+		default:
+			panic("unsupported lane " + lane.String())
+		}
+
+		widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr)
+		widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr)
+		addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr)
+		m.insert(widenLo)
+		m.insert(widenHi)
+		m.insert(addp)
+
+	case ssa.OpcodeVSaddSat:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpSqadd, x, y, instr.Return(), arr)
+	case ssa.OpcodeVUaddSat:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpUqadd, x, y, instr.Return(), arr)
+	case ssa.OpcodeVIsub:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpSub, x, y, instr.Return(), arr)
+	case ssa.OpcodeVSsubSat:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpSqsub, x, y, instr.Return(), arr)
+	case ssa.OpcodeVUsubSat:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpUqsub, x, y, instr.Return(), arr)
+	case ssa.OpcodeVImin:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpSmin, x, y, instr.Return(), arr)
+	case ssa.OpcodeVUmin:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpUmin, x, y, instr.Return(), arr)
+	case ssa.OpcodeVImax:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpSmax, x, y, instr.Return(), arr)
+	case ssa.OpcodeVUmax:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpUmax, x, y, instr.Return(), arr)
+	case ssa.OpcodeVAvgRound:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpUrhadd, x, y, instr.Return(), arr)
+	case ssa.OpcodeVImul:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerVIMul(rd, rn, rm, arr)
+	case ssa.OpcodeVIabs:
+		m.lowerVecMisc(vecOpAbs, instr)
+	case ssa.OpcodeVIneg:
+		m.lowerVecMisc(vecOpNeg, instr)
+	case ssa.OpcodeVIpopcnt:
+		m.lowerVecMisc(vecOpCnt, instr)
+	case ssa.OpcodeVIshl,
+		ssa.OpcodeVSshr, ssa.OpcodeVUshr:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerVShift(op, rd, rn, rm, arr)
+	case ssa.OpcodeVSqrt:
+		m.lowerVecMisc(vecOpFsqrt, instr)
+	case ssa.OpcodeVFabs:
+		m.lowerVecMisc(vecOpFabs, instr)
+	case ssa.OpcodeVFneg:
+		m.lowerVecMisc(vecOpFneg, instr)
+	case ssa.OpcodeVFmin:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpFmin, x, y, instr.Return(), arr)
+	case ssa.OpcodeVFmax:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpFmax, x, y, instr.Return(), arr)
+	case ssa.OpcodeVFadd:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpFadd, x, y, instr.Return(), arr)
+	case ssa.OpcodeVFsub:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpFsub, x, y, instr.Return(), arr)
+	case ssa.OpcodeVFmul:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpFmul, x, y, instr.Return(), arr)
+	case ssa.OpcodeSqmulRoundSat:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpSqrdmulh, x, y, instr.Return(), arr)
+	case ssa.OpcodeVFdiv:
+		x, y, lane := instr.Arg2WithLane()
+		arr := ssaLaneToArrangement(lane)
+		m.lowerVecRRR(vecOpFdiv, x, y, instr.Return(), arr)
+	case ssa.OpcodeVFcvtToSintSat, ssa.OpcodeVFcvtToUintSat:
+		x, lane := instr.ArgWithLane()
+		arr := ssaLaneToArrangement(lane)
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat)
+	case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint:
+		x, lane := instr.ArgWithLane()
+		arr := ssaLaneToArrangement(lane)
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint)
+	case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow:
+		x, lane := instr.ArgWithLane()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		var arr vecArrangement
+		switch lane {
+		case ssa.VecLaneI8x16:
+			arr = vecArrangement8B
+		case ssa.VecLaneI16x8:
+			arr = vecArrangement4H
+		case ssa.VecLaneI32x4:
+			arr = vecArrangement2S
+		}
+
+		shll := m.allocateInstr()
+		if signed := op == ssa.OpcodeSwidenLow; signed {
+			shll.asVecShiftImm(vecOpSshll, rd, rn, operandShiftImm(0), arr)
+		} else {
+			shll.asVecShiftImm(vecOpUshll, rd, rn, operandShiftImm(0), arr)
+		}
+		m.insert(shll)
+	case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh:
+		x, lane := instr.ArgWithLane()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		arr := ssaLaneToArrangement(lane)
+
+		shll := m.allocateInstr()
+		if signed := op == ssa.OpcodeSwidenHigh; signed {
+			shll.asVecShiftImm(vecOpSshll, rd, rn, operandShiftImm(0), arr)
+		} else {
+			shll.asVecShiftImm(vecOpUshll, rd, rn, operandShiftImm(0), arr)
+		}
+		m.insert(shll)
+
+	case ssa.OpcodeSnarrow, ssa.OpcodeUnarrow:
+		x, y, lane := instr.Arg2WithLane()
+		var arr, arr2 vecArrangement
+		switch lane {
+		case ssa.VecLaneI16x8: // I16x8
+			arr = vecArrangement8B
+			arr2 = vecArrangement16B // Implies sqxtn2.
+		case ssa.VecLaneI32x4:
+			arr = vecArrangement4H
+			arr2 = vecArrangement8H // Implies sqxtn2.
+		default:
+			panic("unsupported lane " + lane.String())
+		}
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+		loQxtn := m.allocateInstr()
+		hiQxtn := m.allocateInstr()
+		if signed := op == ssa.OpcodeSnarrow; signed {
+			// Narrow lanes on rn and write them into lower-half of rd.
+			loQxtn.asVecMisc(vecOpSqxtn, tmp, rn, arr) // low
+			// Narrow lanes on rm and write them into higher-half of rd.
+			hiQxtn.asVecMisc(vecOpSqxtn, tmp, rm, arr2) // high (sqxtn2)
+		} else {
+			// Narrow lanes on rn and write them into lower-half of rd.
+			loQxtn.asVecMisc(vecOpSqxtun, tmp, rn, arr) // low
+			// Narrow lanes on rm and write them into higher-half of rd.
+			hiQxtn.asVecMisc(vecOpSqxtun, tmp, rm, arr2) // high (sqxtn2)
+		}
+		m.insert(loQxtn)
+		m.insert(hiQxtn)
+
+		mov := m.allocateInstr()
+		mov.asFpuMov128(rd.nr(), tmp.nr())
+		m.insert(mov)
+	case ssa.OpcodeFvpromoteLow:
+		x, lane := instr.ArgWithLane()
+		if lane != ssa.VecLaneF32x4 {
+			panic("unsupported lane type " + lane.String())
+		}
+		ins := m.allocateInstr()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S)
+		m.insert(ins)
+	case ssa.OpcodeFvdemote:
+		x, lane := instr.ArgWithLane()
+		if lane != ssa.VecLaneF64x2 {
+			panic("unsupported lane type " + lane.String())
+		}
+		ins := m.allocateInstr()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S)
+		m.insert(ins)
+	case ssa.OpcodeExtractlane:
+		x, index, signed, lane := instr.ExtractlaneData()
+
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		mov := m.allocateInstr()
+		switch lane {
+		case ssa.VecLaneI8x16:
+			mov.asMovFromVec(rd, rn, vecArrangementB, vecIndex(index), signed)
+		case ssa.VecLaneI16x8:
+			mov.asMovFromVec(rd, rn, vecArrangementH, vecIndex(index), signed)
+		case ssa.VecLaneI32x4:
+			mov.asMovFromVec(rd, rn, vecArrangementS, vecIndex(index), signed)
+		case ssa.VecLaneI64x2:
+			mov.asMovFromVec(rd, rn, vecArrangementD, vecIndex(index), signed)
+		case ssa.VecLaneF32x4:
+			mov.asVecMovElement(rd, rn, vecArrangementS, vecIndex(0), vecIndex(index))
+		case ssa.VecLaneF64x2:
+			mov.asVecMovElement(rd, rn, vecArrangementD, vecIndex(0), vecIndex(index))
+		default:
+			panic("unsupported lane: " + lane.String())
+		}
+
+		m.insert(mov)
+
+	case ssa.OpcodeInsertlane:
+		x, y, index, lane := instr.InsertlaneData()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+		// Initially mov rn to tmp.
+		mov1 := m.allocateInstr()
+		mov1.asFpuMov128(tmpReg.nr(), rn.nr())
+		m.insert(mov1)
+
+		// movToVec and vecMovElement do not clear the remaining bits to zero,
+		// thus, we can mov rm in-place to tmp.
+		mov2 := m.allocateInstr()
+		switch lane {
+		case ssa.VecLaneI8x16:
+			mov2.asMovToVec(tmpReg, rm, vecArrangementB, vecIndex(index))
+		case ssa.VecLaneI16x8:
+			mov2.asMovToVec(tmpReg, rm, vecArrangementH, vecIndex(index))
+		case ssa.VecLaneI32x4:
+			mov2.asMovToVec(tmpReg, rm, vecArrangementS, vecIndex(index))
+		case ssa.VecLaneI64x2:
+			mov2.asMovToVec(tmpReg, rm, vecArrangementD, vecIndex(index))
+		case ssa.VecLaneF32x4:
+			mov2.asVecMovElement(tmpReg, rm, vecArrangementS, vecIndex(index), vecIndex(0))
+		case ssa.VecLaneF64x2:
+			mov2.asVecMovElement(tmpReg, rm, vecArrangementD, vecIndex(index), vecIndex(0))
+		}
+		m.insert(mov2)
+
+		// Finally mov tmp to rd.
+		mov3 := m.allocateInstr()
+		mov3.asFpuMov128(rd.nr(), tmpReg.nr())
+		m.insert(mov3)
+
+	case ssa.OpcodeSwizzle:
+		x, y, lane := instr.Arg2WithLane()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		arr := ssaLaneToArrangement(lane)
+
+		// tbl <rd>.<arr>, { <rn>.<arr> }, <rm>.<arr>
+		tbl1 := m.allocateInstr()
+		tbl1.asVecTbl(1, rd, rn, rm, arr)
+		m.insert(tbl1)
+
+	case ssa.OpcodeShuffle:
+		x, y, lane1, lane2 := instr.ShuffleData()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		m.lowerShuffle(rd, rn, rm, lane1, lane2)
+
+	case ssa.OpcodeSplat:
+		x, lane := instr.ArgWithLane()
+		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+
+		dup := m.allocateInstr()
+		switch lane {
+		case ssa.VecLaneI8x16:
+			dup.asVecDup(rd, rn, vecArrangement16B)
+		case ssa.VecLaneI16x8:
+			dup.asVecDup(rd, rn, vecArrangement8H)
+		case ssa.VecLaneI32x4:
+			dup.asVecDup(rd, rn, vecArrangement4S)
+		case ssa.VecLaneI64x2:
+			dup.asVecDup(rd, rn, vecArrangement2D)
+		case ssa.VecLaneF32x4:
+			dup.asVecDupElement(rd, rn, vecArrangementS, vecIndex(0))
+		case ssa.VecLaneF64x2:
+			dup.asVecDupElement(rd, rn, vecArrangementD, vecIndex(0))
+		}
+		m.insert(dup)
+
+	case ssa.OpcodeWideningPairwiseDotProductS:
+		x, y := instr.Arg2()
+		xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone),
+			m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+		tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+		m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H))
+		m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H))
+		m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S))
+
+		rd := operandNR(m.compiler.VRegOf(instr.Return()))
+		m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr()))
+
+	case ssa.OpcodeLoadSplat:
+		ptr, offset, lane := instr.LoadSplatData()
+		m.lowerLoadSplat(ptr, offset, lane, instr.Return())
+
+	case ssa.OpcodeAtomicRmw:
+		m.lowerAtomicRmw(instr)
+
+	case ssa.OpcodeAtomicCas:
+		m.lowerAtomicCas(instr)
+
+	case ssa.OpcodeAtomicLoad:
+		m.lowerAtomicLoad(instr)
+
+	case ssa.OpcodeAtomicStore:
+		m.lowerAtomicStore(instr)
+
+	case ssa.OpcodeFence:
+		instr := m.allocateInstr()
+		instr.asDMB()
+		m.insert(instr)
+
+	default:
+		panic("TODO: lowering " + op.String())
+	}
+	m.executableContext.FlushPendingInstructions()
+}
+
+func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) {
+	// `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30.
+	vReg, wReg := v29VReg, v30VReg
+
+	// Initialize v29, v30 to rn, rm.
+	movv := m.allocateInstr()
+	movv.asFpuMov128(vReg, rn.nr())
+	m.insert(movv)
+
+	movw := m.allocateInstr()
+	movw.asFpuMov128(wReg, rm.nr())
+	m.insert(movw)
+
+	// `lane1`, `lane2` are already encoded as two u64s with the right layout:
+	//     lane1 := lane[7]<<56 | ... | lane[1]<<8 | lane[0]
+	//     lane2 := lane[15]<<56 | ... | lane[9]<<8 | lane[8]
+	// Thus, we can use loadFpuConst128.
+	tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+	lfc := m.allocateInstr()
+	lfc.asLoadFpuConst128(tmp.nr(), lane1, lane2)
+	m.insert(lfc)
+
+	// tbl <rd>.16b, { <vReg>.16B, <wReg>.16b }, <tmp>.16b
+	tbl2 := m.allocateInstr()
+	tbl2.asVecTbl(2, rd, operandNR(vReg), tmp, vecArrangement16B)
+	m.insert(tbl2)
+}
+
+func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) {
+	var modulo byte
+	switch arr {
+	case vecArrangement16B:
+		modulo = 0x7 // Modulo 8.
+	case vecArrangement8H:
+		modulo = 0xf // Modulo 16.
+	case vecArrangement4S:
+		modulo = 0x1f // Modulo 32.
+	case vecArrangement2D:
+		modulo = 0x3f // Modulo 64.
+	default:
+		panic("unsupported arrangment " + arr.String())
+	}
+
+	rtmp := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+	vtmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+	and := m.allocateInstr()
+	and.asALUBitmaskImm(aluOpAnd, rtmp.nr(), rm.nr(), uint64(modulo), true)
+	m.insert(and)
+
+	if op != ssa.OpcodeVIshl {
+		// Negate the amount to make this as right shift.
+		neg := m.allocateInstr()
+		neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true)
+		m.insert(neg)
+	}
+
+	// Copy the shift amount into a vector register as sshl/ushl requires it to be there.
+	dup := m.allocateInstr()
+	dup.asVecDup(vtmp, rtmp, arr)
+	m.insert(dup)
+
+	if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr {
+		sshl := m.allocateInstr()
+		sshl.asVecRRR(vecOpSshl, rd, rn, vtmp, arr)
+		m.insert(sshl)
+	} else {
+		ushl := m.allocateInstr()
+		ushl.asVecRRR(vecOpUshl, rd, rn, vtmp, arr)
+		m.insert(ushl)
+	}
+}
+
+func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) {
+	tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+	// Special case VallTrue for i64x2.
+	if op == ssa.OpcodeVallTrue && arr == vecArrangement2D {
+		// 	cmeq v3?.2d, v2?.2d, #0
+		//	addp v3?.2d, v3?.2d, v3?.2d
+		//	fcmp v3?, v3?
+		//	cset dst, eq
+
+		ins := m.allocateInstr()
+		ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D)
+		m.insert(ins)
+
+		addp := m.allocateInstr()
+		addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D)
+		m.insert(addp)
+
+		fcmp := m.allocateInstr()
+		fcmp.asFpuCmp(tmp, tmp, true)
+		m.insert(fcmp)
+
+		cset := m.allocateInstr()
+		cset.asCSet(rd.nr(), false, eq)
+		m.insert(cset)
+
+		return
+	}
+
+	// Create a scalar value with umaxp or uminv, then compare it against zero.
+	ins := m.allocateInstr()
+	if op == ssa.OpcodeVanyTrue {
+		// 	umaxp v4?.16b, v2?.16b, v2?.16b
+		ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B)
+	} else {
+		// 	uminv d4?, v2?.4s
+		ins.asVecLanes(vecOpUminv, tmp, rm, arr)
+	}
+	m.insert(ins)
+
+	//	mov x3?, v4?.d[0]
+	//	ccmp x3?, #0x0, #0x0, al
+	//	cset x3?, ne
+	//	mov x0, x3?
+
+	movv := m.allocateInstr()
+	movv.asMovFromVec(rd, tmp, vecArrangementD, vecIndex(0), false)
+	m.insert(movv)
+
+	fc := m.allocateInstr()
+	fc.asCCmpImm(rd, uint64(0), al, 0, true)
+	m.insert(fc)
+
+	cset := m.allocateInstr()
+	cset.asCSet(rd.nr(), false, ne)
+	m.insert(cset)
+}
+
+func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
+	r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+	v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+	v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+	switch arr {
+	case vecArrangement16B:
+		//	sshr v6?.16b, v2?.16b, #7
+		//	movz x4?, #0x201, lsl 0
+		//	movk x4?, #0x804, lsl 16
+		//	movk x4?, #0x2010, lsl 32
+		//	movk x4?, #0x8040, lsl 48
+		//	dup v5?.2d, x4?
+		//	and v6?.16b, v6?.16b, v5?.16b
+		//	ext v5?.16b, v6?.16b, v6?.16b, #8
+		//	zip1 v5?.16b, v6?.16b, v5?.16b
+		//	addv s5?, v5?.8h
+		//	umov s3?, v5?.h[0]
+
+		// Right arithmetic shift on the original vector and store the result into v1. So we have:
+		// v1[i] = 0xff if vi<0, 0 otherwise.
+		sshr := m.allocateInstr()
+		sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B)
+		m.insert(sshr)
+
+		// Load the bit mask into r0.
+		m.insertMOVZ(r0.nr(), 0x0201, 0, true)
+		m.insertMOVK(r0.nr(), 0x0804, 1, true)
+		m.insertMOVK(r0.nr(), 0x2010, 2, true)
+		m.insertMOVK(r0.nr(), 0x8040, 3, true)
+
+		// dup r0 to v0.
+		dup := m.allocateInstr()
+		dup.asVecDup(v0, r0, vecArrangement2D)
+		m.insert(dup)
+
+		// Lane-wise logical AND with the bit mask, meaning that we have
+		// v[i] = (1 << i) if vi<0, 0 otherwise.
+		//
+		// Below, we use the following notation:
+		// wi := (1 << i) if vi<0, 0 otherwise.
+		and := m.allocateInstr()
+		and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B)
+		m.insert(and)
+
+		// Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have
+		// v0[i] = w(i+8) if i < 8, w(i-8) otherwise.
+		ext := m.allocateInstr()
+		ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8))
+		m.insert(ext)
+
+		// v = [w0, w8, ..., w7, w15]
+		zip1 := m.allocateInstr()
+		zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B)
+		m.insert(zip1)
+
+		// v.h[0] = w0 + ... + w15
+		addv := m.allocateInstr()
+		addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H)
+		m.insert(addv)
+
+		// Extract the v.h[0] as the result.
+		movfv := m.allocateInstr()
+		movfv.asMovFromVec(rd, v0, vecArrangementH, vecIndex(0), false)
+		m.insert(movfv)
+	case vecArrangement8H:
+		//	sshr v6?.8h, v2?.8h, #15
+		//	movz x4?, #0x1, lsl 0
+		//	movk x4?, #0x2, lsl 16
+		//	movk x4?, #0x4, lsl 32
+		//	movk x4?, #0x8, lsl 48
+		//	dup v5?.2d, x4?
+		//	lsl x4?, x4?, 0x4
+		//	ins v5?.d[1], x4?
+		//	and v5?.16b, v6?.16b, v5?.16b
+		//	addv s5?, v5?.8h
+		//	umov s3?, v5?.h[0]
+
+		// Right arithmetic shift on the original vector and store the result into v1. So we have:
+		// v[i] = 0xffff if vi<0, 0 otherwise.
+		sshr := m.allocateInstr()
+		sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H)
+		m.insert(sshr)
+
+		// Load the bit mask into r0.
+		m.lowerConstantI64(r0.nr(), 0x0008000400020001)
+
+		// dup r0 to vector v0.
+		dup := m.allocateInstr()
+		dup.asVecDup(v0, r0, vecArrangement2D)
+		m.insert(dup)
+
+		lsl := m.allocateInstr()
+		lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true)
+		m.insert(lsl)
+
+		movv := m.allocateInstr()
+		movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1))
+		m.insert(movv)
+
+		// Lane-wise logical AND with the bitmask, meaning that we have
+		// v[i] = (1 << i)     if vi<0, 0 otherwise for i=0..3
+		//      = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7
+		and := m.allocateInstr()
+		and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B)
+		m.insert(and)
+
+		addv := m.allocateInstr()
+		addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H)
+		m.insert(addv)
+
+		movfv := m.allocateInstr()
+		movfv.asMovFromVec(rd, v0, vecArrangementH, vecIndex(0), false)
+		m.insert(movfv)
+	case vecArrangement4S:
+		// 	sshr v6?.8h, v2?.8h, #15
+		//	movz x4?, #0x1, lsl 0
+		//	movk x4?, #0x2, lsl 16
+		//	movk x4?, #0x4, lsl 32
+		//	movk x4?, #0x8, lsl 48
+		//	dup v5?.2d, x4?
+		//	lsl x4?, x4?, 0x4
+		//	ins v5?.d[1], x4?
+		//	and v5?.16b, v6?.16b, v5?.16b
+		//	addv s5?, v5?.8h
+		//	umov s3?, v5?.h[0]
+
+		// Right arithmetic shift on the original vector and store the result into v1. So we have:
+		// v[i] = 0xffffffff if vi<0, 0 otherwise.
+		sshr := m.allocateInstr()
+		sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S)
+		m.insert(sshr)
+
+		// Load the bit mask into r0.
+		m.lowerConstantI64(r0.nr(), 0x0000000200000001)
+
+		// dup r0 to vector v0.
+		dup := m.allocateInstr()
+		dup.asVecDup(v0, r0, vecArrangement2D)
+		m.insert(dup)
+
+		lsl := m.allocateInstr()
+		lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true)
+		m.insert(lsl)
+
+		movv := m.allocateInstr()
+		movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1))
+		m.insert(movv)
+
+		// Lane-wise logical AND with the bitmask, meaning that we have
+		// v[i] = (1 << i)     if vi<0, 0 otherwise for i in [0, 1]
+		//      = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3]
+		and := m.allocateInstr()
+		and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B)
+		m.insert(and)
+
+		addv := m.allocateInstr()
+		addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S)
+		m.insert(addv)
+
+		movfv := m.allocateInstr()
+		movfv.asMovFromVec(rd, v0, vecArrangementS, vecIndex(0), false)
+		m.insert(movfv)
+	case vecArrangement2D:
+		// 	mov d3?, v2?.d[0]
+		//	mov x4?, v2?.d[1]
+		//	lsr x4?, x4?, 0x3f
+		//	lsr d3?, d3?, 0x3f
+		//	add s3?, s3?, w4?, lsl #1
+
+		// Move the lower 64-bit int into result.
+		movv0 := m.allocateInstr()
+		movv0.asMovFromVec(rd, rm, vecArrangementD, vecIndex(0), false)
+		m.insert(movv0)
+
+		// Move the higher 64-bit int into r0.
+		movv1 := m.allocateInstr()
+		movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false)
+		m.insert(movv1)
+
+		// Move the sign bit into the least significant bit.
+		lsr1 := m.allocateInstr()
+		lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true)
+		m.insert(lsr1)
+
+		lsr2 := m.allocateInstr()
+		lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true)
+		m.insert(lsr2)
+
+		// rd = (r0<<1) | rd
+		lsl := m.allocateInstr()
+		lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false)
+		m.insert(lsl)
+	default:
+		panic("Unsupported " + arr.String())
+	}
+}
+
+func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	arr := ssaLaneToArrangement(lane)
+	ins := m.allocateInstr()
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rd := operandNR(m.compiler.VRegOf(instr.Return()))
+	ins.asVecMisc(op, rd, rn, arr)
+	m.insert(ins)
+}
+
+func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement) {
+	ins := m.allocateInstr()
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+	rd := operandNR(m.compiler.VRegOf(ret))
+	ins.asVecRRR(op, rd, rn, rm, arr)
+	m.insert(ins)
+}
+
+func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
+	if arr != vecArrangement2D {
+		mul := m.allocateInstr()
+		mul.asVecRRR(vecOpMul, rd, rn, rm, arr)
+		m.insert(mul)
+	} else {
+		tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+		tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+		tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+		tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+		// Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696
+		rev64 := m.allocateInstr()
+		rev64.asVecMisc(vecOpRev64, tmp2, rm, vecArrangement4S)
+		m.insert(rev64)
+
+		mul := m.allocateInstr()
+		mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S)
+		m.insert(mul)
+
+		xtn1 := m.allocateInstr()
+		xtn1.asVecMisc(vecOpXtn, tmp1, rn, vecArrangement2S)
+		m.insert(xtn1)
+
+		addp := m.allocateInstr()
+		addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S)
+		m.insert(addp)
+
+		xtn2 := m.allocateInstr()
+		xtn2.asVecMisc(vecOpXtn, tmp3, rm, vecArrangement2S)
+		m.insert(xtn2)
+
+		// Note: do not write the result directly into result yet. This is the same reason as in bsl.
+		// In short, in UMLAL instruction, the result register is also one of the source register, and
+		// the value on the result register is significant.
+		shll := m.allocateInstr()
+		shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S)
+		m.insert(shll)
+
+		umlal := m.allocateInstr()
+		umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S)
+		m.insert(umlal)
+
+		mov := m.allocateInstr()
+		mov.asFpuMov128(rd.nr(), tmpRes.nr())
+		m.insert(mov)
+	}
+}
+
+func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {
+	x, y, lane := instr.Arg2WithLane()
+	arr := ssaLaneToArrangement(lane)
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+
+	// Note: this usage of tmp is important.
+	// BSL modifies the destination register, so we need to use a temporary register so that
+	// the actual definition of the destination register happens *after* the BSL instruction.
+	// That way, we can force the spill instruction to be inserted after the BSL instruction.
+	tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+
+	fcmgt := m.allocateInstr()
+	if max {
+		fcmgt.asVecRRR(vecOpFcmgt, tmp, rm, rn, arr)
+	} else {
+		// If min, swap the args.
+		fcmgt.asVecRRR(vecOpFcmgt, tmp, rn, rm, arr)
+	}
+	m.insert(fcmgt)
+
+	bsl := m.allocateInstr()
+	bsl.asVecRRRRewrite(vecOpBsl, tmp, rm, rn, vecArrangement16B)
+	m.insert(bsl)
+
+	res := operandNR(m.compiler.VRegOf(instr.Return()))
+	mov2 := m.allocateInstr()
+	mov2.asFpuMov128(res.nr(), tmp.nr())
+	m.insert(mov2)
+}
+
+func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) {
+	div := m.allocateInstr()
+
+	if signed {
+		div.asALU(aluOpSDiv, rd, rn, rm, _64bit)
+	} else {
+		div.asALU(aluOpUDiv, rd, rn, rm, _64bit)
+	}
+	m.insert(div)
+
+	// Check if rm is zero:
+	m.exitIfNot(execCtxVReg, registerAsRegNotZeroCond(rm.nr()), _64bit, wazevoapi.ExitCodeIntegerDivisionByZero)
+
+	// rd = rn-rd*rm by MSUB instruction.
+	msub := m.allocateInstr()
+	msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit)
+	m.insert(msub)
+}
+
+func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) {
+	div := m.allocateInstr()
+
+	if signed {
+		div.asALU(aluOpSDiv, rd, rn, rm, _64bit)
+	} else {
+		div.asALU(aluOpUDiv, rd, rn, rm, _64bit)
+	}
+	m.insert(div)
+
+	// Check if rm is zero:
+	m.exitIfNot(execCtxVReg, registerAsRegNotZeroCond(rm.nr()), _64bit, wazevoapi.ExitCodeIntegerDivisionByZero)
+
+	if signed {
+		// We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1"
+		minusOneCheck := m.allocateInstr()
+		// Sets eq condition if rm == -1.
+		minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit)
+		m.insert(minusOneCheck)
+
+		ccmp := m.allocateInstr()
+		// If eq condition is set, sets the flag by the result based on "rn - 1", otherwise clears the flag.
+		ccmp.asCCmpImm(rn, 1, eq, 0, _64bit)
+		m.insert(ccmp)
+
+		// Check the overflow flag.
+		m.exitIfNot(execCtxVReg, vs.invert().asCond(), false, wazevoapi.ExitCodeIntegerOverflow)
+	}
+}
+
+// exitIfNot emits a conditional branch to exit if the condition is not met.
+// If `c` (cond type) is a register, `cond64bit` must be chosen to indicate whether the register is 32-bit or 64-bit.
+// Otherwise, `cond64bit` is ignored.
+func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, code wazevoapi.ExitCode) {
+	execCtxTmp := m.copyToTmp(execCtxVReg)
+
+	cbr := m.allocateInstr()
+	m.insert(cbr)
+	m.lowerExitWithCode(execCtxTmp, code)
+	// Conditional branch target is after exit.
+	l := m.insertBrTargetLabel()
+	cbr.asCondBr(c, l, cond64bit)
+}
+
+func (m *machine) lowerFcopysign(x, y, ret ssa.Value) {
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+	var tmpI, tmpF operand
+	_64 := x.Type() == ssa.TypeF64
+	if _64 {
+		tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+		tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+	} else {
+		tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32))
+		tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+	}
+	rd := m.compiler.VRegOf(ret)
+	m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64)
+}
+
+func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) {
+	// This is exactly the same code emitted by GCC for "__builtin_copysign":
+	//
+	//    mov     x0, -9223372036854775808
+	//    fmov    d2, x0
+	//    vbit    v0.8b, v1.8b, v2.8b
+	//
+
+	setMSB := m.allocateInstr()
+	if _64bit {
+		m.lowerConstantI64(tmpI.nr(), math.MinInt64)
+		setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0))
+	} else {
+		m.lowerConstantI32(tmpI.nr(), math.MinInt32)
+		setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0))
+	}
+	m.insert(setMSB)
+
+	tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+
+	mov := m.allocateInstr()
+	mov.asFpuMov64(tmpReg.nr(), rn.nr())
+	m.insert(mov)
+
+	vbit := m.allocateInstr()
+	vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B)
+	m.insert(vbit)
+
+	movDst := m.allocateInstr()
+	movDst.asFpuMov64(rd.nr(), tmpReg.nr())
+	m.insert(movDst)
+}
+
+func (m *machine) lowerBitcast(instr *ssa.Instruction) {
+	v, dstType := instr.BitcastData()
+	srcType := v.Type()
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
+	rd := operandNR(m.compiler.VRegOf(instr.Return()))
+	srcInt := srcType.IsInt()
+	dstInt := dstType.IsInt()
+	switch {
+	case srcInt && !dstInt: // Int to Float:
+		mov := m.allocateInstr()
+		var arr vecArrangement
+		if srcType.Bits() == 64 {
+			arr = vecArrangementD
+		} else {
+			arr = vecArrangementS
+		}
+		mov.asMovToVec(rd, rn, arr, vecIndex(0))
+		m.insert(mov)
+	case !srcInt && dstInt: // Float to Int:
+		mov := m.allocateInstr()
+		var arr vecArrangement
+		if dstType.Bits() == 64 {
+			arr = vecArrangementD
+		} else {
+			arr = vecArrangementS
+		}
+		mov.asMovFromVec(rd, rn, arr, vecIndex(0), false)
+		m.insert(mov)
+	default:
+		panic("TODO?BUG?")
+	}
+}
+
+func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) {
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone)
+	rd := operandNR(m.compiler.VRegOf(out))
+
+	neg := m.allocateInstr()
+	neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64)
+	m.insert(neg)
+}
+
+func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) {
+	if !nonTrapping {
+		// First of all, we have to clear the FPU flags.
+		flagClear := m.allocateInstr()
+		flagClear.asMovToFPSR(xzrVReg)
+		m.insert(flagClear)
+	}
+
+	// Then, do the conversion which doesn't trap inherently.
+	cvt := m.allocateInstr()
+	cvt.asFpuToInt(rd, rn, signed, src64bit, dst64bit)
+	m.insert(cvt)
+
+	if !nonTrapping {
+		tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
+
+		// After the conversion, check the FPU flags.
+		getFlag := m.allocateInstr()
+		getFlag.asMovFromFPSR(tmpReg)
+		m.insert(getFlag)
+
+		execCtx := m.copyToTmp(ctx)
+		_rn := operandNR(m.copyToTmp(rn.nr()))
+
+		// Check if the conversion was undefined by comparing the status with 1.
+		// See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register
+		alu := m.allocateInstr()
+		alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true)
+		m.insert(alu)
+
+		// If it is not undefined, we can return the result.
+		ok := m.allocateInstr()
+		m.insert(ok)
+
+		// Otherwise, we have to choose the status depending on it is overflow or NaN conversion.
+
+		// Comparing itself to check if it is a NaN.
+		fpuCmp := m.allocateInstr()
+		fpuCmp.asFpuCmp(_rn, _rn, src64bit)
+		m.insert(fpuCmp)
+		// If the VC flag is not set (== VS flag is set), it is a NaN.
+		m.exitIfNot(execCtx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
+		// Otherwise, it is an overflow.
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
+
+		// Conditional branch target is after exit.
+		l := m.insertBrTargetLabel()
+		ok.asCondBr(ne.asCond(), l, false /* ignored */)
+	}
+}
+
+func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) {
+	cvt := m.allocateInstr()
+	cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit)
+	m.insert(cvt)
+}
+
+func (m *machine) lowerFpuBinOp(si *ssa.Instruction) {
+	instr := m.allocateInstr()
+	var op fpuBinOp
+	switch si.Opcode() {
+	case ssa.OpcodeFadd:
+		op = fpuBinOpAdd
+	case ssa.OpcodeFsub:
+		op = fpuBinOpSub
+	case ssa.OpcodeFmul:
+		op = fpuBinOpMul
+	case ssa.OpcodeFdiv:
+		op = fpuBinOpDiv
+	case ssa.OpcodeFmax:
+		op = fpuBinOpMax
+	case ssa.OpcodeFmin:
+		op = fpuBinOpMin
+	}
+	x, y := si.Arg2()
+	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
+	rn := m.getOperand_NR(xDef, extModeNone)
+	rm := m.getOperand_NR(yDef, extModeNone)
+	rd := operandNR(m.compiler.VRegOf(si.Return()))
+	instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64)
+	m.insert(instr)
+}
+
+func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) {
+	x, y := si.Arg2()
+	if !x.Type().IsInt() {
+		panic("BUG?")
+	}
+
+	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
+	rn := m.getOperand_NR(xDef, extModeNone)
+	rm, yNegated := m.getOperand_MaybeNegatedImm12_ER_SR_NR(yDef, extModeNone)
+
+	var aop aluOp
+	switch {
+	case add && !yNegated: // rn+rm = x+y
+		aop = aluOpAdd
+	case add && yNegated: // rn-rm = x-(-y) = x+y
+		aop = aluOpSub
+	case !add && !yNegated: // rn-rm = x-y
+		aop = aluOpSub
+	case !add && yNegated: // rn+rm = x-(-y) = x-y
+		aop = aluOpAdd
+	}
+	rd := operandNR(m.compiler.VRegOf(si.Return()))
+	alu := m.allocateInstr()
+	alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64)
+	m.insert(alu)
+}
+
+// InsertMove implements backend.Machine.
+func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {
+	instr := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32, ssa.TypeI64:
+		instr.asMove64(dst, src)
+	case ssa.TypeF32, ssa.TypeF64:
+		instr.asFpuMov64(dst, src)
+	case ssa.TypeV128:
+		instr.asFpuMov128(dst, src)
+	default:
+		panic("TODO")
+	}
+	m.insert(instr)
+}
+
+func (m *machine) lowerIcmp(si *ssa.Instruction) {
+	x, y, c := si.IcmpData()
+	flag := condFlagFromSSAIntegerCmpCond(c)
+
+	in64bit := x.Type().Bits() == 64
+	var ext extMode
+	if in64bit {
+		if c.Signed() {
+			ext = extModeSignExtend64
+		} else {
+			ext = extModeZeroExtend64
+		}
+	} else {
+		if c.Signed() {
+			ext = extModeSignExtend32
+		} else {
+			ext = extModeZeroExtend32
+		}
+	}
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)
+	rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext)
+	alu := m.allocateInstr()
+	alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit)
+	m.insert(alu)
+
+	cset := m.allocateInstr()
+	cset.asCSet(m.compiler.VRegOf(si.Return()), false, flag)
+	m.insert(cset)
+}
+
+func (m *machine) lowerVIcmp(si *ssa.Instruction) {
+	x, y, c, lane := si.VIcmpData()
+	flag := condFlagFromSSAIntegerCmpCond(c)
+	arr := ssaLaneToArrangement(lane)
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+	rd := operandNR(m.compiler.VRegOf(si.Return()))
+
+	switch flag {
+	case eq:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr)
+		m.insert(cmp)
+	case ne:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr)
+		m.insert(cmp)
+		not := m.allocateInstr()
+		not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B)
+		m.insert(not)
+	case ge:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmge, rd, rn, rm, arr)
+		m.insert(cmp)
+	case gt:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmgt, rd, rn, rm, arr)
+		m.insert(cmp)
+	case le:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmge, rd, rm, rn, arr) // rm, rn are swapped
+		m.insert(cmp)
+	case lt:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmgt, rd, rm, rn, arr) // rm, rn are swapped
+		m.insert(cmp)
+	case hs:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmhs, rd, rn, rm, arr)
+		m.insert(cmp)
+	case hi:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmhi, rd, rn, rm, arr)
+		m.insert(cmp)
+	case ls:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmhs, rd, rm, rn, arr) // rm, rn are swapped
+		m.insert(cmp)
+	case lo:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpCmhi, rd, rm, rn, arr) // rm, rn are swapped
+		m.insert(cmp)
+	}
+}
+
+func (m *machine) lowerVFcmp(si *ssa.Instruction) {
+	x, y, c, lane := si.VFcmpData()
+	flag := condFlagFromSSAFloatCmpCond(c)
+	arr := ssaLaneToArrangement(lane)
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+	rd := operandNR(m.compiler.VRegOf(si.Return()))
+
+	switch flag {
+	case eq:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr)
+		m.insert(cmp)
+	case ne:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr)
+		m.insert(cmp)
+		not := m.allocateInstr()
+		not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B)
+		m.insert(not)
+	case ge:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpFcmge, rd, rn, rm, arr)
+		m.insert(cmp)
+	case gt:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpFcmgt, rd, rn, rm, arr)
+		m.insert(cmp)
+	case mi:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpFcmgt, rd, rm, rn, arr) // rm, rn are swapped
+		m.insert(cmp)
+	case ls:
+		cmp := m.allocateInstr()
+		cmp.asVecRRR(vecOpFcmge, rd, rm, rn, arr) // rm, rn are swapped
+		m.insert(cmp)
+	}
+}
+
+func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) {
+	cvt := m.allocateInstr()
+	if signed {
+		cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr)
+	} else {
+		cvt.asVecMisc(vecOpFcvtzu, rd, rn, arr)
+	}
+	m.insert(cvt)
+
+	if arr == vecArrangement2D {
+		narrow := m.allocateInstr()
+		if signed {
+			narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S)
+		} else {
+			narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S)
+		}
+		m.insert(narrow)
+	}
+}
+
+func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) {
+	cvt := m.allocateInstr()
+	if signed {
+		cvt.asVecMisc(vecOpScvtf, rd, rn, arr)
+	} else {
+		cvt.asVecMisc(vecOpUcvtf, rd, rn, arr)
+	}
+	m.insert(cvt)
+}
+
+func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) {
+	x, amount := si.Arg2()
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)
+	rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits())
+	rd := operandNR(m.compiler.VRegOf(si.Return()))
+
+	alu := m.allocateInstr()
+	alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64)
+	m.insert(alu)
+}
+
+func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult bool) {
+	x, y := si.Arg2()
+
+	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
+	rn := m.getOperand_NR(xDef, extModeNone)
+
+	var rd operand
+	if ignoreResult {
+		rd = operandNR(xzrVReg)
+	} else {
+		rd = operandNR(m.compiler.VRegOf(si.Return()))
+	}
+
+	_64 := x.Type().Bits() == 64
+	alu := m.allocateInstr()
+	if instr := yDef.Instr; instr != nil && instr.Constant() {
+		c := instr.ConstantVal()
+		if isBitMaskImmediate(c, _64) {
+			// Constant bit wise operations can be lowered to a single instruction.
+			alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64)
+			m.insert(alu)
+			return
+		}
+	}
+
+	rm := m.getOperand_SR_NR(yDef, extModeNone)
+	alu.asALU(op, rd, rn, rm, _64)
+	m.insert(alu)
+}
+
+func (m *machine) lowerRotl(si *ssa.Instruction) {
+	x, y := si.Arg2()
+	r := si.Return()
+	_64 := r.Type().Bits() == 64
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+	var tmp operand
+	if _64 {
+		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+	} else {
+		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+	}
+	rd := operandNR(m.compiler.VRegOf(r))
+
+	// Encode rotl as neg + rotr: neg is a sub against the zero-reg.
+	m.lowerRotlImpl(rd, rn, rm, tmp, _64)
+}
+
+func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) {
+	// Encode rotl as neg + rotr: neg is a sub against the zero-reg.
+	neg := m.allocateInstr()
+	neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit)
+	m.insert(neg)
+	alu := m.allocateInstr()
+	alu.asALU(aluOpRotR, rd, rn, tmp, is64bit)
+	m.insert(alu)
+}
+
+func (m *machine) lowerRotr(si *ssa.Instruction) {
+	x, y := si.Arg2()
+
+	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
+	rn := m.getOperand_NR(xDef, extModeNone)
+	rm := m.getOperand_NR(yDef, extModeNone)
+	rd := operandNR(m.compiler.VRegOf(si.Return()))
+
+	alu := m.allocateInstr()
+	alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64)
+	m.insert(alu)
+}
+
+func (m *machine) lowerExtend(arg, ret ssa.Value, from, to byte, signed bool) {
+	rd := m.compiler.VRegOf(ret)
+	def := m.compiler.ValueDefinition(arg)
+
+	if instr := def.Instr; !signed && from == 32 && instr != nil {
+		// We can optimize out the unsigned extend because:
+		// 	Writes to the W register set bits [63:32] of the X register to zero
+		//  https://developer.arm.com/documentation/den0024/a/An-Introduction-to-the-ARMv8-Instruction-Sets/The-ARMv8-instruction-sets/Distinguishing-between-32-bit-and-64-bit-A64-instructions
+		switch instr.Opcode() {
+		case
+			ssa.OpcodeIadd, ssa.OpcodeIsub, ssa.OpcodeLoad,
+			ssa.OpcodeBand, ssa.OpcodeBor, ssa.OpcodeBnot,
+			ssa.OpcodeIshl, ssa.OpcodeUshr, ssa.OpcodeSshr,
+			ssa.OpcodeRotl, ssa.OpcodeRotr,
+			ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32:
+			// So, if the argument is the result of a 32-bit operation, we can just copy the register.
+			// It is highly likely that this copy will be optimized out after register allocation.
+			rn := m.compiler.VRegOf(arg)
+			mov := m.allocateInstr()
+			// Note: do not use move32 as it will be lowered to a 32-bit move, which is not copy (that is actually the impl of UExtend).
+			mov.asMove64(rd, rn)
+			m.insert(mov)
+			return
+		default:
+		}
+	}
+	rn := m.getOperand_NR(def, extModeNone)
+
+	ext := m.allocateInstr()
+	ext.asExtend(rd, rn.nr(), from, to, signed)
+	m.insert(ext)
+}
+
+func (m *machine) lowerFcmp(x, y, result ssa.Value, c ssa.FloatCmpCond) {
+	rn, rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+
+	fc := m.allocateInstr()
+	fc.asFpuCmp(rn, rm, x.Type().Bits() == 64)
+	m.insert(fc)
+
+	cset := m.allocateInstr()
+	cset.asCSet(m.compiler.VRegOf(result), false, condFlagFromSSAFloatCmpCond(c))
+	m.insert(cset)
+}
+
+func (m *machine) lowerImul(x, y, result ssa.Value) {
+	rd := m.compiler.VRegOf(result)
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+
+	// TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg.
+
+	mul := m.allocateInstr()
+	mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64)
+	m.insert(mul)
+}
+
+func (m *machine) lowerClz(x, result ssa.Value) {
+	rd := m.compiler.VRegOf(result)
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	clz := m.allocateInstr()
+	clz.asBitRR(bitOpClz, rd, rn.nr(), x.Type().Bits() == 64)
+	m.insert(clz)
+}
+
+func (m *machine) lowerCtz(x, result ssa.Value) {
+	rd := m.compiler.VRegOf(result)
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rbit := m.allocateInstr()
+	_64 := x.Type().Bits() == 64
+	var tmpReg regalloc.VReg
+	if _64 {
+		tmpReg = m.compiler.AllocateVReg(ssa.TypeI64)
+	} else {
+		tmpReg = m.compiler.AllocateVReg(ssa.TypeI32)
+	}
+	rbit.asBitRR(bitOpRbit, tmpReg, rn.nr(), _64)
+	m.insert(rbit)
+
+	clz := m.allocateInstr()
+	clz.asBitRR(bitOpClz, rd, tmpReg, _64)
+	m.insert(clz)
+}
+
+func (m *machine) lowerPopcnt(x, result ssa.Value) {
+	// arm64 doesn't have an instruction for population count on scalar register,
+	// so we use the vector instruction `cnt`.
+	// This is exactly what the official Go implements bits.OneCount.
+	// For example, "func () int { return bits.OneCount(10) }" is compiled as
+	//
+	//    MOVD    $10, R0 ;; Load 10.
+	//    FMOVD   R0, F0
+	//    VCNT    V0.B8, V0.B8
+	//    UADDLV  V0.B8, V0
+	//
+	// In aarch64 asm, FMOVD is encoded as `ins`, VCNT is `cnt`,
+	// and the registers may use different names. In our encoding we use the following
+	// instructions:
+	//
+	//    ins v0.d[0], x0     ;; mov from GPR to vec (FMOV above) is encoded as INS
+	//    cnt v0.16b, v0.16b  ;; we use vec arrangement 16b
+	//    uaddlv h0, v0.8b    ;; h0 is still v0 with the dest width specifier 'H', implied when src arrangement is 8b
+	//    mov x5, v0.d[0]     ;; finally we mov the result back to a GPR
+	//
+
+	rd := operandNR(m.compiler.VRegOf(result))
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+
+	rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+	ins := m.allocateInstr()
+	ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0))
+	m.insert(ins)
+
+	rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+	cnt := m.allocateInstr()
+	cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B)
+	m.insert(cnt)
+
+	rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+	uaddlv := m.allocateInstr()
+	uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B)
+	m.insert(uaddlv)
+
+	mov := m.allocateInstr()
+	mov.asMovFromVec(rd, rf3, vecArrangementD, vecIndex(0), false)
+	m.insert(mov)
+}
+
+// lowerExitWithCode lowers the lowerExitWithCode takes a context pointer as argument.
+func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.ExitCode) {
+	tmpReg1 := m.compiler.AllocateVReg(ssa.TypeI32)
+	loadExitCodeConst := m.allocateInstr()
+	loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true)
+
+	setExitCode := m.allocateInstr()
+	setExitCode.asStore(operandNR(tmpReg1),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
+		}, 32)
+
+	// In order to unwind the stack, we also need to push the current stack pointer:
+	tmp2 := m.compiler.AllocateVReg(ssa.TypeI64)
+	movSpToTmp := m.allocateInstr()
+	movSpToTmp.asMove64(tmp2, spVReg)
+	strSpToExecCtx := m.allocateInstr()
+	strSpToExecCtx.asStore(operandNR(tmp2),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+		}, 64)
+	// Also the address of this exit.
+	tmp3 := m.compiler.AllocateVReg(ssa.TypeI64)
+	currentAddrToTmp := m.allocateInstr()
+	currentAddrToTmp.asAdr(tmp3, 0)
+	storeCurrentAddrToExecCtx := m.allocateInstr()
+	storeCurrentAddrToExecCtx.asStore(operandNR(tmp3),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+		}, 64)
+
+	exitSeq := m.allocateInstr()
+	exitSeq.asExitSequence(execCtxVReg)
+
+	m.insert(loadExitCodeConst)
+	m.insert(setExitCode)
+	m.insert(movSpToTmp)
+	m.insert(strSpToExecCtx)
+	m.insert(currentAddrToTmp)
+	m.insert(storeCurrentAddrToExecCtx)
+	m.insert(exitSeq)
+}
+
+func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) {
+	if x.Type() != y.Type() {
+		panic(
+			fmt.Sprintf("TODO(maybe): support icmp with different types: v%d=%s != v%d=%s",
+				x.ID(), x.Type(), y.ID(), y.Type()))
+	}
+
+	extMod := extModeOf(x.Type(), signed)
+
+	// First operand must be in pure register form.
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extMod)
+	// Second operand can be in any of Imm12, ER, SR, or NR form supported by the SUBS instructions.
+	rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), extMod)
+
+	alu := m.allocateInstr()
+	// subs zr, rn, rm
+	alu.asALU(
+		aluOpSubS,
+		// We don't need the result, just need to set flags.
+		operandNR(xzrVReg),
+		rn,
+		rm,
+		x.Type().Bits() == 64,
+	)
+	m.insert(alu)
+}
+
+func (m *machine) lowerFcmpToFlag(x, y ssa.Value) {
+	if x.Type() != y.Type() {
+		panic("TODO(maybe): support icmp with different types")
+	}
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+	cmp := m.allocateInstr()
+	cmp.asFpuCmp(rn, rm, x.Type().Bits() == 64)
+	m.insert(cmp)
+}
+
+func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Value, code wazevoapi.ExitCode) {
+	condDef := m.compiler.ValueDefinition(cond)
+	if !m.compiler.MatchInstr(condDef, ssa.OpcodeIcmp) {
+		panic("TODO: OpcodeExitIfTrueWithCode must come after Icmp at the moment: " + condDef.Instr.Opcode().String())
+	}
+	condDef.Instr.MarkLowered()
+
+	cvalInstr := condDef.Instr
+	x, y, c := cvalInstr.IcmpData()
+	signed := c.Signed()
+
+	if !m.tryLowerBandToFlag(x, y) {
+		m.lowerIcmpToFlag(x, y, signed)
+	}
+
+	// We need to copy the execution context to a temp register, because if it's spilled,
+	// it might end up being reloaded inside the exiting branch.
+	execCtxTmp := m.copyToTmp(execCtxVReg)
+
+	// We have to skip the entire exit sequence if the condition is false.
+	cbr := m.allocateInstr()
+	m.insert(cbr)
+	m.lowerExitWithCode(execCtxTmp, code)
+	// conditional branch target is after exit.
+	l := m.insertBrTargetLabel()
+	cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */)
+}
+
+func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
+	cvalDef := m.compiler.ValueDefinition(c)
+
+	var cc condFlag
+	switch {
+	case m.compiler.MatchInstr(cvalDef, ssa.OpcodeIcmp): // This case, we can use the ALU flag set by SUBS instruction.
+		cvalInstr := cvalDef.Instr
+		x, y, c := cvalInstr.IcmpData()
+		cc = condFlagFromSSAIntegerCmpCond(c)
+		m.lowerIcmpToFlag(x, y, c.Signed())
+		cvalDef.Instr.MarkLowered()
+	case m.compiler.MatchInstr(cvalDef, ssa.OpcodeFcmp): // This case we can use the Fpu flag directly.
+		cvalInstr := cvalDef.Instr
+		x, y, c := cvalInstr.FcmpData()
+		cc = condFlagFromSSAFloatCmpCond(c)
+		m.lowerFcmpToFlag(x, y)
+		cvalDef.Instr.MarkLowered()
+	default:
+		rn := m.getOperand_NR(cvalDef, extModeNone)
+		if c.Type() != ssa.TypeI32 && c.Type() != ssa.TypeI64 {
+			panic("TODO?BUG?: support select with non-integer condition")
+		}
+		alu := m.allocateInstr()
+		// subs zr, rn, zr
+		alu.asALU(
+			aluOpSubS,
+			// We don't need the result, just need to set flags.
+			operandNR(xzrVReg),
+			rn,
+			operandNR(xzrVReg),
+			c.Type().Bits() == 64,
+		)
+		m.insert(alu)
+		cc = ne
+	}
+
+	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
+	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
+
+	rd := operandNR(m.compiler.VRegOf(result))
+	switch x.Type() {
+	case ssa.TypeI32, ssa.TypeI64:
+		// csel rd, rn, rm, cc
+		csel := m.allocateInstr()
+		csel.asCSel(rd, rn, rm, cc, x.Type().Bits() == 64)
+		m.insert(csel)
+	case ssa.TypeF32, ssa.TypeF64:
+		// fcsel rd, rn, rm, cc
+		fcsel := m.allocateInstr()
+		fcsel.asFpuCSel(rd, rn, rm, cc, x.Type().Bits() == 64)
+		m.insert(fcsel)
+	default:
+		panic("BUG")
+	}
+}
+
+func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
+	// First check if `rc` is zero or not.
+	checkZero := m.allocateInstr()
+	checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false)
+	m.insert(checkZero)
+
+	// Then use CSETM to set all bits to one if `rc` is zero.
+	allOnesOrZero := m.compiler.AllocateVReg(ssa.TypeI64)
+	cset := m.allocateInstr()
+	cset.asCSet(allOnesOrZero, true, ne)
+	m.insert(cset)
+
+	// Then move the bits to the result vector register.
+	tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+	dup := m.allocateInstr()
+	dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D)
+	m.insert(dup)
+
+	// Now that `tmp2` has either all bits one or zero depending on `rc`,
+	// we can use bsl to select between `rn` and `rm`.
+	ins := m.allocateInstr()
+	ins.asVecRRRRewrite(vecOpBsl, tmp2, rn, rm, vecArrangement16B)
+	m.insert(ins)
+
+	// Finally, move the result to the destination register.
+	mov2 := m.allocateInstr()
+	mov2.asFpuMov128(rd.nr(), tmp2.nr())
+	m.insert(mov2)
+}
+
+func (m *machine) lowerAtomicRmw(si *ssa.Instruction) {
+	ssaOp, size := si.AtomicRmwData()
+
+	var op atomicRmwOp
+	var negateArg bool
+	var flipArg bool
+	switch ssaOp {
+	case ssa.AtomicRmwOpAdd:
+		op = atomicRmwOpAdd
+	case ssa.AtomicRmwOpSub:
+		op = atomicRmwOpAdd
+		negateArg = true
+	case ssa.AtomicRmwOpAnd:
+		op = atomicRmwOpClr
+		flipArg = true
+	case ssa.AtomicRmwOpOr:
+		op = atomicRmwOpSet
+	case ssa.AtomicRmwOpXor:
+		op = atomicRmwOpEor
+	case ssa.AtomicRmwOpXchg:
+		op = atomicRmwOpSwp
+	default:
+		panic(fmt.Sprintf("unknown ssa atomic rmw op: %s", ssaOp))
+	}
+
+	addr, val := si.Arg2()
+	addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val)
+	rn := m.getOperand_NR(addrDef, extModeNone)
+	rt := operandNR(m.compiler.VRegOf(si.Return()))
+	rs := m.getOperand_NR(valDef, extModeNone)
+
+	_64 := si.Return().Type().Bits() == 64
+	var tmp operand
+	if _64 {
+		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+	} else {
+		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+	}
+	m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64)
+}
+
+func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) {
+	switch {
+	case negateArg:
+		neg := m.allocateInstr()
+		neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit)
+		m.insert(neg)
+	case flipArg:
+		flip := m.allocateInstr()
+		flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit)
+		m.insert(flip)
+	default:
+		tmp = rs
+	}
+
+	rmw := m.allocateInstr()
+	rmw.asAtomicRmw(op, rn, tmp, rt, size)
+	m.insert(rmw)
+}
+
+func (m *machine) lowerAtomicCas(si *ssa.Instruction) {
+	addr, exp, repl := si.Arg3()
+	size := si.AtomicTargetSize()
+
+	addrDef, expDef, replDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(exp), m.compiler.ValueDefinition(repl)
+	rn := m.getOperand_NR(addrDef, extModeNone)
+	rt := m.getOperand_NR(replDef, extModeNone)
+	rs := m.getOperand_NR(expDef, extModeNone)
+	tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type()))
+
+	_64 := si.Return().Type().Bits() == 64
+	// rs is overwritten by CAS, so we need to move it to the result register before the instruction
+	// in case when it is used somewhere else.
+	mov := m.allocateInstr()
+	if _64 {
+		mov.asMove64(tmp.nr(), rs.nr())
+	} else {
+		mov.asMove32(tmp.nr(), rs.nr())
+	}
+	m.insert(mov)
+
+	m.lowerAtomicCasImpl(rn, tmp, rt, size)
+
+	mov2 := m.allocateInstr()
+	rd := m.compiler.VRegOf(si.Return())
+	if _64 {
+		mov2.asMove64(rd, tmp.nr())
+	} else {
+		mov2.asMove32(rd, tmp.nr())
+	}
+	m.insert(mov2)
+}
+
+func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) {
+	cas := m.allocateInstr()
+	cas.asAtomicCas(rn, rs, rt, size)
+	m.insert(cas)
+}
+
+func (m *machine) lowerAtomicLoad(si *ssa.Instruction) {
+	addr := si.Arg()
+	size := si.AtomicTargetSize()
+
+	addrDef := m.compiler.ValueDefinition(addr)
+	rn := m.getOperand_NR(addrDef, extModeNone)
+	rt := operandNR(m.compiler.VRegOf(si.Return()))
+
+	m.lowerAtomicLoadImpl(rn, rt, size)
+}
+
+func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) {
+	ld := m.allocateInstr()
+	ld.asAtomicLoad(rn, rt, size)
+	m.insert(ld)
+}
+
+func (m *machine) lowerAtomicStore(si *ssa.Instruction) {
+	addr, val := si.Arg2()
+	size := si.AtomicTargetSize()
+
+	addrDef := m.compiler.ValueDefinition(addr)
+	valDef := m.compiler.ValueDefinition(val)
+	rn := m.getOperand_NR(addrDef, extModeNone)
+	rt := m.getOperand_NR(valDef, extModeNone)
+
+	m.lowerAtomicStoreImpl(rn, rt, size)
+}
+
+func (m *machine) lowerAtomicStoreImpl(rn, rt operand, size uint64) {
+	ld := m.allocateInstr()
+	ld.asAtomicStore(rn, rt, size)
+	m.insert(ld)
+}
+
+// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue
+// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes
+func (m *machine) copyToTmp(v regalloc.VReg) regalloc.VReg {
+	typ := m.compiler.TypeOf(v)
+	mov := m.allocateInstr()
+	tmp := m.compiler.AllocateVReg(typ)
+	if typ.IsInt() {
+		mov.asMove64(tmp, v)
+	} else {
+		mov.asFpuMov128(tmp, v)
+	}
+	m.insert(mov)
+	return tmp
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
new file mode 100644
index 000000000..d9fbf1789
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
@@ -0,0 +1,350 @@
+package arm64
+
+// This file contains the logic to "find and determine operands" for instructions.
+// In order to finalize the form of an operand, we might end up merging/eliminating
+// the source instructions into an operand whenever possible.
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	// operand represents an operand of an instruction whose type is determined by the kind.
+	operand struct {
+		kind        operandKind
+		data, data2 uint64
+	}
+	operandKind byte
+)
+
+// Here's the list of operand kinds. We use the abbreviation of the kind name not only for these consts,
+// but also names of functions which return the operand of the kind.
+const (
+	// operandKindNR represents "NormalRegister" (NR). This is literally the register without any special operation unlike others.
+	operandKindNR operandKind = iota
+	// operandKindSR represents "Shifted Register" (SR). This is a register which is shifted by a constant.
+	// Some of the arm64 instructions can take this kind of operand.
+	operandKindSR
+	// operandKindER represents "Extended Register (ER). This is a register which is sign/zero-extended to a larger size.
+	// Some of the arm64 instructions can take this kind of operand.
+	operandKindER
+	// operandKindImm12 represents "Immediate 12" (Imm12). This is a 12-bit immediate value which can be either shifted by 12 or not.
+	// See asImm12 function for detail.
+	operandKindImm12
+	// operandKindShiftImm represents "Shifted Immediate" (ShiftImm) used by shift operations.
+	operandKindShiftImm
+)
+
+// String implements fmt.Stringer for debugging.
+func (o operand) format(size byte) string {
+	switch o.kind {
+	case operandKindNR:
+		return formatVRegSized(o.nr(), size)
+	case operandKindSR:
+		r, amt, sop := o.sr()
+		return fmt.Sprintf("%s, %s #%d", formatVRegSized(r, size), sop, amt)
+	case operandKindER:
+		r, eop, _ := o.er()
+		return fmt.Sprintf("%s %s", formatVRegSized(r, size), eop)
+	case operandKindImm12:
+		imm12, shiftBit := o.imm12()
+		if shiftBit == 1 {
+			return fmt.Sprintf("#%#x", uint64(imm12)<<12)
+		} else {
+			return fmt.Sprintf("#%#x", imm12)
+		}
+	default:
+		panic(fmt.Sprintf("unknown operand kind: %d", o.kind))
+	}
+}
+
+// operandNR encodes the given VReg as an operand of operandKindNR.
+func operandNR(r regalloc.VReg) operand {
+	return operand{kind: operandKindNR, data: uint64(r)}
+}
+
+// nr decodes the underlying VReg assuming the operand is of operandKindNR.
+func (o operand) nr() regalloc.VReg {
+	return regalloc.VReg(o.data)
+}
+
+// operandER encodes the given VReg as an operand of operandKindER.
+func operandER(r regalloc.VReg, eop extendOp, to byte) operand {
+	if to < 32 {
+		panic("TODO?BUG?: when we need to extend to less than 32 bits?")
+	}
+	return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)}
+}
+
+// er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER.
+func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) {
+	return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff)
+}
+
+// operandSR encodes the given VReg as an operand of operandKindSR.
+func operandSR(r regalloc.VReg, amt byte, sop shiftOp) operand {
+	return operand{kind: operandKindSR, data: uint64(r), data2: uint64(amt)<<32 | uint64(sop)}
+}
+
+// sr decodes the underlying VReg, shift amount, and shift operation assuming the operand is of operandKindSR.
+func (o operand) sr() (r regalloc.VReg, amt byte, sop shiftOp) {
+	return regalloc.VReg(o.data), byte(o.data2>>32) & 0xff, shiftOp(o.data2) & 0xff
+}
+
+// operandImm12 encodes the given imm12 as an operand of operandKindImm12.
+func operandImm12(imm12 uint16, shiftBit byte) operand {
+	return operand{kind: operandKindImm12, data: uint64(imm12) | uint64(shiftBit)<<32}
+}
+
+// imm12 decodes the underlying imm12 data assuming the operand is of operandKindImm12.
+func (o operand) imm12() (v uint16, shiftBit byte) {
+	return uint16(o.data), byte(o.data >> 32)
+}
+
+// operandShiftImm encodes the given amount as an operand of operandKindShiftImm.
+func operandShiftImm(amount byte) operand {
+	return operand{kind: operandKindShiftImm, data: uint64(amount)}
+}
+
+// shiftImm decodes the underlying shift amount data assuming the operand is of operandKindShiftImm.
+func (o operand) shiftImm() byte {
+	return byte(o.data)
+}
+
+// reg returns the register of the operand if applicable.
+func (o operand) reg() regalloc.VReg {
+	switch o.kind {
+	case operandKindNR:
+		return o.nr()
+	case operandKindSR:
+		r, _, _ := o.sr()
+		return r
+	case operandKindER:
+		r, _, _ := o.er()
+		return r
+	case operandKindImm12:
+		// Does not have a register.
+	case operandKindShiftImm:
+		// Does not have a register.
+	default:
+		panic(o.kind)
+	}
+	return regalloc.VRegInvalid
+}
+
+func (o operand) realReg() regalloc.RealReg {
+	return o.nr().RealReg()
+}
+
+func (o operand) assignReg(v regalloc.VReg) operand {
+	switch o.kind {
+	case operandKindNR:
+		return operandNR(v)
+	case operandKindSR:
+		_, amt, sop := o.sr()
+		return operandSR(v, amt, sop)
+	case operandKindER:
+		_, eop, to := o.er()
+		return operandER(v, eop, to)
+	case operandKindImm12:
+		// Does not have a register.
+	case operandKindShiftImm:
+		// Does not have a register.
+	}
+	panic(o.kind)
+}
+
+// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+// If the operand can be expressed as operandKindImm12, `mode` is ignored.
+func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	instr := def.Instr
+	if instr.Opcode() == ssa.OpcodeIconst {
+		if imm12Op, ok := asImm12Operand(instr.ConstantVal()); ok {
+			instr.MarkLowered()
+			return imm12Op
+		}
+	}
+	return m.getOperand_ER_SR_NR(def, mode)
+}
+
+// getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.
+// If the immediate value is negated, the second return value is true, otherwise always false.
+func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg), false
+	}
+
+	instr := def.Instr
+	if instr.Opcode() == ssa.OpcodeIconst {
+		c := instr.ConstantVal()
+		if imm12Op, ok := asImm12Operand(c); ok {
+			instr.MarkLowered()
+			return imm12Op, false
+		}
+
+		signExtended := int64(c)
+		if def.SSAValue().Type().Bits() == 32 {
+			signExtended = (signExtended << 32) >> 32
+		}
+		negatedWithoutSign := -signExtended
+		if imm12Op, ok := asImm12Operand(uint64(negatedWithoutSign)); ok {
+			instr.MarkLowered()
+			return imm12Op, true
+		}
+	}
+	return m.getOperand_ER_SR_NR(def, mode), false
+}
+
+// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) {
+		extInstr := def.Instr
+
+		signed := extInstr.Opcode() == ssa.OpcodeSExtend
+		innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits()
+		modeBits, modeSigned := mode.bits(), mode.signed()
+		if mode == extModeNone || innerExtToBits == modeBits {
+			eop := extendOpFrom(signed, innerExtFromBits)
+			extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone)
+			op = operandER(extArg.nr(), eop, innerExtToBits)
+			extInstr.MarkLowered()
+			return
+		}
+
+		if innerExtToBits > modeBits {
+			panic("BUG?TODO?: need the results of inner extension to be larger than the mode")
+		}
+
+		switch {
+		case (!signed && !modeSigned) || (signed && modeSigned):
+			// Two sign/zero extensions are equivalent to one sign/zero extension for the larger size.
+			eop := extendOpFrom(modeSigned, innerExtFromBits)
+			op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, modeBits)
+			extInstr.MarkLowered()
+		case (signed && !modeSigned) || (!signed && modeSigned):
+			// We need to {sign, zero}-extend the result of the {zero,sign} extension.
+			eop := extendOpFrom(modeSigned, innerExtToBits)
+			op = operandER(m.compiler.VRegOf(extInstr.Return()), eop, modeBits)
+			// Note that we failed to merge the inner extension instruction this case.
+		}
+		return
+	}
+	return m.getOperand_SR_NR(def, mode)
+}
+
+// ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	if m.compiler.MatchInstr(def, ssa.OpcodeIshl) {
+		// Check if the shift amount is constant instruction.
+		targetVal, amountVal := def.Instr.Arg2()
+		targetVReg := m.getOperand_NR(m.compiler.ValueDefinition(targetVal), extModeNone).nr()
+		amountDef := m.compiler.ValueDefinition(amountVal)
+		if amountDef.IsFromInstr() && amountDef.Instr.Constant() {
+			// If that is the case, we can use the shifted register operand (SR).
+			c := byte(amountDef.Instr.ConstantVal()) & (targetVal.Type().Bits() - 1) // Clears the unnecessary bits.
+			def.Instr.MarkLowered()
+			amountDef.Instr.MarkLowered()
+			return operandSR(targetVReg, c, shiftOpLSL)
+		}
+	}
+	return m.getOperand_NR(def, mode)
+}
+
+// getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def).
+func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	instr := def.Instr
+	if instr.Constant() {
+		amount := byte(instr.ConstantVal()) & (shiftBitWidth - 1) // Clears the unnecessary bits.
+		return operandShiftImm(amount)
+	}
+	return m.getOperand_NR(def, mode)
+}
+
+// ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	var v regalloc.VReg
+	if def.IsFromBlockParam() {
+		v = def.BlkParamVReg
+	} else {
+		instr := def.Instr
+		if instr.Constant() {
+			// We inline all the constant instructions so that we could reduce the register usage.
+			v = m.lowerConstant(instr)
+			instr.MarkLowered()
+		} else {
+			if n := def.N; n == 0 {
+				v = m.compiler.VRegOf(instr.Return())
+			} else {
+				_, rs := instr.Returns()
+				v = m.compiler.VRegOf(rs[n-1])
+			}
+		}
+	}
+
+	r := v
+	switch inBits := def.SSAValue().Type().Bits(); {
+	case mode == extModeNone:
+	case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):
+	case inBits == 32 && mode == extModeZeroExtend64:
+		extended := m.compiler.AllocateVReg(ssa.TypeI64)
+		ext := m.allocateInstr()
+		ext.asExtend(extended, v, 32, 64, false)
+		m.insert(ext)
+		r = extended
+	case inBits == 32 && mode == extModeSignExtend64:
+		extended := m.compiler.AllocateVReg(ssa.TypeI64)
+		ext := m.allocateInstr()
+		ext.asExtend(extended, v, 32, 64, true)
+		m.insert(ext)
+		r = extended
+	case inBits == 64 && (mode == extModeZeroExtend64 || mode == extModeSignExtend64):
+	}
+	return operandNR(r)
+}
+
+func asImm12Operand(val uint64) (op operand, ok bool) {
+	v, shiftBit, ok := asImm12(val)
+	if !ok {
+		return operand{}, false
+	}
+	return operandImm12(v, shiftBit), true
+}
+
+func asImm12(val uint64) (v uint16, shiftBit byte, ok bool) {
+	const mask1, mask2 uint64 = 0xfff, 0xfff_000
+	if val&^mask1 == 0 {
+		return uint16(val), 0, true
+	} else if val&^mask2 == 0 {
+		return uint16(val >> 12), 1, true
+	} else {
+		return 0, 0, false
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
new file mode 100644
index 000000000..4842eaa38
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
@@ -0,0 +1,440 @@
+package arm64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type (
+	// addressMode represents an ARM64 addressing mode.
+	//
+	// https://developer.arm.com/documentation/102374/0101/Loads-and-stores---addressing
+	// TODO: use the bit-packed layout like operand struct.
+	addressMode struct {
+		kind   addressModeKind
+		rn, rm regalloc.VReg
+		extOp  extendOp
+		imm    int64
+	}
+
+	// addressModeKind represents the kind of ARM64 addressing mode.
+	addressModeKind byte
+)
+
+const (
+	// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
+	// and then scaled by bits(type)/8.
+	//
+	// e.g.
+	// 	- ldrh w1, [x2, w3, SXTW #1] ;; sign-extended and scaled by 2 (== LSL #1)
+	// 	- strh w1, [x2, w3, UXTW #1] ;; zero-extended and scaled by 2 (== LSL #1)
+	// 	- ldr w1, [x2, w3, SXTW #2] ;; sign-extended and scaled by 4 (== LSL #2)
+	// 	- str x1, [x2, w3, UXTW #3] ;; zero-extended and scaled by 8 (== LSL #3)
+	//
+	// See the following pages:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--register---Load-Register-Halfword--register--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--
+	addressModeKindRegScaledExtended addressModeKind = iota
+
+	// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without extension factor.
+	addressModeKindRegScaled
+
+	// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without scale factor.
+	addressModeKindRegExtended
+
+	// addressModeKindRegReg takes a base register and an index register. The index register is not either scaled or extended.
+	addressModeKindRegReg
+
+	// addressModeKindRegSignedImm9 takes a base register and a 9-bit "signed" immediate offset (-256 to 255).
+	// The immediate will be sign-extended, and be added to the base register.
+	// This is a.k.a. "unscaled" since the immediate is not scaled.
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--
+	addressModeKindRegSignedImm9
+
+	// addressModeKindRegUnsignedImm12 takes a base register and a 12-bit "unsigned" immediate offset.  scaled by
+	// the size of the type. In other words, the actual offset will be imm12 * bits(type)/8.
+	// See "Unsigned offset" in the following pages:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+	addressModeKindRegUnsignedImm12
+
+	// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
+	// After the load/store, the base register will be updated by the offset.
+	//
+	// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
+	//
+	// See "Post-index" in the following pages for examples:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
+	addressModeKindPostIndex
+
+	// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
+	// Before the load/store, the base register will be updated by the offset.
+	//
+	// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
+	//
+	// See "Pre-index" in the following pages for examples:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
+	addressModeKindPreIndex
+
+	// addressModeKindArgStackSpace is used to resolve the address of the argument stack space
+	// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
+	// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
+	addressModeKindArgStackSpace
+
+	// addressModeKindResultStackSpace is used to resolve the address of the result stack space
+	// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
+	// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
+	addressModeKindResultStackSpace
+)
+
+func (a addressMode) format(dstSizeBits byte) (ret string) {
+	base := formatVRegSized(a.rn, 64)
+	if rn := a.rn; rn.RegType() != regalloc.RegTypeInt {
+		panic("invalid base register type: " + a.rn.RegType().String())
+	} else if rn.IsRealReg() && v0 <= a.rn.RealReg() && a.rn.RealReg() <= v30 {
+		panic("BUG: likely a bug in reg alloc or reset behavior")
+	}
+
+	switch a.kind {
+	case addressModeKindRegScaledExtended:
+		amount := a.sizeInBitsToShiftAmount(dstSizeBits)
+		ret = fmt.Sprintf("[%s, %s, %s #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp, amount)
+	case addressModeKindRegScaled:
+		amount := a.sizeInBitsToShiftAmount(dstSizeBits)
+		ret = fmt.Sprintf("[%s, %s, lsl #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), amount)
+	case addressModeKindRegExtended:
+		ret = fmt.Sprintf("[%s, %s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp)
+	case addressModeKindRegReg:
+		ret = fmt.Sprintf("[%s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()))
+	case addressModeKindRegSignedImm9:
+		if a.imm != 0 {
+			ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
+		} else {
+			ret = fmt.Sprintf("[%s]", base)
+		}
+	case addressModeKindRegUnsignedImm12:
+		if a.imm != 0 {
+			ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
+		} else {
+			ret = fmt.Sprintf("[%s]", base)
+		}
+	case addressModeKindPostIndex:
+		ret = fmt.Sprintf("[%s], #%#x", base, a.imm)
+	case addressModeKindPreIndex:
+		ret = fmt.Sprintf("[%s, #%#x]!", base, a.imm)
+	case addressModeKindArgStackSpace:
+		ret = fmt.Sprintf("[#arg_space, #%#x]", a.imm)
+	case addressModeKindResultStackSpace:
+		ret = fmt.Sprintf("[#ret_space, #%#x]", a.imm)
+	}
+	return
+}
+
+func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
+	if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
+		panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
+	}
+	if preIndex {
+		return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
+	} else {
+		return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
+	}
+}
+
+func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
+	divisor := int64(dstSizeInBits) / 8
+	return 0 < offset && offset%divisor == 0 && offset/divisor < 4096
+}
+
+func offsetFitsInAddressModeKindRegSignedImm9(offset int64) bool {
+	return -256 <= offset && offset <= 255
+}
+
+func (a addressMode) indexRegBits() byte {
+	bits := a.extOp.srcBits()
+	if bits != 32 && bits != 64 {
+		panic("invalid index register for address mode. it must be either 32 or 64 bits")
+	}
+	return bits
+}
+
+func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) {
+	switch sizeInBits {
+	case 8:
+		lsl = 0
+	case 16:
+		lsl = 1
+	case 32:
+		lsl = 2
+	case 64:
+		lsl = 3
+	}
+	return
+}
+
+func extLoadSignSize(op ssa.Opcode) (size byte, signed bool) {
+	switch op {
+	case ssa.OpcodeUload8:
+		size, signed = 8, false
+	case ssa.OpcodeUload16:
+		size, signed = 16, false
+	case ssa.OpcodeUload32:
+		size, signed = 32, false
+	case ssa.OpcodeSload8:
+		size, signed = 8, true
+	case ssa.OpcodeSload16:
+		size, signed = 16, true
+	case ssa.OpcodeSload32:
+		size, signed = 32, true
+	default:
+		panic("BUG")
+	}
+	return
+}
+
+func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret regalloc.VReg) {
+	size, signed := extLoadSignSize(op)
+	amode := m.lowerToAddressMode(ptr, offset, size)
+	load := m.allocateInstr()
+	if signed {
+		load.asSLoad(operandNR(ret), amode, size)
+	} else {
+		load.asULoad(operandNR(ret), amode, size)
+	}
+	m.insert(load)
+}
+
+func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.Value) {
+	amode := m.lowerToAddressMode(ptr, offset, typ.Bits())
+
+	dst := m.compiler.VRegOf(ret)
+	load := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32, ssa.TypeI64:
+		load.asULoad(operandNR(dst), amode, typ.Bits())
+	case ssa.TypeF32, ssa.TypeF64:
+		load.asFpuLoad(operandNR(dst), amode, typ.Bits())
+	case ssa.TypeV128:
+		load.asFpuLoad(operandNR(dst), amode, 128)
+	default:
+		panic("TODO")
+	}
+	m.insert(load)
+}
+
+func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, ret ssa.Value) {
+	// vecLoad1R has offset address mode (base+imm) only for post index, so we simply add the offset to the base.
+	base := m.getOperand_NR(m.compiler.ValueDefinition(ptr), extModeNone).nr()
+	offsetReg := m.compiler.AllocateVReg(ssa.TypeI64)
+	m.lowerConstantI64(offsetReg, int64(offset))
+	addedBase := m.addReg64ToReg64(base, offsetReg)
+
+	rd := operandNR(m.compiler.VRegOf(ret))
+
+	ld1r := m.allocateInstr()
+	ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
+	m.insert(ld1r)
+}
+
+func (m *machine) lowerStore(si *ssa.Instruction) {
+	// TODO: merge consecutive stores into a single pair store instruction.
+	value, ptr, offset, storeSizeInBits := si.StoreData()
+	amode := m.lowerToAddressMode(ptr, offset, storeSizeInBits)
+
+	valueOp := m.getOperand_NR(m.compiler.ValueDefinition(value), extModeNone)
+	store := m.allocateInstr()
+	store.asStore(valueOp, amode, storeSizeInBits)
+	m.insert(store)
+}
+
+// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
+	// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
+	// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
+	// to support more efficient address resolution.
+
+	a32s, a64s, offset := m.collectAddends(ptr)
+	offset += int64(offsetBase)
+	return m.lowerToAddressModeFromAddends(a32s, a64s, size, offset)
+}
+
+// lowerToAddressModeFromAddends creates an addressMode from a list of addends collected by collectAddends.
+// During the construction, this might emit additional instructions.
+//
+// Extracted as a separate function for easy testing.
+func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
+	switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
+	case a64sExist && a32sExist:
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		var a32 addend32
+		a32 = a32s.Dequeue()
+		amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
+	case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
+		offset = 0
+	case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
+		offset = 0
+	case a64sExist:
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		if !a64s.Empty() {
+			index := a64s.Dequeue()
+			amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
+		} else {
+			amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+		}
+	case a32sExist:
+		base32 := a32s.Dequeue()
+
+		// First we need 64-bit base.
+		base := m.compiler.AllocateVReg(ssa.TypeI64)
+		baseExt := m.allocateInstr()
+		var signed bool
+		if base32.ext == extendOpSXTW {
+			signed = true
+		}
+		baseExt.asExtend(base, base32.r, 32, 64, signed)
+		m.insert(baseExt)
+
+		if !a32s.Empty() {
+			index := a32s.Dequeue()
+			amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
+		} else {
+			amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+		}
+	default: // Only static offsets.
+		tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
+		m.lowerConstantI64(tmpReg, offset)
+		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
+		offset = 0
+	}
+
+	baseReg := amode.rn
+	if offset > 0 {
+		baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
+	}
+
+	for !a64s.Empty() {
+		a64 := a64s.Dequeue()
+		baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
+	}
+
+	for !a32s.Empty() {
+		a32 := a32s.Dequeue()
+		baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
+	}
+	amode.rn = baseReg
+	return
+}
+
+var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
+
+func (m *machine) collectAddends(ptr ssa.Value) (addends32 *wazevoapi.Queue[addend32], addends64 *wazevoapi.Queue[regalloc.VReg], offset int64) {
+	m.addendsWorkQueue.Reset()
+	m.addends32.Reset()
+	m.addends64.Reset()
+	m.addendsWorkQueue.Enqueue(ptr)
+
+	for !m.addendsWorkQueue.Empty() {
+		v := m.addendsWorkQueue.Dequeue()
+
+		def := m.compiler.ValueDefinition(v)
+		switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
+		case ssa.OpcodeIadd:
+			// If the addend is an add, we recursively collect its operands.
+			x, y := def.Instr.Arg2()
+			m.addendsWorkQueue.Enqueue(x)
+			m.addendsWorkQueue.Enqueue(y)
+			def.Instr.MarkLowered()
+		case ssa.OpcodeIconst:
+			// If the addend is constant, we just statically merge it into the offset.
+			ic := def.Instr
+			u64 := ic.ConstantVal()
+			if ic.Return().Type().Bits() == 32 {
+				offset += int64(int32(u64)) // sign-extend.
+			} else {
+				offset += int64(u64)
+			}
+			def.Instr.MarkLowered()
+		case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
+			input := def.Instr.Arg()
+			if input.Type().Bits() != 32 {
+				panic("illegal size: " + input.Type().String())
+			}
+
+			var ext extendOp
+			if op == ssa.OpcodeUExtend {
+				ext = extendOpUXTW
+			} else {
+				ext = extendOpSXTW
+			}
+
+			inputDef := m.compiler.ValueDefinition(input)
+			constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
+			switch {
+			case constInst && ext == extendOpUXTW:
+				// Zero-extension of a 32-bit constant can be merged into the offset.
+				offset += int64(uint32(inputDef.Instr.ConstantVal()))
+			case constInst && ext == extendOpSXTW:
+				// Sign-extension of a 32-bit constant can be merged into the offset.
+				offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
+			default:
+				m.addends32.Enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
+			}
+			def.Instr.MarkLowered()
+			continue
+		default:
+			// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
+			m.addends64.Enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
+		}
+	}
+	return &m.addends32, &m.addends64, offset
+}
+
+func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
+	rd = m.compiler.AllocateVReg(ssa.TypeI64)
+	alu := m.allocateInstr()
+	if imm12Op, ok := asImm12Operand(uint64(c)); ok {
+		alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
+	} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
+		alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
+	} else {
+		tmp := m.compiler.AllocateVReg(ssa.TypeI64)
+		m.load64bitConst(c, tmp)
+		alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
+	}
+	m.insert(alu)
+	return
+}
+
+func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
+	rd = m.compiler.AllocateVReg(ssa.TypeI64)
+	alu := m.allocateInstr()
+	alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
+	m.insert(alu)
+	return
+}
+
+func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
+	rd = m.compiler.AllocateVReg(ssa.TypeI64)
+	alu := m.allocateInstr()
+	alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
+	m.insert(alu)
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
new file mode 100644
index 000000000..b435d9ba9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
@@ -0,0 +1,515 @@
+package arm64
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type (
+	// machine implements backend.Machine.
+	machine struct {
+		compiler          backend.Compiler
+		executableContext *backend.ExecutableContextT[instruction]
+		currentABI        *backend.FunctionABI
+
+		regAlloc   regalloc.Allocator
+		regAllocFn *backend.RegAllocFunction[*instruction, *machine]
+
+		// addendsWorkQueue is used during address lowering, defined here for reuse.
+		addendsWorkQueue wazevoapi.Queue[ssa.Value]
+		addends32        wazevoapi.Queue[addend32]
+		// addends64 is used during address lowering, defined here for reuse.
+		addends64              wazevoapi.Queue[regalloc.VReg]
+		unresolvedAddressModes []*instruction
+
+		// condBrRelocs holds the conditional branches which need offset relocation.
+		condBrRelocs []condBrReloc
+
+		// jmpTableTargets holds the labels of the jump table targets.
+		jmpTableTargets [][]uint32
+
+		// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
+		// During the execution of the function, the stack looks like:
+		//
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |      xxxxx      |
+		//          |   ReturnAddress |
+		//          +-----------------+   <<-|
+		//          |   ...........   |      |
+		//          |   spill slot M  |      | <--- spillSlotSize
+		//          |   ............  |      |
+		//          |   spill slot 2  |      |
+		//          |   spill slot 1  |   <<-+
+		//          |   clobbered N   |
+		//          |   ...........   |
+		//          |   clobbered 1   |
+		//          |   clobbered 0   |
+		//   SP---> +-----------------+
+		//             (low address)
+		//
+		// and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16.
+		// Also note that this is only known after register allocation.
+		spillSlotSize int64
+		spillSlots    map[regalloc.VRegID]int64 // regalloc.VRegID to offset.
+		// clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue.
+		clobberedRegs []regalloc.VReg
+
+		maxRequiredStackSizeForCalls int64
+		stackBoundsCheckDisabled     bool
+
+		regAllocStarted bool
+	}
+
+	addend32 struct {
+		r   regalloc.VReg
+		ext extendOp
+	}
+
+	condBrReloc struct {
+		cbr *instruction
+		// currentLabelPos is the labelPosition within which condBr is defined.
+		currentLabelPos *labelPosition
+		// Next block's labelPosition.
+		nextLabel label
+		offset    int64
+	}
+
+	labelPosition = backend.LabelPosition[instruction]
+	label         = backend.Label
+)
+
+const (
+	labelReturn  = backend.LabelReturn
+	labelInvalid = backend.LabelInvalid
+)
+
+// NewBackend returns a new backend for arm64.
+func NewBackend() backend.Machine {
+	m := &machine{
+		spillSlots:        make(map[regalloc.VRegID]int64),
+		executableContext: newExecutableContext(),
+		regAlloc:          regalloc.NewAllocator(regInfo),
+	}
+	return m
+}
+
+func newExecutableContext() *backend.ExecutableContextT[instruction] {
+	return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0)
+}
+
+// ExecutableContext implements backend.Machine.
+func (m *machine) ExecutableContext() backend.ExecutableContext {
+	return m.executableContext
+}
+
+// RegAlloc implements backend.Machine Function.
+func (m *machine) RegAlloc() {
+	rf := m.regAllocFn
+	for _, pos := range m.executableContext.OrderedBlockLabels {
+		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End)
+	}
+
+	m.regAllocStarted = true
+	m.regAlloc.DoAllocation(rf)
+	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.
+	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15
+}
+
+// Reset implements backend.Machine.
+func (m *machine) Reset() {
+	m.clobberedRegs = m.clobberedRegs[:0]
+	for key := range m.spillSlots {
+		m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
+	}
+	for _, key := range m.clobberedRegs {
+		delete(m.spillSlots, regalloc.VRegID(key))
+	}
+	m.clobberedRegs = m.clobberedRegs[:0]
+	m.regAllocStarted = false
+	m.regAlloc.Reset()
+	m.regAllocFn.Reset()
+	m.spillSlotSize = 0
+	m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
+	m.maxRequiredStackSizeForCalls = 0
+	m.executableContext.Reset()
+	m.jmpTableTargets = m.jmpTableTargets[:0]
+}
+
+// SetCurrentABI implements backend.Machine SetCurrentABI.
+func (m *machine) SetCurrentABI(abi *backend.FunctionABI) {
+	m.currentABI = abi
+}
+
+// DisableStackCheck implements backend.Machine DisableStackCheck.
+func (m *machine) DisableStackCheck() {
+	m.stackBoundsCheckDisabled = true
+}
+
+// SetCompiler implements backend.Machine.
+func (m *machine) SetCompiler(ctx backend.Compiler) {
+	m.compiler = ctx
+	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx)
+}
+
+func (m *machine) insert(i *instruction) {
+	ectx := m.executableContext
+	ectx.PendingInstructions = append(ectx.PendingInstructions, i)
+}
+
+func (m *machine) insertBrTargetLabel() label {
+	nop, l := m.allocateBrTarget()
+	m.insert(nop)
+	return l
+}
+
+func (m *machine) allocateBrTarget() (nop *instruction, l label) {
+	ectx := m.executableContext
+	l = ectx.AllocateLabel()
+	nop = m.allocateInstr()
+	nop.asNop0WithLabel(l)
+	pos := ectx.AllocateLabelPosition(l)
+	pos.Begin, pos.End = nop, nop
+	ectx.LabelPositions[l] = pos
+	return
+}
+
+// allocateInstr allocates an instruction.
+func (m *machine) allocateInstr() *instruction {
+	instr := m.executableContext.InstructionPool.Allocate()
+	if !m.regAllocStarted {
+		instr.addedBeforeRegAlloc = true
+	}
+	return instr
+}
+
+func resetInstruction(i *instruction) {
+	*i = instruction{}
+}
+
+func (m *machine) allocateNop() *instruction {
+	instr := m.allocateInstr()
+	instr.asNop0()
+	return instr
+}
+
+func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
+	amode := &i.amode
+	switch amode.kind {
+	case addressModeKindResultStackSpace:
+		amode.imm += ret0offset
+	case addressModeKindArgStackSpace:
+		amode.imm += arg0offset
+	default:
+		panic("BUG")
+	}
+
+	var sizeInBits byte
+	switch i.kind {
+	case store8, uLoad8:
+		sizeInBits = 8
+	case store16, uLoad16:
+		sizeInBits = 16
+	case store32, fpuStore32, uLoad32, fpuLoad32:
+		sizeInBits = 32
+	case store64, fpuStore64, uLoad64, fpuLoad64:
+		sizeInBits = 64
+	case fpuStore128, fpuLoad128:
+		sizeInBits = 128
+	default:
+		panic("BUG")
+	}
+
+	if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) {
+		amode.kind = addressModeKindRegUnsignedImm12
+	} else {
+		// This case, we load the offset into the temporary register,
+		// and then use it as the index register.
+		newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm)
+		linkInstr(newPrev, i)
+		*amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */}
+	}
+}
+
+// resolveRelativeAddresses resolves the relative addresses before encoding.
+func (m *machine) resolveRelativeAddresses(ctx context.Context) {
+	ectx := m.executableContext
+	for {
+		if len(m.unresolvedAddressModes) > 0 {
+			arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
+			for _, i := range m.unresolvedAddressModes {
+				m.resolveAddressingMode(arg0offset, ret0offset, i)
+			}
+		}
+
+		// Reuse the slice to gather the unresolved conditional branches.
+		m.condBrRelocs = m.condBrRelocs[:0]
+
+		var fn string
+		var fnIndex int
+		var labelToSSABlockID map[label]ssa.BasicBlockID
+		if wazevoapi.PerfMapEnabled {
+			fn = wazevoapi.GetCurrentFunctionName(ctx)
+			labelToSSABlockID = make(map[label]ssa.BasicBlockID)
+			for i, l := range ectx.SsaBlockIDToLabels {
+				labelToSSABlockID[l] = ssa.BasicBlockID(i)
+			}
+			fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
+		}
+
+		// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
+		var offset int64
+		for i, pos := range ectx.OrderedBlockLabels {
+			pos.BinaryOffset = offset
+			var size int64
+			for cur := pos.Begin; ; cur = cur.next {
+				switch cur.kind {
+				case nop0:
+					l := cur.nop0Label()
+					if pos, ok := ectx.LabelPositions[l]; ok {
+						pos.BinaryOffset = offset + size
+					}
+				case condBr:
+					if !cur.condBrOffsetResolved() {
+						var nextLabel label
+						if i < len(ectx.OrderedBlockLabels)-1 {
+							// Note: this is only used when the block ends with fallthrough,
+							// therefore can be safely assumed that the next block exists when it's needed.
+							nextLabel = ectx.OrderedBlockLabels[i+1].L
+						}
+						m.condBrRelocs = append(m.condBrRelocs, condBrReloc{
+							cbr: cur, currentLabelPos: pos, offset: offset + size,
+							nextLabel: nextLabel,
+						})
+					}
+				}
+				size += cur.size()
+				if cur == pos.End {
+					break
+				}
+			}
+
+			if wazevoapi.PerfMapEnabled {
+				if size > 0 {
+					l := pos.L
+					var labelStr string
+					if blkID, ok := labelToSSABlockID[l]; ok {
+						labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
+					} else {
+						labelStr = l.String()
+					}
+					wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
+				}
+			}
+			offset += size
+		}
+
+		// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
+		var needRerun bool
+		for i := range m.condBrRelocs {
+			reloc := &m.condBrRelocs[i]
+			cbr := reloc.cbr
+			offset := reloc.offset
+
+			target := cbr.condBrLabel()
+			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
+			diff := offsetOfTarget - offset
+			if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
+				// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
+				// and jump to it.
+				m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
+				// Then, we need to recall this function to fix up the label offsets
+				// as they have changed after the trampoline is inserted.
+				needRerun = true
+			}
+		}
+		if needRerun {
+			if wazevoapi.PerfMapEnabled {
+				wazevoapi.PerfMap.Clear()
+			}
+		} else {
+			break
+		}
+	}
+
+	var currentOffset int64
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		switch cur.kind {
+		case br:
+			target := cur.brLabel()
+			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
+			diff := offsetOfTarget - currentOffset
+			divided := diff >> 2
+			if divided < minSignedInt26 || divided > maxSignedInt26 {
+				// This means the currently compiled single function is extremely large.
+				panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
+			}
+			cur.brOffsetResolve(diff)
+		case condBr:
+			if !cur.condBrOffsetResolved() {
+				target := cur.condBrLabel()
+				offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
+				diff := offsetOfTarget - currentOffset
+				if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
+					panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
+				}
+				cur.condBrOffsetResolve(diff)
+			}
+		case brTableSequence:
+			tableIndex := cur.u1
+			targets := m.jmpTableTargets[tableIndex]
+			for i := range targets {
+				l := label(targets[i])
+				offsetOfTarget := ectx.LabelPositions[l].BinaryOffset
+				diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)
+				targets[i] = uint32(diff)
+			}
+			cur.brTableSequenceOffsetsResolved()
+		case emitSourceOffsetInfo:
+			m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo())
+		}
+		currentOffset += cur.size()
+	}
+}
+
+const (
+	maxSignedInt26 = 1<<25 - 1
+	minSignedInt26 = -(1 << 25)
+
+	maxSignedInt19 = 1<<18 - 1
+	minSignedInt19 = -(1 << 18)
+)
+
+func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
+	cur := currentBlk.End
+	originalTarget := cbr.condBrLabel()
+	endNext := cur.next
+
+	if cur.kind != br {
+		// If the current block ends with a conditional branch, we can just insert the trampoline after it.
+		// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
+		skip := m.allocateInstr()
+		skip.asBr(nextLabel)
+		cur = linkInstr(cur, skip)
+	}
+
+	cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
+	cbr.setCondBrTargets(cbrNewTargetLabel)
+	cur = linkInstr(cur, cbrNewTargetInstr)
+
+	// Then insert the unconditional branch to the original, which should be possible to get encoded
+	// as 26-bit offset should be enough for any practical application.
+	br := m.allocateInstr()
+	br.asBr(originalTarget)
+	cur = linkInstr(cur, br)
+
+	// Update the end of the current block.
+	currentBlk.End = cur
+
+	linkInstr(cur, endNext)
+}
+
+// Format implements backend.Machine.
+func (m *machine) Format() string {
+	ectx := m.executableContext
+	begins := map[*instruction]label{}
+	for l, pos := range ectx.LabelPositions {
+		begins[pos.Begin] = l
+	}
+
+	irBlocks := map[label]ssa.BasicBlockID{}
+	for i, l := range ectx.SsaBlockIDToLabels {
+		irBlocks[l] = ssa.BasicBlockID(i)
+	}
+
+	var lines []string
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		if l, ok := begins[cur]; ok {
+			var labelStr string
+			if blkID, ok := irBlocks[l]; ok {
+				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
+			} else {
+				labelStr = fmt.Sprintf("%s:", l)
+			}
+			lines = append(lines, labelStr)
+		}
+		if cur.kind == nop0 {
+			continue
+		}
+		lines = append(lines, "\t"+cur.String())
+	}
+	return "\n" + strings.Join(lines, "\n") + "\n"
+}
+
+// InsertReturn implements backend.Machine.
+func (m *machine) InsertReturn() {
+	i := m.allocateInstr()
+	i.asRet()
+	m.insert(i)
+}
+
+func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
+	offset, ok := m.spillSlots[id]
+	if !ok {
+		offset = m.spillSlotSize
+		// TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible.
+		m.spillSlots[id] = offset
+		m.spillSlotSize += int64(size)
+	}
+	return offset + 16 // spill slot starts above the clobbered registers and the frame size.
+}
+
+func (m *machine) clobberedRegSlotSize() int64 {
+	return int64(len(m.clobberedRegs) * 16)
+}
+
+func (m *machine) arg0OffsetFromSP() int64 {
+	return m.frameSize() +
+		16 + // 16-byte aligned return address
+		16 // frame size saved below the clobbered registers.
+}
+
+func (m *machine) ret0OffsetFromSP() int64 {
+	return m.arg0OffsetFromSP() + m.currentABI.ArgStackSize
+}
+
+func (m *machine) requiredStackSize() int64 {
+	return m.maxRequiredStackSizeForCalls +
+		m.frameSize() +
+		16 + // 16-byte aligned return address.
+		16 // frame size saved below the clobbered registers.
+}
+
+func (m *machine) frameSize() int64 {
+	s := m.clobberedRegSlotSize() + m.spillSlotSize
+	if s&0xf != 0 {
+		panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
+	}
+	return s
+}
+
+func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) {
+	// TODO: reuse the slice!
+	labels := make([]uint32, len(targets))
+	for j, target := range targets {
+		labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target))
+	}
+	index = len(m.jmpTableTargets)
+	m.jmpTableTargets = append(m.jmpTableTargets, labels)
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
new file mode 100644
index 000000000..466fac464
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
@@ -0,0 +1,469 @@
+package arm64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// PostRegAlloc implements backend.Machine.
+func (m *machine) PostRegAlloc() {
+	m.setupPrologue()
+	m.postRegAlloc()
+}
+
+// setupPrologue initializes the prologue of the function.
+func (m *machine) setupPrologue() {
+	ectx := m.executableContext
+
+	cur := ectx.RootInstr
+	prevInitInst := cur.next
+
+	//
+	//                   (high address)                    (high address)
+	//         SP----> +-----------------+               +------------------+ <----+
+	//                 |     .......     |               |     .......      |      |
+	//                 |      ret Y      |               |      ret Y       |      |
+	//                 |     .......     |               |     .......      |      |
+	//                 |      ret 0      |               |      ret 0       |      |
+	//                 |      arg X      |               |      arg X       |      |  size_of_arg_ret.
+	//                 |     .......     |     ====>     |     .......      |      |
+	//                 |      arg 1      |               |      arg 1       |      |
+	//                 |      arg 0      |               |      arg 0       | <----+
+	//                 |-----------------|               |  size_of_arg_ret |
+	//                                                   |  return address  |
+	//                                                   +------------------+ <---- SP
+	//                    (low address)                     (low address)
+
+	// Saves the return address (lr) and the size_of_arg_ret below the SP.
+	// size_of_arg_ret is used for stack unwinding.
+	cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
+
+	if !m.stackBoundsCheckDisabled {
+		cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
+	}
+
+	// Decrement SP if spillSlotSize > 0.
+	if m.spillSlotSize == 0 && len(m.spillSlots) != 0 {
+		panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots))
+	}
+
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		//
+		//            (high address)                  (high address)
+		//          +-----------------+             +-----------------+
+		//          |     .......     |             |     .......     |
+		//          |      ret Y      |             |      ret Y      |
+		//          |     .......     |             |     .......     |
+		//          |      ret 0      |             |      ret 0      |
+		//          |      arg X      |             |      arg X      |
+		//          |     .......     |             |     .......     |
+		//          |      arg 1      |             |      arg 1      |
+		//          |      arg 0      |             |      arg 0      |
+		//          | size_of_arg_ret |             | size_of_arg_ret |
+		//          |   ReturnAddress |             |  ReturnAddress  |
+		//  SP----> +-----------------+    ====>    +-----------------+
+		//             (low address)                |   clobbered M   |
+		//                                          |   ............  |
+		//                                          |   clobbered 0   |
+		//                                          +-----------------+ <----- SP
+		//                                             (low address)
+		//
+		_amode := addressModePreOrPostIndex(spVReg,
+			-16,  // stack pointer must be 16-byte aligned.
+			true, // Decrement before store.
+		)
+		for _, vr := range regs {
+			// TODO: pair stores to reduce the number of instructions.
+			store := m.allocateInstr()
+			store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType()))
+			cur = linkInstr(cur, store)
+		}
+	}
+
+	if size := m.spillSlotSize; size > 0 {
+		// Check if size is 16-byte aligned.
+		if size&0xf != 0 {
+			panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size))
+		}
+
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false)
+
+		// At this point, the stack looks like:
+		//
+		//            (high address)
+		//          +------------------+
+		//          |     .......      |
+		//          |      ret Y       |
+		//          |     .......      |
+		//          |      ret 0       |
+		//          |      arg X       |
+		//          |     .......      |
+		//          |      arg 1       |
+		//          |      arg 0       |
+		//          |  size_of_arg_ret |
+		//          |   ReturnAddress  |
+		//          +------------------+
+		//          |    clobbered M   |
+		//          |   ............   |
+		//          |    clobbered 0   |
+		//          |   spill slot N   |
+		//          |   ............   |
+		//          |   spill slot 2   |
+		//          |   spill slot 0   |
+		//  SP----> +------------------+
+		//             (low address)
+	}
+
+	// We push the frame size into the stack to make it possible to unwind stack:
+	//
+	//
+	//            (high address)                  (high address)
+	//         +-----------------+                +-----------------+
+	//         |     .......     |                |     .......     |
+	//         |      ret Y      |                |      ret Y      |
+	//         |     .......     |                |     .......     |
+	//         |      ret 0      |                |      ret 0      |
+	//         |      arg X      |                |      arg X      |
+	//         |     .......     |                |     .......     |
+	//         |      arg 1      |                |      arg 1      |
+	//         |      arg 0      |                |      arg 0      |
+	//         | size_of_arg_ret |                | size_of_arg_ret |
+	//         |  ReturnAddress  |                |  ReturnAddress  |
+	//         +-----------------+      ==>       +-----------------+ <----+
+	//         |   clobbered  M  |                |   clobbered  M  |      |
+	//         |   ............  |                |   ............  |      |
+	//         |   clobbered  2  |                |   clobbered  2  |      |
+	//         |   clobbered  1  |                |   clobbered  1  |      | frame size
+	//         |   clobbered  0  |                |   clobbered  0  |      |
+	//         |   spill slot N  |                |   spill slot N  |      |
+	//         |   ............  |                |   ............  |      |
+	//         |   spill slot 0  |                |   spill slot 0  | <----+
+	// SP--->  +-----------------+                |     xxxxxx      |  ;; unused space to make it 16-byte aligned.
+	//                                            |   frame_size    |
+	//                                            +-----------------+ <---- SP
+	//            (low address)
+	//
+	cur = m.createFrameSizeSlot(cur, m.frameSize())
+
+	linkInstr(cur, prevInitInst)
+}
+
+func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction {
+	// First we decrement the stack pointer to point the arg0 slot.
+	var sizeOfArgRetReg regalloc.VReg
+	s := int64(m.currentABI.AlignedArgResultStackSlotSize())
+	if s > 0 {
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
+		sizeOfArgRetReg = tmpRegVReg
+
+		subSp := m.allocateInstr()
+		subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
+		cur = linkInstr(cur, subSp)
+	} else {
+		sizeOfArgRetReg = xzrVReg
+	}
+
+	// Saves the return address (lr) and the size_of_arg_ret below the SP.
+	// size_of_arg_ret is used for stack unwinding.
+	pstr := m.allocateInstr()
+	amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
+	pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
+	cur = linkInstr(cur, pstr)
+	return cur
+}
+
+func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
+	var frameSizeReg regalloc.VReg
+	if s > 0 {
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
+		frameSizeReg = tmpRegVReg
+	} else {
+		frameSizeReg = xzrVReg
+	}
+	_amode := addressModePreOrPostIndex(spVReg,
+		-16,  // stack pointer must be 16-byte aligned.
+		true, // Decrement before store.
+	)
+	store := m.allocateInstr()
+	store.asStore(operandNR(frameSizeReg), _amode, 64)
+	cur = linkInstr(cur, store)
+	return cur
+}
+
+// postRegAlloc does multiple things while walking through the instructions:
+// 1. Removes the redundant copy instruction.
+// 2. Inserts the epilogue.
+func (m *machine) postRegAlloc() {
+	ectx := m.executableContext
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		switch cur.kind {
+		case ret:
+			m.setupEpilogueAfter(cur.prev)
+		case loadConstBlockArg:
+			lc := cur
+			next := lc.next
+			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
+			m.lowerLoadConstantBlockArgAfterRegAlloc(lc)
+			for _, instr := range m.executableContext.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
+		default:
+			// Removes the redundant copy instruction.
+			if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
+				prev, next := cur.prev, cur.next
+				// Remove the copy instruction.
+				prev.next = next
+				if next != nil {
+					next.prev = prev
+				}
+			}
+		}
+	}
+}
+
+func (m *machine) setupEpilogueAfter(cur *instruction) {
+	prevNext := cur.next
+
+	// We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore.
+	cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true)
+
+	if s := m.spillSlotSize; s > 0 {
+		// Adjust SP to the original value:
+		//
+		//            (high address)                        (high address)
+		//          +-----------------+                  +-----------------+
+		//          |     .......     |                  |     .......     |
+		//          |      ret Y      |                  |      ret Y      |
+		//          |     .......     |                  |     .......     |
+		//          |      ret 0      |                  |      ret 0      |
+		//          |      arg X      |                  |      arg X      |
+		//          |     .......     |                  |     .......     |
+		//          |      arg 1      |                  |      arg 1      |
+		//          |      arg 0      |                  |      arg 0      |
+		//          |      xxxxx      |                  |      xxxxx      |
+		//          |   ReturnAddress |                  |   ReturnAddress |
+		//          +-----------------+      ====>       +-----------------+
+		//          |    clobbered M  |                  |    clobbered M  |
+		//          |   ............  |                  |   ............  |
+		//          |    clobbered 1  |                  |    clobbered 1  |
+		//          |    clobbered 0  |                  |    clobbered 0  |
+		//          |   spill slot N  |                  +-----------------+ <---- SP
+		//          |   ............  |
+		//          |   spill slot 0  |
+		//   SP---> +-----------------+
+		//             (low address)
+		//
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
+	}
+
+	// First we need to restore the clobbered registers.
+	if len(m.clobberedRegs) > 0 {
+		//            (high address)
+		//          +-----------------+                      +-----------------+
+		//          |     .......     |                      |     .......     |
+		//          |      ret Y      |                      |      ret Y      |
+		//          |     .......     |                      |     .......     |
+		//          |      ret 0      |                      |      ret 0      |
+		//          |      arg X      |                      |      arg X      |
+		//          |     .......     |                      |     .......     |
+		//          |      arg 1      |                      |      arg 1      |
+		//          |      arg 0      |                      |      arg 0      |
+		//          |      xxxxx      |                      |      xxxxx      |
+		//          |   ReturnAddress |                      |   ReturnAddress |
+		//          +-----------------+      ========>       +-----------------+ <---- SP
+		//          |   clobbered M   |
+		//          |   ...........   |
+		//          |   clobbered 1   |
+		//          |   clobbered 0   |
+		//   SP---> +-----------------+
+		//             (low address)
+
+		l := len(m.clobberedRegs) - 1
+		for i := range m.clobberedRegs {
+			vr := m.clobberedRegs[l-i] // reverse order to restore.
+			load := m.allocateInstr()
+			amode := addressModePreOrPostIndex(spVReg,
+				16,    // stack pointer must be 16-byte aligned.
+				false, // Increment after store.
+			)
+			// TODO: pair loads to reduce the number of instructions.
+			switch regTypeToRegisterSizeInBits(vr.RegType()) {
+			case 64: // save int reg.
+				load.asULoad(operandNR(vr), amode, 64)
+			case 128: // save vector reg.
+				load.asFpuLoad(operandNR(vr), amode, 128)
+			}
+			cur = linkInstr(cur, load)
+		}
+	}
+
+	// Reload the return address (lr).
+	//
+	//            +-----------------+          +-----------------+
+	//            |     .......     |          |     .......     |
+	//            |      ret Y      |          |      ret Y      |
+	//            |     .......     |          |     .......     |
+	//            |      ret 0      |          |      ret 0      |
+	//            |      arg X      |          |      arg X      |
+	//            |     .......     |   ===>   |     .......     |
+	//            |      arg 1      |          |      arg 1      |
+	//            |      arg 0      |          |      arg 0      |
+	//            |      xxxxx      |          +-----------------+ <---- SP
+	//            |  ReturnAddress  |
+	//    SP----> +-----------------+
+
+	ldr := m.allocateInstr()
+	ldr.asULoad(operandNR(lrVReg),
+		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+	cur = linkInstr(cur, ldr)
+
+	if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
+	}
+
+	linkInstr(cur, prevNext)
+}
+
+// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
+// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
+// which always points to the execution context whenever the native code is entered from Go.
+var saveRequiredRegs = []regalloc.VReg{
+	x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg,
+	x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg,
+	v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg,
+	v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
+}
+
+// insertStackBoundsCheck will insert the instructions after `cur` to check the
+// stack bounds, and if there's no sufficient spaces required for the function,
+// exit the execution and try growing it in Go world.
+//
+// TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable.
+func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
+	if requiredStackSize%16 != 0 {
+		panic("BUG")
+	}
+
+	if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
+		// sub tmp, sp, #requiredStackSize
+		sub := m.allocateInstr()
+		sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
+		cur = linkInstr(cur, sub)
+	} else {
+		// This case, we first load the requiredStackSize into the temporary register,
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
+		// Then subtract it.
+		sub := m.allocateInstr()
+		sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
+		cur = linkInstr(cur, sub)
+	}
+
+	tmp2 := x11VReg // Caller save, so it is safe to use it here in the prologue.
+
+	// ldr tmp2, [executionContext #StackBottomPtr]
+	ldr := m.allocateInstr()
+	ldr.asULoad(operandNR(tmp2), addressMode{
+		kind: addressModeKindRegUnsignedImm12,
+		rn:   x0VReg, // execution context is always the first argument.
+		imm:  wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
+	}, 64)
+	cur = linkInstr(cur, ldr)
+
+	// subs xzr, tmp, tmp2
+	subs := m.allocateInstr()
+	subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
+	cur = linkInstr(cur, subs)
+
+	// b.ge #imm
+	cbr := m.allocateInstr()
+	cbr.asCondBr(ge.asCond(), labelInvalid, false /* ignored */)
+	cur = linkInstr(cur, cbr)
+
+	// Set the required stack size and set it to the exec context.
+	{
+		// First load the requiredStackSize into the temporary register,
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
+		setRequiredStackSize := m.allocateInstr()
+		setRequiredStackSize.asStore(operandNR(tmpRegVReg),
+			addressMode{
+				kind: addressModeKindRegUnsignedImm12,
+				// Execution context is always the first argument.
+				rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
+			}, 64)
+
+		cur = linkInstr(cur, setRequiredStackSize)
+	}
+
+	ldrAddress := m.allocateInstr()
+	ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
+		kind: addressModeKindRegUnsignedImm12,
+		rn:   x0VReg, // execution context is always the first argument
+		imm:  wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
+	}, 64)
+	cur = linkInstr(cur, ldrAddress)
+
+	// Then jumps to the stack grow call sequence's address, meaning
+	// transferring the control to the code compiled by CompileStackGrowCallSequence.
+	bl := m.allocateInstr()
+	bl.asCallIndirect(tmpRegVReg, nil)
+	cur = linkInstr(cur, bl)
+
+	// Now that we know the entire code, we can finalize how many bytes
+	// we have to skip when the stack size is sufficient.
+	var cbrOffset int64
+	for _cur := cbr; ; _cur = _cur.next {
+		cbrOffset += _cur.size()
+		if _cur == cur {
+			break
+		}
+	}
+	cbr.condBrOffsetResolve(cbrOffset)
+	return cur
+}
+
+// CompileStackGrowCallSequence implements backend.Machine.
+func (m *machine) CompileStackGrowCallSequence() []byte {
+	ectx := m.executableContext
+
+	cur := m.allocateInstr()
+	cur.asNop0()
+	ectx.RootInstr = cur
+
+	// Save the callee saved and argument registers.
+	cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs)
+
+	// Save the current stack pointer.
+	cur = m.saveCurrentStackPointer(cur, x0VReg)
+
+	// Set the exit status on the execution context.
+	cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack)
+
+	// Exit the execution.
+	cur = m.storeReturnAddressAndExit(cur)
+
+	// After the exit, restore the saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs)
+
+	// Then goes back the original address of this stack grow call.
+	ret := m.allocateInstr()
+	ret.asRet()
+	linkInstr(cur, ret)
+
+	m.encode(ectx.RootInstr)
+	return m.compiler.Buf()
+}
+
+func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction {
+	ectx := m.executableContext
+
+	ectx.PendingInstructions = ectx.PendingInstructions[:0]
+	m.insertAddOrSubStackPointer(rd, diff, add)
+	for _, inserted := range ectx.PendingInstructions {
+		cur = linkInstr(cur, inserted)
+	}
+	return cur
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
new file mode 100644
index 000000000..1c8793b73
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
@@ -0,0 +1,152 @@
+package arm64
+
+// This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// ClobberedRegisters implements backend.RegAllocFunctionMachine.
+func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
+	m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
+}
+
+// Swap implements backend.RegAllocFunctionMachine.
+func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
+	prevNext := cur.next
+	var mov1, mov2, mov3 *instruction
+	if x1.RegType() == regalloc.RegTypeInt {
+		if !tmp.Valid() {
+			tmp = tmpRegVReg
+		}
+		mov1 = m.allocateInstr().asMove64(tmp, x1)
+		mov2 = m.allocateInstr().asMove64(x1, x2)
+		mov3 = m.allocateInstr().asMove64(x2, tmp)
+		cur = linkInstr(cur, mov1)
+		cur = linkInstr(cur, mov2)
+		cur = linkInstr(cur, mov3)
+		linkInstr(cur, prevNext)
+	} else {
+		if !tmp.Valid() {
+			r2 := x2.RealReg()
+			// Temporarily spill x1 to stack.
+			cur = m.InsertStoreRegisterAt(x1, cur, true).prev
+			// Then move x2 to x1.
+			cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))
+			linkInstr(cur, prevNext)
+			// Then reload the original value on x1 from stack to r2.
+			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
+		} else {
+			mov1 = m.allocateInstr().asFpuMov128(tmp, x1)
+			mov2 = m.allocateInstr().asFpuMov128(x1, x2)
+			mov3 = m.allocateInstr().asFpuMov128(x2, tmp)
+			cur = linkInstr(cur, mov1)
+			cur = linkInstr(cur, mov2)
+			cur = linkInstr(cur, mov3)
+			linkInstr(cur, prevNext)
+		}
+	}
+}
+
+// InsertMoveBefore implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
+	typ := src.RegType()
+	if typ != dst.RegType() {
+		panic("BUG: src and dst must have the same type")
+	}
+
+	mov := m.allocateInstr()
+	if typ == regalloc.RegTypeInt {
+		mov.asMove64(dst, src)
+	} else {
+		mov.asFpuMov128(dst, src)
+	}
+
+	cur := instr.prev
+	prevNext := cur.next
+	cur = linkInstr(cur, mov)
+	linkInstr(cur, prevNext)
+}
+
+// SSABlockLabel implements backend.RegAllocFunctionMachine.
+func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
+	return m.executableContext.SsaBlockIDToLabels[id]
+}
+
+// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.compiler.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	var amode addressMode
+	cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
+	store := m.allocateInstr()
+	store.asStore(operandNR(v), amode, typ.Bits())
+
+	cur = linkInstr(cur, store)
+	return linkInstr(cur, prevNext)
+}
+
+// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.compiler.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	var amode addressMode
+	cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
+	load := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32, ssa.TypeI64:
+		load.asULoad(operandNR(v), amode, typ.Bits())
+	case ssa.TypeF32, ssa.TypeF64:
+		load.asFpuLoad(operandNR(v), amode, typ.Bits())
+	case ssa.TypeV128:
+		load.asFpuLoad(operandNR(v), amode, 128)
+	default:
+		panic("TODO")
+	}
+
+	cur = linkInstr(cur, load)
+	return linkInstr(cur, prevNext)
+}
+
+// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
+func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
+	cur := end
+	for cur.kind == nop0 {
+		cur = cur.prev
+		if cur == begin {
+			return end
+		}
+	}
+	switch cur.kind {
+	case br:
+		return cur
+	default:
+		return end
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
new file mode 100644
index 000000000..83902d927
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
@@ -0,0 +1,117 @@
+package arm64
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"sort"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+)
+
+const (
+	// trampolineCallSize is the size of the trampoline instruction sequence for each function in an island.
+	trampolineCallSize = 4*4 + 4 // Four instructions + 32-bit immediate.
+
+	// Unconditional branch offset is encoded as divided by 4 in imm26.
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
+
+	maxUnconditionalBranchOffset = maxSignedInt26 * 4
+	minUnconditionalBranchOffset = minSignedInt26 * 4
+
+	// trampolineIslandInterval is the range of the trampoline island.
+	// Half of the range is used for the trampoline island, and the other half is used for the function.
+	trampolineIslandInterval = maxUnconditionalBranchOffset / 2
+
+	// maxNumFunctions explicitly specifies the maximum number of functions that can be allowed in a single executable.
+	maxNumFunctions = trampolineIslandInterval >> 6
+
+	// maxFunctionExecutableSize is the maximum size of a function that can exist in a trampoline island.
+	// Conservatively set to 1/4 of the trampoline island interval.
+	maxFunctionExecutableSize = trampolineIslandInterval >> 2
+)
+
+// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.
+func (m *machine) CallTrampolineIslandInfo(numFunctions int) (interval, size int, err error) {
+	if numFunctions > maxNumFunctions {
+		return 0, 0, fmt.Errorf("too many functions: %d > %d", numFunctions, maxNumFunctions)
+	}
+	return trampolineIslandInterval, trampolineCallSize * numFunctions, nil
+}
+
+// ResolveRelocations implements backend.Machine ResolveRelocations.
+func (m *machine) ResolveRelocations(
+	refToBinaryOffset []int,
+	executable []byte,
+	relocations []backend.RelocationInfo,
+	callTrampolineIslandOffsets []int,
+) {
+	for _, islandOffset := range callTrampolineIslandOffsets {
+		encodeCallTrampolineIsland(refToBinaryOffset, islandOffset, executable)
+	}
+
+	for _, r := range relocations {
+		instrOffset := r.Offset
+		calleeFnOffset := refToBinaryOffset[r.FuncRef]
+		diff := int64(calleeFnOffset) - (instrOffset)
+		// Check if the diff is within the range of the branch instruction.
+		if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
+			// Find the near trampoline island from callTrampolineIslandOffsets.
+			islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
+			islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
+			diff = int64(islandTargetOffset) - (instrOffset)
+			if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
+				panic("BUG in trampoline placement")
+			}
+		}
+		binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
+	}
+}
+
+// encodeCallTrampolineIsland encodes a trampoline island for the given functions.
+// Each island consists of a trampoline instruction sequence for each function.
+// Each trampoline instruction sequence consists of 4 instructions + 32-bit immediate.
+func encodeCallTrampolineIsland(refToBinaryOffset []int, islandOffset int, executable []byte) {
+	for i := 0; i < len(refToBinaryOffset); i++ {
+		trampolineOffset := islandOffset + trampolineCallSize*i
+
+		fnOffset := refToBinaryOffset[i]
+		diff := fnOffset - (trampolineOffset + 16)
+		if diff > math.MaxInt32 || diff < math.MinInt32 {
+			// This case even amd64 can't handle. 4GB is too big.
+			panic("too big binary")
+		}
+
+		// The tmpReg, tmpReg2 is safe to overwrite (in fact any caller-saved register is safe to use).
+		tmpReg, tmpReg2 := regNumberInEncoding[tmpRegVReg.RealReg()], regNumberInEncoding[x11]
+
+		// adr tmpReg, PC+16: load the address of #diff into tmpReg.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset:], encodeAdr(tmpReg, 16))
+		// ldrsw tmpReg2, [tmpReg]: Load #diff into tmpReg2.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+4:],
+			encodeLoadOrStore(sLoad32, tmpReg2, addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpRegVReg}))
+		// add tmpReg, tmpReg2, tmpReg: add #diff to the address of #diff, getting the absolute address of the function.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+8:],
+			encodeAluRRR(aluOpAdd, tmpReg, tmpReg, tmpReg2, true, false))
+		// br tmpReg: branch to the function without overwriting the link register.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+12:], encodeUnconditionalBranchReg(tmpReg, false))
+		// #diff
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+16:], uint32(diff))
+	}
+}
+
+// searchTrampolineIsland finds the nearest trampoline island from callTrampolineIslandOffsets.
+// Note that even if the offset is in the middle of two islands, it returns the latter one.
+// That is ok because the island is always placed in the middle of the range.
+//
+// precondition: callTrampolineIslandOffsets is sorted in ascending order.
+func searchTrampolineIsland(callTrampolineIslandOffsets []int, offset int) int {
+	l := len(callTrampolineIslandOffsets)
+	n := sort.Search(l, func(i int) bool {
+		return callTrampolineIslandOffsets[i] >= offset
+	})
+	if n == l {
+		n = l - 1
+	}
+	return callTrampolineIslandOffsets[n]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
new file mode 100644
index 000000000..45737516d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
@@ -0,0 +1,397 @@
+package arm64
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// Arm64-specific registers.
+//
+// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state
+
+const (
+	// General purpose registers. Note that we do not distinguish wn and xn registers
+	// because they are the same from the perspective of register allocator, and
+	// the size can be determined by the type of the instruction.
+
+	x0 = regalloc.RealRegInvalid + 1 + iota
+	x1
+	x2
+	x3
+	x4
+	x5
+	x6
+	x7
+	x8
+	x9
+	x10
+	x11
+	x12
+	x13
+	x14
+	x15
+	x16
+	x17
+	x18
+	x19
+	x20
+	x21
+	x22
+	x23
+	x24
+	x25
+	x26
+	x27
+	x28
+	x29
+	x30
+
+	// Vector registers. Note that we do not distinguish vn and dn, ... registers
+	// because they are the same from the perspective of register allocator, and
+	// the size can be determined by the type of the instruction.
+
+	v0
+	v1
+	v2
+	v3
+	v4
+	v5
+	v6
+	v7
+	v8
+	v9
+	v10
+	v11
+	v12
+	v13
+	v14
+	v15
+	v16
+	v17
+	v18
+	v19
+	v20
+	v21
+	v22
+	v23
+	v24
+	v25
+	v26
+	v27
+	v28
+	v29
+	v30
+	v31
+
+	// Special registers
+
+	xzr
+	sp
+	lr  = x30
+	fp  = x29
+	tmp = x27
+)
+
+var (
+	x0VReg  = regalloc.FromRealReg(x0, regalloc.RegTypeInt)
+	x1VReg  = regalloc.FromRealReg(x1, regalloc.RegTypeInt)
+	x2VReg  = regalloc.FromRealReg(x2, regalloc.RegTypeInt)
+	x3VReg  = regalloc.FromRealReg(x3, regalloc.RegTypeInt)
+	x4VReg  = regalloc.FromRealReg(x4, regalloc.RegTypeInt)
+	x5VReg  = regalloc.FromRealReg(x5, regalloc.RegTypeInt)
+	x6VReg  = regalloc.FromRealReg(x6, regalloc.RegTypeInt)
+	x7VReg  = regalloc.FromRealReg(x7, regalloc.RegTypeInt)
+	x8VReg  = regalloc.FromRealReg(x8, regalloc.RegTypeInt)
+	x9VReg  = regalloc.FromRealReg(x9, regalloc.RegTypeInt)
+	x10VReg = regalloc.FromRealReg(x10, regalloc.RegTypeInt)
+	x11VReg = regalloc.FromRealReg(x11, regalloc.RegTypeInt)
+	x12VReg = regalloc.FromRealReg(x12, regalloc.RegTypeInt)
+	x13VReg = regalloc.FromRealReg(x13, regalloc.RegTypeInt)
+	x14VReg = regalloc.FromRealReg(x14, regalloc.RegTypeInt)
+	x15VReg = regalloc.FromRealReg(x15, regalloc.RegTypeInt)
+	x16VReg = regalloc.FromRealReg(x16, regalloc.RegTypeInt)
+	x17VReg = regalloc.FromRealReg(x17, regalloc.RegTypeInt)
+	x18VReg = regalloc.FromRealReg(x18, regalloc.RegTypeInt)
+	x19VReg = regalloc.FromRealReg(x19, regalloc.RegTypeInt)
+	x20VReg = regalloc.FromRealReg(x20, regalloc.RegTypeInt)
+	x21VReg = regalloc.FromRealReg(x21, regalloc.RegTypeInt)
+	x22VReg = regalloc.FromRealReg(x22, regalloc.RegTypeInt)
+	x23VReg = regalloc.FromRealReg(x23, regalloc.RegTypeInt)
+	x24VReg = regalloc.FromRealReg(x24, regalloc.RegTypeInt)
+	x25VReg = regalloc.FromRealReg(x25, regalloc.RegTypeInt)
+	x26VReg = regalloc.FromRealReg(x26, regalloc.RegTypeInt)
+	x27VReg = regalloc.FromRealReg(x27, regalloc.RegTypeInt)
+	x28VReg = regalloc.FromRealReg(x28, regalloc.RegTypeInt)
+	x29VReg = regalloc.FromRealReg(x29, regalloc.RegTypeInt)
+	x30VReg = regalloc.FromRealReg(x30, regalloc.RegTypeInt)
+	v0VReg  = regalloc.FromRealReg(v0, regalloc.RegTypeFloat)
+	v1VReg  = regalloc.FromRealReg(v1, regalloc.RegTypeFloat)
+	v2VReg  = regalloc.FromRealReg(v2, regalloc.RegTypeFloat)
+	v3VReg  = regalloc.FromRealReg(v3, regalloc.RegTypeFloat)
+	v4VReg  = regalloc.FromRealReg(v4, regalloc.RegTypeFloat)
+	v5VReg  = regalloc.FromRealReg(v5, regalloc.RegTypeFloat)
+	v6VReg  = regalloc.FromRealReg(v6, regalloc.RegTypeFloat)
+	v7VReg  = regalloc.FromRealReg(v7, regalloc.RegTypeFloat)
+	v8VReg  = regalloc.FromRealReg(v8, regalloc.RegTypeFloat)
+	v9VReg  = regalloc.FromRealReg(v9, regalloc.RegTypeFloat)
+	v10VReg = regalloc.FromRealReg(v10, regalloc.RegTypeFloat)
+	v11VReg = regalloc.FromRealReg(v11, regalloc.RegTypeFloat)
+	v12VReg = regalloc.FromRealReg(v12, regalloc.RegTypeFloat)
+	v13VReg = regalloc.FromRealReg(v13, regalloc.RegTypeFloat)
+	v14VReg = regalloc.FromRealReg(v14, regalloc.RegTypeFloat)
+	v15VReg = regalloc.FromRealReg(v15, regalloc.RegTypeFloat)
+	v16VReg = regalloc.FromRealReg(v16, regalloc.RegTypeFloat)
+	v17VReg = regalloc.FromRealReg(v17, regalloc.RegTypeFloat)
+	v18VReg = regalloc.FromRealReg(v18, regalloc.RegTypeFloat)
+	v19VReg = regalloc.FromRealReg(v19, regalloc.RegTypeFloat)
+	v20VReg = regalloc.FromRealReg(v20, regalloc.RegTypeFloat)
+	v21VReg = regalloc.FromRealReg(v21, regalloc.RegTypeFloat)
+	v22VReg = regalloc.FromRealReg(v22, regalloc.RegTypeFloat)
+	v23VReg = regalloc.FromRealReg(v23, regalloc.RegTypeFloat)
+	v24VReg = regalloc.FromRealReg(v24, regalloc.RegTypeFloat)
+	v25VReg = regalloc.FromRealReg(v25, regalloc.RegTypeFloat)
+	v26VReg = regalloc.FromRealReg(v26, regalloc.RegTypeFloat)
+	v27VReg = regalloc.FromRealReg(v27, regalloc.RegTypeFloat)
+	// lr (link register) holds the return address at the function entry.
+	lrVReg = x30VReg
+	// tmpReg is used to perform spill/load on large stack offsets, and load large constants.
+	// Therefore, be cautious to use this register in the middle of the compilation, especially before the register allocation.
+	// This is the same as golang/go, but it's only described in the source code:
+	// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L59
+	// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L13-L15
+	tmpRegVReg = regalloc.FromRealReg(tmp, regalloc.RegTypeInt)
+	v28VReg    = regalloc.FromRealReg(v28, regalloc.RegTypeFloat)
+	v29VReg    = regalloc.FromRealReg(v29, regalloc.RegTypeFloat)
+	v30VReg    = regalloc.FromRealReg(v30, regalloc.RegTypeFloat)
+	v31VReg    = regalloc.FromRealReg(v31, regalloc.RegTypeFloat)
+	xzrVReg    = regalloc.FromRealReg(xzr, regalloc.RegTypeInt)
+	spVReg     = regalloc.FromRealReg(sp, regalloc.RegTypeInt)
+	fpVReg     = regalloc.FromRealReg(fp, regalloc.RegTypeInt)
+)
+
+var regNames = [...]string{
+	x0:  "x0",
+	x1:  "x1",
+	x2:  "x2",
+	x3:  "x3",
+	x4:  "x4",
+	x5:  "x5",
+	x6:  "x6",
+	x7:  "x7",
+	x8:  "x8",
+	x9:  "x9",
+	x10: "x10",
+	x11: "x11",
+	x12: "x12",
+	x13: "x13",
+	x14: "x14",
+	x15: "x15",
+	x16: "x16",
+	x17: "x17",
+	x18: "x18",
+	x19: "x19",
+	x20: "x20",
+	x21: "x21",
+	x22: "x22",
+	x23: "x23",
+	x24: "x24",
+	x25: "x25",
+	x26: "x26",
+	x27: "x27",
+	x28: "x28",
+	x29: "x29",
+	x30: "x30",
+	xzr: "xzr",
+	sp:  "sp",
+	v0:  "v0",
+	v1:  "v1",
+	v2:  "v2",
+	v3:  "v3",
+	v4:  "v4",
+	v5:  "v5",
+	v6:  "v6",
+	v7:  "v7",
+	v8:  "v8",
+	v9:  "v9",
+	v10: "v10",
+	v11: "v11",
+	v12: "v12",
+	v13: "v13",
+	v14: "v14",
+	v15: "v15",
+	v16: "v16",
+	v17: "v17",
+	v18: "v18",
+	v19: "v19",
+	v20: "v20",
+	v21: "v21",
+	v22: "v22",
+	v23: "v23",
+	v24: "v24",
+	v25: "v25",
+	v26: "v26",
+	v27: "v27",
+	v28: "v28",
+	v29: "v29",
+	v30: "v30",
+	v31: "v31",
+}
+
+func formatVRegSized(r regalloc.VReg, size byte) (ret string) {
+	if r.IsRealReg() {
+		ret = regNames[r.RealReg()]
+		switch ret[0] {
+		case 'x':
+			switch size {
+			case 32:
+				ret = strings.Replace(ret, "x", "w", 1)
+			case 64:
+			default:
+				panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
+			}
+		case 'v':
+			switch size {
+			case 32:
+				ret = strings.Replace(ret, "v", "s", 1)
+			case 64:
+				ret = strings.Replace(ret, "v", "d", 1)
+			case 128:
+				ret = strings.Replace(ret, "v", "q", 1)
+			default:
+				panic("BUG: invalid register size")
+			}
+		}
+	} else {
+		switch r.RegType() {
+		case regalloc.RegTypeInt:
+			switch size {
+			case 32:
+				ret = fmt.Sprintf("w%d?", r.ID())
+			case 64:
+				ret = fmt.Sprintf("x%d?", r.ID())
+			default:
+				panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
+			}
+		case regalloc.RegTypeFloat:
+			switch size {
+			case 32:
+				ret = fmt.Sprintf("s%d?", r.ID())
+			case 64:
+				ret = fmt.Sprintf("d%d?", r.ID())
+			case 128:
+				ret = fmt.Sprintf("q%d?", r.ID())
+			default:
+				panic("BUG: invalid register size")
+			}
+		default:
+			panic(fmt.Sprintf("BUG: invalid register type: %d for %s", r.RegType(), r))
+		}
+	}
+	return
+}
+
+func formatVRegWidthVec(r regalloc.VReg, width vecArrangement) (ret string) {
+	var id string
+	wspec := strings.ToLower(width.String())
+	if r.IsRealReg() {
+		id = regNames[r.RealReg()][1:]
+	} else {
+		id = fmt.Sprintf("%d?", r.ID())
+	}
+	ret = fmt.Sprintf("%s%s", wspec, id)
+	return
+}
+
+func formatVRegVec(r regalloc.VReg, arr vecArrangement, index vecIndex) (ret string) {
+	id := fmt.Sprintf("v%d?", r.ID())
+	if r.IsRealReg() {
+		id = regNames[r.RealReg()]
+	}
+	ret = fmt.Sprintf("%s.%s", id, strings.ToLower(arr.String()))
+	if index != vecIndexNone {
+		ret += fmt.Sprintf("[%d]", index)
+	}
+	return
+}
+
+func regTypeToRegisterSizeInBits(r regalloc.RegType) byte {
+	switch r {
+	case regalloc.RegTypeInt:
+		return 64
+	case regalloc.RegTypeFloat:
+		return 128
+	default:
+		panic("BUG: invalid register type")
+	}
+}
+
+var regNumberInEncoding = [...]uint32{
+	x0:  0,
+	x1:  1,
+	x2:  2,
+	x3:  3,
+	x4:  4,
+	x5:  5,
+	x6:  6,
+	x7:  7,
+	x8:  8,
+	x9:  9,
+	x10: 10,
+	x11: 11,
+	x12: 12,
+	x13: 13,
+	x14: 14,
+	x15: 15,
+	x16: 16,
+	x17: 17,
+	x18: 18,
+	x19: 19,
+	x20: 20,
+	x21: 21,
+	x22: 22,
+	x23: 23,
+	x24: 24,
+	x25: 25,
+	x26: 26,
+	x27: 27,
+	x28: 28,
+	x29: 29,
+	x30: 30,
+	xzr: 31,
+	sp:  31,
+	v0:  0,
+	v1:  1,
+	v2:  2,
+	v3:  3,
+	v4:  4,
+	v5:  5,
+	v6:  6,
+	v7:  7,
+	v8:  8,
+	v9:  9,
+	v10: 10,
+	v11: 11,
+	v12: 12,
+	v13: 13,
+	v14: 14,
+	v15: 15,
+	v16: 16,
+	v17: 17,
+	v18: 18,
+	v19: 19,
+	v20: 20,
+	v21: 21,
+	v22: 22,
+	v23: 23,
+	v24: 24,
+	v25: 25,
+	v26: 26,
+	v27: 27,
+	v28: 28,
+	v29: 29,
+	v30: 30,
+	v31: 31,
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
new file mode 100644
index 000000000..edb0e36e3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
@@ -0,0 +1,90 @@
+package arm64
+
+import (
+	"encoding/binary"
+	"reflect"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+// UnwindStack implements wazevo.unwindStack.
+func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {
+	l := int(top - sp)
+
+	var stackBuf []byte
+	{
+		// TODO: use unsafe.Slice after floor version is set to Go 1.20.
+		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
+		hdr.Data = sp
+		hdr.Len = l
+		hdr.Cap = l
+	}
+
+	for i := uint64(0); i < uint64(l); {
+		//       (high address)
+		//    +-----------------+
+		//    |     .......     |
+		//    |      ret Y      |  <----+
+		//    |     .......     |       |
+		//    |      ret 0      |       |
+		//    |      arg X      |       |  size_of_arg_ret
+		//    |     .......     |       |
+		//    |      arg 1      |       |
+		//    |      arg 0      |  <----+
+		//    | size_of_arg_ret |
+		//    |  ReturnAddress  |
+		//    +-----------------+ <----+
+		//    |   ...........   |      |
+		//    |   spill slot M  |      |
+		//    |   ............  |      |
+		//    |   spill slot 2  |      |
+		//    |   spill slot 1  |      | frame size
+		//    |   spill slot 1  |      |
+		//    |   clobbered N   |      |
+		//    |   ............  |      |
+		//    |   clobbered 0   | <----+
+		//    |     xxxxxx      |  ;; unused space to make it 16-byte aligned.
+		//    |   frame_size    |
+		//    +-----------------+ <---- SP
+		//       (low address)
+
+		frameSize := binary.LittleEndian.Uint64(stackBuf[i:])
+		i += frameSize +
+			16 // frame size + aligned space.
+		retAddr := binary.LittleEndian.Uint64(stackBuf[i:])
+		i += 8 // ret addr.
+		sizeOfArgRet := binary.LittleEndian.Uint64(stackBuf[i:])
+		i += 8 + sizeOfArgRet
+		returnAddresses = append(returnAddresses, uintptr(retAddr))
+		if len(returnAddresses) == wasmdebug.MaxFrames {
+			break
+		}
+	}
+	return returnAddresses
+}
+
+// GoCallStackView implements wazevo.goCallStackView.
+func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	//                  (high address)
+	//              +-----------------+ <----+
+	//              |   xxxxxxxxxxx   |      | ;; optional unused space to make it 16-byte aligned.
+	//           ^  |  arg[N]/ret[M]  |      |
+	// sliceSize |  |  ............   |      | sliceSize
+	//           |  |  arg[1]/ret[1]  |      |
+	//           v  |  arg[0]/ret[0]  | <----+
+	//              |    sliceSize    |
+	//              |   frame_size    |
+	//              +-----------------+ <---- stackPointerBeforeGoCall
+	//                 (low address)
+	ptr := unsafe.Pointer(stackPointerBeforeGoCall)
+	size := *(*uint64)(unsafe.Add(ptr, 8))
+	var view []uint64
+	{
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&view))
+		sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).
+		sh.Len = int(size)
+		sh.Cap = int(size)
+	}
+	return view
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
new file mode 100644
index 000000000..54ce89e46
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
@@ -0,0 +1,100 @@
+package backend
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type (
+	// Machine is a backend for a specific ISA machine.
+	Machine interface {
+		ExecutableContext() ExecutableContext
+
+		// DisableStackCheck disables the stack check for the current compilation for debugging/testing.
+		DisableStackCheck()
+
+		// SetCurrentABI initializes the FunctionABI for the given signature.
+		SetCurrentABI(abi *FunctionABI)
+
+		// SetCompiler sets the compilation context used for the lifetime of Machine.
+		// This is only called once per Machine, i.e. before the first compilation.
+		SetCompiler(Compiler)
+
+		// LowerSingleBranch is called when the compilation of the given single branch is started.
+		LowerSingleBranch(b *ssa.Instruction)
+
+		// LowerConditionalBranch is called when the compilation of the given conditional branch is started.
+		LowerConditionalBranch(b *ssa.Instruction)
+
+		// LowerInstr is called for each instruction in the given block except for the ones marked as already lowered
+		// via Compiler.MarkLowered. The order is reverse, i.e. from the last instruction to the first one.
+		//
+		// Note: this can lower multiple instructions (which produce the inputs) at once whenever it's possible
+		// for optimization.
+		LowerInstr(*ssa.Instruction)
+
+		// Reset resets the machine state for the next compilation.
+		Reset()
+
+		// InsertMove inserts a move instruction from src to dst whose type is typ.
+		InsertMove(dst, src regalloc.VReg, typ ssa.Type)
+
+		// InsertReturn inserts the return instruction to return from the current function.
+		InsertReturn()
+
+		// InsertLoadConstantBlockArg inserts the instruction(s) to load the constant value into the given regalloc.VReg.
+		InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg)
+
+		// Format returns the string representation of the currently compiled machine code.
+		// This is only for testing purpose.
+		Format() string
+
+		// RegAlloc does the register allocation after lowering.
+		RegAlloc()
+
+		// PostRegAlloc does the post register allocation, e.g. setting up prologue/epilogue, redundant move elimination, etc.
+		PostRegAlloc()
+
+		// ResolveRelocations resolves the relocations after emitting machine code.
+		//  * refToBinaryOffset: the map from the function reference (ssa.FuncRef) to the executable offset.
+		//  * executable: the binary to resolve the relocations.
+		//  * relocations: the relocations to resolve.
+		//  * callTrampolineIslandOffsets: the offsets of the trampoline islands in the executable.
+		ResolveRelocations(
+			refToBinaryOffset []int,
+			executable []byte,
+			relocations []RelocationInfo,
+			callTrampolineIslandOffsets []int,
+		)
+
+		// Encode encodes the machine instructions to the Compiler.
+		Encode(ctx context.Context) error
+
+		// CompileGoFunctionTrampoline compiles the trampoline function  to call a Go function of the given exit code and signature.
+		CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte
+
+		// CompileStackGrowCallSequence returns the sequence of instructions shared by all functions to
+		// call the stack grow builtin function.
+		CompileStackGrowCallSequence() []byte
+
+		// CompileEntryPreamble returns the sequence of instructions shared by multiple functions to
+		// enter the function from Go.
+		CompileEntryPreamble(signature *ssa.Signature) []byte
+
+		// LowerParams lowers the given parameters.
+		LowerParams(params []ssa.Value)
+
+		// LowerReturns lowers the given returns.
+		LowerReturns(returns []ssa.Value)
+
+		// ArgsResultsRegs returns the registers used for arguments and return values.
+		ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg)
+
+		// CallTrampolineIslandInfo returns the interval of the offset where the trampoline island is placed, and
+		// the size of the trampoline island. If islandSize is zero, the trampoline island is not used on this machine.
+		CallTrampolineIslandInfo(numFunctions int) (interval, islandSize int, err error)
+	}
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
new file mode 100644
index 000000000..3f36c84e5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
@@ -0,0 +1,319 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction.
+type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface {
+	// InsertMoveBefore inserts the move instruction from src to dst before the given instruction.
+	InsertMoveBefore(dst, src regalloc.VReg, instr I)
+	// InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction.
+	// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
+	InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I
+	// InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction.
+	// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
+	InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I
+	// ClobberedRegisters is called when the register allocation is done and the clobbered registers are known.
+	ClobberedRegisters(regs []regalloc.VReg)
+	// Swap swaps the two virtual registers after the given instruction.
+	Swap(cur I, x1, x2, tmp regalloc.VReg)
+	// LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details.
+	LastInstrForInsertion(begin, end I) I
+	// SSABlockLabel returns the label of the given ssa.BasicBlockID.
+	SSABlockLabel(id ssa.BasicBlockID) Label
+}
+
+type (
+	// RegAllocFunction implements regalloc.Function.
+	RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
+		m   m
+		ssb ssa.Builder
+		c   Compiler
+		// iter is the iterator for reversePostOrderBlocks
+		iter                   int
+		reversePostOrderBlocks []RegAllocBlock[I, m]
+		// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
+		labelToRegAllocBlockIndex map[Label]int
+		loopNestingForestRoots    []ssa.BasicBlock
+	}
+
+	// RegAllocBlock implements regalloc.Block.
+	RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
+		// f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses().
+		f                           *RegAllocFunction[I, m]
+		sb                          ssa.BasicBlock
+		l                           Label
+		begin, end                  I
+		loopNestingForestChildren   []ssa.BasicBlock
+		cur                         I
+		id                          int
+		cachedLastInstrForInsertion I
+	}
+)
+
+// NewRegAllocFunction returns a new RegAllocFunction.
+func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
+	return &RegAllocFunction[I, M]{
+		m:                         m,
+		ssb:                       ssb,
+		c:                         c,
+		labelToRegAllocBlockIndex: make(map[Label]int),
+	}
+}
+
+// AddBlock adds a new block to the function.
+func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) {
+	i := len(f.reversePostOrderBlocks)
+	f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{
+		f:     f,
+		sb:    sb,
+		l:     l,
+		begin: begin,
+		end:   end,
+		id:    int(sb.ID()),
+	})
+	f.labelToRegAllocBlockIndex[l] = i
+}
+
+// Reset resets the function for the next compilation.
+func (f *RegAllocFunction[I, M]) Reset() {
+	f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0]
+	f.iter = 0
+}
+
+// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter.
+func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertStoreRegisterAt(v, instr.(I), true)
+}
+
+// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore.
+func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertReloadRegisterAt(v, instr.(I), false)
+}
+
+// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter.
+func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertReloadRegisterAt(v, instr.(I), true)
+}
+
+// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore.
+func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertStoreRegisterAt(v, instr.(I), false)
+}
+
+// ClobberedRegisters implements regalloc.Function ClobberedRegisters.
+func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) {
+	f.m.ClobberedRegisters(regs)
+}
+
+// SwapBefore implements regalloc.Function SwapBefore.
+func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) {
+	f.m.Swap(instr.Prev().(I), x1, x2, tmp)
+}
+
+// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin.
+func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block {
+	f.iter = len(f.reversePostOrderBlocks) - 1
+	return f.PostOrderBlockIteratorNext()
+}
+
+// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext.
+func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block {
+	if f.iter < 0 {
+		return nil
+	}
+	b := &f.reversePostOrderBlocks[f.iter]
+	f.iter--
+	return b
+}
+
+// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin.
+func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block {
+	f.iter = 0
+	return f.ReversePostOrderBlockIteratorNext()
+}
+
+// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext.
+func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block {
+	if f.iter >= len(f.reversePostOrderBlocks) {
+		return nil
+	}
+	b := &f.reversePostOrderBlocks[f.iter]
+	f.iter++
+	return b
+}
+
+// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots.
+func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int {
+	f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots()
+	return len(f.loopNestingForestRoots)
+}
+
+// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot.
+func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block {
+	blk := f.loopNestingForestRoots[i]
+	l := f.m.SSABlockLabel(blk.ID())
+	index := f.labelToRegAllocBlockIndex[l]
+	return &f.reversePostOrderBlocks[index]
+}
+
+// InsertMoveBefore implements regalloc.Function InsertMoveBefore.
+func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) {
+	f.m.InsertMoveBefore(dst, src, instr.(I))
+}
+
+// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor.
+func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block {
+	ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb)
+	l := f.m.SSABlockLabel(ret.ID())
+	index := f.labelToRegAllocBlockIndex[l]
+	return &f.reversePostOrderBlocks[index]
+}
+
+// Idom implements regalloc.Function Idom.
+func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block {
+	builder := f.ssb
+	idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb)
+	if idom == nil {
+		panic("BUG: idom must not be nil")
+	}
+	l := f.m.SSABlockLabel(idom.ID())
+	index := f.labelToRegAllocBlockIndex[l]
+	return &f.reversePostOrderBlocks[index]
+}
+
+// ID implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) }
+
+// BlockParams implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg {
+	c := r.f.c
+	*regs = (*regs)[:0]
+	for i := 0; i < r.sb.Params(); i++ {
+		v := c.VRegOf(r.sb.Param(i))
+		*regs = append(*regs, v)
+	}
+	return *regs
+}
+
+// InstrIteratorBegin implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr {
+	r.cur = r.begin
+	return r.cur
+}
+
+// InstrIteratorNext implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr {
+	for {
+		if r.cur == r.end {
+			return nil
+		}
+		instr := r.cur.Next()
+		r.cur = instr.(I)
+		if instr == nil {
+			return nil
+		} else if instr.AddedBeforeRegAlloc() {
+			// Only concerned about the instruction added before regalloc.
+			return instr
+		}
+	}
+}
+
+// InstrRevIteratorBegin implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr {
+	r.cur = r.end
+	return r.cur
+}
+
+// InstrRevIteratorNext implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr {
+	for {
+		if r.cur == r.begin {
+			return nil
+		}
+		instr := r.cur.Prev()
+		r.cur = instr.(I)
+		if instr == nil {
+			return nil
+		} else if instr.AddedBeforeRegAlloc() {
+			// Only concerned about the instruction added before regalloc.
+			return instr
+		}
+	}
+}
+
+// FirstInstr implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr {
+	return r.begin
+}
+
+// EndInstr implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr {
+	return r.end
+}
+
+// LastInstrForInsertion implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr {
+	var nil I
+	if r.cachedLastInstrForInsertion == nil {
+		r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end)
+	}
+	return r.cachedLastInstrForInsertion
+}
+
+// Preds implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() }
+
+// Pred implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block {
+	sb := r.sb
+	pred := sb.Pred(i)
+	l := r.f.m.SSABlockLabel(pred.ID())
+	index := r.f.labelToRegAllocBlockIndex[l]
+	return &r.f.reversePostOrderBlocks[index]
+}
+
+// Entry implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() }
+
+// Succs implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Succs() int {
+	return r.sb.Succs()
+}
+
+// Succ implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block {
+	sb := r.sb
+	succ := sb.Succ(i)
+	if succ.ReturnBlock() {
+		return nil
+	}
+	l := r.f.m.SSABlockLabel(succ.ID())
+	index := r.f.labelToRegAllocBlockIndex[l]
+	return &r.f.reversePostOrderBlocks[index]
+}
+
+// LoopHeader implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LoopHeader() bool {
+	return r.sb.LoopHeader()
+}
+
+// LoopNestingForestChildren implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int {
+	r.loopNestingForestChildren = r.sb.LoopNestingForestChildren()
+	return len(r.loopNestingForestChildren)
+}
+
+// LoopNestingForestChild implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block {
+	blk := r.loopNestingForestChildren[i]
+	l := r.f.m.SSABlockLabel(blk.ID())
+	index := r.f.labelToRegAllocBlockIndex[l]
+	return &r.f.reversePostOrderBlocks[index]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
new file mode 100644
index 000000000..23157b478
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
@@ -0,0 +1,136 @@
+package regalloc
+
+import "fmt"
+
+// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
+// allocators to work on any ISA.
+//
+// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
+// 	where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
+// 	by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
+
+type (
+	// Function is the top-level interface to do register allocation, which corresponds to a CFG containing
+	// Blocks(s).
+	Function interface {
+		// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
+		// In other words, the last blocks in the CFG will be returned first.
+		PostOrderBlockIteratorBegin() Block
+		// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
+		PostOrderBlockIteratorNext() Block
+		// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
+		// In other words, the first blocks in the CFG will be returned first.
+		ReversePostOrderBlockIteratorBegin() Block
+		// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
+		ReversePostOrderBlockIteratorNext() Block
+		// ClobberedRegisters tell the clobbered registers by this function.
+		ClobberedRegisters([]VReg)
+		// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
+		LoopNestingForestRoots() int
+		// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
+		LoopNestingForestRoot(i int) Block
+		// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
+		LowestCommonAncestor(blk1, blk2 Block) Block
+		// Idom returns the immediate dominator of the given block.
+		Idom(blk Block) Block
+
+		// Followings are for rewriting the function.
+
+		// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
+		SwapBefore(x1, x2, tmp VReg, instr Instr)
+		// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
+		StoreRegisterBefore(v VReg, instr Instr)
+		// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
+		StoreRegisterAfter(v VReg, instr Instr)
+		// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
+		ReloadRegisterBefore(v VReg, instr Instr)
+		// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
+		ReloadRegisterAfter(v VReg, instr Instr)
+		// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
+		InsertMoveBefore(dst, src VReg, instr Instr)
+	}
+
+	// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
+	Block interface {
+		// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
+		ID() int32
+		// BlockParams returns the virtual registers used as the parameters of this block.
+		BlockParams(*[]VReg) []VReg
+		// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
+		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
+		InstrIteratorBegin() Instr
+		// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
+		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
+		InstrIteratorNext() Instr
+		// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
+		InstrRevIteratorBegin() Instr
+		// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
+		InstrRevIteratorNext() Instr
+		// FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
+		FirstInstr() Instr
+		// EndInstr returns the end instruction in this block.
+		EndInstr() Instr
+		// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
+		// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
+		// At the time of register allocation, all the critical edges are already split, so there is no need
+		// to worry about the case where branching instruction has multiple successors.
+		// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
+		// the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
+		LastInstrForInsertion() Instr
+		// Preds returns the number of predecessors of this block in the CFG.
+		Preds() int
+		// Pred returns the i-th predecessor of this block in the CFG.
+		Pred(i int) Block
+		// Entry returns true if the block is for the entry block.
+		Entry() bool
+		// Succs returns the number of successors of this block in the CFG.
+		Succs() int
+		// Succ returns the i-th successor of this block in the CFG.
+		Succ(i int) Block
+		// LoopHeader returns true if this block is a loop header.
+		LoopHeader() bool
+		// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
+		LoopNestingForestChildren() int
+		// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
+		LoopNestingForestChild(i int) Block
+	}
+
+	// Instr is an instruction in a block, abstracting away the underlying ISA.
+	Instr interface {
+		fmt.Stringer
+		// Next returns the next instruction in the same block.
+		Next() Instr
+		// Prev returns the previous instruction in the same block.
+		Prev() Instr
+		// Defs returns the virtual registers defined by this instruction.
+		Defs(*[]VReg) []VReg
+		// Uses returns the virtual registers used by this instruction.
+		// Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this.
+		Uses(*[]VReg) []VReg
+		// AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index.
+		AssignUse(index int, v VReg)
+		// AssignDef assigns a RealReg-allocated virtual register defined by this instruction.
+		// This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction)
+		AssignDef(VReg)
+		// IsCopy returns true if this instruction is a move instruction between two registers.
+		// If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other,
+		// we could coalesce them, and hence the copy can be eliminated from the final code.
+		IsCopy() bool
+		// IsCall returns true if this instruction is a call instruction. The result is used to insert
+		// caller saved register spills and restores.
+		IsCall() bool
+		// IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer.
+		//  The result is used to insert caller saved register spills and restores.
+		IsIndirectCall() bool
+		// IsReturn returns true if this instruction is a return instruction.
+		IsReturn() bool
+		// AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
+		AddedBeforeRegAlloc() bool
+	}
+
+	// InstrConstraint is an interface for arch-specific instruction constraints.
+	InstrConstraint interface {
+		comparable
+		Instr
+	}
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
new file mode 100644
index 000000000..46df807e6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
@@ -0,0 +1,123 @@
+package regalloc
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend.
+// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg.
+type VReg uint64
+
+// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info.
+type VRegID uint32
+
+// RealReg returns the RealReg of this VReg.
+func (v VReg) RealReg() RealReg {
+	return RealReg(v >> 32)
+}
+
+// IsRealReg returns true if this VReg is backed by a physical register.
+func (v VReg) IsRealReg() bool {
+	return v.RealReg() != RealRegInvalid
+}
+
+// FromRealReg returns a VReg from the given RealReg and RegType.
+// This is used to represent a specific pre-colored register in the backend.
+func FromRealReg(r RealReg, typ RegType) VReg {
+	rid := VRegID(r)
+	if rid > vRegIDReservedForRealNum {
+		panic(fmt.Sprintf("invalid real reg %d", r))
+	}
+	return VReg(r).SetRealReg(r).SetRegType(typ)
+}
+
+// SetRealReg sets the RealReg of this VReg and returns the updated VReg.
+func (v VReg) SetRealReg(r RealReg) VReg {
+	return VReg(r)<<32 | (v & 0xff_00_ffffffff)
+}
+
+// RegType returns the RegType of this VReg.
+func (v VReg) RegType() RegType {
+	return RegType(v >> 40)
+}
+
+// SetRegType sets the RegType of this VReg and returns the updated VReg.
+func (v VReg) SetRegType(t RegType) VReg {
+	return VReg(t)<<40 | (v & 0x00_ff_ffffffff)
+}
+
+// ID returns the VRegID of this VReg.
+func (v VReg) ID() VRegID {
+	return VRegID(v & 0xffffffff)
+}
+
+// Valid returns true if this VReg is Valid.
+func (v VReg) Valid() bool {
+	return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid
+}
+
+// RealReg represents a physical register.
+type RealReg byte
+
+const RealRegInvalid RealReg = 0
+
+const (
+	vRegIDInvalid            VRegID = 1 << 31
+	VRegIDNonReservedBegin          = vRegIDReservedForRealNum
+	vRegIDReservedForRealNum VRegID = 128
+	VRegInvalid                     = VReg(vRegIDInvalid)
+)
+
+// String implements fmt.Stringer.
+func (r RealReg) String() string {
+	switch r {
+	case RealRegInvalid:
+		return "invalid"
+	default:
+		return fmt.Sprintf("r%d", r)
+	}
+}
+
+// String implements fmt.Stringer.
+func (v VReg) String() string {
+	if v.IsRealReg() {
+		return fmt.Sprintf("r%d", v.ID())
+	}
+	return fmt.Sprintf("v%d?", v.ID())
+}
+
+// RegType represents the type of a register.
+type RegType byte
+
+const (
+	RegTypeInvalid RegType = iota
+	RegTypeInt
+	RegTypeFloat
+	NumRegType
+)
+
+// String implements fmt.Stringer.
+func (r RegType) String() string {
+	switch r {
+	case RegTypeInt:
+		return "int"
+	case RegTypeFloat:
+		return "float"
+	default:
+		return "invalid"
+	}
+}
+
+// RegTypeOf returns the RegType of the given ssa.Type.
+func RegTypeOf(p ssa.Type) RegType {
+	switch p {
+	case ssa.TypeI32, ssa.TypeI64:
+		return RegTypeInt
+	case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+		return RegTypeFloat
+	default:
+		panic("invalid type")
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
new file mode 100644
index 000000000..b4450d56f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
@@ -0,0 +1,1212 @@
+// Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
+// api.go.
+//
+// References:
+//   - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
+//   - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
+//   - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
+//   - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
+//   - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
+package regalloc
+
+import (
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// NewAllocator returns a new Allocator.
+func NewAllocator(allocatableRegs *RegisterInfo) Allocator {
+	a := Allocator{
+		regInfo:            allocatableRegs,
+		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList),
+		blockStates:        wazevoapi.NewIDedPool[blockState](resetBlockState),
+	}
+	a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState)
+	a.state.reset()
+	for _, regs := range allocatableRegs.AllocatableRegisters {
+		for _, r := range regs {
+			a.allocatableSet = a.allocatableSet.add(r)
+		}
+	}
+	return a
+}
+
+type (
+	// RegisterInfo holds the statically-known ISA-specific register information.
+	RegisterInfo struct {
+		// AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
+		// The order matters: the first element is the most preferred one when allocating.
+		AllocatableRegisters [NumRegType][]RealReg
+		CalleeSavedRegisters RegSet
+		CallerSavedRegisters RegSet
+		RealRegToVReg        []VReg
+		// RealRegName returns the name of the given RealReg for debugging.
+		RealRegName func(r RealReg) string
+		RealRegType func(r RealReg) RegType
+	}
+
+	// Allocator is a register allocator.
+	Allocator struct {
+		// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
+		regInfo *RegisterInfo
+		// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
+		allocatableSet           RegSet
+		allocatedCalleeSavedRegs []VReg
+		vs                       []VReg
+		vs2                      []VRegID
+		phiDefInstListPool       wazevoapi.Pool[phiDefInstList]
+
+		// Followings are re-used during various places.
+		blks             []Block
+		reals            []RealReg
+		currentOccupants regInUseSet
+
+		// Following two fields are updated while iterating the blocks in the reverse postorder.
+		state       state
+		blockStates wazevoapi.IDedPool[blockState]
+	}
+
+	// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
+	programCounter int32
+
+	state struct {
+		argRealRegs []VReg
+		regsInUse   regInUseSet
+		vrStates    wazevoapi.IDedPool[vrState]
+
+		currentBlockID int32
+
+		// allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
+		allocatedRegSet RegSet
+	}
+
+	blockState struct {
+		// liveIns is a list of VReg that are live at the beginning of the block.
+		liveIns []VRegID
+		// seen is true if the block is visited during the liveness analysis.
+		seen bool
+		// visited is true if the block is visited during the allocation phase.
+		visited            bool
+		startFromPredIndex int
+		// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
+		startRegs regInUseSet
+		// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
+		endRegs regInUseSet
+	}
+
+	vrState struct {
+		v VReg
+		r RealReg
+		// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
+		defInstr Instr
+		// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
+		defBlk Block
+		// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
+		// reloads this value. This is used to determine the spill location. Only valid if spilled=true.
+		lca Block
+		// lastUse is the program counter of the last use of this value. This changes while iterating the block, and
+		// should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
+		lastUse                 programCounter
+		lastUseUpdatedAtBlockID int32
+		// spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
+		//
+		// Note that this field is used during liveness analysis for different purpose. This is used to determine the
+		// value is live-in or not.
+		spilled bool
+		// isPhi is true if this is a phi value.
+		isPhi      bool
+		desiredLoc desiredLoc
+		// phiDefInstList is a list of instructions that defines this phi value.
+		// This is used to determine the spill location, and only valid if isPhi=true.
+		*phiDefInstList
+	}
+
+	// phiDefInstList is a linked list of instructions that defines a phi value.
+	phiDefInstList struct {
+		instr Instr
+		v     VReg
+		next  *phiDefInstList
+	}
+
+	// desiredLoc represents a desired location for a VReg.
+	desiredLoc uint16
+	// desiredLocKind is a kind of desired location for a VReg.
+	desiredLocKind uint16
+)
+
+const (
+	// desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified.
+	desiredLocKindUnspecified desiredLocKind = iota
+	// desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values.
+	desiredLocKindStack
+	// desiredLocKindReg is a kind of desired location for a VReg that is in a register.
+	desiredLocKindReg
+	desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified)
+	desiredLocStack       = desiredLoc(desiredLocKindStack)
+)
+
+func newDesiredLocReg(r RealReg) desiredLoc {
+	return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2)
+}
+
+func (d desiredLoc) realReg() RealReg {
+	return RealReg(d >> 2)
+}
+
+func (d desiredLoc) stack() bool {
+	return d&3 == desiredLoc(desiredLocKindStack)
+}
+
+func resetPhiDefInstList(l *phiDefInstList) {
+	l.instr = nil
+	l.next = nil
+	l.v = VRegInvalid
+}
+
+func (s *state) dump(info *RegisterInfo) { //nolint:unused
+	fmt.Println("\t\tstate:")
+	fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
+	fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
+	fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
+	fmt.Println("\t\t\tused:", s.regsInUse.format(info))
+	var strs []string
+	for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ {
+		vs := s.vrStates.Get(i)
+		if vs == nil {
+			continue
+		}
+		if vs.r != RealRegInvalid {
+			strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r)))
+		}
+	}
+	fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
+}
+
+func (s *state) reset() {
+	s.argRealRegs = s.argRealRegs[:0]
+	s.vrStates.Reset()
+	s.allocatedRegSet = RegSet(0)
+	s.regsInUse.reset()
+	s.currentBlockID = -1
+}
+
+func (s *state) setVRegState(v VReg, r RealReg) {
+	id := int(v.ID())
+	st := s.vrStates.GetOrAllocate(id)
+	st.r = r
+	st.v = v
+}
+
+func resetVrState(vs *vrState) {
+	vs.v = VRegInvalid
+	vs.r = RealRegInvalid
+	vs.defInstr = nil
+	vs.defBlk = nil
+	vs.spilled = false
+	vs.lastUse = -1
+	vs.lastUseUpdatedAtBlockID = -1
+	vs.lca = nil
+	vs.isPhi = false
+	vs.phiDefInstList = nil
+	vs.desiredLoc = desiredLocUnspecified
+}
+
+func (s *state) getVRegState(v VRegID) *vrState {
+	return s.vrStates.GetOrAllocate(int(v))
+}
+
+func (s *state) useRealReg(r RealReg, v VReg) {
+	if s.regsInUse.has(r) {
+		panic("BUG: useRealReg: the given real register is already used")
+	}
+	s.regsInUse.add(r, v)
+	s.setVRegState(v, r)
+	s.allocatedRegSet = s.allocatedRegSet.add(r)
+}
+
+func (s *state) releaseRealReg(r RealReg) {
+	current := s.regsInUse.get(r)
+	if current.Valid() {
+		s.regsInUse.remove(r)
+		s.setVRegState(current, RealRegInvalid)
+	}
+}
+
+// recordReload records that the given VReg is reloaded in the given block.
+// This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
+func (vs *vrState) recordReload(f Function, blk Block) {
+	vs.spilled = true
+	if vs.lca == nil {
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
+		}
+		vs.lca = blk
+	} else {
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
+		}
+		vs.lca = f.LowestCommonAncestor(vs.lca, blk)
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("updated lca=%d\n", vs.lca.ID())
+		}
+	}
+}
+
+func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
+	r = RealRegInvalid
+	// First, check if the preferredMask has any allocatable register.
+	if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
+		for _, candidateReal := range allocatable {
+			// TODO: we should ensure the preferred register is in the allocatable set in the first place,
+			//  but right now, just in case, we check it here.
+			if candidateReal == preferred {
+				return preferred
+			}
+		}
+	}
+
+	var lastUseAt programCounter
+	var spillVReg VReg
+	for _, candidateReal := range allocatable {
+		if forbiddenMask.has(candidateReal) {
+			continue
+		}
+
+		using := s.regsInUse.get(candidateReal)
+		if using == VRegInvalid {
+			// This is not used at this point.
+			return candidateReal
+		}
+
+		// Real registers in use should not be spilled, so we skip them.
+		// For example, if the register is used as an argument register, and it might be
+		// spilled and not reloaded when it ends up being used as a temporary to pass
+		// stack based argument.
+		if using.IsRealReg() {
+			continue
+		}
+
+		isPreferred := candidateReal == preferred
+
+		// last == -1 means the value won't be used anymore.
+		if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
+			lastUseAt = last
+			r = candidateReal
+			spillVReg = using
+			if isPreferred {
+				break
+			}
+		}
+	}
+
+	if r == RealRegInvalid {
+		panic("not found any allocatable register")
+	}
+
+	if wazevoapi.RegAllocLoggingEnabled {
+		fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo))
+	}
+	s.releaseRealReg(r)
+	return r
+}
+
+func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
+	for _, r := range allocatable {
+		if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
+			return r
+		}
+	}
+	return RealRegInvalid
+}
+
+func (s *state) resetAt(bs *blockState) {
+	s.regsInUse.range_(func(_ RealReg, vr VReg) {
+		s.setVRegState(vr, RealRegInvalid)
+	})
+	s.regsInUse.reset()
+	bs.endRegs.range_(func(r RealReg, v VReg) {
+		id := int(v.ID())
+		st := s.vrStates.GetOrAllocate(id)
+		if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn {
+			s.regsInUse.add(r, v)
+			s.setVRegState(v, r)
+		}
+	})
+}
+
+func resetBlockState(b *blockState) {
+	b.seen = false
+	b.visited = false
+	b.endRegs.reset()
+	b.startRegs.reset()
+	b.startFromPredIndex = -1
+	b.liveIns = b.liveIns[:0]
+}
+
+func (b *blockState) dump(a *RegisterInfo) {
+	fmt.Println("\t\tblockState:")
+	fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
+	fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
+	fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
+	fmt.Println("\t\t\tvisited:", b.visited)
+}
+
+// DoAllocation performs register allocation on the given Function.
+func (a *Allocator) DoAllocation(f Function) {
+	a.livenessAnalysis(f)
+	a.alloc(f)
+	a.determineCalleeSavedRealRegs(f)
+}
+
+func (a *Allocator) determineCalleeSavedRealRegs(f Function) {
+	a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
+	a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
+		if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
+			a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
+		}
+	})
+	f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
+}
+
+func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState {
+	return a.blockStates.GetOrAllocate(int(blockID))
+}
+
+// phiBlk returns the block that defines the given phi value, nil otherwise.
+func (s *state) phiBlk(v VRegID) Block {
+	vs := s.getVRegState(v)
+	if vs.isPhi {
+		return vs.defBlk
+	}
+	return nil
+}
+
+const (
+	programCounterLiveIn  = math.MinInt32
+	programCounterLiveOut = math.MaxInt32
+)
+
+// liveAnalysis constructs Allocator.blockLivenessData.
+// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
+func (a *Allocator) livenessAnalysis(f Function) {
+	s := &a.state
+	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter.
+
+		// We should gather phi value data.
+		for _, p := range blk.BlockParams(&a.vs) {
+			vs := s.getVRegState(p.ID())
+			vs.isPhi = true
+			vs.defBlk = blk
+		}
+	}
+
+	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
+		blkID := blk.ID()
+		info := a.getOrAllocateBlockState(blkID)
+
+		a.vs2 = a.vs2[:0]
+		const (
+			flagDeleted = false
+			flagLive    = true
+		)
+		ns := blk.Succs()
+		for i := 0; i < ns; i++ {
+			succ := blk.Succ(i)
+			if succ == nil {
+				continue
+			}
+
+			succID := succ.ID()
+			succInfo := a.getOrAllocateBlockState(succID)
+			if !succInfo.seen { // This means the back edge.
+				continue
+			}
+
+			for _, v := range succInfo.liveIns {
+				if s.phiBlk(v) != succ {
+					st := s.getVRegState(v)
+					// We use .spilled field to store the flag.
+					st.spilled = flagLive
+					a.vs2 = append(a.vs2, v)
+				}
+			}
+		}
+
+		for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
+
+			var use, def VReg
+			for _, def = range instr.Defs(&a.vs) {
+				if !def.IsRealReg() {
+					id := def.ID()
+					st := s.getVRegState(id)
+					// We use .spilled field to store the flag.
+					st.spilled = flagDeleted
+					a.vs2 = append(a.vs2, id)
+				}
+			}
+			for _, use = range instr.Uses(&a.vs) {
+				if !use.IsRealReg() {
+					id := use.ID()
+					st := s.getVRegState(id)
+					// We use .spilled field to store the flag.
+					st.spilled = flagLive
+					a.vs2 = append(a.vs2, id)
+				}
+			}
+
+			if def.Valid() && s.phiBlk(def.ID()) != nil {
+				if use.Valid() && use.IsRealReg() {
+					// If the destination is a phi value, and the source is a real register, this is the beginning of the function.
+					a.state.argRealRegs = append(a.state.argRealRegs, use)
+				}
+			}
+		}
+
+		for _, v := range a.vs2 {
+			st := s.getVRegState(v)
+			// We use .spilled field to store the flag.
+			if st.spilled == flagLive { //nolint:gosimple
+				info.liveIns = append(info.liveIns, v)
+				st.spilled = false
+			}
+		}
+
+		info.seen = true
+	}
+
+	nrs := f.LoopNestingForestRoots()
+	for i := 0; i < nrs; i++ {
+		root := f.LoopNestingForestRoot(i)
+		a.loopTreeDFS(root)
+	}
+}
+
+// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
+func (a *Allocator) loopTreeDFS(entry Block) {
+	a.blks = a.blks[:0]
+	a.blks = append(a.blks, entry)
+
+	s := &a.state
+	for len(a.blks) > 0 {
+		tail := len(a.blks) - 1
+		loop := a.blks[tail]
+		a.blks = a.blks[:tail]
+		a.vs2 = a.vs2[:0]
+		const (
+			flagDone    = false
+			flagPending = true
+		)
+		info := a.getOrAllocateBlockState(loop.ID())
+		for _, v := range info.liveIns {
+			if s.phiBlk(v) != loop {
+				a.vs2 = append(a.vs2, v)
+				st := s.getVRegState(v)
+				// We use .spilled field to store the flag.
+				st.spilled = flagPending
+			}
+		}
+
+		var siblingAddedView []VRegID
+		cn := loop.LoopNestingForestChildren()
+		for i := 0; i < cn; i++ {
+			child := loop.LoopNestingForestChild(i)
+			childID := child.ID()
+			childInfo := a.getOrAllocateBlockState(childID)
+
+			if i == 0 {
+				begin := len(childInfo.liveIns)
+				for _, v := range a.vs2 {
+					st := s.getVRegState(v)
+					// We use .spilled field to store the flag.
+					if st.spilled == flagPending { //nolint:gosimple
+						st.spilled = flagDone
+						// TODO: deduplicate, though I don't think it has much impact.
+						childInfo.liveIns = append(childInfo.liveIns, v)
+					}
+				}
+				siblingAddedView = childInfo.liveIns[begin:]
+			} else {
+				// TODO: deduplicate, though I don't think it has much impact.
+				childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...)
+			}
+
+			if child.LoopHeader() {
+				a.blks = append(a.blks, child)
+			}
+		}
+
+		if cn == 0 {
+			// If there's no forest child, we haven't cleared the .spilled field at this point.
+			for _, v := range a.vs2 {
+				st := s.getVRegState(v)
+				st.spilled = false
+			}
+		}
+	}
+}
+
+// alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
+// The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
+// In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
+// one of its predecessors. Each block inherits the selected state and starts allocation from there.
+// If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
+// The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
+// at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
+// the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
+//
+// All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
+func (a *Allocator) alloc(f Function) {
+	// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
+	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("========== allocating blk%d ========\n", blk.ID())
+		}
+		if blk.Entry() {
+			a.finalizeStartReg(blk)
+		}
+		a.allocBlock(f, blk)
+	}
+	// After the allocation, we all know the start and end state of each block. So we can fix the merge states.
+	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
+		a.fixMergeState(f, blk)
+	}
+	// Finally, we insert the spill instructions as we know all the places where the reloads happen.
+	a.scheduleSpills(f)
+}
+
+func (a *Allocator) updateLiveInVRState(liveness *blockState) {
+	currentBlockID := a.state.currentBlockID
+	for _, v := range liveness.liveIns {
+		vs := a.state.getVRegState(v)
+		vs.lastUse = programCounterLiveIn
+		vs.lastUseUpdatedAtBlockID = currentBlockID
+	}
+}
+
+func (a *Allocator) finalizeStartReg(blk Block) {
+	bID := blk.ID()
+	liveness := a.getOrAllocateBlockState(bID)
+	s := &a.state
+	currentBlkState := a.getOrAllocateBlockState(bID)
+	if currentBlkState.startFromPredIndex > -1 {
+		return
+	}
+
+	s.currentBlockID = bID
+	a.updateLiveInVRState(liveness)
+
+	preds := blk.Preds()
+	var predState *blockState
+	switch preds {
+	case 0: // This is the entry block.
+	case 1:
+		predID := blk.Pred(0).ID()
+		predState = a.getOrAllocateBlockState(predID)
+		currentBlkState.startFromPredIndex = 0
+	default:
+		// TODO: there should be some better heuristic to choose the predecessor.
+		for i := 0; i < preds; i++ {
+			predID := blk.Pred(i).ID()
+			if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
+				predState = _predState
+				currentBlkState.startFromPredIndex = i
+				break
+			}
+		}
+	}
+	if predState == nil {
+		if !blk.Entry() {
+			panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
+		}
+		for _, u := range s.argRealRegs {
+			s.useRealReg(u.RealReg(), u)
+		}
+		currentBlkState.startFromPredIndex = 0
+	} else if predState != nil {
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
+				bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
+		}
+		s.resetAt(predState)
+	}
+
+	s.regsInUse.range_(func(allocated RealReg, v VReg) {
+		currentBlkState.startRegs.add(allocated, v)
+	})
+	if wazevoapi.RegAllocLoggingEnabled {
+		fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo))
+	}
+}
+
+func (a *Allocator) allocBlock(f Function, blk Block) {
+	bID := blk.ID()
+	s := &a.state
+	currentBlkState := a.getOrAllocateBlockState(bID)
+	s.currentBlockID = bID
+
+	if currentBlkState.startFromPredIndex < 0 {
+		panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock")
+	}
+
+	// Clears the previous state.
+	s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) {
+		s.setVRegState(vr, RealRegInvalid)
+	})
+	s.regsInUse.reset()
+	// Then set the start state.
+	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
+		s.useRealReg(allocatedRealReg, vr)
+	})
+
+	desiredUpdated := a.vs2[:0]
+
+	// Update the last use of each VReg.
+	var pc programCounter
+	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
+		var use, def VReg
+		for _, use = range instr.Uses(&a.vs) {
+			if !use.IsRealReg() {
+				s.getVRegState(use.ID()).lastUse = pc
+			}
+		}
+
+		if instr.IsCopy() {
+			def = instr.Defs(&a.vs)[0]
+			r := def.RealReg()
+			if r != RealRegInvalid {
+				useID := use.ID()
+				vs := s.getVRegState(useID)
+				if !vs.isPhi { // TODO: no idea why do we need this.
+					vs.desiredLoc = newDesiredLocReg(r)
+					desiredUpdated = append(desiredUpdated, useID)
+				}
+			}
+		}
+		pc++
+	}
+
+	// Mark all live-out values by checking live-in of the successors.
+	// While doing so, we also update the desired register values.
+	var succ Block
+	for i, ns := 0, blk.Succs(); i < ns; i++ {
+		succ = blk.Succ(i)
+		if succ == nil {
+			continue
+		}
+
+		succID := succ.ID()
+		succState := a.getOrAllocateBlockState(succID)
+		for _, v := range succState.liveIns {
+			if s.phiBlk(v) != succ {
+				st := s.getVRegState(v)
+				st.lastUse = programCounterLiveOut
+			}
+		}
+
+		if succState.startFromPredIndex > -1 {
+			if wazevoapi.RegAllocLoggingEnabled {
+				fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
+			}
+			succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
+				vs := s.getVRegState(vr.ID())
+				vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
+				desiredUpdated = append(desiredUpdated, vr.ID())
+			})
+			for _, p := range succ.BlockParams(&a.vs) {
+				vs := s.getVRegState(p.ID())
+				if vs.desiredLoc.realReg() == RealRegInvalid {
+					vs.desiredLoc = desiredLocStack
+					desiredUpdated = append(desiredUpdated, p.ID())
+				}
+			}
+		}
+	}
+
+	// Propagate the desired register values from the end of the block to the beginning.
+	for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
+		if instr.IsCopy() {
+			def := instr.Defs(&a.vs)[0]
+			defState := s.getVRegState(def.ID())
+			desired := defState.desiredLoc.realReg()
+			if desired == RealRegInvalid {
+				continue
+			}
+
+			use := instr.Uses(&a.vs)[0]
+			useID := use.ID()
+			useState := s.getVRegState(useID)
+			if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified {
+				useState.desiredLoc = newDesiredLocReg(desired)
+				desiredUpdated = append(desiredUpdated, useID)
+			}
+		}
+	}
+
+	pc = 0
+	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Println(instr)
+		}
+
+		var currentUsedSet RegSet
+		killSet := a.reals[:0]
+
+		// Gather the set of registers that will be used in the current instruction.
+		for _, use := range instr.Uses(&a.vs) {
+			if use.IsRealReg() {
+				r := use.RealReg()
+				currentUsedSet = currentUsedSet.add(r)
+				if a.allocatableSet.has(r) {
+					killSet = append(killSet, r)
+				}
+			} else {
+				vs := s.getVRegState(use.ID())
+				if r := vs.r; r != RealRegInvalid {
+					currentUsedSet = currentUsedSet.add(r)
+				}
+			}
+		}
+
+		for i, use := range instr.Uses(&a.vs) {
+			if !use.IsRealReg() {
+				vs := s.getVRegState(use.ID())
+				killed := vs.lastUse == pc
+				r := vs.r
+
+				if r == RealRegInvalid {
+					r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
+						// Prefer the desired register if it's available.
+						vs.desiredLoc.realReg())
+					vs.recordReload(f, blk)
+					f.ReloadRegisterBefore(use.SetRealReg(r), instr)
+					s.useRealReg(r, use)
+				}
+				if wazevoapi.RegAllocLoggingEnabled {
+					fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
+				}
+				instr.AssignUse(i, use.SetRealReg(r))
+				currentUsedSet = currentUsedSet.add(r)
+				if killed {
+					if wazevoapi.RegAllocLoggingEnabled {
+						fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
+					}
+					killSet = append(killSet, r)
+				}
+			}
+		}
+
+		isIndirect := instr.IsIndirectCall()
+		call := instr.IsCall() || isIndirect
+		if call {
+			addr := RealRegInvalid
+			if instr.IsIndirectCall() {
+				addr = a.vs[0].RealReg()
+			}
+			a.releaseCallerSavedRegs(addr)
+		}
+
+		for _, r := range killSet {
+			s.releaseRealReg(r)
+		}
+		a.reals = killSet
+
+		defs := instr.Defs(&a.vs)
+		switch {
+		case len(defs) > 1:
+			// Some instructions define multiple values on real registers.
+			// E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
+			//
+			// Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs
+			// that require allocations. If we need to support such case, we need to add the logic to handle it here,
+			// though is there any such instruction?
+			for _, def := range defs {
+				if !def.IsRealReg() {
+					panic("BUG: multiple defs should be on real registers")
+				}
+				r := def.RealReg()
+				if s.regsInUse.has(r) {
+					s.releaseRealReg(r)
+				}
+				s.useRealReg(r, def)
+			}
+		case len(defs) == 1:
+			def := defs[0]
+			if def.IsRealReg() {
+				r := def.RealReg()
+				if a.allocatableSet.has(r) {
+					if s.regsInUse.has(r) {
+						s.releaseRealReg(r)
+					}
+					s.useRealReg(r, def)
+				}
+			} else {
+				vState := s.getVRegState(def.ID())
+				r := vState.r
+
+				if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
+					if r != desired {
+						if (vState.isPhi && vState.defBlk == succ) ||
+							// If this is not a phi and it's already assigned a real reg,
+							// this value has multiple definitions, hence we cannot assign the desired register.
+							(!s.regsInUse.has(desired) && r == RealRegInvalid) {
+							// If the phi value is passed via a real register, we force the value to be in the desired register.
+							if wazevoapi.RegAllocLoggingEnabled {
+								fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired))
+							}
+							if r != RealRegInvalid {
+								// If the value is already in a different real register, we release it to change the state.
+								// Otherwise, multiple registers might have the same values at the end, which results in
+								// messing up the merge state reconciliation.
+								s.releaseRealReg(r)
+							}
+							r = desired
+							s.releaseRealReg(r)
+							s.useRealReg(r, def)
+						}
+					}
+				}
+
+				// Allocate a new real register if `def` is not currently assigned one.
+				// It can happen when multiple instructions define the same VReg (e.g. const loads).
+				if r == RealRegInvalid {
+					if instr.IsCopy() {
+						copySrc := instr.Uses(&a.vs)[0].RealReg()
+						if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
+							r = copySrc
+						}
+					}
+					if r == RealRegInvalid {
+						typ := def.RegType()
+						r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
+					}
+					s.useRealReg(r, def)
+				}
+				dr := def.SetRealReg(r)
+				instr.AssignDef(dr)
+				if wazevoapi.RegAllocLoggingEnabled {
+					fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
+				}
+				if vState.isPhi {
+					if vState.desiredLoc.stack() { // Stack based phi value.
+						f.StoreRegisterAfter(dr, instr)
+						// Release the real register as it's not used anymore.
+						s.releaseRealReg(r)
+					} else {
+						// Only the register based phis are necessary to track the defining instructions
+						// since the stack-based phis are already having stores inserted ^.
+						n := a.phiDefInstListPool.Allocate()
+						n.instr = instr
+						n.next = vState.phiDefInstList
+						n.v = dr
+						vState.phiDefInstList = n
+					}
+				} else {
+					vState.defInstr = instr
+					vState.defBlk = blk
+				}
+			}
+		}
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Println(instr)
+		}
+		pc++
+	}
+
+	s.regsInUse.range_(func(allocated RealReg, v VReg) {
+		currentBlkState.endRegs.add(allocated, v)
+	})
+
+	currentBlkState.visited = true
+	if wazevoapi.RegAllocLoggingEnabled {
+		currentBlkState.dump(a.regInfo)
+	}
+
+	// Reset the desired end location.
+	for _, v := range desiredUpdated {
+		vs := s.getVRegState(v)
+		vs.desiredLoc = desiredLocUnspecified
+	}
+	a.vs2 = desiredUpdated[:0]
+
+	for i := 0; i < blk.Succs(); i++ {
+		succ := blk.Succ(i)
+		if succ == nil {
+			continue
+		}
+		// If the successor is not visited yet, finalize the start state.
+		a.finalizeStartReg(succ)
+	}
+}
+
+func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
+	s := &a.state
+
+	for i := 0; i < 64; i++ {
+		allocated := RealReg(i)
+		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
+			continue
+		}
+		if v := s.regsInUse.get(allocated); v.Valid() {
+			if v.IsRealReg() {
+				continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
+			}
+			if !a.regInfo.CallerSavedRegisters.has(allocated) {
+				// If this is not a caller-saved register, it is safe to keep it across the call.
+				continue
+			}
+			s.releaseRealReg(allocated)
+		}
+	}
+}
+
+func (a *Allocator) fixMergeState(f Function, blk Block) {
+	preds := blk.Preds()
+	if preds <= 1 {
+		return
+	}
+
+	s := &a.state
+
+	// Restores the state at the beginning of the block.
+	bID := blk.ID()
+	blkSt := a.getOrAllocateBlockState(bID)
+	desiredOccupants := &blkSt.startRegs
+	aliveOnRegVRegs := make(map[VReg]RealReg)
+	for i := 0; i < 64; i++ {
+		r := RealReg(i)
+		if v := blkSt.startRegs.get(r); v.Valid() {
+			aliveOnRegVRegs[v] = r
+		}
+	}
+
+	if wazevoapi.RegAllocLoggingEnabled {
+		fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
+	}
+
+	s.currentBlockID = bID
+	a.updateLiveInVRState(a.getOrAllocateBlockState(bID))
+
+	currentOccupants := &a.currentOccupants
+	for i := 0; i < preds; i++ {
+		currentOccupants.reset()
+		if i == blkSt.startFromPredIndex {
+			continue
+		}
+
+		currentOccupantsRev := make(map[VReg]RealReg)
+		pred := blk.Pred(i)
+		predSt := a.getOrAllocateBlockState(pred.ID())
+		for ii := 0; ii < 64; ii++ {
+			r := RealReg(ii)
+			if v := predSt.endRegs.get(r); v.Valid() {
+				if _, ok := aliveOnRegVRegs[v]; !ok {
+					continue
+				}
+				currentOccupants.add(r, v)
+				currentOccupantsRev[v] = r
+			}
+		}
+
+		s.resetAt(predSt)
+
+		// Finds the free registers if any.
+		intTmp, floatTmp := VRegInvalid, VRegInvalid
+		if intFree := s.findAllocatable(
+			a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
+		); intFree != RealRegInvalid {
+			intTmp = FromRealReg(intFree, RegTypeInt)
+		}
+		if floatFree := s.findAllocatable(
+			a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
+		); floatFree != RealRegInvalid {
+			floatTmp = FromRealReg(floatFree, RegTypeFloat)
+		}
+
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
+		}
+
+		for ii := 0; ii < 64; ii++ {
+			r := RealReg(ii)
+			desiredVReg := desiredOccupants.get(r)
+			if !desiredVReg.Valid() {
+				continue
+			}
+
+			currentVReg := currentOccupants.get(r)
+			if desiredVReg.ID() == currentVReg.ID() {
+				continue
+			}
+
+			typ := desiredVReg.RegType()
+			var tmpRealReg VReg
+			if typ == RegTypeInt {
+				tmpRealReg = intTmp
+			} else {
+				tmpRealReg = floatTmp
+			}
+			a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
+		}
+	}
+}
+
+func (a *Allocator) reconcileEdge(f Function,
+	r RealReg,
+	pred Block,
+	currentOccupants *regInUseSet,
+	currentOccupantsRev map[VReg]RealReg,
+	currentVReg, desiredVReg VReg,
+	freeReg VReg,
+	typ RegType,
+) {
+	s := &a.state
+	if currentVReg.Valid() {
+		// Both are on reg.
+		er, ok := currentOccupantsRev[desiredVReg]
+		if !ok {
+			if wazevoapi.RegAllocLoggingEnabled {
+				fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
+					desiredVReg.ID(), a.regInfo.RealRegName(r),
+				)
+			}
+			// This case is that the desired value is on the stack, but currentVReg is on the target register.
+			// We need to move the current value to the stack, and reload the desired value.
+			// TODO: we can do better here.
+			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
+			delete(currentOccupantsRev, currentVReg)
+
+			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
+			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
+			currentOccupants.add(r, desiredVReg)
+			currentOccupantsRev[desiredVReg] = r
+			return
+		}
+
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
+				desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
+			)
+		}
+		f.SwapBefore(
+			currentVReg.SetRealReg(r),
+			desiredVReg.SetRealReg(er),
+			freeReg,
+			pred.LastInstrForInsertion(),
+		)
+		s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
+		currentOccupantsRev[desiredVReg] = r
+		currentOccupantsRev[currentVReg] = er
+		currentOccupants.add(r, desiredVReg)
+		currentOccupants.add(er, currentVReg)
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
+		}
+	} else {
+		// Desired is on reg, but currently the target register is not used.
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
+				desiredVReg.ID(), a.regInfo.RealRegName(r),
+			)
+		}
+		if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
+			f.InsertMoveBefore(
+				FromRealReg(r, typ),
+				desiredVReg.SetRealReg(currentReg),
+				pred.LastInstrForInsertion(),
+			)
+			currentOccupants.remove(currentReg)
+		} else {
+			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
+			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
+		}
+		currentOccupantsRev[desiredVReg] = r
+		currentOccupants.add(r, desiredVReg)
+	}
+
+	if wazevoapi.RegAllocLoggingEnabled {
+		fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
+	}
+}
+
+func (a *Allocator) scheduleSpills(f Function) {
+	states := a.state.vrStates
+	for i := 0; i <= states.MaxIDEncountered(); i++ {
+		vs := states.Get(i)
+		if vs == nil {
+			continue
+		}
+		if vs.spilled {
+			a.scheduleSpill(f, vs)
+		}
+	}
+}
+
+func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
+	v := vs.v
+	// If the value is the phi value, we need to insert a spill after each phi definition.
+	if vs.isPhi {
+		for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
+			f.StoreRegisterAfter(defInstr.v, defInstr.instr)
+		}
+		return
+	}
+
+	pos := vs.lca
+	definingBlk := vs.defBlk
+	r := RealRegInvalid
+	if definingBlk == nil {
+		panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
+	}
+	if pos == nil {
+		panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
+	}
+
+	if wazevoapi.RegAllocLoggingEnabled {
+		fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
+	}
+	for pos != definingBlk {
+		st := a.getOrAllocateBlockState(pos.ID())
+		for ii := 0; ii < 64; ii++ {
+			rr := RealReg(ii)
+			if st.startRegs.get(rr) == v {
+				r = rr
+				// Already in the register, so we can place the spill at the beginning of the block.
+				break
+			}
+		}
+
+		if r != RealRegInvalid {
+			break
+		}
+
+		pos = f.Idom(pos)
+	}
+
+	if pos == definingBlk {
+		defInstr := vs.defInstr
+		defInstr.Defs(&a.vs)
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
+		}
+		f.StoreRegisterAfter(a.vs[0], defInstr)
+	} else {
+		// Found an ancestor block that holds the value in the register at the beginning of the block.
+		// We need to insert a spill before the last use.
+		first := pos.FirstInstr()
+		if wazevoapi.RegAllocLoggingEnabled {
+			fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
+		}
+		f.StoreRegisterAfter(v.SetRealReg(r), first)
+	}
+}
+
+// Reset resets the allocator's internal state so that it can be reused.
+func (a *Allocator) Reset() {
+	a.state.reset()
+	a.blockStates.Reset()
+	a.phiDefInstListPool.Reset()
+	a.vs = a.vs[:0]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
new file mode 100644
index 000000000..e9bf60661
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
@@ -0,0 +1,108 @@
+package regalloc
+
+import (
+	"fmt"
+	"strings"
+)
+
+// NewRegSet returns a new RegSet with the given registers.
+func NewRegSet(regs ...RealReg) RegSet {
+	var ret RegSet
+	for _, r := range regs {
+		ret = ret.add(r)
+	}
+	return ret
+}
+
+// RegSet represents a set of registers.
+type RegSet uint64
+
+func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused
+	var ret []string
+	for i := 0; i < 64; i++ {
+		if rs&(1<<uint(i)) != 0 {
+			ret = append(ret, info.RealRegName(RealReg(i)))
+		}
+	}
+	return strings.Join(ret, ", ")
+}
+
+func (rs RegSet) has(r RealReg) bool {
+	return rs&(1<<uint(r)) != 0
+}
+
+func (rs RegSet) add(r RealReg) RegSet {
+	if r >= 64 {
+		return rs
+	}
+	return rs | 1<<uint(r)
+}
+
+func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
+	for i := 0; i < 64; i++ {
+		if rs&(1<<uint(i)) != 0 {
+			f(RealReg(i))
+		}
+	}
+}
+
+type regInUseSet struct {
+	set RegSet
+	vrs [64]VReg
+}
+
+func (rs *regInUseSet) reset() {
+	rs.set = 0
+	for i := range rs.vrs {
+		rs.vrs[i] = VRegInvalid
+	}
+}
+
+func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
+	var ret []string
+	for i := 0; i < 64; i++ {
+		if rs.set&(1<<uint(i)) != 0 {
+			vr := rs.vrs[i]
+			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
+		}
+	}
+	return strings.Join(ret, ", ")
+}
+
+func (rs *regInUseSet) has(r RealReg) bool {
+	if r >= 64 {
+		return false
+	}
+	return rs.set&(1<<uint(r)) != 0
+}
+
+func (rs *regInUseSet) get(r RealReg) VReg {
+	if r >= 64 {
+		return VRegInvalid
+	}
+	return rs.vrs[r]
+}
+
+func (rs *regInUseSet) remove(r RealReg) {
+	if r >= 64 {
+		return
+	}
+	rs.set &= ^(1 << uint(r))
+	rs.vrs[r] = VRegInvalid
+}
+
+func (rs *regInUseSet) add(r RealReg, vr VReg) {
+	if r >= 64 {
+		return
+	}
+	rs.set |= 1 << uint(r)
+	rs.vrs[r] = vr
+}
+
+func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
+	for i := 0; i < 64; i++ {
+		if rs.set&(1<<uint(i)) != 0 {
+			f(RealReg(i), rs.vrs[i])
+		}
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
new file mode 100644
index 000000000..edfa962b5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
@@ -0,0 +1,43 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// SSAValueDefinition represents a definition of an SSA value.
+type SSAValueDefinition struct {
+	// BlockParamValue is valid if Instr == nil
+	BlockParamValue ssa.Value
+
+	// BlkParamVReg is valid if Instr == nil
+	BlkParamVReg regalloc.VReg
+
+	// Instr is not nil if this is a definition from an instruction.
+	Instr *ssa.Instruction
+	// N is the index of the return value in the instr's return values list.
+	N int
+	// RefCount is the number of references to the result.
+	RefCount int
+}
+
+func (d *SSAValueDefinition) IsFromInstr() bool {
+	return d.Instr != nil
+}
+
+func (d *SSAValueDefinition) IsFromBlockParam() bool {
+	return d.Instr == nil
+}
+
+func (d *SSAValueDefinition) SSAValue() ssa.Value {
+	if d.IsFromBlockParam() {
+		return d.BlockParamValue
+	} else {
+		r, rs := d.Instr.Returns()
+		if d.N == 0 {
+			return r
+		} else {
+			return rs[d.N-1]
+		}
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
new file mode 100644
index 000000000..3379c4dde
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
@@ -0,0 +1,722 @@
+package wazevo
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"reflect"
+	"runtime"
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+	"github.com/tetratelabs/wazero/internal/wasmruntime"
+)
+
+type (
+	// callEngine implements api.Function.
+	callEngine struct {
+		internalapi.WazeroOnly
+		stack []byte
+		// stackTop is the pointer to the *aligned* top of the stack. This must be updated
+		// whenever the stack is changed. This is passed to the assembly function
+		// at the very beginning of api.Function Call/CallWithStack.
+		stackTop uintptr
+		// executable is the pointer to the executable code for this function.
+		executable         *byte
+		preambleExecutable *byte
+		// parent is the *moduleEngine from which this callEngine is created.
+		parent *moduleEngine
+		// indexInModule is the index of the function in the module.
+		indexInModule wasm.Index
+		// sizeOfParamResultSlice is the size of the parameter/result slice.
+		sizeOfParamResultSlice int
+		requiredParams         int
+		// execCtx holds various information to be read/written by assembly functions.
+		execCtx executionContext
+		// execCtxPtr holds the pointer to the executionContext which doesn't change after callEngine is created.
+		execCtxPtr        uintptr
+		numberOfResults   int
+		stackIteratorImpl stackIterator
+	}
+
+	// executionContext is the struct to be read/written by assembly functions.
+	executionContext struct {
+		// exitCode holds the wazevoapi.ExitCode describing the state of the function execution.
+		exitCode wazevoapi.ExitCode
+		// callerModuleContextPtr holds the moduleContextOpaque for Go function calls.
+		callerModuleContextPtr *byte
+		// originalFramePointer holds the original frame pointer of the caller of the assembly function.
+		originalFramePointer uintptr
+		// originalStackPointer holds the original stack pointer of the caller of the assembly function.
+		originalStackPointer uintptr
+		// goReturnAddress holds the return address to go back to the caller of the assembly function.
+		goReturnAddress uintptr
+		// stackBottomPtr holds the pointer to the bottom of the stack.
+		stackBottomPtr *byte
+		// goCallReturnAddress holds the return address to go back to the caller of the Go function.
+		goCallReturnAddress *byte
+		// stackPointerBeforeGoCall holds the stack pointer before calling a Go function.
+		stackPointerBeforeGoCall *uint64
+		// stackGrowRequiredSize holds the required size of stack grow.
+		stackGrowRequiredSize uintptr
+		// memoryGrowTrampolineAddress holds the address of memory grow trampoline function.
+		memoryGrowTrampolineAddress *byte
+		// stackGrowCallTrampolineAddress holds the address of stack grow trampoline function.
+		stackGrowCallTrampolineAddress *byte
+		// checkModuleExitCodeTrampolineAddress holds the address of check-module-exit-code function.
+		checkModuleExitCodeTrampolineAddress *byte
+		// savedRegisters is the opaque spaces for save/restore registers.
+		// We want to align 16 bytes for each register, so we use [64][2]uint64.
+		savedRegisters [64][2]uint64
+		// goFunctionCallCalleeModuleContextOpaque is the pointer to the target Go function's moduleContextOpaque.
+		goFunctionCallCalleeModuleContextOpaque uintptr
+		// tableGrowTrampolineAddress holds the address of table grow trampoline function.
+		tableGrowTrampolineAddress *byte
+		// refFuncTrampolineAddress holds the address of ref-func trampoline function.
+		refFuncTrampolineAddress *byte
+		// memmoveAddress holds the address of memmove function implemented by Go runtime. See memmove.go.
+		memmoveAddress uintptr
+		// framePointerBeforeGoCall holds the frame pointer before calling a Go function. Note: only used in amd64.
+		framePointerBeforeGoCall uintptr
+		// memoryWait32TrampolineAddress holds the address of memory_wait32 trampoline function.
+		memoryWait32TrampolineAddress *byte
+		// memoryWait32TrampolineAddress holds the address of memory_wait64 trampoline function.
+		memoryWait64TrampolineAddress *byte
+		// memoryNotifyTrampolineAddress holds the address of the memory_notify trampoline function.
+		memoryNotifyTrampolineAddress *byte
+	}
+)
+
+func (c *callEngine) requiredInitialStackSize() int {
+	const initialStackSizeDefault = 10240
+	stackSize := initialStackSizeDefault
+	paramResultInBytes := c.sizeOfParamResultSlice * 8 * 2 // * 8 because uint64 is 8 bytes, and *2 because we need both separated param/result slots.
+	required := paramResultInBytes + 32 + 16               // 32 is enough to accommodate the call frame info, and 16 exists just in case when []byte is not aligned to 16 bytes.
+	if required > stackSize {
+		stackSize = required
+	}
+	return stackSize
+}
+
+func (c *callEngine) init() {
+	stackSize := c.requiredInitialStackSize()
+	if wazevoapi.StackGuardCheckEnabled {
+		stackSize += wazevoapi.StackGuardCheckGuardPageSize
+	}
+	c.stack = make([]byte, stackSize)
+	c.stackTop = alignedStackTop(c.stack)
+	if wazevoapi.StackGuardCheckEnabled {
+		c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
+	} else {
+		c.execCtx.stackBottomPtr = &c.stack[0]
+	}
+	c.execCtxPtr = uintptr(unsafe.Pointer(&c.execCtx))
+}
+
+// alignedStackTop returns 16-bytes aligned stack top of given stack.
+// 16 bytes should be good for all platform (arm64/amd64).
+func alignedStackTop(s []byte) uintptr {
+	stackAddr := uintptr(unsafe.Pointer(&s[len(s)-1]))
+	return stackAddr - (stackAddr & (16 - 1))
+}
+
+// Definition implements api.Function.
+func (c *callEngine) Definition() api.FunctionDefinition {
+	return c.parent.module.Source.FunctionDefinition(c.indexInModule)
+}
+
+// Call implements api.Function.
+func (c *callEngine) Call(ctx context.Context, params ...uint64) ([]uint64, error) {
+	if c.requiredParams != len(params) {
+		return nil, fmt.Errorf("expected %d params, but passed %d", c.requiredParams, len(params))
+	}
+	paramResultSlice := make([]uint64, c.sizeOfParamResultSlice)
+	copy(paramResultSlice, params)
+	if err := c.callWithStack(ctx, paramResultSlice); err != nil {
+		return nil, err
+	}
+	return paramResultSlice[:c.numberOfResults], nil
+}
+
+func (c *callEngine) addFrame(builder wasmdebug.ErrorBuilder, addr uintptr) (def api.FunctionDefinition, listener experimental.FunctionListener) {
+	eng := c.parent.parent.parent
+	cm := eng.compiledModuleOfAddr(addr)
+	if cm == nil {
+		// This case, the module might have been closed and deleted from the engine.
+		// We fall back to searching the imported modules that can be referenced from this callEngine.
+
+		// First, we check itself.
+		if checkAddrInBytes(addr, c.parent.parent.executable) {
+			cm = c.parent.parent
+		} else {
+			// Otherwise, search all imported modules. TODO: maybe recursive, but not sure it's useful in practice.
+			p := c.parent
+			for i := range p.importedFunctions {
+				candidate := p.importedFunctions[i].me.parent
+				if checkAddrInBytes(addr, candidate.executable) {
+					cm = candidate
+					break
+				}
+			}
+		}
+	}
+
+	if cm != nil {
+		index := cm.functionIndexOf(addr)
+		def = cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
+		var sources []string
+		if dw := cm.module.DWARFLines; dw != nil {
+			sourceOffset := cm.getSourceOffset(addr)
+			sources = dw.Line(sourceOffset)
+		}
+		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
+		if len(cm.listeners) > 0 {
+			listener = cm.listeners[index]
+		}
+	}
+	return
+}
+
+// CallWithStack implements api.Function.
+func (c *callEngine) CallWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
+	if c.sizeOfParamResultSlice > len(paramResultStack) {
+		return fmt.Errorf("need %d params, but stack size is %d", c.sizeOfParamResultSlice, len(paramResultStack))
+	}
+	return c.callWithStack(ctx, paramResultStack)
+}
+
+// CallWithStack implements api.Function.
+func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
+	snapshotEnabled := ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil
+	if snapshotEnabled {
+		ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, c)
+	}
+
+	if wazevoapi.StackGuardCheckEnabled {
+		defer func() {
+			wazevoapi.CheckStackGuardPage(c.stack)
+		}()
+	}
+
+	p := c.parent
+	ensureTermination := p.parent.ensureTermination
+	m := p.module
+	if ensureTermination {
+		select {
+		case <-ctx.Done():
+			// If the provided context is already done, close the module and return the error.
+			m.CloseWithCtxErr(ctx)
+			return m.FailIfClosed()
+		default:
+		}
+	}
+
+	var paramResultPtr *uint64
+	if len(paramResultStack) > 0 {
+		paramResultPtr = &paramResultStack[0]
+	}
+	defer func() {
+		r := recover()
+		if s, ok := r.(*snapshot); ok {
+			// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
+			// let it propagate up to be handled by the caller.
+			panic(s)
+		}
+		if r != nil {
+			type listenerForAbort struct {
+				def api.FunctionDefinition
+				lsn experimental.FunctionListener
+			}
+
+			var listeners []listenerForAbort
+			builder := wasmdebug.NewErrorBuilder()
+			def, lsn := c.addFrame(builder, uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
+			if lsn != nil {
+				listeners = append(listeners, listenerForAbort{def, lsn})
+			}
+			returnAddrs := unwindStack(
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)),
+				c.execCtx.framePointerBeforeGoCall,
+				c.stackTop,
+				nil,
+			)
+			for _, retAddr := range returnAddrs[:len(returnAddrs)-1] { // the last return addr is the trampoline, so we skip it.
+				def, lsn = c.addFrame(builder, retAddr)
+				if lsn != nil {
+					listeners = append(listeners, listenerForAbort{def, lsn})
+				}
+			}
+			err = builder.FromRecovered(r)
+
+			for _, lsn := range listeners {
+				lsn.lsn.Abort(ctx, m, lsn.def, err)
+			}
+		} else {
+			if err != wasmruntime.ErrRuntimeStackOverflow { // Stackoverflow case shouldn't be panic (to avoid extreme stack unwinding).
+				err = c.parent.module.FailIfClosed()
+			}
+		}
+
+		if err != nil {
+			// Ensures that we can reuse this callEngine even after an error.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+		}
+	}()
+
+	if ensureTermination {
+		done := m.CloseModuleOnCanceledOrTimeout(ctx)
+		defer done()
+	}
+
+	if c.stackTop&(16-1) != 0 {
+		panic("BUG: stack must be aligned to 16 bytes")
+	}
+	entrypoint(c.preambleExecutable, c.executable, c.execCtxPtr, c.parent.opaquePtr, paramResultPtr, c.stackTop)
+	for {
+		switch ec := c.execCtx.exitCode; ec & wazevoapi.ExitCodeMask {
+		case wazevoapi.ExitCodeOK:
+			return nil
+		case wazevoapi.ExitCodeGrowStack:
+			oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
+			oldTop := c.stackTop
+			oldStack := c.stack
+			var newsp, newfp uintptr
+			if wazevoapi.StackGuardCheckEnabled {
+				newsp, newfp, err = c.growStackWithGuarded()
+			} else {
+				newsp, newfp, err = c.growStack()
+			}
+			if err != nil {
+				return err
+			}
+			adjustClonedStack(oldsp, oldTop, newsp, newfp, c.stackTop)
+			// Old stack must be alive until the new stack is adjusted.
+			runtime.KeepAlive(oldStack)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp)
+		case wazevoapi.ExitCodeGrowMemory:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			argRes := &s[0]
+			if res, ok := mem.Grow(uint32(*argRes)); !ok {
+				*argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer.
+			} else {
+				*argRes = uint64(res)
+				calleeOpaque := opaqueViewFromPtr(uintptr(unsafe.Pointer(c.execCtx.callerModuleContextPtr)))
+				if mod.Source.MemorySection != nil { // Local memory.
+					putLocalMemory(calleeOpaque, 8 /* local memory begins at 8 */, mem)
+				} else {
+					// Imported memory's owner at offset 16 of the callerModuleContextPtr.
+					opaquePtr := uintptr(binary.LittleEndian.Uint64(calleeOpaque[16:]))
+					importedMemOwner := opaqueViewFromPtr(opaquePtr)
+					putLocalMemory(importedMemOwner, 8 /* local memory begins at 8 */, mem)
+				}
+			}
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeTableGrow:
+			mod := c.callerModuleInstance()
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			tableIndex, num, ref := uint32(s[0]), uint32(s[1]), uintptr(s[2])
+			table := mod.Tables[tableIndex]
+			s[0] = uint64(uint32(int32(table.Grow(num, ref))))
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoFunction:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
+			}()
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoFunctionWithListener:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			// Call Listener.Before.
+			callerModule := c.callerModuleInstance()
+			listener := listeners[index]
+			hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			def := hostModule.FunctionDefinition(wasm.Index(index))
+			listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
+			// Call into the Go function.
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, s)
+			}()
+			// Call Listener.After.
+			listener.After(ctx, callerModule, def, s)
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoModuleFunction:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			mod := c.callerModuleInstance()
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, mod, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
+			}()
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoModuleFunctionWithListener:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			// Call Listener.Before.
+			callerModule := c.callerModuleInstance()
+			listener := listeners[index]
+			hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			def := hostModule.FunctionDefinition(wasm.Index(index))
+			listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
+			// Call into the Go function.
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, callerModule, s)
+			}()
+			// Call Listener.After.
+			listener.After(ctx, callerModule, def, s)
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallListenerBefore:
+			stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			index := wasm.Index(stack[0])
+			mod := c.callerModuleInstance()
+			listener := mod.Engine.(*moduleEngine).listeners[index]
+			def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
+			listener.Before(ctx, mod, def, stack[1:], c.stackIterator(false))
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallListenerAfter:
+			stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			index := wasm.Index(stack[0])
+			mod := c.callerModuleInstance()
+			listener := mod.Engine.(*moduleEngine).listeners[index]
+			def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
+			listener.After(ctx, mod, def, stack[1:])
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCheckModuleExitCode:
+			// Note: this operation must be done in Go, not native code. The reason is that
+			// native code cannot be preempted and that means it can block forever if there are not
+			// enough OS threads (which we don't have control over).
+			if err := m.FailIfClosed(); err != nil {
+				panic(err)
+			}
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeRefFunc:
+			mod := c.callerModuleInstance()
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			funcIndex := wasm.Index(s[0])
+			ref := mod.Engine.FunctionInstanceReference(funcIndex)
+			s[0] = uint64(ref)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeMemoryWait32:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+			if !mem.Shared {
+				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
+			}
+
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			timeout, exp, addr := int64(s[0]), uint32(s[1]), uintptr(s[2])
+			base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
+
+			offset := uint32(addr - base)
+			res := mem.Wait32(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
+				addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
+				return atomic.LoadUint32((*uint32)(addr))
+			})
+			s[0] = res
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeMemoryWait64:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+			if !mem.Shared {
+				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
+			}
+
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			timeout, exp, addr := int64(s[0]), uint64(s[1]), uintptr(s[2])
+			base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
+
+			offset := uint32(addr - base)
+			res := mem.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
+				addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
+				return atomic.LoadUint64((*uint64)(addr))
+			})
+			s[0] = uint64(res)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeMemoryNotify:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			count, addr := uint32(s[0]), s[1]
+			offset := uint32(uintptr(addr) - uintptr(unsafe.Pointer(&mem.Buffer[0])))
+			res := mem.Notify(offset, count)
+			s[0] = uint64(res)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeUnreachable:
+			panic(wasmruntime.ErrRuntimeUnreachable)
+		case wazevoapi.ExitCodeMemoryOutOfBounds:
+			panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+		case wazevoapi.ExitCodeTableOutOfBounds:
+			panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+		case wazevoapi.ExitCodeIndirectCallNullPointer:
+			panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+		case wazevoapi.ExitCodeIndirectCallTypeMismatch:
+			panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
+		case wazevoapi.ExitCodeIntegerOverflow:
+			panic(wasmruntime.ErrRuntimeIntegerOverflow)
+		case wazevoapi.ExitCodeIntegerDivisionByZero:
+			panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
+		case wazevoapi.ExitCodeInvalidConversionToInteger:
+			panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+		case wazevoapi.ExitCodeUnalignedAtomic:
+			panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+		default:
+			panic("BUG")
+		}
+	}
+}
+
+func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance {
+	return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr)
+}
+
+func opaqueViewFromPtr(ptr uintptr) []byte {
+	var opaque []byte
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaque))
+	sh.Data = ptr
+	setSliceLimits(sh, 24, 24)
+	return opaque
+}
+
+const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb.
+
+func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) {
+	if wazevoapi.StackGuardCheckEnabled {
+		wazevoapi.CheckStackGuardPage(c.stack)
+	}
+	newSP, newFP, err = c.growStack()
+	if err != nil {
+		return
+	}
+	if wazevoapi.StackGuardCheckEnabled {
+		c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
+	}
+	return
+}
+
+// growStack grows the stack, and returns the new stack pointer.
+func (c *callEngine) growStack() (newSP, newFP uintptr, err error) {
+	currentLen := uintptr(len(c.stack))
+	if callStackCeiling < currentLen {
+		err = wasmruntime.ErrRuntimeStackOverflow
+		return
+	}
+
+	newLen := 2*currentLen + c.execCtx.stackGrowRequiredSize + 16 // Stack might be aligned to 16 bytes, so add 16 bytes just in case.
+	newSP, newFP, c.stackTop, c.stack = c.cloneStack(newLen)
+	c.execCtx.stackBottomPtr = &c.stack[0]
+	return
+}
+
+func (c *callEngine) cloneStack(l uintptr) (newSP, newFP, newTop uintptr, newStack []byte) {
+	newStack = make([]byte, l)
+
+	relSp := c.stackTop - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
+	relFp := c.stackTop - c.execCtx.framePointerBeforeGoCall
+
+	// Copy the existing contents in the previous Go-allocated stack into the new one.
+	var prevStackAligned, newStackAligned []byte
+	{
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&prevStackAligned))
+		sh.Data = c.stackTop - relSp
+		setSliceLimits(sh, relSp, relSp)
+	}
+	newTop = alignedStackTop(newStack)
+	{
+		newSP = newTop - relSp
+		newFP = newTop - relFp
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&newStackAligned))
+		sh.Data = newSP
+		setSliceLimits(sh, relSp, relSp)
+	}
+	copy(newStackAligned, prevStackAligned)
+	return
+}
+
+func (c *callEngine) stackIterator(onHostCall bool) experimental.StackIterator {
+	c.stackIteratorImpl.reset(c, onHostCall)
+	return &c.stackIteratorImpl
+}
+
+// stackIterator implements experimental.StackIterator.
+type stackIterator struct {
+	retAddrs      []uintptr
+	retAddrCursor int
+	eng           *engine
+	pc            uint64
+
+	currentDef *wasm.FunctionDefinition
+}
+
+func (si *stackIterator) reset(c *callEngine, onHostCall bool) {
+	if onHostCall {
+		si.retAddrs = append(si.retAddrs[:0], uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
+	} else {
+		si.retAddrs = si.retAddrs[:0]
+	}
+	si.retAddrs = unwindStack(uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall, c.stackTop, si.retAddrs)
+	si.retAddrs = si.retAddrs[:len(si.retAddrs)-1] // the last return addr is the trampoline, so we skip it.
+	si.retAddrCursor = 0
+	si.eng = c.parent.parent.parent
+}
+
+// Next implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) Next() bool {
+	if si.retAddrCursor >= len(si.retAddrs) {
+		return false
+	}
+
+	addr := si.retAddrs[si.retAddrCursor]
+	cm := si.eng.compiledModuleOfAddr(addr)
+	if cm != nil {
+		index := cm.functionIndexOf(addr)
+		def := cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
+		si.currentDef = def
+		si.retAddrCursor++
+		si.pc = uint64(addr)
+		return true
+	}
+	return false
+}
+
+// ProgramCounter implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
+	return experimental.ProgramCounter(si.pc)
+}
+
+// Function implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) Function() experimental.InternalFunction {
+	return si
+}
+
+// Definition implements the same method as documented on experimental.InternalFunction.
+func (si *stackIterator) Definition() api.FunctionDefinition {
+	return si.currentDef
+}
+
+// SourceOffsetForPC implements the same method as documented on experimental.InternalFunction.
+func (si *stackIterator) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
+	upc := uintptr(pc)
+	cm := si.eng.compiledModuleOfAddr(upc)
+	return cm.getSourceOffset(upc)
+}
+
+// snapshot implements experimental.Snapshot
+type snapshot struct {
+	sp, fp, top    uintptr
+	returnAddress  *byte
+	stack          []byte
+	savedRegisters [64][2]uint64
+	ret            []uint64
+	c              *callEngine
+}
+
+// Snapshot implements the same method as documented on experimental.Snapshotter.
+func (c *callEngine) Snapshot() experimental.Snapshot {
+	returnAddress := c.execCtx.goCallReturnAddress
+	oldTop, oldSp := c.stackTop, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
+	newSP, newFP, newTop, newStack := c.cloneStack(uintptr(len(c.stack)) + 16)
+	adjustClonedStack(oldSp, oldTop, newSP, newFP, newTop)
+	return &snapshot{
+		sp:             newSP,
+		fp:             newFP,
+		top:            newTop,
+		savedRegisters: c.execCtx.savedRegisters,
+		returnAddress:  returnAddress,
+		stack:          newStack,
+		c:              c,
+	}
+}
+
+// Restore implements the same method as documented on experimental.Snapshot.
+func (s *snapshot) Restore(ret []uint64) {
+	s.ret = ret
+	panic(s)
+}
+
+func (s *snapshot) doRestore() {
+	spp := *(**uint64)(unsafe.Pointer(&s.sp))
+	view := goCallStackView(spp)
+	copy(view, s.ret)
+
+	c := s.c
+	c.stack = s.stack
+	c.stackTop = s.top
+	ec := &c.execCtx
+	ec.stackBottomPtr = &c.stack[0]
+	ec.stackPointerBeforeGoCall = spp
+	ec.framePointerBeforeGoCall = s.fp
+	ec.goCallReturnAddress = s.returnAddress
+	ec.savedRegisters = s.savedRegisters
+}
+
+// Error implements the same method on error.
+func (s *snapshot) Error() string {
+	return "unhandled snapshot restore, this generally indicates restore was called from a different " +
+		"exported function invocation than snapshot"
+}
+
+func snapshotRecoverFn(c *callEngine) {
+	if r := recover(); r != nil {
+		if s, ok := r.(*snapshot); ok && s.c == c {
+			s.doRestore()
+		} else {
+			panic(r)
+		}
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
new file mode 100644
index 000000000..f02b905fc
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
@@ -0,0 +1,843 @@
+package wazevo
+
+import (
+	"context"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"runtime"
+	"sort"
+	"sync"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/frontend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/platform"
+	"github.com/tetratelabs/wazero/internal/version"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+type (
+	// engine implements wasm.Engine.
+	engine struct {
+		wazeroVersion   string
+		fileCache       filecache.Cache
+		compiledModules map[wasm.ModuleID]*compiledModule
+		// sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable.
+		sortedCompiledModules []*compiledModule
+		mux                   sync.RWMutex
+		// sharedFunctions is compiled functions shared by all modules.
+		sharedFunctions *sharedFunctions
+		// setFinalizer defaults to runtime.SetFinalizer, but overridable for tests.
+		setFinalizer func(obj interface{}, finalizer interface{})
+
+		// The followings are reused for compiling shared functions.
+		machine backend.Machine
+		be      backend.Compiler
+	}
+
+	sharedFunctions struct {
+		// memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function.
+		memoryGrowExecutable []byte
+		// checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This
+		// is used when ensureTermination is true.
+		checkModuleExitCode []byte
+		// stackGrowExecutable is a compiled executable for growing stack builtin function.
+		stackGrowExecutable []byte
+		// tableGrowExecutable is a compiled trampoline executable for table.grow builtin function.
+		tableGrowExecutable []byte
+		// refFuncExecutable is a compiled trampoline executable for ref.func builtin function.
+		refFuncExecutable []byte
+		// memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function
+		memoryWait32Executable []byte
+		// memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function
+		memoryWait64Executable []byte
+		// memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function
+		memoryNotifyExecutable    []byte
+		listenerBeforeTrampolines map[*wasm.FunctionType][]byte
+		listenerAfterTrampolines  map[*wasm.FunctionType][]byte
+	}
+
+	// compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation.
+	compiledModule struct {
+		*executables
+		// functionOffsets maps a local function index to the offset in the executable.
+		functionOffsets           []int
+		parent                    *engine
+		module                    *wasm.Module
+		ensureTermination         bool
+		listeners                 []experimental.FunctionListener
+		listenerBeforeTrampolines []*byte
+		listenerAfterTrampolines  []*byte
+
+		// The followings are only available for non host modules.
+
+		offsets         wazevoapi.ModuleContextOffsetData
+		sharedFunctions *sharedFunctions
+		sourceMap       sourceMap
+	}
+
+	executables struct {
+		executable     []byte
+		entryPreambles [][]byte
+	}
+)
+
+// sourceMap is a mapping from the offset of the executable to the offset of the original wasm binary.
+type sourceMap struct {
+	// executableOffsets is a sorted list of offsets of the executable. This is index-correlated with wasmBinaryOffsets,
+	// in other words executableOffsets[i] is the offset of the executable which corresponds to the offset of a Wasm
+	// binary pointed by wasmBinaryOffsets[i].
+	executableOffsets []uintptr
+	// wasmBinaryOffsets is the counterpart of executableOffsets.
+	wasmBinaryOffsets []uint64
+}
+
+var _ wasm.Engine = (*engine)(nil)
+
+// NewEngine returns the implementation of wasm.Engine.
+func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm.Engine {
+	machine := newMachine()
+	be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
+	e := &engine{
+		compiledModules: make(map[wasm.ModuleID]*compiledModule),
+		setFinalizer:    runtime.SetFinalizer,
+		machine:         machine,
+		be:              be,
+		fileCache:       fc,
+		wazeroVersion:   version.GetWazeroVersion(),
+	}
+	e.compileSharedFunctions()
+	return e
+}
+
+// CompileModule implements wasm.Engine.
+func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (err error) {
+	if wazevoapi.PerfMapEnabled {
+		wazevoapi.PerfMap.Lock()
+		defer wazevoapi.PerfMap.Unlock()
+	}
+
+	if _, ok, err := e.getCompiledModule(module, listeners, ensureTermination); ok { // cache hit!
+		return nil
+	} else if err != nil {
+		return err
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		ctx = wazevoapi.NewDeterministicCompilationVerifierContext(ctx, len(module.CodeSection))
+	}
+	cm, err := e.compileModule(ctx, module, listeners, ensureTermination)
+	if err != nil {
+		return err
+	}
+	if err = e.addCompiledModule(module, cm); err != nil {
+		return err
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		for i := 0; i < wazevoapi.DeterministicCompilationVerifyingIter; i++ {
+			_, err := e.compileModule(ctx, module, listeners, ensureTermination)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	if len(listeners) > 0 {
+		cm.listeners = listeners
+		cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
+		cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
+		for i := range module.TypeSection {
+			typ := &module.TypeSection[i]
+			before, after := e.getListenerTrampolineForType(typ)
+			cm.listenerBeforeTrampolines[i] = before
+			cm.listenerAfterTrampolines[i] = after
+		}
+	}
+	return nil
+}
+
+func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) {
+	exec.entryPreambles = make([][]byte, len(m.TypeSection))
+	for i := range m.TypeSection {
+		typ := &m.TypeSection[i]
+		sig := frontend.SignatureForWasmFunctionType(typ)
+		be.Init()
+		buf := machine.CompileEntryPreamble(&sig)
+		executable := mmapExecutable(buf)
+		exec.entryPreambles[i] = executable
+
+		if wazevoapi.PerfMapEnabled {
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
+				uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
+		}
+	}
+}
+
+func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) {
+	withListener := len(listeners) > 0
+	cm := &compiledModule{
+		offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module,
+		ensureTermination: ensureTermination,
+		executables:       &executables{},
+	}
+
+	if module.IsHostModule {
+		return e.compileHostModule(ctx, module, listeners)
+	}
+
+	importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection)
+	if localFns == 0 {
+		return cm, nil
+	}
+
+	rels := make([]backend.RelocationInfo, 0)
+	refToBinaryOffset := make([]int, importedFns+localFns)
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		// The compilation must be deterministic regardless of the order of functions being compiled.
+		wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
+	}
+
+	needSourceInfo := module.DWARFLines != nil
+
+	// Creates new compiler instances which are reused for each function.
+	ssaBuilder := ssa.NewBuilder()
+	fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
+	machine := newMachine()
+	be := backend.NewCompiler(ctx, machine, ssaBuilder)
+
+	cm.executables.compileEntryPreambles(module, machine, be)
+
+	totalSize := 0 // Total binary size of the executable.
+	cm.functionOffsets = make([]int, localFns)
+	bodies := make([][]byte, localFns)
+
+	// Trampoline relocation related variables.
+	trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns)
+	if err != nil {
+		return nil, err
+	}
+	needCallTrampoline := callTrampolineIslandSize > 0
+	var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
+
+	for i := range module.CodeSection {
+		if wazevoapi.DeterministicCompilationVerifierEnabled {
+			i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i)
+		}
+
+		fidx := wasm.Index(i + importedFns)
+
+		if wazevoapi.NeedFunctionNameInContext {
+			def := module.FunctionDefinition(fidx)
+			name := def.DebugName()
+			if len(def.ExportNames()) > 0 {
+				name = def.ExportNames()[0]
+			}
+			ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
+		}
+
+		needListener := len(listeners) > 0 && listeners[i] != nil
+		body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
+		if err != nil {
+			return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
+		}
+
+		// Align 16-bytes boundary.
+		totalSize = (totalSize + 15) &^ 15
+		cm.functionOffsets[i] = totalSize
+
+		if needSourceInfo {
+			// At the beginning of the function, we add the offset of the function body so that
+			// we can resolve the source location of the call site of before listener call.
+			cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize))
+			cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection)
+
+			for _, info := range be.SourceOffsetInfo() {
+				cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset))
+				cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
+			}
+		}
+
+		fref := frontend.FunctionIndexToFuncRef(fidx)
+		refToBinaryOffset[fref] = totalSize
+
+		// At this point, relocation offsets are relative to the start of the function body,
+		// so we adjust it to the start of the executable.
+		for _, r := range relsPerFunc {
+			r.Offset += int64(totalSize)
+			rels = append(rels, r)
+		}
+
+		bodies[i] = body
+		totalSize += len(body)
+		if wazevoapi.PrintMachineCodeHexPerFunction {
+			fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
+		}
+
+		if needCallTrampoline {
+			// If the total size exceeds the trampoline interval, we need to add a trampoline island.
+			if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) {
+				callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize)
+				totalSize += callTrampolineIslandSize
+			}
+		}
+	}
+
+	// Allocate executable memory and then copy the generated machine code.
+	executable, err := platform.MmapCodeSegment(totalSize)
+	if err != nil {
+		panic(err)
+	}
+	cm.executable = executable
+
+	for i, b := range bodies {
+		offset := cm.functionOffsets[i]
+		copy(executable[offset:], b)
+	}
+
+	if wazevoapi.PerfMapEnabled {
+		wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
+	}
+
+	if needSourceInfo {
+		for i := range cm.sourceMap.executableOffsets {
+			cm.sourceMap.executableOffsets[i] += uintptr(unsafe.Pointer(&cm.executable[0]))
+		}
+	}
+
+	// Resolve relocations for local function calls.
+	if len(rels) > 0 {
+		machine.ResolveRelocations(refToBinaryOffset, executable, rels, callTrampolineIslandOffsets)
+	}
+
+	if runtime.GOARCH == "arm64" {
+		// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+		if err = platform.MprotectRX(executable); err != nil {
+			return nil, err
+		}
+	}
+	cm.sharedFunctions = e.sharedFunctions
+	e.setFinalizer(cm.executables, executablesFinalizer)
+	return cm, nil
+}
+
+func (e *engine) compileLocalWasmFunction(
+	ctx context.Context,
+	module *wasm.Module,
+	localFunctionIndex wasm.Index,
+	fe *frontend.Compiler,
+	ssaBuilder ssa.Builder,
+	be backend.Compiler,
+	needListener bool,
+) (body []byte, rels []backend.RelocationInfo, err error) {
+	typIndex := module.FunctionSection[localFunctionIndex]
+	typ := &module.TypeSection[typIndex]
+	codeSeg := &module.CodeSection[localFunctionIndex]
+
+	// Initializes both frontend and backend compilers.
+	fe.Init(localFunctionIndex, typIndex, typ, codeSeg.LocalTypes, codeSeg.Body, needListener, codeSeg.BodyOffsetInCodeSection)
+	be.Init()
+
+	// Lower Wasm to SSA.
+	fe.LowerToSSA()
+	if wazevoapi.PrintSSA && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "SSA", ssaBuilder.Format())
+	}
+
+	// Run SSA-level optimization passes.
+	ssaBuilder.RunPasses()
+
+	if wazevoapi.PrintOptimizedSSA && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[Optimized SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Optimized SSA", ssaBuilder.Format())
+	}
+
+	// Now our ssaBuilder contains the necessary information to further lower them to
+	// machine code.
+	original, rels, err := be.Compile(ctx)
+	if err != nil {
+		return nil, nil, fmt.Errorf("ssa->machine code: %v", err)
+	}
+
+	// TODO: optimize as zero copy.
+	copied := make([]byte, len(original))
+	copy(copied, original)
+	return copied, rels, nil
+}
+
+func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) {
+	machine := newMachine()
+	be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
+
+	num := len(module.CodeSection)
+	cm := &compiledModule{module: module, listeners: listeners, executables: &executables{}}
+	cm.functionOffsets = make([]int, num)
+	totalSize := 0 // Total binary size of the executable.
+	bodies := make([][]byte, num)
+	var sig ssa.Signature
+	for i := range module.CodeSection {
+		totalSize = (totalSize + 15) &^ 15
+		cm.functionOffsets[i] = totalSize
+
+		typIndex := module.FunctionSection[i]
+		typ := &module.TypeSection[typIndex]
+
+		// We can relax until the index fits together in ExitCode as we do in wazevoapi.ExitCodeCallGoModuleFunctionWithIndex.
+		// However, 1 << 16 should be large enough for a real use case.
+		const hostFunctionNumMaximum = 1 << 16
+		if i >= hostFunctionNumMaximum {
+			return nil, fmt.Errorf("too many host functions (maximum %d)", hostFunctionNumMaximum)
+		}
+
+		sig.ID = ssa.SignatureID(typIndex) // This is important since we reuse the `machine` which caches the ABI based on the SignatureID.
+		sig.Params = append(sig.Params[:0],
+			ssa.TypeI64, // First argument must be exec context.
+			ssa.TypeI64, // The second argument is the moduleContextOpaque of this host module.
+		)
+		for _, t := range typ.Params {
+			sig.Params = append(sig.Params, frontend.WasmTypeToSSAType(t))
+		}
+
+		sig.Results = sig.Results[:0]
+		for _, t := range typ.Results {
+			sig.Results = append(sig.Results, frontend.WasmTypeToSSAType(t))
+		}
+
+		c := &module.CodeSection[i]
+		if c.GoFunc == nil {
+			panic("BUG: GoFunc must be set for host module")
+		}
+
+		withListener := len(listeners) > 0 && listeners[i] != nil
+		var exitCode wazevoapi.ExitCode
+		fn := c.GoFunc
+		switch fn.(type) {
+		case api.GoModuleFunction:
+			exitCode = wazevoapi.ExitCodeCallGoModuleFunctionWithIndex(i, withListener)
+		case api.GoFunction:
+			exitCode = wazevoapi.ExitCodeCallGoFunctionWithIndex(i, withListener)
+		}
+
+		be.Init()
+		machine.CompileGoFunctionTrampoline(exitCode, &sig, true)
+		if err := be.Finalize(ctx); err != nil {
+			return nil, err
+		}
+		body := be.Buf()
+
+		if wazevoapi.PerfMapEnabled {
+			name := module.FunctionDefinition(wasm.Index(i)).DebugName()
+			wazevoapi.PerfMap.AddModuleEntry(i,
+				int64(totalSize),
+				uint64(len(body)),
+				fmt.Sprintf("trampoline:%s", name))
+		}
+
+		// TODO: optimize as zero copy.
+		copied := make([]byte, len(body))
+		copy(copied, body)
+		bodies[i] = copied
+		totalSize += len(body)
+	}
+
+	if totalSize == 0 {
+		// Empty module.
+		return cm, nil
+	}
+
+	// Allocate executable memory and then copy the generated machine code.
+	executable, err := platform.MmapCodeSegment(totalSize)
+	if err != nil {
+		panic(err)
+	}
+	cm.executable = executable
+
+	for i, b := range bodies {
+		offset := cm.functionOffsets[i]
+		copy(executable[offset:], b)
+	}
+
+	if wazevoapi.PerfMapEnabled {
+		wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
+	}
+
+	if runtime.GOARCH == "arm64" {
+		// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+		if err = platform.MprotectRX(executable); err != nil {
+			return nil, err
+		}
+	}
+	e.setFinalizer(cm.executables, executablesFinalizer)
+	return cm, nil
+}
+
+// Close implements wasm.Engine.
+func (e *engine) Close() (err error) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	e.sortedCompiledModules = nil
+	e.compiledModules = nil
+	e.sharedFunctions = nil
+	return nil
+}
+
+// CompiledModuleCount implements wasm.Engine.
+func (e *engine) CompiledModuleCount() uint32 {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+	return uint32(len(e.compiledModules))
+}
+
+// DeleteCompiledModule implements wasm.Engine.
+func (e *engine) DeleteCompiledModule(m *wasm.Module) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	cm, ok := e.compiledModules[m.ID]
+	if ok {
+		if len(cm.executable) > 0 {
+			e.deleteCompiledModuleFromSortedList(cm)
+		}
+		delete(e.compiledModules, m.ID)
+	}
+}
+
+func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) {
+	ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
+
+	index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
+		return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
+	})
+	e.sortedCompiledModules = append(e.sortedCompiledModules, nil)
+	copy(e.sortedCompiledModules[index+1:], e.sortedCompiledModules[index:])
+	e.sortedCompiledModules[index] = cm
+}
+
+func (e *engine) deleteCompiledModuleFromSortedList(cm *compiledModule) {
+	ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
+
+	index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
+		return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
+	})
+	if index >= len(e.sortedCompiledModules) {
+		return
+	}
+	copy(e.sortedCompiledModules[index:], e.sortedCompiledModules[index+1:])
+	e.sortedCompiledModules = e.sortedCompiledModules[:len(e.sortedCompiledModules)-1]
+}
+
+func (e *engine) compiledModuleOfAddr(addr uintptr) *compiledModule {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+
+	index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
+		return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) > addr
+	})
+	index -= 1
+	if index < 0 {
+		return nil
+	}
+	candidate := e.sortedCompiledModules[index]
+	if checkAddrInBytes(addr, candidate.executable) {
+		// If a module is already deleted, the found module may have been wrong.
+		return candidate
+	}
+	return nil
+}
+
+func checkAddrInBytes(addr uintptr, b []byte) bool {
+	return uintptr(unsafe.Pointer(&b[0])) <= addr && addr <= uintptr(unsafe.Pointer(&b[len(b)-1]))
+}
+
+// NewModuleEngine implements wasm.Engine.
+func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
+	me := &moduleEngine{}
+
+	// Note: imported functions are resolved in moduleEngine.ResolveImportedFunction.
+	me.importedFunctions = make([]importedFunction, m.ImportFunctionCount)
+
+	compiled, ok := e.getCompiledModuleFromMemory(m)
+	if !ok {
+		return nil, errors.New("source module must be compiled before instantiation")
+	}
+	me.parent = compiled
+	me.module = mi
+	me.listeners = compiled.listeners
+
+	if m.IsHostModule {
+		me.opaque = buildHostModuleOpaque(m, compiled.listeners)
+		me.opaquePtr = &me.opaque[0]
+	} else {
+		if size := compiled.offsets.TotalSize; size != 0 {
+			opaque := newAlignedOpaque(size)
+			me.opaque = opaque
+			me.opaquePtr = &opaque[0]
+		}
+	}
+	return me, nil
+}
+
+func (e *engine) compileSharedFunctions() {
+	e.sharedFunctions = &sharedFunctions{
+		listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte),
+		listenerAfterTrampolines:  make(map[*wasm.FunctionType][]byte),
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32},
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryGrowExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.tableGrowExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI32 /* exec context */},
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.checkModuleExitCode
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */},
+			Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
+		}, false)
+		e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.refFuncExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileStackGrowCallSequence()
+		e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.stackGrowExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
+			// exec context, timeout, expected, addr
+			Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
+			// Returns the status.
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryWait32Executable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryWait32Executable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
+			// exec context, timeout, expected, addr
+			Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
+			// Returns the status.
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryWait64Executable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryWait64Executable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
+			// exec context, count, addr
+			Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
+			// Returns the number notified.
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryNotifyExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline")
+		}
+	}
+
+	e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)
+}
+
+func sharedFunctionsFinalizer(sf *sharedFunctions) {
+	if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil {
+		panic(err)
+	}
+	for _, f := range sf.listenerBeforeTrampolines {
+		if err := platform.MunmapCodeSegment(f); err != nil {
+			panic(err)
+		}
+	}
+	for _, f := range sf.listenerAfterTrampolines {
+		if err := platform.MunmapCodeSegment(f); err != nil {
+			panic(err)
+		}
+	}
+
+	sf.memoryGrowExecutable = nil
+	sf.checkModuleExitCode = nil
+	sf.stackGrowExecutable = nil
+	sf.tableGrowExecutable = nil
+	sf.refFuncExecutable = nil
+	sf.memoryWait32Executable = nil
+	sf.memoryWait64Executable = nil
+	sf.memoryNotifyExecutable = nil
+	sf.listenerBeforeTrampolines = nil
+	sf.listenerAfterTrampolines = nil
+}
+
+func executablesFinalizer(exec *executables) {
+	if len(exec.executable) > 0 {
+		if err := platform.MunmapCodeSegment(exec.executable); err != nil {
+			panic(err)
+		}
+	}
+	exec.executable = nil
+
+	for _, f := range exec.entryPreambles {
+		if err := platform.MunmapCodeSegment(f); err != nil {
+			panic(err)
+		}
+	}
+	exec.entryPreambles = nil
+}
+
+func mmapExecutable(src []byte) []byte {
+	executable, err := platform.MmapCodeSegment(len(src))
+	if err != nil {
+		panic(err)
+	}
+
+	copy(executable, src)
+
+	if runtime.GOARCH == "arm64" {
+		// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+		if err = platform.MprotectRX(executable); err != nil {
+			panic(err)
+		}
+	}
+	return executable
+}
+
+func (cm *compiledModule) functionIndexOf(addr uintptr) wasm.Index {
+	addr -= uintptr(unsafe.Pointer(&cm.executable[0]))
+	offset := cm.functionOffsets
+	index := sort.Search(len(offset), func(i int) bool {
+		return offset[i] > int(addr)
+	})
+	index--
+	if index < 0 {
+		panic("BUG")
+	}
+	return wasm.Index(index)
+}
+
+func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (before, after *byte) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+
+	beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType]
+	afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType]
+	if ok {
+		return &beforeBuf[0], &afterBuf[0]
+	}
+
+	beforeSig, afterSig := frontend.SignatureForListener(functionType)
+
+	e.be.Init()
+	buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
+	beforeBuf = mmapExecutable(buf)
+
+	e.be.Init()
+	buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
+	afterBuf = mmapExecutable(buf)
+
+	e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf
+	e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf
+	return &beforeBuf[0], &afterBuf[0]
+}
+
+func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 {
+	offsets := cm.sourceMap.executableOffsets
+	if len(offsets) == 0 {
+		return 0
+	}
+
+	index := sort.Search(len(offsets), func(i int) bool {
+		return offsets[i] >= pc
+	})
+
+	index--
+	if index < 0 {
+		return 0
+	}
+	return cm.sourceMap.wasmBinaryOffsets[index]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
new file mode 100644
index 000000000..f7c0450ae
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
@@ -0,0 +1,296 @@
+package wazevo
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/binary"
+	"fmt"
+	"hash/crc32"
+	"io"
+	"runtime"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/platform"
+	"github.com/tetratelabs/wazero/internal/u32"
+	"github.com/tetratelabs/wazero/internal/u64"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+var crc = crc32.MakeTable(crc32.Castagnoli)
+
+// fileCacheKey returns a key for the file cache.
+// In order to avoid collisions with the existing compiler, we do not use m.ID directly,
+// but instead we rehash it with magic.
+func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
+	s := sha256.New()
+	s.Write(m.ID[:])
+	s.Write(magic)
+	s.Sum(ret[:0])
+	return
+}
+
+func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err error) {
+	e.addCompiledModuleToMemory(module, cm)
+	if !module.IsHostModule && e.fileCache != nil {
+		err = e.addCompiledModuleToCache(module, cm)
+	}
+	return
+}
+
+func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) {
+	cm, ok = e.getCompiledModuleFromMemory(module)
+	if ok {
+		return
+	}
+	cm, ok, err = e.getCompiledModuleFromCache(module)
+	if ok {
+		cm.parent = e
+		cm.module = module
+		cm.sharedFunctions = e.sharedFunctions
+		cm.ensureTermination = ensureTermination
+		cm.offsets = wazevoapi.NewModuleContextOffsetData(module, len(listeners) > 0)
+		if len(listeners) > 0 {
+			cm.listeners = listeners
+			cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
+			cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
+			for i := range module.TypeSection {
+				typ := &module.TypeSection[i]
+				before, after := e.getListenerTrampolineForType(typ)
+				cm.listenerBeforeTrampolines[i] = before
+				cm.listenerAfterTrampolines[i] = after
+			}
+		}
+		e.addCompiledModuleToMemory(module, cm)
+		ssaBuilder := ssa.NewBuilder()
+		machine := newMachine()
+		be := backend.NewCompiler(context.Background(), machine, ssaBuilder)
+		cm.executables.compileEntryPreambles(module, machine, be)
+
+		// Set the finalizer.
+		e.setFinalizer(cm.executables, executablesFinalizer)
+	}
+	return
+}
+
+func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	e.compiledModules[m.ID] = cm
+	if len(cm.executable) > 0 {
+		e.addCompiledModuleToSortedList(cm)
+	}
+}
+
+func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+	cm, ok = e.compiledModules[module.ID]
+	return
+}
+
+func (e *engine) addCompiledModuleToCache(module *wasm.Module, cm *compiledModule) (err error) {
+	if e.fileCache == nil || module.IsHostModule {
+		return
+	}
+	err = e.fileCache.Add(fileCacheKey(module), serializeCompiledModule(e.wazeroVersion, cm))
+	return
+}
+
+func (e *engine) getCompiledModuleFromCache(module *wasm.Module) (cm *compiledModule, hit bool, err error) {
+	if e.fileCache == nil || module.IsHostModule {
+		return
+	}
+
+	// Check if the entries exist in the external cache.
+	var cached io.ReadCloser
+	cached, hit, err = e.fileCache.Get(fileCacheKey(module))
+	if !hit || err != nil {
+		return
+	}
+
+	// Otherwise, we hit the cache on external cache.
+	// We retrieve *code structures from `cached`.
+	var staleCache bool
+	// Note: cached.Close is ensured to be called in deserializeCodes.
+	cm, staleCache, err = deserializeCompiledModule(e.wazeroVersion, cached)
+	if err != nil {
+		hit = false
+		return
+	} else if staleCache {
+		return nil, false, e.fileCache.Delete(fileCacheKey(module))
+	}
+	return
+}
+
+var magic = []byte{'W', 'A', 'Z', 'E', 'V', 'O'}
+
+func serializeCompiledModule(wazeroVersion string, cm *compiledModule) io.Reader {
+	buf := bytes.NewBuffer(nil)
+	// First 6 byte: WAZEVO header.
+	buf.Write(magic)
+	// Next 1 byte: length of version:
+	buf.WriteByte(byte(len(wazeroVersion)))
+	// Version of wazero.
+	buf.WriteString(wazeroVersion)
+	// Number of *code (== locally defined functions in the module): 4 bytes.
+	buf.Write(u32.LeBytes(uint32(len(cm.functionOffsets))))
+	for _, offset := range cm.functionOffsets {
+		// The offset of this function in the executable (8 bytes).
+		buf.Write(u64.LeBytes(uint64(offset)))
+	}
+	// The length of code segment (8 bytes).
+	buf.Write(u64.LeBytes(uint64(len(cm.executable))))
+	// Append the native code.
+	buf.Write(cm.executable)
+	// Append checksum.
+	checksum := crc32.Checksum(cm.executable, crc)
+	buf.Write(u32.LeBytes(checksum))
+	if sm := cm.sourceMap; len(sm.executableOffsets) > 0 {
+		buf.WriteByte(1) // indicates that source map is present.
+		l := len(sm.wasmBinaryOffsets)
+		buf.Write(u64.LeBytes(uint64(l)))
+		executableAddr := uintptr(unsafe.Pointer(&cm.executable[0]))
+		for i := 0; i < l; i++ {
+			buf.Write(u64.LeBytes(sm.wasmBinaryOffsets[i]))
+			// executableOffsets is absolute address, so we need to subtract executableAddr.
+			buf.Write(u64.LeBytes(uint64(sm.executableOffsets[i] - executableAddr)))
+		}
+	} else {
+		buf.WriteByte(0) // indicates that source map is not present.
+	}
+	return bytes.NewReader(buf.Bytes())
+}
+
+func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *compiledModule, staleCache bool, err error) {
+	defer reader.Close()
+	cacheHeaderSize := len(magic) + 1 /* version size */ + len(wazeroVersion) + 4 /* number of functions */
+
+	// Read the header before the native code.
+	header := make([]byte, cacheHeaderSize)
+	n, err := reader.Read(header)
+	if err != nil {
+		return nil, false, fmt.Errorf("compilationcache: error reading header: %v", err)
+	}
+
+	if n != cacheHeaderSize {
+		return nil, false, fmt.Errorf("compilationcache: invalid header length: %d", n)
+	}
+
+	if !bytes.Equal(header[:len(magic)], magic) {
+		return nil, false, fmt.Errorf(
+			"compilationcache: invalid magic number: got %s but want %s", magic, header[:len(magic)])
+	}
+
+	// Check the version compatibility.
+	versionSize := int(header[len(magic)])
+
+	cachedVersionBegin, cachedVersionEnd := len(magic)+1, len(magic)+1+versionSize
+	if cachedVersionEnd >= len(header) {
+		staleCache = true
+		return
+	} else if cachedVersion := string(header[cachedVersionBegin:cachedVersionEnd]); cachedVersion != wazeroVersion {
+		staleCache = true
+		return
+	}
+
+	functionsNum := binary.LittleEndian.Uint32(header[len(header)-4:])
+	cm = &compiledModule{functionOffsets: make([]int, functionsNum), executables: &executables{}}
+
+	var eightBytes [8]byte
+	for i := uint32(0); i < functionsNum; i++ {
+		// Read the offset of each function in the executable.
+		var offset uint64
+		if offset, err = readUint64(reader, &eightBytes); err != nil {
+			err = fmt.Errorf("compilationcache: error reading func[%d] executable offset: %v", i, err)
+			return
+		}
+		cm.functionOffsets[i] = int(offset)
+	}
+
+	executableLen, err := readUint64(reader, &eightBytes)
+	if err != nil {
+		err = fmt.Errorf("compilationcache: error reading executable size: %v", err)
+		return
+	}
+
+	if executableLen > 0 {
+		executable, err := platform.MmapCodeSegment(int(executableLen))
+		if err != nil {
+			err = fmt.Errorf("compilationcache: error mmapping executable (len=%d): %v", executableLen, err)
+			return nil, false, err
+		}
+
+		_, err = io.ReadFull(reader, executable)
+		if err != nil {
+			err = fmt.Errorf("compilationcache: error reading executable (len=%d): %v", executableLen, err)
+			return nil, false, err
+		}
+
+		expected := crc32.Checksum(executable, crc)
+		if _, err = io.ReadFull(reader, eightBytes[:4]); err != nil {
+			return nil, false, fmt.Errorf("compilationcache: could not read checksum: %v", err)
+		} else if checksum := binary.LittleEndian.Uint32(eightBytes[:4]); expected != checksum {
+			return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum)
+		}
+
+		if runtime.GOARCH == "arm64" {
+			// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+			if err = platform.MprotectRX(executable); err != nil {
+				return nil, false, err
+			}
+		}
+		cm.executable = executable
+	}
+
+	if _, err := io.ReadFull(reader, eightBytes[:1]); err != nil {
+		return nil, false, fmt.Errorf("compilationcache: error reading source map presence: %v", err)
+	}
+
+	if eightBytes[0] == 1 {
+		sm := &cm.sourceMap
+		sourceMapLen, err := readUint64(reader, &eightBytes)
+		if err != nil {
+			err = fmt.Errorf("compilationcache: error reading source map length: %v", err)
+			return nil, false, err
+		}
+		executableOffset := uintptr(unsafe.Pointer(&cm.executable[0]))
+		for i := uint64(0); i < sourceMapLen; i++ {
+			wasmBinaryOffset, err := readUint64(reader, &eightBytes)
+			if err != nil {
+				err = fmt.Errorf("compilationcache: error reading source map[%d] wasm binary offset: %v", i, err)
+				return nil, false, err
+			}
+			executableRelativeOffset, err := readUint64(reader, &eightBytes)
+			if err != nil {
+				err = fmt.Errorf("compilationcache: error reading source map[%d] executable offset: %v", i, err)
+				return nil, false, err
+			}
+			sm.wasmBinaryOffsets = append(sm.wasmBinaryOffsets, wasmBinaryOffset)
+			// executableOffsets is absolute address, so we need to add executableOffset.
+			sm.executableOffsets = append(sm.executableOffsets, uintptr(executableRelativeOffset)+executableOffset)
+		}
+	}
+	return
+}
+
+// readUint64 strictly reads an uint64 in little-endian byte order, using the
+// given array as a buffer. This returns io.EOF if less than 8 bytes were read.
+func readUint64(reader io.Reader, b *[8]byte) (uint64, error) {
+	s := b[0:8]
+	n, err := reader.Read(s)
+	if err != nil {
+		return 0, err
+	} else if n < 8 { // more strict than reader.Read
+		return 0, io.EOF
+	}
+
+	// Read the u64 from the underlying buffer.
+	ret := binary.LittleEndian.Uint64(s)
+	return ret, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go
new file mode 100644
index 000000000..18f60af3a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go
@@ -0,0 +1,15 @@
+//go:build amd64 && !tinygo
+
+package wazevo
+
+import _ "unsafe"
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.entrypoint
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.afterGoFunctionCallEntrypoint
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go
new file mode 100644
index 000000000..e16d64f65
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go
@@ -0,0 +1,15 @@
+//go:build arm64 && !tinygo
+
+package wazevo
+
+import _ "unsafe"
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.entrypoint
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.afterGoFunctionCallEntrypoint
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go
new file mode 100644
index 000000000..8f9d64b2b
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go
@@ -0,0 +1,15 @@
+//go:build (!arm64 && !amd64) || tinygo
+
+package wazevo
+
+import (
+	"runtime"
+)
+
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) {
+	panic(runtime.GOARCH)
+}
+
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) {
+	panic(runtime.GOARCH)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
new file mode 100644
index 000000000..873a35a55
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
@@ -0,0 +1,594 @@
+// Package frontend implements the translation of WebAssembly to SSA IR using the ssa package.
+package frontend
+
+import (
+	"bytes"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// Compiler is in charge of lowering Wasm to SSA IR, and does the optimization
+// on top of it in architecture-independent way.
+type Compiler struct {
+	// Per-module data that is used across all functions.
+
+	m      *wasm.Module
+	offset *wazevoapi.ModuleContextOffsetData
+	// ssaBuilder is a ssa.Builder used by this frontend.
+	ssaBuilder             ssa.Builder
+	signatures             map[*wasm.FunctionType]*ssa.Signature
+	listenerSignatures     map[*wasm.FunctionType][2]*ssa.Signature
+	memoryGrowSig          ssa.Signature
+	memoryWait32Sig        ssa.Signature
+	memoryWait64Sig        ssa.Signature
+	memoryNotifySig        ssa.Signature
+	checkModuleExitCodeSig ssa.Signature
+	tableGrowSig           ssa.Signature
+	refFuncSig             ssa.Signature
+	memmoveSig             ssa.Signature
+	ensureTermination      bool
+
+	// Followings are reset by per function.
+
+	// wasmLocalToVariable maps the index (considered as wasm.Index of locals)
+	// to the corresponding ssa.Variable.
+	wasmLocalToVariable                   [] /* local index to */ ssa.Variable
+	wasmLocalFunctionIndex                wasm.Index
+	wasmFunctionTypeIndex                 wasm.Index
+	wasmFunctionTyp                       *wasm.FunctionType
+	wasmFunctionLocalTypes                []wasm.ValueType
+	wasmFunctionBody                      []byte
+	wasmFunctionBodyOffsetInCodeSection   uint64
+	memoryBaseVariable, memoryLenVariable ssa.Variable
+	needMemory                            bool
+	memoryShared                          bool
+	globalVariables                       []ssa.Variable
+	globalVariablesTypes                  []ssa.Type
+	mutableGlobalVariablesIndexes         []wasm.Index // index to ^.
+	needListener                          bool
+	needSourceOffsetInfo                  bool
+	// br is reused during lowering.
+	br            *bytes.Reader
+	loweringState loweringState
+
+	knownSafeBounds    [] /* ssa.ValueID to */ knownSafeBound
+	knownSafeBoundsSet []ssa.ValueID
+
+	knownSafeBoundsAtTheEndOfBlocks   [] /* ssa.BlockID to */ knownSafeBoundsAtTheEndOfBlock
+	varLengthKnownSafeBoundWithIDPool wazevoapi.VarLengthPool[knownSafeBoundWithID]
+
+	execCtxPtrValue, moduleCtxPtrValue ssa.Value
+
+	// Following are reused for the known safe bounds analysis.
+
+	pointers []int
+	bounds   [][]knownSafeBoundWithID
+}
+
+type (
+	// knownSafeBound represents a known safe bound for a value.
+	knownSafeBound struct {
+		// bound is a constant upper bound for the value.
+		bound uint64
+		// absoluteAddr is the absolute address of the value.
+		absoluteAddr ssa.Value
+	}
+	// knownSafeBoundWithID is a knownSafeBound with the ID of the value.
+	knownSafeBoundWithID struct {
+		knownSafeBound
+		id ssa.ValueID
+	}
+	knownSafeBoundsAtTheEndOfBlock = wazevoapi.VarLength[knownSafeBoundWithID]
+)
+
+var knownSafeBoundsAtTheEndOfBlockNil = wazevoapi.NewNilVarLength[knownSafeBoundWithID]()
+
+// NewFrontendCompiler returns a frontend Compiler.
+func NewFrontendCompiler(m *wasm.Module, ssaBuilder ssa.Builder, offset *wazevoapi.ModuleContextOffsetData, ensureTermination bool, listenerOn bool, sourceInfo bool) *Compiler {
+	c := &Compiler{
+		m:                                 m,
+		ssaBuilder:                        ssaBuilder,
+		br:                                bytes.NewReader(nil),
+		offset:                            offset,
+		ensureTermination:                 ensureTermination,
+		needSourceOffsetInfo:              sourceInfo,
+		varLengthKnownSafeBoundWithIDPool: wazevoapi.NewVarLengthPool[knownSafeBoundWithID](),
+	}
+	c.declareSignatures(listenerOn)
+	return c
+}
+
+func (c *Compiler) declareSignatures(listenerOn bool) {
+	m := c.m
+	c.signatures = make(map[*wasm.FunctionType]*ssa.Signature, len(m.TypeSection)+2)
+	if listenerOn {
+		c.listenerSignatures = make(map[*wasm.FunctionType][2]*ssa.Signature, len(m.TypeSection))
+	}
+	for i := range m.TypeSection {
+		wasmSig := &m.TypeSection[i]
+		sig := SignatureForWasmFunctionType(wasmSig)
+		sig.ID = ssa.SignatureID(i)
+		c.signatures[wasmSig] = &sig
+		c.ssaBuilder.DeclareSignature(&sig)
+
+		if listenerOn {
+			beforeSig, afterSig := SignatureForListener(wasmSig)
+			beforeSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))
+			afterSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))*2
+			c.listenerSignatures[wasmSig] = [2]*ssa.Signature{beforeSig, afterSig}
+			c.ssaBuilder.DeclareSignature(beforeSig)
+			c.ssaBuilder.DeclareSignature(afterSig)
+		}
+	}
+
+	begin := ssa.SignatureID(len(m.TypeSection))
+	if listenerOn {
+		begin *= 3
+	}
+	c.memoryGrowSig = ssa.Signature{
+		ID: begin,
+		// Takes execution context and the page size to grow.
+		Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32},
+		// Returns the previous page size.
+		Results: []ssa.Type{ssa.TypeI32},
+	}
+	c.ssaBuilder.DeclareSignature(&c.memoryGrowSig)
+
+	c.checkModuleExitCodeSig = ssa.Signature{
+		ID: c.memoryGrowSig.ID + 1,
+		// Only takes execution context.
+		Params: []ssa.Type{ssa.TypeI64},
+	}
+	c.ssaBuilder.DeclareSignature(&c.checkModuleExitCodeSig)
+
+	c.tableGrowSig = ssa.Signature{
+		ID:     c.checkModuleExitCodeSig.ID + 1,
+		Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
+		// Returns the previous size.
+		Results: []ssa.Type{ssa.TypeI32},
+	}
+	c.ssaBuilder.DeclareSignature(&c.tableGrowSig)
+
+	c.refFuncSig = ssa.Signature{
+		ID:     c.tableGrowSig.ID + 1,
+		Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* func index */},
+		// Returns the function reference.
+		Results: []ssa.Type{ssa.TypeI64},
+	}
+	c.ssaBuilder.DeclareSignature(&c.refFuncSig)
+
+	c.memmoveSig = ssa.Signature{
+		ID: c.refFuncSig.ID + 1,
+		// dst, src, and the byte count.
+		Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
+	}
+
+	c.ssaBuilder.DeclareSignature(&c.memmoveSig)
+
+	c.memoryWait32Sig = ssa.Signature{
+		ID: c.memmoveSig.ID + 1,
+		// exec context, timeout, expected, addr
+		Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
+		// Returns the status.
+		Results: []ssa.Type{ssa.TypeI32},
+	}
+	c.ssaBuilder.DeclareSignature(&c.memoryWait32Sig)
+
+	c.memoryWait64Sig = ssa.Signature{
+		ID: c.memoryWait32Sig.ID + 1,
+		// exec context, timeout, expected, addr
+		Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
+		// Returns the status.
+		Results: []ssa.Type{ssa.TypeI32},
+	}
+	c.ssaBuilder.DeclareSignature(&c.memoryWait64Sig)
+
+	c.memoryNotifySig = ssa.Signature{
+		ID: c.memoryWait64Sig.ID + 1,
+		// exec context, count, addr
+		Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
+		// Returns the number notified.
+		Results: []ssa.Type{ssa.TypeI32},
+	}
+	c.ssaBuilder.DeclareSignature(&c.memoryNotifySig)
+}
+
+// SignatureForWasmFunctionType returns the ssa.Signature for the given wasm.FunctionType.
+func SignatureForWasmFunctionType(typ *wasm.FunctionType) ssa.Signature {
+	sig := ssa.Signature{
+		// +2 to pass moduleContextPtr and executionContextPtr. See the inline comment LowerToSSA.
+		Params:  make([]ssa.Type, len(typ.Params)+2),
+		Results: make([]ssa.Type, len(typ.Results)),
+	}
+	sig.Params[0] = executionContextPtrTyp
+	sig.Params[1] = moduleContextPtrTyp
+	for j, typ := range typ.Params {
+		sig.Params[j+2] = WasmTypeToSSAType(typ)
+	}
+	for j, typ := range typ.Results {
+		sig.Results[j] = WasmTypeToSSAType(typ)
+	}
+	return sig
+}
+
+// Init initializes the state of frontendCompiler and make it ready for a next function.
+func (c *Compiler) Init(idx, typIndex wasm.Index, typ *wasm.FunctionType, localTypes []wasm.ValueType, body []byte, needListener bool, bodyOffsetInCodeSection uint64) {
+	c.ssaBuilder.Init(c.signatures[typ])
+	c.loweringState.reset()
+
+	c.wasmFunctionTypeIndex = typIndex
+	c.wasmLocalFunctionIndex = idx
+	c.wasmFunctionTyp = typ
+	c.wasmFunctionLocalTypes = localTypes
+	c.wasmFunctionBody = body
+	c.wasmFunctionBodyOffsetInCodeSection = bodyOffsetInCodeSection
+	c.needListener = needListener
+	c.clearSafeBounds()
+	c.varLengthKnownSafeBoundWithIDPool.Reset()
+	c.knownSafeBoundsAtTheEndOfBlocks = c.knownSafeBoundsAtTheEndOfBlocks[:0]
+}
+
+// Note: this assumes 64-bit platform (I believe we won't have 32-bit backend ;)).
+const executionContextPtrTyp, moduleContextPtrTyp = ssa.TypeI64, ssa.TypeI64
+
+// LowerToSSA lowers the current function to SSA function which will be held by ssaBuilder.
+// After calling this, the caller will be able to access the SSA info in *Compiler.ssaBuilder.
+//
+// Note that this only does the naive lowering, and do not do any optimization, instead the caller is expected to do so.
+func (c *Compiler) LowerToSSA() {
+	builder := c.ssaBuilder
+
+	// Set up the entry block.
+	entryBlock := builder.AllocateBasicBlock()
+	builder.SetCurrentBlock(entryBlock)
+
+	// Functions always take two parameters in addition to Wasm-level parameters:
+	//
+	//  1. executionContextPtr: pointer to the *executionContext in wazevo package.
+	//    This will be used to exit the execution in the face of trap, plus used for host function calls.
+	//
+	// 	2. moduleContextPtr: pointer to the *moduleContextOpaque in wazevo package.
+	//	  This will be used to access memory, etc. Also, this will be used during host function calls.
+	//
+	// Note: it's clear that sometimes a function won't need them. For example,
+	//  if the function doesn't trap and doesn't make function call, then
+	// 	we might be able to eliminate the parameter. However, if that function
+	//	can be called via call_indirect, then we cannot eliminate because the
+	//  signature won't match with the expected one.
+	// TODO: maybe there's some way to do this optimization without glitches, but so far I have no clue about the feasibility.
+	//
+	// Note: In Wasmtime or many other runtimes, moduleContextPtr is called "vmContext". Also note that `moduleContextPtr`
+	//  is wazero-specific since other runtimes can naturally use the OS-level signal to do this job thanks to the fact that
+	//  they can use native stack vs wazero cannot use Go-routine stack and have to use Go-runtime allocated []byte as a stack.
+	c.execCtxPtrValue = entryBlock.AddParam(builder, executionContextPtrTyp)
+	c.moduleCtxPtrValue = entryBlock.AddParam(builder, moduleContextPtrTyp)
+	builder.AnnotateValue(c.execCtxPtrValue, "exec_ctx")
+	builder.AnnotateValue(c.moduleCtxPtrValue, "module_ctx")
+
+	for i, typ := range c.wasmFunctionTyp.Params {
+		st := WasmTypeToSSAType(typ)
+		variable := builder.DeclareVariable(st)
+		value := entryBlock.AddParam(builder, st)
+		builder.DefineVariable(variable, value, entryBlock)
+		c.setWasmLocalVariable(wasm.Index(i), variable)
+	}
+	c.declareWasmLocals(entryBlock)
+	c.declareNecessaryVariables()
+
+	c.lowerBody(entryBlock)
+}
+
+// localVariable returns the SSA variable for the given Wasm local index.
+func (c *Compiler) localVariable(index wasm.Index) ssa.Variable {
+	return c.wasmLocalToVariable[index]
+}
+
+func (c *Compiler) setWasmLocalVariable(index wasm.Index, variable ssa.Variable) {
+	idx := int(index)
+	if idx >= len(c.wasmLocalToVariable) {
+		c.wasmLocalToVariable = append(c.wasmLocalToVariable, make([]ssa.Variable, idx+1-len(c.wasmLocalToVariable))...)
+	}
+	c.wasmLocalToVariable[idx] = variable
+}
+
+// declareWasmLocals declares the SSA variables for the Wasm locals.
+func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {
+	localCount := wasm.Index(len(c.wasmFunctionTyp.Params))
+	for i, typ := range c.wasmFunctionLocalTypes {
+		st := WasmTypeToSSAType(typ)
+		variable := c.ssaBuilder.DeclareVariable(st)
+		c.setWasmLocalVariable(wasm.Index(i)+localCount, variable)
+
+		zeroInst := c.ssaBuilder.AllocateInstruction()
+		switch st {
+		case ssa.TypeI32:
+			zeroInst.AsIconst32(0)
+		case ssa.TypeI64:
+			zeroInst.AsIconst64(0)
+		case ssa.TypeF32:
+			zeroInst.AsF32const(0)
+		case ssa.TypeF64:
+			zeroInst.AsF64const(0)
+		case ssa.TypeV128:
+			zeroInst.AsVconst(0, 0)
+		default:
+			panic("TODO: " + wasm.ValueTypeName(typ))
+		}
+
+		c.ssaBuilder.InsertInstruction(zeroInst)
+		value := zeroInst.Return()
+		c.ssaBuilder.DefineVariable(variable, value, entry)
+	}
+}
+
+func (c *Compiler) declareNecessaryVariables() {
+	if c.needMemory = c.m.MemorySection != nil; c.needMemory {
+		c.memoryShared = c.m.MemorySection.IsShared
+	} else if c.needMemory = c.m.ImportMemoryCount > 0; c.needMemory {
+		for _, imp := range c.m.ImportSection {
+			if imp.Type == wasm.ExternTypeMemory {
+				c.memoryShared = imp.DescMem.IsShared
+				break
+			}
+		}
+	}
+
+	if c.needMemory {
+		c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64)
+		c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64)
+	}
+
+	c.globalVariables = c.globalVariables[:0]
+	c.mutableGlobalVariablesIndexes = c.mutableGlobalVariablesIndexes[:0]
+	c.globalVariablesTypes = c.globalVariablesTypes[:0]
+	for _, imp := range c.m.ImportSection {
+		if imp.Type == wasm.ExternTypeGlobal {
+			desc := imp.DescGlobal
+			c.declareWasmGlobal(desc.ValType, desc.Mutable)
+		}
+	}
+	for _, g := range c.m.GlobalSection {
+		desc := g.Type
+		c.declareWasmGlobal(desc.ValType, desc.Mutable)
+	}
+
+	// TODO: add tables.
+}
+
+func (c *Compiler) declareWasmGlobal(typ wasm.ValueType, mutable bool) {
+	var st ssa.Type
+	switch typ {
+	case wasm.ValueTypeI32:
+		st = ssa.TypeI32
+	case wasm.ValueTypeI64,
+		// Both externref and funcref are represented as I64 since we only support 64-bit platforms.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		st = ssa.TypeI64
+	case wasm.ValueTypeF32:
+		st = ssa.TypeF32
+	case wasm.ValueTypeF64:
+		st = ssa.TypeF64
+	case wasm.ValueTypeV128:
+		st = ssa.TypeV128
+	default:
+		panic("TODO: " + wasm.ValueTypeName(typ))
+	}
+	v := c.ssaBuilder.DeclareVariable(st)
+	index := wasm.Index(len(c.globalVariables))
+	c.globalVariables = append(c.globalVariables, v)
+	c.globalVariablesTypes = append(c.globalVariablesTypes, st)
+	if mutable {
+		c.mutableGlobalVariablesIndexes = append(c.mutableGlobalVariablesIndexes, index)
+	}
+}
+
+// WasmTypeToSSAType converts wasm.ValueType to ssa.Type.
+func WasmTypeToSSAType(vt wasm.ValueType) ssa.Type {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return ssa.TypeI32
+	case wasm.ValueTypeI64,
+		// Both externref and funcref are represented as I64 since we only support 64-bit platforms.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return ssa.TypeI64
+	case wasm.ValueTypeF32:
+		return ssa.TypeF32
+	case wasm.ValueTypeF64:
+		return ssa.TypeF64
+	case wasm.ValueTypeV128:
+		return ssa.TypeV128
+	default:
+		panic("TODO: " + wasm.ValueTypeName(vt))
+	}
+}
+
+// addBlockParamsFromWasmTypes adds the block parameters to the given block.
+func (c *Compiler) addBlockParamsFromWasmTypes(tps []wasm.ValueType, blk ssa.BasicBlock) {
+	for _, typ := range tps {
+		st := WasmTypeToSSAType(typ)
+		blk.AddParam(c.ssaBuilder, st)
+	}
+}
+
+// formatBuilder outputs the constructed SSA function as a string with a source information.
+func (c *Compiler) formatBuilder() string {
+	return c.ssaBuilder.Format()
+}
+
+// SignatureForListener returns the signatures for the listener functions.
+func SignatureForListener(wasmSig *wasm.FunctionType) (*ssa.Signature, *ssa.Signature) {
+	beforeSig := &ssa.Signature{}
+	beforeSig.Params = make([]ssa.Type, len(wasmSig.Params)+2)
+	beforeSig.Params[0] = ssa.TypeI64 // Execution context.
+	beforeSig.Params[1] = ssa.TypeI32 // Function index.
+	for i, p := range wasmSig.Params {
+		beforeSig.Params[i+2] = WasmTypeToSSAType(p)
+	}
+	afterSig := &ssa.Signature{}
+	afterSig.Params = make([]ssa.Type, len(wasmSig.Results)+2)
+	afterSig.Params[0] = ssa.TypeI64 // Execution context.
+	afterSig.Params[1] = ssa.TypeI32 // Function index.
+	for i, p := range wasmSig.Results {
+		afterSig.Params[i+2] = WasmTypeToSSAType(p)
+	}
+	return beforeSig, afterSig
+}
+
+// isBoundSafe returns true if the given value is known to be safe to access up to the given bound.
+func (c *Compiler) getKnownSafeBound(v ssa.ValueID) *knownSafeBound {
+	if int(v) >= len(c.knownSafeBounds) {
+		return nil
+	}
+	return &c.knownSafeBounds[v]
+}
+
+// recordKnownSafeBound records the given safe bound for the given value.
+func (c *Compiler) recordKnownSafeBound(v ssa.ValueID, safeBound uint64, absoluteAddr ssa.Value) {
+	if int(v) >= len(c.knownSafeBounds) {
+		c.knownSafeBounds = append(c.knownSafeBounds, make([]knownSafeBound, v+1)...)
+	}
+
+	if exiting := c.knownSafeBounds[v]; exiting.bound == 0 {
+		c.knownSafeBounds[v] = knownSafeBound{
+			bound:        safeBound,
+			absoluteAddr: absoluteAddr,
+		}
+		c.knownSafeBoundsSet = append(c.knownSafeBoundsSet, v)
+	} else if safeBound > exiting.bound {
+		c.knownSafeBounds[v].bound = safeBound
+	}
+}
+
+// clearSafeBounds clears the known safe bounds.
+func (c *Compiler) clearSafeBounds() {
+	for _, v := range c.knownSafeBoundsSet {
+		ptr := &c.knownSafeBounds[v]
+		ptr.bound = 0
+		ptr.absoluteAddr = ssa.ValueInvalid
+	}
+	c.knownSafeBoundsSet = c.knownSafeBoundsSet[:0]
+}
+
+// resetAbsoluteAddressInSafeBounds resets the absolute addresses recorded in the known safe bounds.
+func (c *Compiler) resetAbsoluteAddressInSafeBounds() {
+	for _, v := range c.knownSafeBoundsSet {
+		ptr := &c.knownSafeBounds[v]
+		ptr.absoluteAddr = ssa.ValueInvalid
+	}
+}
+
+func (k *knownSafeBound) valid() bool {
+	return k != nil && k.bound > 0
+}
+
+func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values {
+	builder := c.ssaBuilder
+	pool := builder.VarLengthPool()
+	args := pool.Allocate(_cap)
+	args = args.Append(builder.VarLengthPool(), vs...)
+	return args
+}
+
+func (c *Compiler) finalizeKnownSafeBoundsAtTheEndOfBlock(bID ssa.BasicBlockID) {
+	_bID := int(bID)
+	if l := len(c.knownSafeBoundsAtTheEndOfBlocks); _bID >= l {
+		c.knownSafeBoundsAtTheEndOfBlocks = append(c.knownSafeBoundsAtTheEndOfBlocks,
+			make([]knownSafeBoundsAtTheEndOfBlock, _bID+1-len(c.knownSafeBoundsAtTheEndOfBlocks))...)
+		for i := l; i < len(c.knownSafeBoundsAtTheEndOfBlocks); i++ {
+			c.knownSafeBoundsAtTheEndOfBlocks[i] = knownSafeBoundsAtTheEndOfBlockNil
+		}
+	}
+	p := &c.varLengthKnownSafeBoundWithIDPool
+	size := len(c.knownSafeBoundsSet)
+	allocated := c.varLengthKnownSafeBoundWithIDPool.Allocate(size)
+	// Sort the known safe bounds by the value ID so that we can use the intersection algorithm in initializeCurrentBlockKnownBounds.
+	sortSSAValueIDs(c.knownSafeBoundsSet)
+	for _, vID := range c.knownSafeBoundsSet {
+		kb := c.knownSafeBounds[vID]
+		allocated = allocated.Append(p, knownSafeBoundWithID{
+			knownSafeBound: kb,
+			id:             vID,
+		})
+	}
+	c.knownSafeBoundsAtTheEndOfBlocks[bID] = allocated
+	c.clearSafeBounds()
+}
+
+func (c *Compiler) initializeCurrentBlockKnownBounds() {
+	currentBlk := c.ssaBuilder.CurrentBlock()
+	switch preds := currentBlk.Preds(); preds {
+	case 0:
+	case 1:
+		pred := currentBlk.Pred(0).ID()
+		for _, kb := range c.getKnownSafeBoundsAtTheEndOfBlocks(pred).View() {
+			// Unless the block is sealed, we cannot assume the absolute address is valid:
+			// later we might add another predecessor that has no visibility of that value.
+			addr := ssa.ValueInvalid
+			if currentBlk.Sealed() {
+				addr = kb.absoluteAddr
+			}
+			c.recordKnownSafeBound(kb.id, kb.bound, addr)
+		}
+	default:
+		c.pointers = c.pointers[:0]
+		c.bounds = c.bounds[:0]
+		for i := 0; i < preds; i++ {
+			c.bounds = append(c.bounds, c.getKnownSafeBoundsAtTheEndOfBlocks(currentBlk.Pred(i).ID()).View())
+			c.pointers = append(c.pointers, 0)
+		}
+
+		// If there are multiple predecessors, we need to find the intersection of the known safe bounds.
+
+	outer:
+		for {
+			smallestID := ssa.ValueID(math.MaxUint32)
+			for i, ptr := range c.pointers {
+				if ptr >= len(c.bounds[i]) {
+					break outer
+				}
+				cb := &c.bounds[i][ptr]
+				if id := cb.id; id < smallestID {
+					smallestID = cb.id
+				}
+			}
+
+			// Check if current elements are the same across all lists.
+			same := true
+			minBound := uint64(math.MaxUint64)
+			for i := 0; i < preds; i++ {
+				cb := &c.bounds[i][c.pointers[i]]
+				if cb.id != smallestID {
+					same = false
+					break
+				} else {
+					if cb.bound < minBound {
+						minBound = cb.bound
+					}
+				}
+			}
+
+			if same { // All elements are the same.
+				// Absolute address cannot be used in the intersection since the value might be only defined in one of the predecessors.
+				c.recordKnownSafeBound(smallestID, minBound, ssa.ValueInvalid)
+			}
+
+			// Move pointer(s) for the smallest ID forward (if same, move all).
+			for i := 0; i < preds; i++ {
+				cb := &c.bounds[i][c.pointers[i]]
+				if cb.id == smallestID {
+					c.pointers[i]++
+				}
+			}
+		}
+	}
+}
+
+func (c *Compiler) getKnownSafeBoundsAtTheEndOfBlocks(id ssa.BasicBlockID) knownSafeBoundsAtTheEndOfBlock {
+	if int(id) >= len(c.knownSafeBoundsAtTheEndOfBlocks) {
+		return knownSafeBoundsAtTheEndOfBlockNil
+	}
+	return c.knownSafeBoundsAtTheEndOfBlocks[id]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
new file mode 100644
index 000000000..5096a6365
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
@@ -0,0 +1,4268 @@
+package frontend
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"runtime"
+	"strings"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+type (
+	// loweringState is used to keep the state of lowering.
+	loweringState struct {
+		// values holds the values on the Wasm stack.
+		values           []ssa.Value
+		controlFrames    []controlFrame
+		unreachable      bool
+		unreachableDepth int
+		tmpForBrTable    []uint32
+		pc               int
+	}
+	controlFrame struct {
+		kind controlFrameKind
+		// originalStackLen holds the number of values on the Wasm stack
+		// when start executing this control frame minus params for the block.
+		originalStackLenWithoutParam int
+		// blk is the loop header if this is loop, and is the else-block if this is an if frame.
+		blk,
+		// followingBlock is the basic block we enter if we reach "end" of block.
+		followingBlock ssa.BasicBlock
+		blockType *wasm.FunctionType
+		// clonedArgs hold the arguments to Else block.
+		clonedArgs ssa.Values
+	}
+
+	controlFrameKind byte
+)
+
+// String implements fmt.Stringer for debugging.
+func (l *loweringState) String() string {
+	var str []string
+	for _, v := range l.values {
+		str = append(str, fmt.Sprintf("v%v", v.ID()))
+	}
+	var frames []string
+	for i := range l.controlFrames {
+		frames = append(frames, l.controlFrames[i].kind.String())
+	}
+	return fmt.Sprintf("\n\tunreachable=%v(depth=%d)\n\tstack: %s\n\tcontrol frames: %s",
+		l.unreachable, l.unreachableDepth,
+		strings.Join(str, ", "),
+		strings.Join(frames, ", "),
+	)
+}
+
+const (
+	controlFrameKindFunction = iota + 1
+	controlFrameKindLoop
+	controlFrameKindIfWithElse
+	controlFrameKindIfWithoutElse
+	controlFrameKindBlock
+)
+
+// String implements fmt.Stringer for debugging.
+func (k controlFrameKind) String() string {
+	switch k {
+	case controlFrameKindFunction:
+		return "function"
+	case controlFrameKindLoop:
+		return "loop"
+	case controlFrameKindIfWithElse:
+		return "if_with_else"
+	case controlFrameKindIfWithoutElse:
+		return "if_without_else"
+	case controlFrameKindBlock:
+		return "block"
+	default:
+		panic(k)
+	}
+}
+
+// isLoop returns true if this is a loop frame.
+func (ctrl *controlFrame) isLoop() bool {
+	return ctrl.kind == controlFrameKindLoop
+}
+
+// reset resets the state of loweringState for reuse.
+func (l *loweringState) reset() {
+	l.values = l.values[:0]
+	l.controlFrames = l.controlFrames[:0]
+	l.pc = 0
+	l.unreachable = false
+	l.unreachableDepth = 0
+}
+
+func (l *loweringState) peek() (ret ssa.Value) {
+	tail := len(l.values) - 1
+	return l.values[tail]
+}
+
+func (l *loweringState) pop() (ret ssa.Value) {
+	tail := len(l.values) - 1
+	ret = l.values[tail]
+	l.values = l.values[:tail]
+	return
+}
+
+func (l *loweringState) push(ret ssa.Value) {
+	l.values = append(l.values, ret)
+}
+
+func (c *Compiler) nPeekDup(n int) ssa.Values {
+	if n == 0 {
+		return ssa.ValuesNil
+	}
+
+	l := c.state()
+	tail := len(l.values)
+
+	args := c.allocateVarLengthValues(n)
+	args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-n:tail]...)
+	return args
+}
+
+func (l *loweringState) ctrlPop() (ret controlFrame) {
+	tail := len(l.controlFrames) - 1
+	ret = l.controlFrames[tail]
+	l.controlFrames = l.controlFrames[:tail]
+	return
+}
+
+func (l *loweringState) ctrlPush(ret controlFrame) {
+	l.controlFrames = append(l.controlFrames, ret)
+}
+
+func (l *loweringState) ctrlPeekAt(n int) (ret *controlFrame) {
+	tail := len(l.controlFrames) - 1
+	return &l.controlFrames[tail-n]
+}
+
+// lowerBody lowers the body of the Wasm function to the SSA form.
+func (c *Compiler) lowerBody(entryBlk ssa.BasicBlock) {
+	c.ssaBuilder.Seal(entryBlk)
+
+	if c.needListener {
+		c.callListenerBefore()
+	}
+
+	// Pushes the empty control frame which corresponds to the function return.
+	c.loweringState.ctrlPush(controlFrame{
+		kind:           controlFrameKindFunction,
+		blockType:      c.wasmFunctionTyp,
+		followingBlock: c.ssaBuilder.ReturnBlock(),
+	})
+
+	for c.loweringState.pc < len(c.wasmFunctionBody) {
+		blkBeforeLowering := c.ssaBuilder.CurrentBlock()
+		c.lowerCurrentOpcode()
+		blkAfterLowering := c.ssaBuilder.CurrentBlock()
+		if blkBeforeLowering != blkAfterLowering {
+			// In Wasm, once a block exits, that means we've done compiling the block.
+			// Therefore, we finalize the known bounds at the end of the block for the exiting block.
+			c.finalizeKnownSafeBoundsAtTheEndOfBlock(blkBeforeLowering.ID())
+			// After that, we initialize the known bounds for the new compilation target block.
+			c.initializeCurrentBlockKnownBounds()
+		}
+	}
+}
+
+func (c *Compiler) state() *loweringState {
+	return &c.loweringState
+}
+
+func (c *Compiler) lowerCurrentOpcode() {
+	op := c.wasmFunctionBody[c.loweringState.pc]
+
+	if c.needSourceOffsetInfo {
+		c.ssaBuilder.SetCurrentSourceOffset(
+			ssa.SourceOffset(c.loweringState.pc) + ssa.SourceOffset(c.wasmFunctionBodyOffsetInCodeSection),
+		)
+	}
+
+	builder := c.ssaBuilder
+	state := c.state()
+	switch op {
+	case wasm.OpcodeI32Const:
+		c := c.readI32s()
+		if state.unreachable {
+			break
+		}
+
+		iconst := builder.AllocateInstruction().AsIconst32(uint32(c)).Insert(builder)
+		value := iconst.Return()
+		state.push(value)
+	case wasm.OpcodeI64Const:
+		c := c.readI64s()
+		if state.unreachable {
+			break
+		}
+		iconst := builder.AllocateInstruction().AsIconst64(uint64(c)).Insert(builder)
+		value := iconst.Return()
+		state.push(value)
+	case wasm.OpcodeF32Const:
+		f32 := c.readF32()
+		if state.unreachable {
+			break
+		}
+		f32const := builder.AllocateInstruction().
+			AsF32const(f32).
+			Insert(builder).
+			Return()
+		state.push(f32const)
+	case wasm.OpcodeF64Const:
+		f64 := c.readF64()
+		if state.unreachable {
+			break
+		}
+		f64const := builder.AllocateInstruction().
+			AsF64const(f64).
+			Insert(builder).
+			Return()
+		state.push(f64const)
+	case wasm.OpcodeI32Add, wasm.OpcodeI64Add:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		iadd := builder.AllocateInstruction()
+		iadd.AsIadd(x, y)
+		builder.InsertInstruction(iadd)
+		value := iadd.Return()
+		state.push(value)
+	case wasm.OpcodeI32Sub, wasm.OpcodeI64Sub:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		isub := builder.AllocateInstruction()
+		isub.AsIsub(x, y)
+		builder.InsertInstruction(isub)
+		value := isub.Return()
+		state.push(value)
+	case wasm.OpcodeF32Add, wasm.OpcodeF64Add:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		iadd := builder.AllocateInstruction()
+		iadd.AsFadd(x, y)
+		builder.InsertInstruction(iadd)
+		value := iadd.Return()
+		state.push(value)
+	case wasm.OpcodeI32Mul, wasm.OpcodeI64Mul:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		imul := builder.AllocateInstruction()
+		imul.AsImul(x, y)
+		builder.InsertInstruction(imul)
+		value := imul.Return()
+		state.push(value)
+	case wasm.OpcodeF32Sub, wasm.OpcodeF64Sub:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		isub := builder.AllocateInstruction()
+		isub.AsFsub(x, y)
+		builder.InsertInstruction(isub)
+		value := isub.Return()
+		state.push(value)
+	case wasm.OpcodeF32Mul, wasm.OpcodeF64Mul:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		isub := builder.AllocateInstruction()
+		isub.AsFmul(x, y)
+		builder.InsertInstruction(isub)
+		value := isub.Return()
+		state.push(value)
+	case wasm.OpcodeF32Div, wasm.OpcodeF64Div:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		isub := builder.AllocateInstruction()
+		isub.AsFdiv(x, y)
+		builder.InsertInstruction(isub)
+		value := isub.Return()
+		state.push(value)
+	case wasm.OpcodeF32Max, wasm.OpcodeF64Max:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		isub := builder.AllocateInstruction()
+		isub.AsFmax(x, y)
+		builder.InsertInstruction(isub)
+		value := isub.Return()
+		state.push(value)
+	case wasm.OpcodeF32Min, wasm.OpcodeF64Min:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		isub := builder.AllocateInstruction()
+		isub.AsFmin(x, y)
+		builder.InsertInstruction(isub)
+		value := isub.Return()
+		state.push(value)
+	case wasm.OpcodeI64Extend8S:
+		if state.unreachable {
+			break
+		}
+		c.insertIntegerExtend(true, 8, 64)
+	case wasm.OpcodeI64Extend16S:
+		if state.unreachable {
+			break
+		}
+		c.insertIntegerExtend(true, 16, 64)
+	case wasm.OpcodeI64Extend32S, wasm.OpcodeI64ExtendI32S:
+		if state.unreachable {
+			break
+		}
+		c.insertIntegerExtend(true, 32, 64)
+	case wasm.OpcodeI64ExtendI32U:
+		if state.unreachable {
+			break
+		}
+		c.insertIntegerExtend(false, 32, 64)
+	case wasm.OpcodeI32Extend8S:
+		if state.unreachable {
+			break
+		}
+		c.insertIntegerExtend(true, 8, 32)
+	case wasm.OpcodeI32Extend16S:
+		if state.unreachable {
+			break
+		}
+		c.insertIntegerExtend(true, 16, 32)
+	case wasm.OpcodeI32Eqz, wasm.OpcodeI64Eqz:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		zero := builder.AllocateInstruction()
+		if op == wasm.OpcodeI32Eqz {
+			zero.AsIconst32(0)
+		} else {
+			zero.AsIconst64(0)
+		}
+		builder.InsertInstruction(zero)
+		icmp := builder.AllocateInstruction().
+			AsIcmp(x, zero.Return(), ssa.IntegerCmpCondEqual).
+			Insert(builder).
+			Return()
+		state.push(icmp)
+	case wasm.OpcodeI32Eq, wasm.OpcodeI64Eq:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondEqual)
+	case wasm.OpcodeI32Ne, wasm.OpcodeI64Ne:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondNotEqual)
+	case wasm.OpcodeI32LtS, wasm.OpcodeI64LtS:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondSignedLessThan)
+	case wasm.OpcodeI32LtU, wasm.OpcodeI64LtU:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondUnsignedLessThan)
+	case wasm.OpcodeI32GtS, wasm.OpcodeI64GtS:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondSignedGreaterThan)
+	case wasm.OpcodeI32GtU, wasm.OpcodeI64GtU:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondUnsignedGreaterThan)
+	case wasm.OpcodeI32LeS, wasm.OpcodeI64LeS:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondSignedLessThanOrEqual)
+	case wasm.OpcodeI32LeU, wasm.OpcodeI64LeU:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondUnsignedLessThanOrEqual)
+	case wasm.OpcodeI32GeS, wasm.OpcodeI64GeS:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondSignedGreaterThanOrEqual)
+	case wasm.OpcodeI32GeU, wasm.OpcodeI64GeU:
+		if state.unreachable {
+			break
+		}
+		c.insertIcmp(ssa.IntegerCmpCondUnsignedGreaterThanOrEqual)
+
+	case wasm.OpcodeF32Eq, wasm.OpcodeF64Eq:
+		if state.unreachable {
+			break
+		}
+		c.insertFcmp(ssa.FloatCmpCondEqual)
+	case wasm.OpcodeF32Ne, wasm.OpcodeF64Ne:
+		if state.unreachable {
+			break
+		}
+		c.insertFcmp(ssa.FloatCmpCondNotEqual)
+	case wasm.OpcodeF32Lt, wasm.OpcodeF64Lt:
+		if state.unreachable {
+			break
+		}
+		c.insertFcmp(ssa.FloatCmpCondLessThan)
+	case wasm.OpcodeF32Gt, wasm.OpcodeF64Gt:
+		if state.unreachable {
+			break
+		}
+		c.insertFcmp(ssa.FloatCmpCondGreaterThan)
+	case wasm.OpcodeF32Le, wasm.OpcodeF64Le:
+		if state.unreachable {
+			break
+		}
+		c.insertFcmp(ssa.FloatCmpCondLessThanOrEqual)
+	case wasm.OpcodeF32Ge, wasm.OpcodeF64Ge:
+		if state.unreachable {
+			break
+		}
+		c.insertFcmp(ssa.FloatCmpCondGreaterThanOrEqual)
+	case wasm.OpcodeF32Neg, wasm.OpcodeF64Neg:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsFneg(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeF32Sqrt, wasm.OpcodeF64Sqrt:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsSqrt(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeF32Abs, wasm.OpcodeF64Abs:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsFabs(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeF32Copysign, wasm.OpcodeF64Copysign:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		v := builder.AllocateInstruction().AsFcopysign(x, y).Insert(builder).Return()
+		state.push(v)
+
+	case wasm.OpcodeF32Ceil, wasm.OpcodeF64Ceil:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsCeil(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeF32Floor, wasm.OpcodeF64Floor:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsFloor(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeF32Trunc, wasm.OpcodeF64Trunc:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsTrunc(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeF32Nearest, wasm.OpcodeF64Nearest:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		v := builder.AllocateInstruction().AsNearest(x).Insert(builder).Return()
+		state.push(v)
+	case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF32S,
+		wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF32S,
+		wasm.OpcodeI64TruncF64U, wasm.OpcodeI64TruncF32U,
+		wasm.OpcodeI32TruncF64U, wasm.OpcodeI32TruncF32U:
+		if state.unreachable {
+			break
+		}
+		ret := builder.AllocateInstruction().AsFcvtToInt(
+			state.pop(),
+			c.execCtxPtrValue,
+			op == wasm.OpcodeI64TruncF64S || op == wasm.OpcodeI64TruncF32S || op == wasm.OpcodeI32TruncF32S || op == wasm.OpcodeI32TruncF64S,
+			op == wasm.OpcodeI64TruncF64S || op == wasm.OpcodeI64TruncF32S || op == wasm.OpcodeI64TruncF64U || op == wasm.OpcodeI64TruncF32U,
+			false,
+		).Insert(builder).Return()
+		state.push(ret)
+	case wasm.OpcodeMiscPrefix:
+		state.pc++
+		// A misc opcode is encoded as an unsigned variable 32-bit integer.
+		miscOpUint, num, err := leb128.LoadUint32(c.wasmFunctionBody[state.pc:])
+		if err != nil {
+			// In normal conditions this should never happen because the function has passed validation.
+			panic(fmt.Sprintf("failed to read misc opcode: %v", err))
+		}
+		state.pc += int(num - 1)
+		miscOp := wasm.OpcodeMisc(miscOpUint)
+		switch miscOp {
+		case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF32S,
+			wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF32S,
+			wasm.OpcodeMiscI64TruncSatF64U, wasm.OpcodeMiscI64TruncSatF32U,
+			wasm.OpcodeMiscI32TruncSatF64U, wasm.OpcodeMiscI32TruncSatF32U:
+			if state.unreachable {
+				break
+			}
+			ret := builder.AllocateInstruction().AsFcvtToInt(
+				state.pop(),
+				c.execCtxPtrValue,
+				miscOp == wasm.OpcodeMiscI64TruncSatF64S || miscOp == wasm.OpcodeMiscI64TruncSatF32S || miscOp == wasm.OpcodeMiscI32TruncSatF32S || miscOp == wasm.OpcodeMiscI32TruncSatF64S,
+				miscOp == wasm.OpcodeMiscI64TruncSatF64S || miscOp == wasm.OpcodeMiscI64TruncSatF32S || miscOp == wasm.OpcodeMiscI64TruncSatF64U || miscOp == wasm.OpcodeMiscI64TruncSatF32U,
+				true,
+			).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeMiscTableSize:
+			tableIndex := c.readI32u()
+			if state.unreachable {
+				break
+			}
+
+			// Load the table.
+			loadTableInstancePtr := builder.AllocateInstruction()
+			loadTableInstancePtr.AsLoad(c.moduleCtxPtrValue, c.offset.TableOffset(int(tableIndex)).U32(), ssa.TypeI64)
+			builder.InsertInstruction(loadTableInstancePtr)
+			tableInstancePtr := loadTableInstancePtr.Return()
+
+			// Load the table's length.
+			loadTableLen := builder.AllocateInstruction().
+				AsLoad(tableInstancePtr, tableInstanceLenOffset, ssa.TypeI32).
+				Insert(builder)
+			state.push(loadTableLen.Return())
+
+		case wasm.OpcodeMiscTableGrow:
+			tableIndex := c.readI32u()
+			if state.unreachable {
+				break
+			}
+
+			c.storeCallerModuleContext()
+
+			tableIndexVal := builder.AllocateInstruction().AsIconst32(tableIndex).Insert(builder).Return()
+
+			num := state.pop()
+			r := state.pop()
+
+			tableGrowPtr := builder.AllocateInstruction().
+				AsLoad(c.execCtxPtrValue,
+					wazevoapi.ExecutionContextOffsetTableGrowTrampolineAddress.U32(),
+					ssa.TypeI64,
+				).Insert(builder).Return()
+
+			args := c.allocateVarLengthValues(4, c.execCtxPtrValue, tableIndexVal, num, r)
+			callGrowRet := builder.
+				AllocateInstruction().
+				AsCallIndirect(tableGrowPtr, &c.tableGrowSig, args).
+				Insert(builder).Return()
+			state.push(callGrowRet)
+
+		case wasm.OpcodeMiscTableCopy:
+			dstTableIndex := c.readI32u()
+			srcTableIndex := c.readI32u()
+			if state.unreachable {
+				break
+			}
+
+			copySize := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			srcOffset := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			dstOffset := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+
+			// Out of bounds check.
+			dstTableInstancePtr := c.boundsCheckInTable(dstTableIndex, dstOffset, copySize)
+			srcTableInstancePtr := c.boundsCheckInTable(srcTableIndex, srcOffset, copySize)
+
+			dstTableBaseAddr := c.loadTableBaseAddr(dstTableInstancePtr)
+			srcTableBaseAddr := c.loadTableBaseAddr(srcTableInstancePtr)
+
+			three := builder.AllocateInstruction().AsIconst64(3).Insert(builder).Return()
+
+			dstOffsetInBytes := builder.AllocateInstruction().AsIshl(dstOffset, three).Insert(builder).Return()
+			dstAddr := builder.AllocateInstruction().AsIadd(dstTableBaseAddr, dstOffsetInBytes).Insert(builder).Return()
+			srcOffsetInBytes := builder.AllocateInstruction().AsIshl(srcOffset, three).Insert(builder).Return()
+			srcAddr := builder.AllocateInstruction().AsIadd(srcTableBaseAddr, srcOffsetInBytes).Insert(builder).Return()
+
+			copySizeInBytes := builder.AllocateInstruction().AsIshl(copySize, three).Insert(builder).Return()
+			c.callMemmove(dstAddr, srcAddr, copySizeInBytes)
+
+		case wasm.OpcodeMiscMemoryCopy:
+			state.pc += 2 // +2 to skip two memory indexes which are fixed to zero.
+			if state.unreachable {
+				break
+			}
+
+			copySize := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			srcOffset := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			dstOffset := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+
+			// Out of bounds check.
+			memLen := c.getMemoryLenValue(false)
+			c.boundsCheckInMemory(memLen, dstOffset, copySize)
+			c.boundsCheckInMemory(memLen, srcOffset, copySize)
+
+			memBase := c.getMemoryBaseValue(false)
+			dstAddr := builder.AllocateInstruction().AsIadd(memBase, dstOffset).Insert(builder).Return()
+			srcAddr := builder.AllocateInstruction().AsIadd(memBase, srcOffset).Insert(builder).Return()
+
+			c.callMemmove(dstAddr, srcAddr, copySize)
+
+		case wasm.OpcodeMiscTableFill:
+			tableIndex := c.readI32u()
+			if state.unreachable {
+				break
+			}
+			fillSize := state.pop()
+			value := state.pop()
+			offset := state.pop()
+
+			fillSizeExt := builder.
+				AllocateInstruction().AsUExtend(fillSize, 32, 64).Insert(builder).Return()
+			offsetExt := builder.
+				AllocateInstruction().AsUExtend(offset, 32, 64).Insert(builder).Return()
+			tableInstancePtr := c.boundsCheckInTable(tableIndex, offsetExt, fillSizeExt)
+
+			three := builder.AllocateInstruction().AsIconst64(3).Insert(builder).Return()
+			offsetInBytes := builder.AllocateInstruction().AsIshl(offsetExt, three).Insert(builder).Return()
+			fillSizeInBytes := builder.AllocateInstruction().AsIshl(fillSizeExt, three).Insert(builder).Return()
+
+			// Calculate the base address of the table.
+			tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr)
+			addr := builder.AllocateInstruction().AsIadd(tableBaseAddr, offsetInBytes).Insert(builder).Return()
+
+			// Prepare the loop and following block.
+			beforeLoop := builder.AllocateBasicBlock()
+			loopBlk := builder.AllocateBasicBlock()
+			loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
+			followingBlk := builder.AllocateBasicBlock()
+
+			// Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time.
+			// 	buf := memoryInst.Buffer[offset : offset+fillSize]
+			// 	buf[0:8] = value
+			// 	for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes.
+			// 		copy(buf[i:], buf[:i])
+			// 	}
+
+			// Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics.
+			zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
+			ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSizeExt, zero, ssa.IntegerCmpCondEqual).
+				Insert(builder).Return()
+			builder.AllocateInstruction().AsBrnz(ifFillSizeZero, ssa.ValuesNil, followingBlk).Insert(builder)
+			c.insertJumpToBlock(ssa.ValuesNil, beforeLoop)
+
+			// buf[0:8] = value
+			builder.SetCurrentBlock(beforeLoop)
+			builder.AllocateInstruction().AsStore(ssa.OpcodeStore, value, addr, 0).Insert(builder)
+			initValue := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
+			c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
+
+			builder.SetCurrentBlock(loopBlk)
+			dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
+
+			// If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar.
+			var count ssa.Value
+			{
+				loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
+				loopVarDoubledLargerThanFillSize := builder.
+					AllocateInstruction().AsIcmp(loopVarDoubled, fillSizeInBytes, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
+					Insert(builder).Return()
+				diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
+				count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
+			}
+
+			c.callMemmove(dstAddr, addr, count)
+
+			shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
+			newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
+			loopVarLessThanFillSize := builder.AllocateInstruction().
+				AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
+
+			builder.AllocateInstruction().
+				AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
+				Insert(builder)
+
+			c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
+			builder.SetCurrentBlock(followingBlk)
+
+			builder.Seal(beforeLoop)
+			builder.Seal(loopBlk)
+			builder.Seal(followingBlk)
+
+		case wasm.OpcodeMiscMemoryFill:
+			state.pc++ // Skip the memory index which is fixed to zero.
+			if state.unreachable {
+				break
+			}
+
+			fillSize := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			value := state.pop()
+			offset := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+
+			// Out of bounds check.
+			c.boundsCheckInMemory(c.getMemoryLenValue(false), offset, fillSize)
+
+			// Calculate the base address:
+			addr := builder.AllocateInstruction().AsIadd(c.getMemoryBaseValue(false), offset).Insert(builder).Return()
+
+			// Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+			// 	buf := memoryInst.Buffer[offset : offset+fillSize]
+			// 	buf[0] = value
+			// 	for i := 1; i < fillSize; i *= 2 {
+			// 		copy(buf[i:], buf[:i])
+			// 	}
+
+			// Prepare the loop and following block.
+			beforeLoop := builder.AllocateBasicBlock()
+			loopBlk := builder.AllocateBasicBlock()
+			loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
+			followingBlk := builder.AllocateBasicBlock()
+
+			// Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics.
+			zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
+			ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSize, zero, ssa.IntegerCmpCondEqual).
+				Insert(builder).Return()
+			builder.AllocateInstruction().AsBrnz(ifFillSizeZero, ssa.ValuesNil, followingBlk).Insert(builder)
+			c.insertJumpToBlock(ssa.ValuesNil, beforeLoop)
+
+			// buf[0] = value
+			builder.SetCurrentBlock(beforeLoop)
+			builder.AllocateInstruction().AsStore(ssa.OpcodeIstore8, value, addr, 0).Insert(builder)
+			initValue := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
+			c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
+
+			builder.SetCurrentBlock(loopBlk)
+			dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
+
+			// If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar.
+			var count ssa.Value
+			{
+				loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
+				loopVarDoubledLargerThanFillSize := builder.
+					AllocateInstruction().AsIcmp(loopVarDoubled, fillSize, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
+					Insert(builder).Return()
+				diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
+				count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
+			}
+
+			c.callMemmove(dstAddr, addr, count)
+
+			shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
+			newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
+			loopVarLessThanFillSize := builder.AllocateInstruction().
+				AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
+
+			builder.AllocateInstruction().
+				AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
+				Insert(builder)
+
+			c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
+			builder.SetCurrentBlock(followingBlk)
+
+			builder.Seal(beforeLoop)
+			builder.Seal(loopBlk)
+			builder.Seal(followingBlk)
+
+		case wasm.OpcodeMiscMemoryInit:
+			index := c.readI32u()
+			state.pc++ // Skip the memory index which is fixed to zero.
+			if state.unreachable {
+				break
+			}
+
+			copySize := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			offsetInDataInstance := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			offsetInMemory := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+
+			dataInstPtr := c.dataOrElementInstanceAddr(index, c.offset.DataInstances1stElement)
+
+			// Bounds check.
+			c.boundsCheckInMemory(c.getMemoryLenValue(false), offsetInMemory, copySize)
+			c.boundsCheckInDataOrElementInstance(dataInstPtr, offsetInDataInstance, copySize, wazevoapi.ExitCodeMemoryOutOfBounds)
+
+			dataInstBaseAddr := builder.AllocateInstruction().AsLoad(dataInstPtr, 0, ssa.TypeI64).Insert(builder).Return()
+			srcAddr := builder.AllocateInstruction().AsIadd(dataInstBaseAddr, offsetInDataInstance).Insert(builder).Return()
+
+			memBase := c.getMemoryBaseValue(false)
+			dstAddr := builder.AllocateInstruction().AsIadd(memBase, offsetInMemory).Insert(builder).Return()
+
+			c.callMemmove(dstAddr, srcAddr, copySize)
+
+		case wasm.OpcodeMiscTableInit:
+			elemIndex := c.readI32u()
+			tableIndex := c.readI32u()
+			if state.unreachable {
+				break
+			}
+
+			copySize := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			offsetInElementInstance := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+			offsetInTable := builder.
+				AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return()
+
+			elemInstPtr := c.dataOrElementInstanceAddr(elemIndex, c.offset.ElementInstances1stElement)
+
+			// Bounds check.
+			tableInstancePtr := c.boundsCheckInTable(tableIndex, offsetInTable, copySize)
+			c.boundsCheckInDataOrElementInstance(elemInstPtr, offsetInElementInstance, copySize, wazevoapi.ExitCodeTableOutOfBounds)
+
+			three := builder.AllocateInstruction().AsIconst64(3).Insert(builder).Return()
+			// Calculates the destination address in the table.
+			tableOffsetInBytes := builder.AllocateInstruction().AsIshl(offsetInTable, three).Insert(builder).Return()
+			tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr)
+			dstAddr := builder.AllocateInstruction().AsIadd(tableBaseAddr, tableOffsetInBytes).Insert(builder).Return()
+
+			// Calculates the source address in the element instance.
+			srcOffsetInBytes := builder.AllocateInstruction().AsIshl(offsetInElementInstance, three).Insert(builder).Return()
+			elemInstBaseAddr := builder.AllocateInstruction().AsLoad(elemInstPtr, 0, ssa.TypeI64).Insert(builder).Return()
+			srcAddr := builder.AllocateInstruction().AsIadd(elemInstBaseAddr, srcOffsetInBytes).Insert(builder).Return()
+
+			copySizeInBytes := builder.AllocateInstruction().AsIshl(copySize, three).Insert(builder).Return()
+			c.callMemmove(dstAddr, srcAddr, copySizeInBytes)
+
+		case wasm.OpcodeMiscElemDrop:
+			index := c.readI32u()
+			if state.unreachable {
+				break
+			}
+
+			c.dropDataOrElementInstance(index, c.offset.ElementInstances1stElement)
+
+		case wasm.OpcodeMiscDataDrop:
+			index := c.readI32u()
+			if state.unreachable {
+				break
+			}
+			c.dropDataOrElementInstance(index, c.offset.DataInstances1stElement)
+
+		default:
+			panic("Unknown MiscOp " + wasm.MiscInstructionName(miscOp))
+		}
+
+	case wasm.OpcodeI32ReinterpretF32:
+		if state.unreachable {
+			break
+		}
+		reinterpret := builder.AllocateInstruction().
+			AsBitcast(state.pop(), ssa.TypeI32).
+			Insert(builder).Return()
+		state.push(reinterpret)
+
+	case wasm.OpcodeI64ReinterpretF64:
+		if state.unreachable {
+			break
+		}
+		reinterpret := builder.AllocateInstruction().
+			AsBitcast(state.pop(), ssa.TypeI64).
+			Insert(builder).Return()
+		state.push(reinterpret)
+
+	case wasm.OpcodeF32ReinterpretI32:
+		if state.unreachable {
+			break
+		}
+		reinterpret := builder.AllocateInstruction().
+			AsBitcast(state.pop(), ssa.TypeF32).
+			Insert(builder).Return()
+		state.push(reinterpret)
+
+	case wasm.OpcodeF64ReinterpretI64:
+		if state.unreachable {
+			break
+		}
+		reinterpret := builder.AllocateInstruction().
+			AsBitcast(state.pop(), ssa.TypeF64).
+			Insert(builder).Return()
+		state.push(reinterpret)
+
+	case wasm.OpcodeI32DivS, wasm.OpcodeI64DivS:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		result := builder.AllocateInstruction().AsSDiv(x, y, c.execCtxPtrValue).Insert(builder).Return()
+		state.push(result)
+
+	case wasm.OpcodeI32DivU, wasm.OpcodeI64DivU:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		result := builder.AllocateInstruction().AsUDiv(x, y, c.execCtxPtrValue).Insert(builder).Return()
+		state.push(result)
+
+	case wasm.OpcodeI32RemS, wasm.OpcodeI64RemS:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		result := builder.AllocateInstruction().AsSRem(x, y, c.execCtxPtrValue).Insert(builder).Return()
+		state.push(result)
+
+	case wasm.OpcodeI32RemU, wasm.OpcodeI64RemU:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		result := builder.AllocateInstruction().AsURem(x, y, c.execCtxPtrValue).Insert(builder).Return()
+		state.push(result)
+
+	case wasm.OpcodeI32And, wasm.OpcodeI64And:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		and := builder.AllocateInstruction()
+		and.AsBand(x, y)
+		builder.InsertInstruction(and)
+		value := and.Return()
+		state.push(value)
+	case wasm.OpcodeI32Or, wasm.OpcodeI64Or:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		or := builder.AllocateInstruction()
+		or.AsBor(x, y)
+		builder.InsertInstruction(or)
+		value := or.Return()
+		state.push(value)
+	case wasm.OpcodeI32Xor, wasm.OpcodeI64Xor:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		xor := builder.AllocateInstruction()
+		xor.AsBxor(x, y)
+		builder.InsertInstruction(xor)
+		value := xor.Return()
+		state.push(value)
+	case wasm.OpcodeI32Shl, wasm.OpcodeI64Shl:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		ishl := builder.AllocateInstruction()
+		ishl.AsIshl(x, y)
+		builder.InsertInstruction(ishl)
+		value := ishl.Return()
+		state.push(value)
+	case wasm.OpcodeI32ShrU, wasm.OpcodeI64ShrU:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		ishl := builder.AllocateInstruction()
+		ishl.AsUshr(x, y)
+		builder.InsertInstruction(ishl)
+		value := ishl.Return()
+		state.push(value)
+	case wasm.OpcodeI32ShrS, wasm.OpcodeI64ShrS:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		ishl := builder.AllocateInstruction()
+		ishl.AsSshr(x, y)
+		builder.InsertInstruction(ishl)
+		value := ishl.Return()
+		state.push(value)
+	case wasm.OpcodeI32Rotl, wasm.OpcodeI64Rotl:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		rotl := builder.AllocateInstruction()
+		rotl.AsRotl(x, y)
+		builder.InsertInstruction(rotl)
+		value := rotl.Return()
+		state.push(value)
+	case wasm.OpcodeI32Rotr, wasm.OpcodeI64Rotr:
+		if state.unreachable {
+			break
+		}
+		y, x := state.pop(), state.pop()
+		rotr := builder.AllocateInstruction()
+		rotr.AsRotr(x, y)
+		builder.InsertInstruction(rotr)
+		value := rotr.Return()
+		state.push(value)
+	case wasm.OpcodeI32Clz, wasm.OpcodeI64Clz:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		clz := builder.AllocateInstruction()
+		clz.AsClz(x)
+		builder.InsertInstruction(clz)
+		value := clz.Return()
+		state.push(value)
+	case wasm.OpcodeI32Ctz, wasm.OpcodeI64Ctz:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		ctz := builder.AllocateInstruction()
+		ctz.AsCtz(x)
+		builder.InsertInstruction(ctz)
+		value := ctz.Return()
+		state.push(value)
+	case wasm.OpcodeI32Popcnt, wasm.OpcodeI64Popcnt:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		popcnt := builder.AllocateInstruction()
+		popcnt.AsPopcnt(x)
+		builder.InsertInstruction(popcnt)
+		value := popcnt.Return()
+		state.push(value)
+
+	case wasm.OpcodeI32WrapI64:
+		if state.unreachable {
+			break
+		}
+		x := state.pop()
+		wrap := builder.AllocateInstruction().AsIreduce(x, ssa.TypeI32).Insert(builder).Return()
+		state.push(wrap)
+	case wasm.OpcodeGlobalGet:
+		index := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		v := c.getWasmGlobalValue(index, false)
+		state.push(v)
+	case wasm.OpcodeGlobalSet:
+		index := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		v := state.pop()
+		c.setWasmGlobalValue(index, v)
+	case wasm.OpcodeLocalGet:
+		index := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		variable := c.localVariable(index)
+		if _, ok := c.m.NonStaticLocals[c.wasmLocalFunctionIndex][index]; ok {
+			state.push(builder.MustFindValue(variable))
+		} else {
+			// If a local is static, we can simply find it in the entry block which is either a function param
+			// or a zero value. This fast pass helps to avoid the overhead of searching the entire function plus
+			// avoid adding unnecessary block arguments.
+			// TODO: I think this optimization should be done in a SSA pass like passRedundantPhiEliminationOpt,
+			// 	but somehow there's some corner cases that it fails to optimize.
+			state.push(builder.MustFindValueInBlk(variable, c.ssaBuilder.EntryBlock()))
+		}
+	case wasm.OpcodeLocalSet:
+		index := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		variable := c.localVariable(index)
+		newValue := state.pop()
+		builder.DefineVariableInCurrentBB(variable, newValue)
+
+	case wasm.OpcodeLocalTee:
+		index := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		variable := c.localVariable(index)
+		newValue := state.peek()
+		builder.DefineVariableInCurrentBB(variable, newValue)
+
+	case wasm.OpcodeSelect, wasm.OpcodeTypedSelect:
+		if op == wasm.OpcodeTypedSelect {
+			state.pc += 2 // ignores the type which is only needed during validation.
+		}
+
+		if state.unreachable {
+			break
+		}
+
+		cond := state.pop()
+		v2 := state.pop()
+		v1 := state.pop()
+
+		sl := builder.AllocateInstruction().
+			AsSelect(cond, v1, v2).
+			Insert(builder).
+			Return()
+		state.push(sl)
+
+	case wasm.OpcodeMemorySize:
+		state.pc++ // skips the memory index.
+		if state.unreachable {
+			break
+		}
+
+		var memSizeInBytes ssa.Value
+		if c.offset.LocalMemoryBegin < 0 {
+			memInstPtr := builder.AllocateInstruction().
+				AsLoad(c.moduleCtxPtrValue, c.offset.ImportedMemoryBegin.U32(), ssa.TypeI64).
+				Insert(builder).
+				Return()
+
+			memSizeInBytes = builder.AllocateInstruction().
+				AsLoad(memInstPtr, memoryInstanceBufSizeOffset, ssa.TypeI32).
+				Insert(builder).
+				Return()
+		} else {
+			memSizeInBytes = builder.AllocateInstruction().
+				AsLoad(c.moduleCtxPtrValue, c.offset.LocalMemoryLen().U32(), ssa.TypeI32).
+				Insert(builder).
+				Return()
+		}
+
+		amount := builder.AllocateInstruction()
+		amount.AsIconst32(uint32(wasm.MemoryPageSizeInBits))
+		builder.InsertInstruction(amount)
+		memSize := builder.AllocateInstruction().
+			AsUshr(memSizeInBytes, amount.Return()).
+			Insert(builder).
+			Return()
+		state.push(memSize)
+
+	case wasm.OpcodeMemoryGrow:
+		state.pc++ // skips the memory index.
+		if state.unreachable {
+			break
+		}
+
+		c.storeCallerModuleContext()
+
+		pages := state.pop()
+		memoryGrowPtr := builder.AllocateInstruction().
+			AsLoad(c.execCtxPtrValue,
+				wazevoapi.ExecutionContextOffsetMemoryGrowTrampolineAddress.U32(),
+				ssa.TypeI64,
+			).Insert(builder).Return()
+
+		args := c.allocateVarLengthValues(1, c.execCtxPtrValue, pages)
+		callGrowRet := builder.
+			AllocateInstruction().
+			AsCallIndirect(memoryGrowPtr, &c.memoryGrowSig, args).
+			Insert(builder).Return()
+		state.push(callGrowRet)
+
+		// After the memory grow, reload the cached memory base and len.
+		c.reloadMemoryBaseLen()
+
+	case wasm.OpcodeI32Store,
+		wasm.OpcodeI64Store,
+		wasm.OpcodeF32Store,
+		wasm.OpcodeF64Store,
+		wasm.OpcodeI32Store8,
+		wasm.OpcodeI32Store16,
+		wasm.OpcodeI64Store8,
+		wasm.OpcodeI64Store16,
+		wasm.OpcodeI64Store32:
+
+		_, offset := c.readMemArg()
+		if state.unreachable {
+			break
+		}
+		var opSize uint64
+		var opcode ssa.Opcode
+		switch op {
+		case wasm.OpcodeI32Store, wasm.OpcodeF32Store:
+			opcode = ssa.OpcodeStore
+			opSize = 4
+		case wasm.OpcodeI64Store, wasm.OpcodeF64Store:
+			opcode = ssa.OpcodeStore
+			opSize = 8
+		case wasm.OpcodeI32Store8, wasm.OpcodeI64Store8:
+			opcode = ssa.OpcodeIstore8
+			opSize = 1
+		case wasm.OpcodeI32Store16, wasm.OpcodeI64Store16:
+			opcode = ssa.OpcodeIstore16
+			opSize = 2
+		case wasm.OpcodeI64Store32:
+			opcode = ssa.OpcodeIstore32
+			opSize = 4
+		default:
+			panic("BUG")
+		}
+
+		value := state.pop()
+		baseAddr := state.pop()
+		addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
+		builder.AllocateInstruction().
+			AsStore(opcode, value, addr, offset).
+			Insert(builder)
+
+	case wasm.OpcodeI32Load,
+		wasm.OpcodeI64Load,
+		wasm.OpcodeF32Load,
+		wasm.OpcodeF64Load,
+		wasm.OpcodeI32Load8S,
+		wasm.OpcodeI32Load8U,
+		wasm.OpcodeI32Load16S,
+		wasm.OpcodeI32Load16U,
+		wasm.OpcodeI64Load8S,
+		wasm.OpcodeI64Load8U,
+		wasm.OpcodeI64Load16S,
+		wasm.OpcodeI64Load16U,
+		wasm.OpcodeI64Load32S,
+		wasm.OpcodeI64Load32U:
+		_, offset := c.readMemArg()
+		if state.unreachable {
+			break
+		}
+
+		var opSize uint64
+		switch op {
+		case wasm.OpcodeI32Load, wasm.OpcodeF32Load:
+			opSize = 4
+		case wasm.OpcodeI64Load, wasm.OpcodeF64Load:
+			opSize = 8
+		case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U:
+			opSize = 1
+		case wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U:
+			opSize = 2
+		case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U:
+			opSize = 1
+		case wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U:
+			opSize = 2
+		case wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U:
+			opSize = 4
+		default:
+			panic("BUG")
+		}
+
+		baseAddr := state.pop()
+		addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
+		load := builder.AllocateInstruction()
+		switch op {
+		case wasm.OpcodeI32Load:
+			load.AsLoad(addr, offset, ssa.TypeI32)
+		case wasm.OpcodeI64Load:
+			load.AsLoad(addr, offset, ssa.TypeI64)
+		case wasm.OpcodeF32Load:
+			load.AsLoad(addr, offset, ssa.TypeF32)
+		case wasm.OpcodeF64Load:
+			load.AsLoad(addr, offset, ssa.TypeF64)
+		case wasm.OpcodeI32Load8S:
+			load.AsExtLoad(ssa.OpcodeSload8, addr, offset, false)
+		case wasm.OpcodeI32Load8U:
+			load.AsExtLoad(ssa.OpcodeUload8, addr, offset, false)
+		case wasm.OpcodeI32Load16S:
+			load.AsExtLoad(ssa.OpcodeSload16, addr, offset, false)
+		case wasm.OpcodeI32Load16U:
+			load.AsExtLoad(ssa.OpcodeUload16, addr, offset, false)
+		case wasm.OpcodeI64Load8S:
+			load.AsExtLoad(ssa.OpcodeSload8, addr, offset, true)
+		case wasm.OpcodeI64Load8U:
+			load.AsExtLoad(ssa.OpcodeUload8, addr, offset, true)
+		case wasm.OpcodeI64Load16S:
+			load.AsExtLoad(ssa.OpcodeSload16, addr, offset, true)
+		case wasm.OpcodeI64Load16U:
+			load.AsExtLoad(ssa.OpcodeUload16, addr, offset, true)
+		case wasm.OpcodeI64Load32S:
+			load.AsExtLoad(ssa.OpcodeSload32, addr, offset, true)
+		case wasm.OpcodeI64Load32U:
+			load.AsExtLoad(ssa.OpcodeUload32, addr, offset, true)
+		default:
+			panic("BUG")
+		}
+		builder.InsertInstruction(load)
+		state.push(load.Return())
+	case wasm.OpcodeBlock:
+		// Note: we do not need to create a BB for this as that would always have only one predecessor
+		// which is the current BB, and therefore it's always ok to merge them in any way.
+
+		bt := c.readBlockType()
+
+		if state.unreachable {
+			state.unreachableDepth++
+			break
+		}
+
+		followingBlk := builder.AllocateBasicBlock()
+		c.addBlockParamsFromWasmTypes(bt.Results, followingBlk)
+
+		state.ctrlPush(controlFrame{
+			kind:                         controlFrameKindBlock,
+			originalStackLenWithoutParam: len(state.values) - len(bt.Params),
+			followingBlock:               followingBlk,
+			blockType:                    bt,
+		})
+	case wasm.OpcodeLoop:
+		bt := c.readBlockType()
+
+		if state.unreachable {
+			state.unreachableDepth++
+			break
+		}
+
+		loopHeader, afterLoopBlock := builder.AllocateBasicBlock(), builder.AllocateBasicBlock()
+		c.addBlockParamsFromWasmTypes(bt.Params, loopHeader)
+		c.addBlockParamsFromWasmTypes(bt.Results, afterLoopBlock)
+
+		originalLen := len(state.values) - len(bt.Params)
+		state.ctrlPush(controlFrame{
+			originalStackLenWithoutParam: originalLen,
+			kind:                         controlFrameKindLoop,
+			blk:                          loopHeader,
+			followingBlock:               afterLoopBlock,
+			blockType:                    bt,
+		})
+
+		args := c.allocateVarLengthValues(originalLen)
+		args = args.Append(builder.VarLengthPool(), state.values[originalLen:]...)
+
+		// Insert the jump to the header of loop.
+		br := builder.AllocateInstruction()
+		br.AsJump(args, loopHeader)
+		builder.InsertInstruction(br)
+
+		c.switchTo(originalLen, loopHeader)
+
+		if c.ensureTermination {
+			checkModuleExitCodePtr := builder.AllocateInstruction().
+				AsLoad(c.execCtxPtrValue,
+					wazevoapi.ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress.U32(),
+					ssa.TypeI64,
+				).Insert(builder).Return()
+
+			args := c.allocateVarLengthValues(1, c.execCtxPtrValue)
+			builder.AllocateInstruction().
+				AsCallIndirect(checkModuleExitCodePtr, &c.checkModuleExitCodeSig, args).
+				Insert(builder)
+		}
+	case wasm.OpcodeIf:
+		bt := c.readBlockType()
+
+		if state.unreachable {
+			state.unreachableDepth++
+			break
+		}
+
+		v := state.pop()
+		thenBlk, elseBlk, followingBlk := builder.AllocateBasicBlock(), builder.AllocateBasicBlock(), builder.AllocateBasicBlock()
+
+		// We do not make the Wasm-level block parameters as SSA-level block params for if-else blocks
+		// since they won't be PHI and the definition is unique.
+
+		// On the other hand, the following block after if-else-end will likely have
+		// multiple definitions (one in Then and another in Else blocks).
+		c.addBlockParamsFromWasmTypes(bt.Results, followingBlk)
+
+		args := c.allocateVarLengthValues(len(bt.Params))
+		args = args.Append(builder.VarLengthPool(), state.values[len(state.values)-len(bt.Params):]...)
+
+		// Insert the conditional jump to the Else block.
+		brz := builder.AllocateInstruction()
+		brz.AsBrz(v, ssa.ValuesNil, elseBlk)
+		builder.InsertInstruction(brz)
+
+		// Then, insert the jump to the Then block.
+		br := builder.AllocateInstruction()
+		br.AsJump(ssa.ValuesNil, thenBlk)
+		builder.InsertInstruction(br)
+
+		state.ctrlPush(controlFrame{
+			kind:                         controlFrameKindIfWithoutElse,
+			originalStackLenWithoutParam: len(state.values) - len(bt.Params),
+			blk:                          elseBlk,
+			followingBlock:               followingBlk,
+			blockType:                    bt,
+			clonedArgs:                   args,
+		})
+
+		builder.SetCurrentBlock(thenBlk)
+
+		// Then and Else (if exists) have only one predecessor.
+		builder.Seal(thenBlk)
+		builder.Seal(elseBlk)
+	case wasm.OpcodeElse:
+		ifctrl := state.ctrlPeekAt(0)
+		if unreachable := state.unreachable; unreachable && state.unreachableDepth > 0 {
+			// If it is currently in unreachable and is a nested if,
+			// we just remove the entire else block.
+			break
+		}
+
+		ifctrl.kind = controlFrameKindIfWithElse
+		if !state.unreachable {
+			// If this Then block is currently reachable, we have to insert the branching to the following BB.
+			followingBlk := ifctrl.followingBlock // == the BB after if-then-else.
+			args := c.nPeekDup(len(ifctrl.blockType.Results))
+			c.insertJumpToBlock(args, followingBlk)
+		} else {
+			state.unreachable = false
+		}
+
+		// Reset the stack so that we can correctly handle the else block.
+		state.values = state.values[:ifctrl.originalStackLenWithoutParam]
+		elseBlk := ifctrl.blk
+		for _, arg := range ifctrl.clonedArgs.View() {
+			state.push(arg)
+		}
+
+		builder.SetCurrentBlock(elseBlk)
+
+	case wasm.OpcodeEnd:
+		if state.unreachableDepth > 0 {
+			state.unreachableDepth--
+			break
+		}
+
+		ctrl := state.ctrlPop()
+		followingBlk := ctrl.followingBlock
+
+		unreachable := state.unreachable
+		if !unreachable {
+			// Top n-th args will be used as a result of the current control frame.
+			args := c.nPeekDup(len(ctrl.blockType.Results))
+
+			// Insert the unconditional branch to the target.
+			c.insertJumpToBlock(args, followingBlk)
+		} else { // recover from the unreachable state.
+			state.unreachable = false
+		}
+
+		switch ctrl.kind {
+		case controlFrameKindFunction:
+			break // This is the very end of function.
+		case controlFrameKindLoop:
+			// Loop header block can be reached from any br/br_table contained in the loop,
+			// so now that we've reached End of it, we can seal it.
+			builder.Seal(ctrl.blk)
+		case controlFrameKindIfWithoutElse:
+			// If this is the end of Then block, we have to emit the empty Else block.
+			elseBlk := ctrl.blk
+			builder.SetCurrentBlock(elseBlk)
+			c.insertJumpToBlock(ctrl.clonedArgs, followingBlk)
+		}
+
+		builder.Seal(followingBlk)
+
+		// Ready to start translating the following block.
+		c.switchTo(ctrl.originalStackLenWithoutParam, followingBlk)
+
+	case wasm.OpcodeBr:
+		labelIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+
+		targetBlk, argNum := state.brTargetArgNumFor(labelIndex)
+		args := c.nPeekDup(argNum)
+		c.insertJumpToBlock(args, targetBlk)
+
+		state.unreachable = true
+
+	case wasm.OpcodeBrIf:
+		labelIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+
+		v := state.pop()
+
+		targetBlk, argNum := state.brTargetArgNumFor(labelIndex)
+		args := c.nPeekDup(argNum)
+		var sealTargetBlk bool
+		if c.needListener && targetBlk.ReturnBlock() { // In this case, we have to call the listener before returning.
+			// Save the currently active block.
+			current := builder.CurrentBlock()
+
+			// Allocate the trampoline block to the return where we call the listener.
+			targetBlk = builder.AllocateBasicBlock()
+			builder.SetCurrentBlock(targetBlk)
+			sealTargetBlk = true
+
+			c.callListenerAfter()
+
+			instr := builder.AllocateInstruction()
+			instr.AsReturn(args)
+			builder.InsertInstruction(instr)
+
+			args = ssa.ValuesNil
+
+			// Revert the current block.
+			builder.SetCurrentBlock(current)
+		}
+
+		// Insert the conditional jump to the target block.
+		brnz := builder.AllocateInstruction()
+		brnz.AsBrnz(v, args, targetBlk)
+		builder.InsertInstruction(brnz)
+
+		if sealTargetBlk {
+			builder.Seal(targetBlk)
+		}
+
+		// Insert the unconditional jump to the Else block which corresponds to after br_if.
+		elseBlk := builder.AllocateBasicBlock()
+		c.insertJumpToBlock(ssa.ValuesNil, elseBlk)
+
+		// Now start translating the instructions after br_if.
+		builder.Seal(elseBlk) // Else of br_if has the current block as the only one successor.
+		builder.SetCurrentBlock(elseBlk)
+
+	case wasm.OpcodeBrTable:
+		labels := state.tmpForBrTable
+		labels = labels[:0]
+		labelCount := c.readI32u()
+		for i := 0; i < int(labelCount); i++ {
+			labels = append(labels, c.readI32u())
+		}
+		labels = append(labels, c.readI32u()) // default label.
+		if state.unreachable {
+			break
+		}
+
+		index := state.pop()
+		if labelCount == 0 { // If this br_table is empty, we can just emit the unconditional jump.
+			targetBlk, argNum := state.brTargetArgNumFor(labels[0])
+			args := c.nPeekDup(argNum)
+			c.insertJumpToBlock(args, targetBlk)
+		} else {
+			c.lowerBrTable(labels, index)
+		}
+		state.unreachable = true
+
+	case wasm.OpcodeNop:
+	case wasm.OpcodeReturn:
+		if state.unreachable {
+			break
+		}
+		if c.needListener {
+			c.callListenerAfter()
+		}
+
+		results := c.nPeekDup(c.results())
+		instr := builder.AllocateInstruction()
+
+		instr.AsReturn(results)
+		builder.InsertInstruction(instr)
+		state.unreachable = true
+
+	case wasm.OpcodeUnreachable:
+		if state.unreachable {
+			break
+		}
+		exit := builder.AllocateInstruction()
+		exit.AsExitWithCode(c.execCtxPtrValue, wazevoapi.ExitCodeUnreachable)
+		builder.InsertInstruction(exit)
+		state.unreachable = true
+
+	case wasm.OpcodeCallIndirect:
+		typeIndex := c.readI32u()
+		tableIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		c.lowerCallIndirect(typeIndex, tableIndex)
+
+	case wasm.OpcodeCall:
+		fnIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+
+		var typIndex wasm.Index
+		if fnIndex < c.m.ImportFunctionCount {
+			// Before transfer the control to the callee, we have to store the current module's moduleContextPtr
+			// into execContext.callerModuleContextPtr in case when the callee is a Go function.
+			c.storeCallerModuleContext()
+			var fi int
+			for i := range c.m.ImportSection {
+				imp := &c.m.ImportSection[i]
+				if imp.Type == wasm.ExternTypeFunc {
+					if fi == int(fnIndex) {
+						typIndex = imp.DescFunc
+						break
+					}
+					fi++
+				}
+			}
+		} else {
+			typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount]
+		}
+		typ := &c.m.TypeSection[typIndex]
+
+		argN := len(typ.Params)
+		tail := len(state.values) - argN
+		vs := state.values[tail:]
+		state.values = state.values[:tail]
+		args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue)
+
+		sig := c.signatures[typ]
+		call := builder.AllocateInstruction()
+		if fnIndex >= c.m.ImportFunctionCount {
+			args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself.
+			args = args.Append(builder.VarLengthPool(), vs...)
+			call.AsCall(FunctionIndexToFuncRef(fnIndex), sig, args)
+			builder.InsertInstruction(call)
+		} else {
+			// This case we have to read the address of the imported function from the module context.
+			moduleCtx := c.moduleCtxPtrValue
+			loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction()
+			funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex)
+			loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64)
+			loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64)
+			builder.InsertInstruction(loadFuncPtr)
+			builder.InsertInstruction(loadModuleCtxPtr)
+
+			args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return())
+			args = args.Append(builder.VarLengthPool(), vs...)
+			call.AsCallIndirect(loadFuncPtr.Return(), sig, args)
+			builder.InsertInstruction(call)
+		}
+
+		first, rest := call.Returns()
+		if first.Valid() {
+			state.push(first)
+		}
+		for _, v := range rest {
+			state.push(v)
+		}
+
+		c.reloadAfterCall()
+
+	case wasm.OpcodeDrop:
+		if state.unreachable {
+			break
+		}
+		_ = state.pop()
+	case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI32U, wasm.OpcodeF64ConvertI64U:
+		if state.unreachable {
+			break
+		}
+		result := builder.AllocateInstruction().AsFcvtFromInt(
+			state.pop(),
+			op == wasm.OpcodeF64ConvertI32S || op == wasm.OpcodeF64ConvertI64S,
+			true,
+		).Insert(builder).Return()
+		state.push(result)
+	case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI32U, wasm.OpcodeF32ConvertI64U:
+		if state.unreachable {
+			break
+		}
+		result := builder.AllocateInstruction().AsFcvtFromInt(
+			state.pop(),
+			op == wasm.OpcodeF32ConvertI32S || op == wasm.OpcodeF32ConvertI64S,
+			false,
+		).Insert(builder).Return()
+		state.push(result)
+	case wasm.OpcodeF32DemoteF64:
+		if state.unreachable {
+			break
+		}
+		cvt := builder.AllocateInstruction()
+		cvt.AsFdemote(state.pop())
+		builder.InsertInstruction(cvt)
+		state.push(cvt.Return())
+	case wasm.OpcodeF64PromoteF32:
+		if state.unreachable {
+			break
+		}
+		cvt := builder.AllocateInstruction()
+		cvt.AsFpromote(state.pop())
+		builder.InsertInstruction(cvt)
+		state.push(cvt.Return())
+
+	case wasm.OpcodeVecPrefix:
+		state.pc++
+		vecOp := c.wasmFunctionBody[state.pc]
+		switch vecOp {
+		case wasm.OpcodeVecV128Const:
+			state.pc++
+			lo := binary.LittleEndian.Uint64(c.wasmFunctionBody[state.pc:])
+			state.pc += 8
+			hi := binary.LittleEndian.Uint64(c.wasmFunctionBody[state.pc:])
+			state.pc += 7
+			if state.unreachable {
+				break
+			}
+			ret := builder.AllocateInstruction().AsVconst(lo, hi).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Load:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), 16)
+			load := builder.AllocateInstruction()
+			load.AsLoad(addr, offset, ssa.TypeV128)
+			builder.InsertInstruction(load)
+			state.push(load.Return())
+		case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane, wasm.OpcodeVecV128Load32Lane:
+			_, offset := c.readMemArg()
+			state.pc++
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			var loadOp ssa.Opcode
+			var opSize uint64
+			switch vecOp {
+			case wasm.OpcodeVecV128Load8Lane:
+				loadOp, lane, opSize = ssa.OpcodeUload8, ssa.VecLaneI8x16, 1
+			case wasm.OpcodeVecV128Load16Lane:
+				loadOp, lane, opSize = ssa.OpcodeUload16, ssa.VecLaneI16x8, 2
+			case wasm.OpcodeVecV128Load32Lane:
+				loadOp, lane, opSize = ssa.OpcodeUload32, ssa.VecLaneI32x4, 4
+			}
+			laneIndex := c.wasmFunctionBody[state.pc]
+			vector := state.pop()
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
+			load := builder.AllocateInstruction().
+				AsExtLoad(loadOp, addr, offset, false).
+				Insert(builder).Return()
+			ret := builder.AllocateInstruction().
+				AsInsertlane(vector, load, laneIndex, lane).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Load64Lane:
+			_, offset := c.readMemArg()
+			state.pc++
+			if state.unreachable {
+				break
+			}
+			laneIndex := c.wasmFunctionBody[state.pc]
+			vector := state.pop()
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), 8)
+			load := builder.AllocateInstruction().
+				AsLoad(addr, offset, ssa.TypeI64).
+				Insert(builder).Return()
+			ret := builder.AllocateInstruction().
+				AsInsertlane(vector, load, laneIndex, ssa.VecLaneI64x2).
+				Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecV128Load32zero, wasm.OpcodeVecV128Load64zero:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			var scalarType ssa.Type
+			switch vecOp {
+			case wasm.OpcodeVecV128Load32zero:
+				scalarType = ssa.TypeF32
+			case wasm.OpcodeVecV128Load64zero:
+				scalarType = ssa.TypeF64
+			}
+
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), uint64(scalarType.Size()))
+
+			ret := builder.AllocateInstruction().
+				AsVZeroExtLoad(addr, offset, scalarType).
+				Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecV128Load8x8u, wasm.OpcodeVecV128Load8x8s,
+			wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load16x4s,
+			wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load32x2s:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			var signed bool
+			switch vecOp {
+			case wasm.OpcodeVecV128Load8x8s:
+				signed = true
+				fallthrough
+			case wasm.OpcodeVecV128Load8x8u:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecV128Load16x4s:
+				signed = true
+				fallthrough
+			case wasm.OpcodeVecV128Load16x4u:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecV128Load32x2s:
+				signed = true
+				fallthrough
+			case wasm.OpcodeVecV128Load32x2u:
+				lane = ssa.VecLaneI32x4
+			}
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), 8)
+			load := builder.AllocateInstruction().
+				AsLoad(addr, offset, ssa.TypeF64).
+				Insert(builder).Return()
+			ret := builder.AllocateInstruction().
+				AsWiden(load, lane, signed, true).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
+			wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			var opSize uint64
+			switch vecOp {
+			case wasm.OpcodeVecV128Load8Splat:
+				lane, opSize = ssa.VecLaneI8x16, 1
+			case wasm.OpcodeVecV128Load16Splat:
+				lane, opSize = ssa.VecLaneI16x8, 2
+			case wasm.OpcodeVecV128Load32Splat:
+				lane, opSize = ssa.VecLaneI32x4, 4
+			case wasm.OpcodeVecV128Load64Splat:
+				lane, opSize = ssa.VecLaneI64x2, 8
+			}
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
+			ret := builder.AllocateInstruction().
+				AsLoadSplat(addr, offset, lane).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Store:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+			value := state.pop()
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), 16)
+			builder.AllocateInstruction().
+				AsStore(ssa.OpcodeStore, value, addr, offset).
+				Insert(builder)
+		case wasm.OpcodeVecV128Store8Lane, wasm.OpcodeVecV128Store16Lane,
+			wasm.OpcodeVecV128Store32Lane, wasm.OpcodeVecV128Store64Lane:
+			_, offset := c.readMemArg()
+			state.pc++
+			if state.unreachable {
+				break
+			}
+			laneIndex := c.wasmFunctionBody[state.pc]
+			var storeOp ssa.Opcode
+			var lane ssa.VecLane
+			var opSize uint64
+			switch vecOp {
+			case wasm.OpcodeVecV128Store8Lane:
+				storeOp, lane, opSize = ssa.OpcodeIstore8, ssa.VecLaneI8x16, 1
+			case wasm.OpcodeVecV128Store16Lane:
+				storeOp, lane, opSize = ssa.OpcodeIstore16, ssa.VecLaneI16x8, 2
+			case wasm.OpcodeVecV128Store32Lane:
+				storeOp, lane, opSize = ssa.OpcodeIstore32, ssa.VecLaneI32x4, 4
+			case wasm.OpcodeVecV128Store64Lane:
+				storeOp, lane, opSize = ssa.OpcodeStore, ssa.VecLaneI64x2, 8
+			}
+			vector := state.pop()
+			baseAddr := state.pop()
+			addr := c.memOpSetup(baseAddr, uint64(offset), opSize)
+			value := builder.AllocateInstruction().
+				AsExtractlane(vector, laneIndex, lane, false).
+				Insert(builder).Return()
+			builder.AllocateInstruction().
+				AsStore(storeOp, value, addr, offset).
+				Insert(builder)
+		case wasm.OpcodeVecV128Not:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVbnot(v1).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128And:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVband(v1, v2).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128AndNot:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVbandnot(v1, v2).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Or:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVbor(v1, v2).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Xor:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVbxor(v1, v2).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128Bitselect:
+			if state.unreachable {
+				break
+			}
+			c := state.pop()
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVbitselect(c, v1, v2).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128AnyTrue:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVanyTrue(v1).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16AllTrue:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8AllTrue:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4AllTrue:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2AllTrue:
+				lane = ssa.VecLaneI64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVallTrue(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16BitMask:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8BitMask:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4BitMask:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2BitMask:
+				lane = ssa.VecLaneI64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVhighBits(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Abs:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Abs:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Abs:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Abs:
+				lane = ssa.VecLaneI64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVIabs(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Neg:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Neg:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Neg:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Neg:
+				lane = ssa.VecLaneI64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVIneg(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Popcnt:
+			if state.unreachable {
+				break
+			}
+			lane := ssa.VecLaneI8x16
+			v1 := state.pop()
+
+			ret := builder.AllocateInstruction().AsVIpopcnt(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI64x2Add:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Add:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Add:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Add:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Add:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVIadd(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI16x8AddSatS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16AddSatS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8AddSatS:
+				lane = ssa.VecLaneI16x8
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVSaddSat(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI16x8AddSatU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16AddSatU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8AddSatU:
+				lane = ssa.VecLaneI16x8
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVUaddSat(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI16x8SubSatS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16SubSatS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8SubSatS:
+				lane = ssa.VecLaneI16x8
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVSsubSat(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16SubSatU, wasm.OpcodeVecI16x8SubSatU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16SubSatU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8SubSatU:
+				lane = ssa.VecLaneI16x8
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVUsubSat(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecI8x16Sub, wasm.OpcodeVecI16x8Sub, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI64x2Sub:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Sub:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Sub:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Sub:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Sub:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVIsub(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI32x4MinS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16MinS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8MinS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4MinS:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVImin(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI32x4MinU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16MinU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8MinU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4MinU:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVUmin(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI32x4MaxS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16MaxS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8MaxS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4MaxS:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVImax(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI32x4MaxU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16MaxU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8MaxU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4MaxU:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVUmax(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16AvgrU, wasm.OpcodeVecI16x8AvgrU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16AvgrU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8AvgrU:
+				lane = ssa.VecLaneI16x8
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVAvgRound(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI16x8Mul, wasm.OpcodeVecI32x4Mul, wasm.OpcodeVecI64x2Mul:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI16x8Mul:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Mul:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Mul:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVImul(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI16x8Q15mulrSatS:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsSqmulRoundSat(v1, v2, ssa.VecLaneI16x8).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI64x2Eq:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Eq:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Eq:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Eq:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Eq:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI64x2Ne:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Ne:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Ne:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Ne:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Ne:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondNotEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI64x2LtS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16LtS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8LtS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4LtS:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2LtS:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedLessThan, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI32x4LtU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16LtU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8LtU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4LtU:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedLessThan, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI64x2LeS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16LeS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8LeS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4LeS:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2LeS:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedLessThanOrEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI32x4LeU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16LeU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8LeU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4LeU:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedLessThanOrEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16GtS, wasm.OpcodeVecI16x8GtS, wasm.OpcodeVecI32x4GtS, wasm.OpcodeVecI64x2GtS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16GtS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8GtS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4GtS:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2GtS:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedGreaterThan, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI32x4GtU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16GtU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8GtU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4GtU:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedGreaterThan, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI64x2GeS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16GeS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8GeS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4GeS:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2GeS:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedGreaterThanOrEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16GeU, wasm.OpcodeVecI16x8GeU, wasm.OpcodeVecI32x4GeU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16GeU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8GeU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4GeU:
+				lane = ssa.VecLaneI32x4
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Max:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Max:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Max:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFmax(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Abs:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Abs:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFabs(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF64x2Min:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Min:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Min:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFmin(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Neg:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Neg:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFneg(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Sqrt:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Sqrt:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVSqrt(v1, lane).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF64x2Add:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Add:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Add:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFadd(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF64x2Sub:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Sub:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Sub:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFsub(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF64x2Mul:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Mul:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Mul:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFmul(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Div, wasm.OpcodeVecF64x2Div:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Div:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Div:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFdiv(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U:
+			if state.unreachable {
+				break
+			}
+			v := state.pop()
+			signed := vecOp == wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S
+			ret := builder.AllocateInstruction().AsExtIaddPairwise(v, ssa.VecLaneI8x16, signed).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U:
+			if state.unreachable {
+				break
+			}
+			v := state.pop()
+			signed := vecOp == wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S
+			ret := builder.AllocateInstruction().AsExtIaddPairwise(v, ssa.VecLaneI16x8, signed).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := c.lowerExtMul(
+				v1, v2,
+				ssa.VecLaneI8x16, ssa.VecLaneI16x8,
+				vecOp == wasm.OpcodeVecI16x8ExtMulLowI8x16S, true)
+			state.push(ret)
+
+		case wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := c.lowerExtMul(
+				v1, v2,
+				ssa.VecLaneI8x16, ssa.VecLaneI16x8,
+				vecOp == wasm.OpcodeVecI16x8ExtMulHighI8x16S, false)
+			state.push(ret)
+
+		case wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := c.lowerExtMul(
+				v1, v2,
+				ssa.VecLaneI16x8, ssa.VecLaneI32x4,
+				vecOp == wasm.OpcodeVecI32x4ExtMulLowI16x8S, true)
+			state.push(ret)
+
+		case wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := c.lowerExtMul(
+				v1, v2,
+				ssa.VecLaneI16x8, ssa.VecLaneI32x4,
+				vecOp == wasm.OpcodeVecI32x4ExtMulHighI16x8S, false)
+			state.push(ret)
+		case wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := c.lowerExtMul(
+				v1, v2,
+				ssa.VecLaneI32x4, ssa.VecLaneI64x2,
+				vecOp == wasm.OpcodeVecI64x2ExtMulLowI32x4S, true)
+			state.push(ret)
+
+		case wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := c.lowerExtMul(
+				v1, v2,
+				ssa.VecLaneI32x4, ssa.VecLaneI64x2,
+				vecOp == wasm.OpcodeVecI64x2ExtMulHighI32x4S, false)
+			state.push(ret)
+
+		case wasm.OpcodeVecI32x4DotI16x8S:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+
+			ret := builder.AllocateInstruction().AsWideningPairwiseDotProductS(v1, v2).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF64x2Eq:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Eq:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Eq:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcmp(v1, v2, ssa.FloatCmpCondEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF64x2Ne:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Ne:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Ne:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcmp(v1, v2, ssa.FloatCmpCondNotEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF64x2Lt:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Lt:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Lt:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcmp(v1, v2, ssa.FloatCmpCondLessThan, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF64x2Le:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Le:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Le:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcmp(v1, v2, ssa.FloatCmpCondLessThanOrEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Gt, wasm.OpcodeVecF64x2Gt:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Gt:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Gt:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcmp(v1, v2, ssa.FloatCmpCondGreaterThan, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Ge:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Ge:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Ge:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcmp(v1, v2, ssa.FloatCmpCondGreaterThanOrEqual, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF64x2Ceil:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Ceil:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Ceil:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVCeil(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF64x2Floor:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Floor:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Floor:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVFloor(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF64x2Trunc:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Trunc:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Trunc:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVTrunc(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Nearest, wasm.OpcodeVecF64x2Nearest:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Nearest:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Nearest:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVNearest(v1, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF64x2Pmin:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Pmin:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Pmin:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVMinPseudo(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmax:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecF32x4Pmax:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Pmax:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVMaxPseudo(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcvtToIntSat(v1, ssa.VecLaneF32x4, vecOp == wasm.OpcodeVecI32x4TruncSatF32x4S).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcvtToIntSat(v1, ssa.VecLaneF64x2, vecOp == wasm.OpcodeVecI32x4TruncSatF64x2SZero).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsVFcvtFromInt(v1, ssa.VecLaneF32x4, vecOp == wasm.OpcodeVecF32x4ConvertI32x4S).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			if runtime.GOARCH == "arm64" {
+				// TODO: this is weird. fix.
+				v1 = builder.AllocateInstruction().
+					AsWiden(v1, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecF64x2ConvertLowI32x4S, true).Insert(builder).Return()
+			}
+			ret := builder.AllocateInstruction().
+				AsVFcvtFromInt(v1, ssa.VecLaneF64x2, vecOp == wasm.OpcodeVecF64x2ConvertLowI32x4S).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsNarrow(v1, v2, ssa.VecLaneI16x8, vecOp == wasm.OpcodeVecI8x16NarrowI16x8S).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsNarrow(v1, v2, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecI16x8NarrowI32x4S).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsWiden(v1, ssa.VecLaneI8x16, vecOp == wasm.OpcodeVecI16x8ExtendLowI8x16S, true).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsWiden(v1, ssa.VecLaneI8x16, vecOp == wasm.OpcodeVecI16x8ExtendHighI8x16S, false).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsWiden(v1, ssa.VecLaneI16x8, vecOp == wasm.OpcodeVecI32x4ExtendLowI16x8S, true).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsWiden(v1, ssa.VecLaneI16x8, vecOp == wasm.OpcodeVecI32x4ExtendHighI16x8S, false).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsWiden(v1, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecI64x2ExtendLowI32x4S, true).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4U:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsWiden(v1, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecI64x2ExtendHighI32x4S, false).
+				Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecF64x2PromoteLowF32x4Zero:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsFvpromoteLow(v1, ssa.VecLaneF32x4).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecF32x4DemoteF64x2Zero:
+			if state.unreachable {
+				break
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().
+				AsFvdemote(v1, ssa.VecLaneF64x2).
+				Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI64x2Shl:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Shl:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Shl:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Shl:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Shl:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVIshl(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI64x2ShrS:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16ShrS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8ShrS:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4ShrS:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2ShrS:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVSshr(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16ShrU, wasm.OpcodeVecI16x8ShrU, wasm.OpcodeVecI32x4ShrU, wasm.OpcodeVecI64x2ShrU:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16ShrU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8ShrU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4ShrU:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2ShrU:
+				lane = ssa.VecLaneI64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsVUshr(v1, v2, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecI8x16ExtractLaneS, wasm.OpcodeVecI16x8ExtractLaneS:
+			state.pc++
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16ExtractLaneS:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8ExtractLaneS:
+				lane = ssa.VecLaneI16x8
+			}
+			v1 := state.pop()
+			index := c.wasmFunctionBody[state.pc]
+			ext := builder.AllocateInstruction().AsExtractlane(v1, index, lane, true).Insert(builder).Return()
+			state.push(ext)
+		case wasm.OpcodeVecI8x16ExtractLaneU, wasm.OpcodeVecI16x8ExtractLaneU,
+			wasm.OpcodeVecI32x4ExtractLane, wasm.OpcodeVecI64x2ExtractLane,
+			wasm.OpcodeVecF32x4ExtractLane, wasm.OpcodeVecF64x2ExtractLane:
+			state.pc++ // Skip the immediate value.
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16ExtractLaneU:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8ExtractLaneU:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4ExtractLane:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2ExtractLane:
+				lane = ssa.VecLaneI64x2
+			case wasm.OpcodeVecF32x4ExtractLane:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2ExtractLane:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			index := c.wasmFunctionBody[state.pc]
+			ext := builder.AllocateInstruction().AsExtractlane(v1, index, lane, false).Insert(builder).Return()
+			state.push(ext)
+		case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane,
+			wasm.OpcodeVecI32x4ReplaceLane, wasm.OpcodeVecI64x2ReplaceLane,
+			wasm.OpcodeVecF32x4ReplaceLane, wasm.OpcodeVecF64x2ReplaceLane:
+			state.pc++
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16ReplaceLane:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8ReplaceLane:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4ReplaceLane:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2ReplaceLane:
+				lane = ssa.VecLaneI64x2
+			case wasm.OpcodeVecF32x4ReplaceLane:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2ReplaceLane:
+				lane = ssa.VecLaneF64x2
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			index := c.wasmFunctionBody[state.pc]
+			ret := builder.AllocateInstruction().AsInsertlane(v1, v2, index, lane).Insert(builder).Return()
+			state.push(ret)
+		case wasm.OpcodeVecV128i8x16Shuffle:
+			state.pc++
+			laneIndexes := c.wasmFunctionBody[state.pc : state.pc+16]
+			state.pc += 15
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsShuffle(v1, v2, laneIndexes).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecI8x16Swizzle:
+			if state.unreachable {
+				break
+			}
+			v2 := state.pop()
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsSwizzle(v1, v2, ssa.VecLaneI8x16).Insert(builder).Return()
+			state.push(ret)
+
+		case wasm.OpcodeVecI8x16Splat,
+			wasm.OpcodeVecI16x8Splat,
+			wasm.OpcodeVecI32x4Splat,
+			wasm.OpcodeVecI64x2Splat,
+			wasm.OpcodeVecF32x4Splat,
+			wasm.OpcodeVecF64x2Splat:
+			if state.unreachable {
+				break
+			}
+			var lane ssa.VecLane
+			switch vecOp {
+			case wasm.OpcodeVecI8x16Splat:
+				lane = ssa.VecLaneI8x16
+			case wasm.OpcodeVecI16x8Splat:
+				lane = ssa.VecLaneI16x8
+			case wasm.OpcodeVecI32x4Splat:
+				lane = ssa.VecLaneI32x4
+			case wasm.OpcodeVecI64x2Splat:
+				lane = ssa.VecLaneI64x2
+			case wasm.OpcodeVecF32x4Splat:
+				lane = ssa.VecLaneF32x4
+			case wasm.OpcodeVecF64x2Splat:
+				lane = ssa.VecLaneF64x2
+			}
+			v1 := state.pop()
+			ret := builder.AllocateInstruction().AsSplat(v1, lane).Insert(builder).Return()
+			state.push(ret)
+
+		default:
+			panic("TODO: unsupported vector instruction: " + wasm.VectorInstructionName(vecOp))
+		}
+	case wasm.OpcodeAtomicPrefix:
+		state.pc++
+		atomicOp := c.wasmFunctionBody[state.pc]
+		switch atomicOp {
+		case wasm.OpcodeAtomicMemoryWait32, wasm.OpcodeAtomicMemoryWait64:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			c.storeCallerModuleContext()
+
+			var opSize uint64
+			var trampoline wazevoapi.Offset
+			var sig *ssa.Signature
+			switch atomicOp {
+			case wasm.OpcodeAtomicMemoryWait32:
+				opSize = 4
+				trampoline = wazevoapi.ExecutionContextOffsetMemoryWait32TrampolineAddress
+				sig = &c.memoryWait32Sig
+			case wasm.OpcodeAtomicMemoryWait64:
+				opSize = 8
+				trampoline = wazevoapi.ExecutionContextOffsetMemoryWait64TrampolineAddress
+				sig = &c.memoryWait64Sig
+			}
+
+			timeout := state.pop()
+			exp := state.pop()
+			baseAddr := state.pop()
+			addr := c.atomicMemOpSetup(baseAddr, uint64(offset), opSize)
+
+			memoryWaitPtr := builder.AllocateInstruction().
+				AsLoad(c.execCtxPtrValue,
+					trampoline.U32(),
+					ssa.TypeI64,
+				).Insert(builder).Return()
+
+			args := c.allocateVarLengthValues(3, c.execCtxPtrValue, timeout, exp, addr)
+			memoryWaitRet := builder.AllocateInstruction().
+				AsCallIndirect(memoryWaitPtr, sig, args).
+				Insert(builder).Return()
+			state.push(memoryWaitRet)
+		case wasm.OpcodeAtomicMemoryNotify:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			c.storeCallerModuleContext()
+			count := state.pop()
+			baseAddr := state.pop()
+			addr := c.atomicMemOpSetup(baseAddr, uint64(offset), 4)
+
+			memoryNotifyPtr := builder.AllocateInstruction().
+				AsLoad(c.execCtxPtrValue,
+					wazevoapi.ExecutionContextOffsetMemoryNotifyTrampolineAddress.U32(),
+					ssa.TypeI64,
+				).Insert(builder).Return()
+			args := c.allocateVarLengthValues(2, c.execCtxPtrValue, count, addr)
+			memoryNotifyRet := builder.AllocateInstruction().
+				AsCallIndirect(memoryNotifyPtr, &c.memoryNotifySig, args).
+				Insert(builder).Return()
+			state.push(memoryNotifyRet)
+		case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			baseAddr := state.pop()
+
+			var size uint64
+			switch atomicOp {
+			case wasm.OpcodeAtomicI64Load:
+				size = 8
+			case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI64Load32U:
+				size = 4
+			case wasm.OpcodeAtomicI32Load16U, wasm.OpcodeAtomicI64Load16U:
+				size = 2
+			case wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI64Load8U:
+				size = 1
+			}
+
+			var typ ssa.Type
+			switch atomicOp {
+			case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load32U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load8U:
+				typ = ssa.TypeI64
+			case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load16U, wasm.OpcodeAtomicI32Load8U:
+				typ = ssa.TypeI32
+			}
+
+			addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size)
+			res := builder.AllocateInstruction().AsAtomicLoad(addr, size, typ).Insert(builder).Return()
+			state.push(res)
+		case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			val := state.pop()
+			baseAddr := state.pop()
+
+			var size uint64
+			switch atomicOp {
+			case wasm.OpcodeAtomicI64Store:
+				size = 8
+			case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI64Store32:
+				size = 4
+			case wasm.OpcodeAtomicI32Store16, wasm.OpcodeAtomicI64Store16:
+				size = 2
+			case wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI64Store8:
+				size = 1
+			}
+
+			addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size)
+			builder.AllocateInstruction().AsAtomicStore(addr, val, size).Insert(builder)
+		case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw32AddU,
+			wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw32SubU,
+			wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw32AndU,
+			wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw32OrU,
+			wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw32XorU,
+			wasm.OpcodeAtomicI32RmwXchg, wasm.OpcodeAtomicI64RmwXchg, wasm.OpcodeAtomicI32Rmw8XchgU, wasm.OpcodeAtomicI32Rmw16XchgU, wasm.OpcodeAtomicI64Rmw8XchgU, wasm.OpcodeAtomicI64Rmw16XchgU, wasm.OpcodeAtomicI64Rmw32XchgU:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			val := state.pop()
+			baseAddr := state.pop()
+
+			var rmwOp ssa.AtomicRmwOp
+			var size uint64
+			switch atomicOp {
+			case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw32AddU:
+				rmwOp = ssa.AtomicRmwOpAdd
+				switch atomicOp {
+				case wasm.OpcodeAtomicI64RmwAdd:
+					size = 8
+				case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI64Rmw32AddU:
+					size = 4
+				case wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI64Rmw16AddU:
+					size = 2
+				case wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI64Rmw8AddU:
+					size = 1
+				}
+			case wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw32SubU:
+				rmwOp = ssa.AtomicRmwOpSub
+				switch atomicOp {
+				case wasm.OpcodeAtomicI64RmwSub:
+					size = 8
+				case wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI64Rmw32SubU:
+					size = 4
+				case wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI64Rmw16SubU:
+					size = 2
+				case wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI64Rmw8SubU:
+					size = 1
+				}
+			case wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw32AndU:
+				rmwOp = ssa.AtomicRmwOpAnd
+				switch atomicOp {
+				case wasm.OpcodeAtomicI64RmwAnd:
+					size = 8
+				case wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI64Rmw32AndU:
+					size = 4
+				case wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI64Rmw16AndU:
+					size = 2
+				case wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI64Rmw8AndU:
+					size = 1
+				}
+			case wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw32OrU:
+				rmwOp = ssa.AtomicRmwOpOr
+				switch atomicOp {
+				case wasm.OpcodeAtomicI64RmwOr:
+					size = 8
+				case wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI64Rmw32OrU:
+					size = 4
+				case wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI64Rmw16OrU:
+					size = 2
+				case wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI64Rmw8OrU:
+					size = 1
+				}
+			case wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw32XorU:
+				rmwOp = ssa.AtomicRmwOpXor
+				switch atomicOp {
+				case wasm.OpcodeAtomicI64RmwXor:
+					size = 8
+				case wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI64Rmw32XorU:
+					size = 4
+				case wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XorU:
+					size = 2
+				case wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XorU:
+					size = 1
+				}
+			case wasm.OpcodeAtomicI32RmwXchg, wasm.OpcodeAtomicI64RmwXchg, wasm.OpcodeAtomicI32Rmw8XchgU, wasm.OpcodeAtomicI32Rmw16XchgU, wasm.OpcodeAtomicI64Rmw8XchgU, wasm.OpcodeAtomicI64Rmw16XchgU, wasm.OpcodeAtomicI64Rmw32XchgU:
+				rmwOp = ssa.AtomicRmwOpXchg
+				switch atomicOp {
+				case wasm.OpcodeAtomicI64RmwXchg:
+					size = 8
+				case wasm.OpcodeAtomicI32RmwXchg, wasm.OpcodeAtomicI64Rmw32XchgU:
+					size = 4
+				case wasm.OpcodeAtomicI32Rmw16XchgU, wasm.OpcodeAtomicI64Rmw16XchgU:
+					size = 2
+				case wasm.OpcodeAtomicI32Rmw8XchgU, wasm.OpcodeAtomicI64Rmw8XchgU:
+					size = 1
+				}
+			}
+
+			addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size)
+			res := builder.AllocateInstruction().AsAtomicRmw(rmwOp, addr, val, size).Insert(builder).Return()
+			state.push(res)
+		case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU:
+			_, offset := c.readMemArg()
+			if state.unreachable {
+				break
+			}
+
+			repl := state.pop()
+			exp := state.pop()
+			baseAddr := state.pop()
+
+			var size uint64
+			switch atomicOp {
+			case wasm.OpcodeAtomicI64RmwCmpxchg:
+				size = 8
+			case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI64Rmw32CmpxchgU:
+				size = 4
+			case wasm.OpcodeAtomicI32Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU:
+				size = 2
+			case wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw8CmpxchgU:
+				size = 1
+			}
+			addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size)
+			res := builder.AllocateInstruction().AsAtomicCas(addr, exp, repl, size).Insert(builder).Return()
+			state.push(res)
+		case wasm.OpcodeAtomicFence:
+			order := c.readByte()
+			if state.unreachable {
+				break
+			}
+			if c.needMemory {
+				builder.AllocateInstruction().AsFence(order).Insert(builder)
+			}
+		default:
+			panic("TODO: unsupported atomic instruction: " + wasm.AtomicInstructionName(atomicOp))
+		}
+	case wasm.OpcodeRefFunc:
+		funcIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+
+		c.storeCallerModuleContext()
+
+		funcIndexVal := builder.AllocateInstruction().AsIconst32(funcIndex).Insert(builder).Return()
+
+		refFuncPtr := builder.AllocateInstruction().
+			AsLoad(c.execCtxPtrValue,
+				wazevoapi.ExecutionContextOffsetRefFuncTrampolineAddress.U32(),
+				ssa.TypeI64,
+			).Insert(builder).Return()
+
+		args := c.allocateVarLengthValues(2, c.execCtxPtrValue, funcIndexVal)
+		refFuncRet := builder.
+			AllocateInstruction().
+			AsCallIndirect(refFuncPtr, &c.refFuncSig, args).
+			Insert(builder).Return()
+		state.push(refFuncRet)
+
+	case wasm.OpcodeRefNull:
+		c.loweringState.pc++ // skips the reference type as we treat both of them as i64(0).
+		if state.unreachable {
+			break
+		}
+		ret := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
+		state.push(ret)
+	case wasm.OpcodeRefIsNull:
+		if state.unreachable {
+			break
+		}
+		r := state.pop()
+		zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder)
+		icmp := builder.AllocateInstruction().
+			AsIcmp(r, zero.Return(), ssa.IntegerCmpCondEqual).
+			Insert(builder).
+			Return()
+		state.push(icmp)
+	case wasm.OpcodeTableSet:
+		tableIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		r := state.pop()
+		targetOffsetInTable := state.pop()
+
+		elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable)
+		builder.AllocateInstruction().AsStore(ssa.OpcodeStore, r, elementAddr, 0).Insert(builder)
+
+	case wasm.OpcodeTableGet:
+		tableIndex := c.readI32u()
+		if state.unreachable {
+			break
+		}
+		targetOffsetInTable := state.pop()
+		elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable)
+		loaded := builder.AllocateInstruction().AsLoad(elementAddr, 0, ssa.TypeI64).Insert(builder).Return()
+		state.push(loaded)
+	default:
+		panic("TODO: unsupported in wazevo yet: " + wasm.InstructionName(op))
+	}
+
+	if wazevoapi.FrontEndLoggingEnabled {
+		fmt.Println("--------- Translated " + wasm.InstructionName(op) + " --------")
+		fmt.Println("state: " + c.loweringState.String())
+		fmt.Println(c.formatBuilder())
+		fmt.Println("--------------------------")
+	}
+	c.loweringState.pc++
+}
+
+func (c *Compiler) lowerExtMul(v1, v2 ssa.Value, from, to ssa.VecLane, signed, low bool) ssa.Value {
+	// TODO: The sequence `Widen; Widen; VIMul` can be substituted for a single instruction on some ISAs.
+	builder := c.ssaBuilder
+
+	v1lo := builder.AllocateInstruction().AsWiden(v1, from, signed, low).Insert(builder).Return()
+	v2lo := builder.AllocateInstruction().AsWiden(v2, from, signed, low).Insert(builder).Return()
+
+	return builder.AllocateInstruction().AsVImul(v1lo, v2lo, to).Insert(builder).Return()
+}
+
+const (
+	tableInstanceBaseAddressOffset = 0
+	tableInstanceLenOffset         = tableInstanceBaseAddressOffset + 8
+)
+
+func (c *Compiler) lowerAccessTableWithBoundsCheck(tableIndex uint32, elementOffsetInTable ssa.Value) (elementAddress ssa.Value) {
+	builder := c.ssaBuilder
+
+	// Load the table.
+	loadTableInstancePtr := builder.AllocateInstruction()
+	loadTableInstancePtr.AsLoad(c.moduleCtxPtrValue, c.offset.TableOffset(int(tableIndex)).U32(), ssa.TypeI64)
+	builder.InsertInstruction(loadTableInstancePtr)
+	tableInstancePtr := loadTableInstancePtr.Return()
+
+	// Load the table's length.
+	loadTableLen := builder.AllocateInstruction()
+	loadTableLen.AsLoad(tableInstancePtr, tableInstanceLenOffset, ssa.TypeI32)
+	builder.InsertInstruction(loadTableLen)
+	tableLen := loadTableLen.Return()
+
+	// Compare the length and the target, and trap if out of bounds.
+	checkOOB := builder.AllocateInstruction()
+	checkOOB.AsIcmp(elementOffsetInTable, tableLen, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual)
+	builder.InsertInstruction(checkOOB)
+	exitIfOOB := builder.AllocateInstruction()
+	exitIfOOB.AsExitIfTrueWithCode(c.execCtxPtrValue, checkOOB.Return(), wazevoapi.ExitCodeTableOutOfBounds)
+	builder.InsertInstruction(exitIfOOB)
+
+	// Get the base address of wasm.TableInstance.References.
+	loadTableBaseAddress := builder.AllocateInstruction()
+	loadTableBaseAddress.AsLoad(tableInstancePtr, tableInstanceBaseAddressOffset, ssa.TypeI64)
+	builder.InsertInstruction(loadTableBaseAddress)
+	tableBase := loadTableBaseAddress.Return()
+
+	// Calculate the address of the target function. First we need to multiply targetOffsetInTable by 8 (pointer size).
+	multiplyBy8 := builder.AllocateInstruction()
+	three := builder.AllocateInstruction()
+	three.AsIconst64(3)
+	builder.InsertInstruction(three)
+	multiplyBy8.AsIshl(elementOffsetInTable, three.Return())
+	builder.InsertInstruction(multiplyBy8)
+	targetOffsetInTableMultipliedBy8 := multiplyBy8.Return()
+
+	// Then add the multiplied value to the base which results in the address of the target function (*wazevo.functionInstance)
+	calcElementAddressInTable := builder.AllocateInstruction()
+	calcElementAddressInTable.AsIadd(tableBase, targetOffsetInTableMultipliedBy8)
+	builder.InsertInstruction(calcElementAddressInTable)
+	return calcElementAddressInTable.Return()
+}
+
+func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
+	builder := c.ssaBuilder
+	state := c.state()
+
+	elementOffsetInTable := state.pop()
+	functionInstancePtrAddress := c.lowerAccessTableWithBoundsCheck(tableIndex, elementOffsetInTable)
+	loadFunctionInstancePtr := builder.AllocateInstruction()
+	loadFunctionInstancePtr.AsLoad(functionInstancePtrAddress, 0, ssa.TypeI64)
+	builder.InsertInstruction(loadFunctionInstancePtr)
+	functionInstancePtr := loadFunctionInstancePtr.Return()
+
+	// Check if it is not the null pointer.
+	zero := builder.AllocateInstruction()
+	zero.AsIconst64(0)
+	builder.InsertInstruction(zero)
+	checkNull := builder.AllocateInstruction()
+	checkNull.AsIcmp(functionInstancePtr, zero.Return(), ssa.IntegerCmpCondEqual)
+	builder.InsertInstruction(checkNull)
+	exitIfNull := builder.AllocateInstruction()
+	exitIfNull.AsExitIfTrueWithCode(c.execCtxPtrValue, checkNull.Return(), wazevoapi.ExitCodeIndirectCallNullPointer)
+	builder.InsertInstruction(exitIfNull)
+
+	// We need to do the type check. First, load the target function instance's typeID.
+	loadTypeID := builder.AllocateInstruction()
+	loadTypeID.AsLoad(functionInstancePtr, wazevoapi.FunctionInstanceTypeIDOffset, ssa.TypeI32)
+	builder.InsertInstruction(loadTypeID)
+	actualTypeID := loadTypeID.Return()
+
+	// Next, we load the expected TypeID:
+	loadTypeIDsBegin := builder.AllocateInstruction()
+	loadTypeIDsBegin.AsLoad(c.moduleCtxPtrValue, c.offset.TypeIDs1stElement.U32(), ssa.TypeI64)
+	builder.InsertInstruction(loadTypeIDsBegin)
+	typeIDsBegin := loadTypeIDsBegin.Return()
+
+	loadExpectedTypeID := builder.AllocateInstruction()
+	loadExpectedTypeID.AsLoad(typeIDsBegin, uint32(typeIndex)*4 /* size of wasm.FunctionTypeID */, ssa.TypeI32)
+	builder.InsertInstruction(loadExpectedTypeID)
+	expectedTypeID := loadExpectedTypeID.Return()
+
+	// Check if the type ID matches.
+	checkTypeID := builder.AllocateInstruction()
+	checkTypeID.AsIcmp(actualTypeID, expectedTypeID, ssa.IntegerCmpCondNotEqual)
+	builder.InsertInstruction(checkTypeID)
+	exitIfNotMatch := builder.AllocateInstruction()
+	exitIfNotMatch.AsExitIfTrueWithCode(c.execCtxPtrValue, checkTypeID.Return(), wazevoapi.ExitCodeIndirectCallTypeMismatch)
+	builder.InsertInstruction(exitIfNotMatch)
+
+	// Now ready to call the function. Load the executable and moduleContextOpaquePtr from the function instance.
+	loadExecutablePtr := builder.AllocateInstruction()
+	loadExecutablePtr.AsLoad(functionInstancePtr, wazevoapi.FunctionInstanceExecutableOffset, ssa.TypeI64)
+	builder.InsertInstruction(loadExecutablePtr)
+	executablePtr := loadExecutablePtr.Return()
+	loadModuleContextOpaquePtr := builder.AllocateInstruction()
+	loadModuleContextOpaquePtr.AsLoad(functionInstancePtr, wazevoapi.FunctionInstanceModuleContextOpaquePtrOffset, ssa.TypeI64)
+	builder.InsertInstruction(loadModuleContextOpaquePtr)
+	moduleContextOpaquePtr := loadModuleContextOpaquePtr.Return()
+
+	typ := &c.m.TypeSection[typeIndex]
+	tail := len(state.values) - len(typ.Params)
+	vs := state.values[tail:]
+	state.values = state.values[:tail]
+	args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue, moduleContextOpaquePtr)
+	args = args.Append(builder.VarLengthPool(), vs...)
+
+	// Before transfer the control to the callee, we have to store the current module's moduleContextPtr
+	// into execContext.callerModuleContextPtr in case when the callee is a Go function.
+	c.storeCallerModuleContext()
+
+	call := builder.AllocateInstruction()
+	call.AsCallIndirect(executablePtr, c.signatures[typ], args)
+	builder.InsertInstruction(call)
+
+	first, rest := call.Returns()
+	if first.Valid() {
+		state.push(first)
+	}
+	for _, v := range rest {
+		state.push(v)
+	}
+
+	c.reloadAfterCall()
+}
+
+// memOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores).
+func (c *Compiler) memOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) {
+	address = ssa.ValueInvalid
+	builder := c.ssaBuilder
+
+	baseAddrID := baseAddr.ID()
+	ceil := constOffset + operationSizeInBytes
+	if known := c.getKnownSafeBound(baseAddrID); known.valid() {
+		// We reuse the calculated absolute address even if the bound is not known to be safe.
+		address = known.absoluteAddr
+		if ceil <= known.bound {
+			if !address.Valid() {
+				// This means that, the bound is known to be safe, but the memory base might have changed.
+				// So, we re-calculate the address.
+				memBase := c.getMemoryBaseValue(false)
+				extBaseAddr := builder.AllocateInstruction().
+					AsUExtend(baseAddr, 32, 64).
+					Insert(builder).
+					Return()
+				address = builder.AllocateInstruction().
+					AsIadd(memBase, extBaseAddr).Insert(builder).Return()
+				known.absoluteAddr = address // Update the absolute address for the subsequent memory access.
+			}
+			return
+		}
+	}
+
+	ceilConst := builder.AllocateInstruction()
+	ceilConst.AsIconst64(ceil)
+	builder.InsertInstruction(ceilConst)
+
+	// We calculate the offset in 64-bit space.
+	extBaseAddr := builder.AllocateInstruction().
+		AsUExtend(baseAddr, 32, 64).
+		Insert(builder).
+		Return()
+
+	// Note: memLen is already zero extended to 64-bit space at the load time.
+	memLen := c.getMemoryLenValue(false)
+
+	// baseAddrPlusCeil = baseAddr + ceil
+	baseAddrPlusCeil := builder.AllocateInstruction()
+	baseAddrPlusCeil.AsIadd(extBaseAddr, ceilConst.Return())
+	builder.InsertInstruction(baseAddrPlusCeil)
+
+	// Check for out of bounds memory access: `memLen >= baseAddrPlusCeil`.
+	cmp := builder.AllocateInstruction()
+	cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedLessThan)
+	builder.InsertInstruction(cmp)
+	exitIfNZ := builder.AllocateInstruction()
+	exitIfNZ.AsExitIfTrueWithCode(c.execCtxPtrValue, cmp.Return(), wazevoapi.ExitCodeMemoryOutOfBounds)
+	builder.InsertInstruction(exitIfNZ)
+
+	// Load the value from memBase + extBaseAddr.
+	if address == ssa.ValueInvalid { // Reuse the value if the memBase is already calculated at this point.
+		memBase := c.getMemoryBaseValue(false)
+		address = builder.AllocateInstruction().
+			AsIadd(memBase, extBaseAddr).Insert(builder).Return()
+	}
+
+	// Record the bound ceil for this baseAddr is known to be safe for the subsequent memory access in the same block.
+	c.recordKnownSafeBound(baseAddrID, ceil, address)
+	return
+}
+
+// atomicMemOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores), including
+// the constant offset and performs an alignment check on the final address.
+func (c *Compiler) atomicMemOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) {
+	builder := c.ssaBuilder
+
+	addrWithoutOffset := c.memOpSetup(baseAddr, constOffset, operationSizeInBytes)
+	var addr ssa.Value
+	if constOffset == 0 {
+		addr = addrWithoutOffset
+	} else {
+		offset := builder.AllocateInstruction().AsIconst64(constOffset).Insert(builder).Return()
+		addr = builder.AllocateInstruction().AsIadd(addrWithoutOffset, offset).Insert(builder).Return()
+	}
+
+	c.memAlignmentCheck(addr, operationSizeInBytes)
+
+	return addr
+}
+
+func (c *Compiler) memAlignmentCheck(addr ssa.Value, operationSizeInBytes uint64) {
+	if operationSizeInBytes == 1 {
+		return // No alignment restrictions when accessing a byte
+	}
+	var checkBits uint64
+	switch operationSizeInBytes {
+	case 2:
+		checkBits = 0b1
+	case 4:
+		checkBits = 0b11
+	case 8:
+		checkBits = 0b111
+	}
+
+	builder := c.ssaBuilder
+
+	mask := builder.AllocateInstruction().AsIconst64(checkBits).Insert(builder).Return()
+	masked := builder.AllocateInstruction().AsBand(addr, mask).Insert(builder).Return()
+	zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
+	cmp := builder.AllocateInstruction().AsIcmp(masked, zero, ssa.IntegerCmpCondNotEqual).Insert(builder).Return()
+	builder.AllocateInstruction().AsExitIfTrueWithCode(c.execCtxPtrValue, cmp, wazevoapi.ExitCodeUnalignedAtomic).Insert(builder)
+}
+
+func (c *Compiler) callMemmove(dst, src, size ssa.Value) {
+	args := c.allocateVarLengthValues(3, dst, src, size)
+	if size.Type() != ssa.TypeI64 {
+		panic("TODO: memmove size must be i64")
+	}
+
+	builder := c.ssaBuilder
+	memmovePtr := builder.AllocateInstruction().
+		AsLoad(c.execCtxPtrValue,
+			wazevoapi.ExecutionContextOffsetMemmoveAddress.U32(),
+			ssa.TypeI64,
+		).Insert(builder).Return()
+	builder.AllocateInstruction().AsCallGoRuntimeMemmove(memmovePtr, &c.memmoveSig, args).Insert(builder)
+}
+
+func (c *Compiler) reloadAfterCall() {
+	// Note that when these are not used in the following instructions, they will be optimized out.
+	// So in any ways, we define them!
+
+	// After calling any function, memory buffer might have changed. So we need to re-define the variable.
+	// However, if the memory is shared, we don't need to reload the memory base and length as the base will never change.
+	if c.needMemory && !c.memoryShared {
+		c.reloadMemoryBaseLen()
+	}
+
+	// Also, any mutable Global can change.
+	for _, index := range c.mutableGlobalVariablesIndexes {
+		_ = c.getWasmGlobalValue(index, true)
+	}
+}
+
+func (c *Compiler) reloadMemoryBaseLen() {
+	_ = c.getMemoryBaseValue(true)
+	_ = c.getMemoryLenValue(true)
+
+	// This function being called means that the memory base might have changed.
+	// Therefore, we need to clear the absolute addresses recorded in the known safe bounds
+	// because we cache the absolute address of the memory access per each base offset.
+	c.resetAbsoluteAddressInSafeBounds()
+}
+
+func (c *Compiler) setWasmGlobalValue(index wasm.Index, v ssa.Value) {
+	variable := c.globalVariables[index]
+	opaqueOffset := c.offset.GlobalInstanceOffset(index)
+
+	builder := c.ssaBuilder
+	if index < c.m.ImportGlobalCount {
+		loadGlobalInstPtr := builder.AllocateInstruction()
+		loadGlobalInstPtr.AsLoad(c.moduleCtxPtrValue, uint32(opaqueOffset), ssa.TypeI64)
+		builder.InsertInstruction(loadGlobalInstPtr)
+
+		store := builder.AllocateInstruction()
+		store.AsStore(ssa.OpcodeStore, v, loadGlobalInstPtr.Return(), uint32(0))
+		builder.InsertInstruction(store)
+
+	} else {
+		store := builder.AllocateInstruction()
+		store.AsStore(ssa.OpcodeStore, v, c.moduleCtxPtrValue, uint32(opaqueOffset))
+		builder.InsertInstruction(store)
+	}
+
+	// The value has changed to `v`, so we record it.
+	builder.DefineVariableInCurrentBB(variable, v)
+}
+
+func (c *Compiler) getWasmGlobalValue(index wasm.Index, forceLoad bool) ssa.Value {
+	variable := c.globalVariables[index]
+	typ := c.globalVariablesTypes[index]
+	opaqueOffset := c.offset.GlobalInstanceOffset(index)
+
+	builder := c.ssaBuilder
+	if !forceLoad {
+		if v := builder.FindValueInLinearPath(variable); v.Valid() {
+			return v
+		}
+	}
+
+	var load *ssa.Instruction
+	if index < c.m.ImportGlobalCount {
+		loadGlobalInstPtr := builder.AllocateInstruction()
+		loadGlobalInstPtr.AsLoad(c.moduleCtxPtrValue, uint32(opaqueOffset), ssa.TypeI64)
+		builder.InsertInstruction(loadGlobalInstPtr)
+		load = builder.AllocateInstruction().
+			AsLoad(loadGlobalInstPtr.Return(), uint32(0), typ)
+	} else {
+		load = builder.AllocateInstruction().
+			AsLoad(c.moduleCtxPtrValue, uint32(opaqueOffset), typ)
+	}
+
+	v := load.Insert(builder).Return()
+	builder.DefineVariableInCurrentBB(variable, v)
+	return v
+}
+
+const (
+	memoryInstanceBufOffset     = 0
+	memoryInstanceBufSizeOffset = memoryInstanceBufOffset + 8
+)
+
+func (c *Compiler) getMemoryBaseValue(forceReload bool) ssa.Value {
+	builder := c.ssaBuilder
+	variable := c.memoryBaseVariable
+	if !forceReload {
+		if v := builder.FindValueInLinearPath(variable); v.Valid() {
+			return v
+		}
+	}
+
+	var ret ssa.Value
+	if c.offset.LocalMemoryBegin < 0 {
+		loadMemInstPtr := builder.AllocateInstruction()
+		loadMemInstPtr.AsLoad(c.moduleCtxPtrValue, c.offset.ImportedMemoryBegin.U32(), ssa.TypeI64)
+		builder.InsertInstruction(loadMemInstPtr)
+		memInstPtr := loadMemInstPtr.Return()
+
+		loadBufPtr := builder.AllocateInstruction()
+		loadBufPtr.AsLoad(memInstPtr, memoryInstanceBufOffset, ssa.TypeI64)
+		builder.InsertInstruction(loadBufPtr)
+		ret = loadBufPtr.Return()
+	} else {
+		load := builder.AllocateInstruction()
+		load.AsLoad(c.moduleCtxPtrValue, c.offset.LocalMemoryBase().U32(), ssa.TypeI64)
+		builder.InsertInstruction(load)
+		ret = load.Return()
+	}
+
+	builder.DefineVariableInCurrentBB(variable, ret)
+	return ret
+}
+
+func (c *Compiler) getMemoryLenValue(forceReload bool) ssa.Value {
+	variable := c.memoryLenVariable
+	builder := c.ssaBuilder
+	if !forceReload && !c.memoryShared {
+		if v := builder.FindValueInLinearPath(variable); v.Valid() {
+			return v
+		}
+	}
+
+	var ret ssa.Value
+	if c.offset.LocalMemoryBegin < 0 {
+		loadMemInstPtr := builder.AllocateInstruction()
+		loadMemInstPtr.AsLoad(c.moduleCtxPtrValue, c.offset.ImportedMemoryBegin.U32(), ssa.TypeI64)
+		builder.InsertInstruction(loadMemInstPtr)
+		memInstPtr := loadMemInstPtr.Return()
+
+		loadBufSizePtr := builder.AllocateInstruction()
+		if c.memoryShared {
+			sizeOffset := builder.AllocateInstruction().AsIconst64(memoryInstanceBufSizeOffset).Insert(builder).Return()
+			addr := builder.AllocateInstruction().AsIadd(memInstPtr, sizeOffset).Insert(builder).Return()
+			loadBufSizePtr.AsAtomicLoad(addr, 8, ssa.TypeI64)
+		} else {
+			loadBufSizePtr.AsLoad(memInstPtr, memoryInstanceBufSizeOffset, ssa.TypeI64)
+		}
+		builder.InsertInstruction(loadBufSizePtr)
+
+		ret = loadBufSizePtr.Return()
+	} else {
+		load := builder.AllocateInstruction()
+		if c.memoryShared {
+			lenOffset := builder.AllocateInstruction().AsIconst64(c.offset.LocalMemoryLen().U64()).Insert(builder).Return()
+			addr := builder.AllocateInstruction().AsIadd(c.moduleCtxPtrValue, lenOffset).Insert(builder).Return()
+			load.AsAtomicLoad(addr, 8, ssa.TypeI64)
+		} else {
+			load.AsExtLoad(ssa.OpcodeUload32, c.moduleCtxPtrValue, c.offset.LocalMemoryLen().U32(), true)
+		}
+		builder.InsertInstruction(load)
+		ret = load.Return()
+	}
+
+	builder.DefineVariableInCurrentBB(variable, ret)
+	return ret
+}
+
+func (c *Compiler) insertIcmp(cond ssa.IntegerCmpCond) {
+	state, builder := c.state(), c.ssaBuilder
+	y, x := state.pop(), state.pop()
+	cmp := builder.AllocateInstruction()
+	cmp.AsIcmp(x, y, cond)
+	builder.InsertInstruction(cmp)
+	value := cmp.Return()
+	state.push(value)
+}
+
+func (c *Compiler) insertFcmp(cond ssa.FloatCmpCond) {
+	state, builder := c.state(), c.ssaBuilder
+	y, x := state.pop(), state.pop()
+	cmp := builder.AllocateInstruction()
+	cmp.AsFcmp(x, y, cond)
+	builder.InsertInstruction(cmp)
+	value := cmp.Return()
+	state.push(value)
+}
+
+// storeCallerModuleContext stores the current module's moduleContextPtr into execContext.callerModuleContextPtr.
+func (c *Compiler) storeCallerModuleContext() {
+	builder := c.ssaBuilder
+	execCtx := c.execCtxPtrValue
+	store := builder.AllocateInstruction()
+	store.AsStore(ssa.OpcodeStore,
+		c.moduleCtxPtrValue, execCtx, wazevoapi.ExecutionContextOffsetCallerModuleContextPtr.U32())
+	builder.InsertInstruction(store)
+}
+
+func (c *Compiler) readByte() byte {
+	v := c.wasmFunctionBody[c.loweringState.pc+1]
+	c.loweringState.pc++
+	return v
+}
+
+func (c *Compiler) readI32u() uint32 {
+	v, n, err := leb128.LoadUint32(c.wasmFunctionBody[c.loweringState.pc+1:])
+	if err != nil {
+		panic(err) // shouldn't be reached since compilation comes after validation.
+	}
+	c.loweringState.pc += int(n)
+	return v
+}
+
+func (c *Compiler) readI32s() int32 {
+	v, n, err := leb128.LoadInt32(c.wasmFunctionBody[c.loweringState.pc+1:])
+	if err != nil {
+		panic(err) // shouldn't be reached since compilation comes after validation.
+	}
+	c.loweringState.pc += int(n)
+	return v
+}
+
+func (c *Compiler) readI64s() int64 {
+	v, n, err := leb128.LoadInt64(c.wasmFunctionBody[c.loweringState.pc+1:])
+	if err != nil {
+		panic(err) // shouldn't be reached since compilation comes after validation.
+	}
+	c.loweringState.pc += int(n)
+	return v
+}
+
+func (c *Compiler) readF32() float32 {
+	v := math.Float32frombits(binary.LittleEndian.Uint32(c.wasmFunctionBody[c.loweringState.pc+1:]))
+	c.loweringState.pc += 4
+	return v
+}
+
+func (c *Compiler) readF64() float64 {
+	v := math.Float64frombits(binary.LittleEndian.Uint64(c.wasmFunctionBody[c.loweringState.pc+1:]))
+	c.loweringState.pc += 8
+	return v
+}
+
+// readBlockType reads the block type from the current position of the bytecode reader.
+func (c *Compiler) readBlockType() *wasm.FunctionType {
+	state := c.state()
+
+	c.br.Reset(c.wasmFunctionBody[state.pc+1:])
+	bt, num, err := wasm.DecodeBlockType(c.m.TypeSection, c.br, api.CoreFeaturesV2)
+	if err != nil {
+		panic(err) // shouldn't be reached since compilation comes after validation.
+	}
+	state.pc += int(num)
+
+	return bt
+}
+
+func (c *Compiler) readMemArg() (align, offset uint32) {
+	state := c.state()
+
+	align, num, err := leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:])
+	if err != nil {
+		panic(fmt.Errorf("read memory align: %v", err))
+	}
+
+	state.pc += int(num)
+	offset, num, err = leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:])
+	if err != nil {
+		panic(fmt.Errorf("read memory offset: %v", err))
+	}
+
+	state.pc += int(num)
+	return align, offset
+}
+
+// insertJumpToBlock inserts a jump instruction to the given block in the current block.
+func (c *Compiler) insertJumpToBlock(args ssa.Values, targetBlk ssa.BasicBlock) {
+	if targetBlk.ReturnBlock() {
+		if c.needListener {
+			c.callListenerAfter()
+		}
+	}
+
+	builder := c.ssaBuilder
+	jmp := builder.AllocateInstruction()
+	jmp.AsJump(args, targetBlk)
+	builder.InsertInstruction(jmp)
+}
+
+func (c *Compiler) insertIntegerExtend(signed bool, from, to byte) {
+	state := c.state()
+	builder := c.ssaBuilder
+	v := state.pop()
+	extend := builder.AllocateInstruction()
+	if signed {
+		extend.AsSExtend(v, from, to)
+	} else {
+		extend.AsUExtend(v, from, to)
+	}
+	builder.InsertInstruction(extend)
+	value := extend.Return()
+	state.push(value)
+}
+
+func (c *Compiler) switchTo(originalStackLen int, targetBlk ssa.BasicBlock) {
+	if targetBlk.Preds() == 0 {
+		c.loweringState.unreachable = true
+	}
+
+	// Now we should adjust the stack and start translating the continuation block.
+	c.loweringState.values = c.loweringState.values[:originalStackLen]
+
+	c.ssaBuilder.SetCurrentBlock(targetBlk)
+
+	// At this point, blocks params consist only of the Wasm-level parameters,
+	// (since it's added only when we are trying to resolve variable *inside* this block).
+	for i := 0; i < targetBlk.Params(); i++ {
+		value := targetBlk.Param(i)
+		c.loweringState.push(value)
+	}
+}
+
+// results returns the number of results of the current function.
+func (c *Compiler) results() int {
+	return len(c.wasmFunctionTyp.Results)
+}
+
+func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) {
+	state := c.state()
+	builder := c.ssaBuilder
+
+	f := state.ctrlPeekAt(int(labels[0]))
+	var numArgs int
+	if f.isLoop() {
+		numArgs = len(f.blockType.Params)
+	} else {
+		numArgs = len(f.blockType.Results)
+	}
+
+	targets := make([]ssa.BasicBlock, len(labels))
+
+	// We need trampoline blocks since depending on the target block structure, we might end up inserting moves before jumps,
+	// which cannot be done with br_table. Instead, we can do such per-block moves in the trampoline blocks.
+	// At the linking phase (very end of the backend), we can remove the unnecessary jumps, and therefore no runtime overhead.
+	currentBlk := builder.CurrentBlock()
+	for i, l := range labels {
+		// Args are always on the top of the stack. Note that we should not share the args slice
+		// among the jump instructions since the args are modified during passes (e.g. redundant phi elimination).
+		args := c.nPeekDup(numArgs)
+		targetBlk, _ := state.brTargetArgNumFor(l)
+		trampoline := builder.AllocateBasicBlock()
+		builder.SetCurrentBlock(trampoline)
+		c.insertJumpToBlock(args, targetBlk)
+		targets[i] = trampoline
+	}
+	builder.SetCurrentBlock(currentBlk)
+
+	// If the target block has no arguments, we can just jump to the target block.
+	brTable := builder.AllocateInstruction()
+	brTable.AsBrTable(index, targets)
+	builder.InsertInstruction(brTable)
+
+	for _, trampoline := range targets {
+		builder.Seal(trampoline)
+	}
+}
+
+func (l *loweringState) brTargetArgNumFor(labelIndex uint32) (targetBlk ssa.BasicBlock, argNum int) {
+	targetFrame := l.ctrlPeekAt(int(labelIndex))
+	if targetFrame.isLoop() {
+		targetBlk, argNum = targetFrame.blk, len(targetFrame.blockType.Params)
+	} else {
+		targetBlk, argNum = targetFrame.followingBlock, len(targetFrame.blockType.Results)
+	}
+	return
+}
+
+func (c *Compiler) callListenerBefore() {
+	c.storeCallerModuleContext()
+
+	builder := c.ssaBuilder
+	beforeListeners1stElement := builder.AllocateInstruction().
+		AsLoad(c.moduleCtxPtrValue,
+			c.offset.BeforeListenerTrampolines1stElement.U32(),
+			ssa.TypeI64,
+		).Insert(builder).Return()
+
+	beforeListenerPtr := builder.AllocateInstruction().
+		AsLoad(beforeListeners1stElement, uint32(c.wasmFunctionTypeIndex)*8 /* 8 bytes per index */, ssa.TypeI64).Insert(builder).Return()
+
+	entry := builder.EntryBlock()
+	ps := entry.Params()
+
+	args := c.allocateVarLengthValues(ps, c.execCtxPtrValue,
+		builder.AllocateInstruction().AsIconst32(c.wasmLocalFunctionIndex).Insert(builder).Return())
+	for i := 2; i < ps; i++ {
+		args = args.Append(builder.VarLengthPool(), entry.Param(i))
+	}
+
+	beforeSig := c.listenerSignatures[c.wasmFunctionTyp][0]
+	builder.AllocateInstruction().
+		AsCallIndirect(beforeListenerPtr, beforeSig, args).
+		Insert(builder)
+}
+
+func (c *Compiler) callListenerAfter() {
+	c.storeCallerModuleContext()
+
+	builder := c.ssaBuilder
+	afterListeners1stElement := builder.AllocateInstruction().
+		AsLoad(c.moduleCtxPtrValue,
+			c.offset.AfterListenerTrampolines1stElement.U32(),
+			ssa.TypeI64,
+		).Insert(builder).Return()
+
+	afterListenerPtr := builder.AllocateInstruction().
+		AsLoad(afterListeners1stElement,
+			uint32(c.wasmFunctionTypeIndex)*8 /* 8 bytes per index */, ssa.TypeI64).
+		Insert(builder).
+		Return()
+
+	afterSig := c.listenerSignatures[c.wasmFunctionTyp][1]
+	args := c.allocateVarLengthValues(
+		c.results()+2,
+		c.execCtxPtrValue,
+		builder.AllocateInstruction().AsIconst32(c.wasmLocalFunctionIndex).Insert(builder).Return(),
+	)
+
+	l := c.state()
+	tail := len(l.values)
+	args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-c.results():tail]...)
+	builder.AllocateInstruction().
+		AsCallIndirect(afterListenerPtr, afterSig, args).
+		Insert(builder)
+}
+
+const (
+	elementOrDataInstanceLenOffset = 8
+	elementOrDataInstanceSize      = 24
+)
+
+// dropInstance inserts instructions to drop the element/data instance specified by the given index.
+func (c *Compiler) dropDataOrElementInstance(index uint32, firstItemOffset wazevoapi.Offset) {
+	builder := c.ssaBuilder
+	instPtr := c.dataOrElementInstanceAddr(index, firstItemOffset)
+
+	zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
+
+	// Clear the instance.
+	builder.AllocateInstruction().AsStore(ssa.OpcodeStore, zero, instPtr, 0).Insert(builder)
+	builder.AllocateInstruction().AsStore(ssa.OpcodeStore, zero, instPtr, elementOrDataInstanceLenOffset).Insert(builder)
+	builder.AllocateInstruction().AsStore(ssa.OpcodeStore, zero, instPtr, elementOrDataInstanceLenOffset+8).Insert(builder)
+}
+
+func (c *Compiler) dataOrElementInstanceAddr(index uint32, firstItemOffset wazevoapi.Offset) ssa.Value {
+	builder := c.ssaBuilder
+
+	_1stItemPtr := builder.
+		AllocateInstruction().
+		AsLoad(c.moduleCtxPtrValue, firstItemOffset.U32(), ssa.TypeI64).
+		Insert(builder).Return()
+
+	// Each data/element instance is a slice, so we need to multiply index by 16 to get the offset of the target instance.
+	index = index * elementOrDataInstanceSize
+	indexExt := builder.AllocateInstruction().AsIconst64(uint64(index)).Insert(builder).Return()
+	// Then, add the offset to the address of the instance.
+	instPtr := builder.AllocateInstruction().AsIadd(_1stItemPtr, indexExt).Insert(builder).Return()
+	return instPtr
+}
+
+func (c *Compiler) boundsCheckInDataOrElementInstance(instPtr, offsetInInstance, copySize ssa.Value, exitCode wazevoapi.ExitCode) {
+	builder := c.ssaBuilder
+	dataInstLen := builder.AllocateInstruction().
+		AsLoad(instPtr, elementOrDataInstanceLenOffset, ssa.TypeI64).
+		Insert(builder).Return()
+	ceil := builder.AllocateInstruction().AsIadd(offsetInInstance, copySize).Insert(builder).Return()
+	cmp := builder.AllocateInstruction().
+		AsIcmp(dataInstLen, ceil, ssa.IntegerCmpCondUnsignedLessThan).
+		Insert(builder).
+		Return()
+	builder.AllocateInstruction().
+		AsExitIfTrueWithCode(c.execCtxPtrValue, cmp, exitCode).
+		Insert(builder)
+}
+
+func (c *Compiler) boundsCheckInTable(tableIndex uint32, offset, size ssa.Value) (tableInstancePtr ssa.Value) {
+	builder := c.ssaBuilder
+	dstCeil := builder.AllocateInstruction().AsIadd(offset, size).Insert(builder).Return()
+
+	// Load the table.
+	tableInstancePtr = builder.AllocateInstruction().
+		AsLoad(c.moduleCtxPtrValue, c.offset.TableOffset(int(tableIndex)).U32(), ssa.TypeI64).
+		Insert(builder).Return()
+
+	// Load the table's length.
+	tableLen := builder.AllocateInstruction().
+		AsLoad(tableInstancePtr, tableInstanceLenOffset, ssa.TypeI32).Insert(builder).Return()
+	tableLenExt := builder.AllocateInstruction().AsUExtend(tableLen, 32, 64).Insert(builder).Return()
+
+	// Compare the length and the target, and trap if out of bounds.
+	checkOOB := builder.AllocateInstruction()
+	checkOOB.AsIcmp(tableLenExt, dstCeil, ssa.IntegerCmpCondUnsignedLessThan)
+	builder.InsertInstruction(checkOOB)
+	exitIfOOB := builder.AllocateInstruction()
+	exitIfOOB.AsExitIfTrueWithCode(c.execCtxPtrValue, checkOOB.Return(), wazevoapi.ExitCodeTableOutOfBounds)
+	builder.InsertInstruction(exitIfOOB)
+	return
+}
+
+func (c *Compiler) loadTableBaseAddr(tableInstancePtr ssa.Value) ssa.Value {
+	builder := c.ssaBuilder
+	loadTableBaseAddress := builder.
+		AllocateInstruction().
+		AsLoad(tableInstancePtr, tableInstanceBaseAddressOffset, ssa.TypeI64).
+		Insert(builder)
+	return loadTableBaseAddress.Return()
+}
+
+func (c *Compiler) boundsCheckInMemory(memLen, offset, size ssa.Value) {
+	builder := c.ssaBuilder
+	ceil := builder.AllocateInstruction().AsIadd(offset, size).Insert(builder).Return()
+	cmp := builder.AllocateInstruction().
+		AsIcmp(memLen, ceil, ssa.IntegerCmpCondUnsignedLessThan).
+		Insert(builder).
+		Return()
+	builder.AllocateInstruction().
+		AsExitIfTrueWithCode(c.execCtxPtrValue, cmp, wazevoapi.ExitCodeMemoryOutOfBounds).
+		Insert(builder)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go
new file mode 100644
index 000000000..2db2b892c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go
@@ -0,0 +1,10 @@
+package frontend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func FunctionIndexToFuncRef(idx wasm.Index) ssa.FuncRef {
+	return ssa.FuncRef(idx)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go
new file mode 100644
index 000000000..1296706f5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go
@@ -0,0 +1,15 @@
+//go:build go1.21
+
+package frontend
+
+import (
+	"slices"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+func sortSSAValueIDs(IDs []ssa.ValueID) {
+	slices.SortFunc(IDs, func(i, j ssa.ValueID) int {
+		return int(i) - int(j)
+	})
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go
new file mode 100644
index 000000000..2e786a160
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go
@@ -0,0 +1,17 @@
+//go:build !go1.21
+
+// TODO: delete after the floor Go version is 1.21
+
+package frontend
+
+import (
+	"sort"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+func sortSSAValueIDs(IDs []ssa.ValueID) {
+	sort.SliceStable(IDs, func(i, j int) bool {
+		return int(IDs[i]) < int(IDs[j])
+	})
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go
new file mode 100644
index 000000000..8da7347a9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go
@@ -0,0 +1,82 @@
+package wazevo
+
+import (
+	"encoding/binary"
+	"reflect"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque {
+	size := len(m.CodeSection)*16 + 32
+	ret := newAlignedOpaque(size)
+
+	binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m))))
+
+	if len(listeners) > 0 {
+		sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&listeners))
+		binary.LittleEndian.PutUint64(ret[8:], uint64(sliceHeader.Data))
+		binary.LittleEndian.PutUint64(ret[16:], uint64(sliceHeader.Len))
+		binary.LittleEndian.PutUint64(ret[24:], uint64(sliceHeader.Cap))
+	}
+
+	offset := 32
+	for i := range m.CodeSection {
+		goFn := m.CodeSection[i].GoFunc
+		writeIface(goFn, ret[offset:])
+		offset += 16
+	}
+	return ret
+}
+
+func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module {
+	var opaqueViewOverSlice []byte
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))
+	sh.Data = opaqueBegin
+	sh.Len = 32
+	sh.Cap = 32
+	return *(**wasm.Module)(unsafe.Pointer(&opaqueViewOverSlice[0]))
+}
+
+func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.FunctionListener {
+	var opaqueViewOverSlice []byte
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))
+	sh.Data = opaqueBegin
+	sh.Len = 32
+	sh.Cap = 32
+
+	b := binary.LittleEndian.Uint64(opaqueViewOverSlice[8:])
+	l := binary.LittleEndian.Uint64(opaqueViewOverSlice[16:])
+	c := binary.LittleEndian.Uint64(opaqueViewOverSlice[24:])
+	var ret []experimental.FunctionListener
+	sh = (*reflect.SliceHeader)(unsafe.Pointer(&ret))
+	sh.Data = uintptr(b)
+	setSliceLimits(sh, uintptr(l), uintptr(c))
+	return ret
+}
+
+func hostModuleGoFuncFromOpaque[T any](index int, opaqueBegin uintptr) T {
+	offset := uintptr(index*16) + 32
+	ptr := opaqueBegin + offset
+
+	var opaqueViewOverFunction []byte
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverFunction))
+	sh.Data = ptr
+	sh.Len = 16
+	sh.Cap = 16
+	return readIface(opaqueViewOverFunction).(T)
+}
+
+func writeIface(goFn interface{}, buf []byte) {
+	goFnIface := *(*[2]uint64)(unsafe.Pointer(&goFn))
+	binary.LittleEndian.PutUint64(buf, goFnIface[0])
+	binary.LittleEndian.PutUint64(buf[8:], goFnIface[1])
+}
+
+func readIface(buf []byte) interface{} {
+	b := binary.LittleEndian.Uint64(buf)
+	s := binary.LittleEndian.Uint64(buf[8:])
+	return *(*interface{})(unsafe.Pointer(&[2]uint64{b, s}))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go
new file mode 100644
index 000000000..da27cc108
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go
@@ -0,0 +1,30 @@
+//go:build amd64
+
+package wazevo
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64"
+)
+
+func newMachine() backend.Machine {
+	return amd64.NewBackend()
+}
+
+// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
+// The implementation must be aligned with the ABI/Calling convention.
+func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
+	return amd64.UnwindStack(sp, fp, top, returnAddresses)
+}
+
+// goCallStackView is a function to get a view of the stack before a Go call, which
+// is the view of the stack allocated in CompileGoFunctionTrampoline.
+func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	return amd64.GoCallStackView(stackPointerBeforeGoCall)
+}
+
+// adjustClonedStack is a function to adjust the stack after it is grown.
+// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
+func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
+	amd64.AdjustClonedStack(oldsp, oldTop, sp, fp, top)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go
new file mode 100644
index 000000000..e7a846548
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go
@@ -0,0 +1,32 @@
+//go:build arm64
+
+package wazevo
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64"
+)
+
+func newMachine() backend.Machine {
+	return arm64.NewBackend()
+}
+
+// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
+// The implementation must be aligned with the ABI/Calling convention.
+func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
+	return arm64.UnwindStack(sp, fp, top, returnAddresses)
+}
+
+// goCallStackView is a function to get a view of the stack before a Go call, which
+// is the view of the stack allocated in CompileGoFunctionTrampoline.
+func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	return arm64.GoCallStackView(stackPointerBeforeGoCall)
+}
+
+// adjustClonedStack is a function to adjust the stack after it is grown.
+// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
+func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
+	// TODO: currently, the frame pointers are not used, and saved old sps are relative to the current stack pointer,
+	//  so no need to adjustment on arm64. However, when we make it absolute, which in my opinion is better perf-wise
+	//  at the expense of slightly costly stack growth, we need to adjust the pushed frame pointers.
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go
new file mode 100644
index 000000000..c5afc6314
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go
@@ -0,0 +1,29 @@
+//go:build !(amd64 || arm64)
+
+package wazevo
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+)
+
+func newMachine() backend.Machine {
+	panic("unsupported architecture")
+}
+
+// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
+// The implementation must be aligned with the ABI/Calling convention.
+func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
+	panic("unsupported architecture")
+}
+
+// goCallStackView is a function to get a view of the stack before a Go call, which
+// is the view of the stack allocated in CompileGoFunctionTrampoline.
+func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	panic("unsupported architecture")
+}
+
+// adjustClonedStack is a function to adjust the stack after it is grown.
+// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
+func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
+	panic("unsupported architecture")
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go
new file mode 100644
index 000000000..889922107
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go
@@ -0,0 +1,11 @@
+package wazevo
+
+import (
+	"reflect"
+	"unsafe"
+)
+
+//go:linkname memmove runtime.memmove
+func memmove(_, _ unsafe.Pointer, _ uintptr)
+
+var memmovPtr = reflect.ValueOf(memmove).Pointer()
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
new file mode 100644
index 000000000..ba8f546c0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
@@ -0,0 +1,344 @@
+package wazevo
+
+import (
+	"encoding/binary"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/internal/wasmruntime"
+)
+
+type (
+	// moduleEngine implements wasm.ModuleEngine.
+	moduleEngine struct {
+		// opaquePtr equals &opaque[0].
+		opaquePtr              *byte
+		parent                 *compiledModule
+		module                 *wasm.ModuleInstance
+		opaque                 moduleContextOpaque
+		localFunctionInstances []*functionInstance
+		importedFunctions      []importedFunction
+		listeners              []experimental.FunctionListener
+	}
+
+	functionInstance struct {
+		executable             *byte
+		moduleContextOpaquePtr *byte
+		typeID                 wasm.FunctionTypeID
+		indexInModule          wasm.Index
+	}
+
+	importedFunction struct {
+		me            *moduleEngine
+		indexInModule wasm.Index
+	}
+
+	// moduleContextOpaque is the opaque byte slice of Module instance specific contents whose size
+	// is only Wasm-compile-time known, hence dynamic. Its contents are basically the pointers to the module instance,
+	// specific objects as well as functions. This is sometimes called "VMContext" in other Wasm runtimes.
+	//
+	// Internally, the buffer is structured as follows:
+	//
+	// 	type moduleContextOpaque struct {
+	// 	    moduleInstance                            *wasm.ModuleInstance
+	// 	    localMemoryBufferPtr                      *byte                (optional)
+	// 	    localMemoryLength                         uint64               (optional)
+	// 	    importedMemoryInstance                    *wasm.MemoryInstance (optional)
+	// 	    importedMemoryOwnerOpaqueCtx              *byte                (optional)
+	// 	    importedFunctions                         [# of importedFunctions]functionInstance
+	//      importedGlobals                           []ImportedGlobal       (optional)
+	//      localGlobals                              []Global               (optional)
+	//      typeIDsBegin                              &wasm.ModuleInstance.TypeIDs[0]  (optional)
+	//      tables                                    []*wasm.TableInstance  (optional)
+	// 	    beforeListenerTrampolines1stElement       **byte                 (optional)
+	// 	    afterListenerTrampolines1stElement        **byte                 (optional)
+	//      dataInstances1stElement                   []wasm.DataInstance    (optional)
+	//      elementInstances1stElement                []wasm.ElementInstance (optional)
+	// 	}
+	//
+	//  type ImportedGlobal struct {
+	// 		*Global
+	// 		_ uint64 // padding
+	//  }
+	//
+	//  type Global struct {
+	// 		Val, ValHi uint64
+	//  }
+	//
+	// See wazevoapi.NewModuleContextOffsetData for the details of the offsets.
+	//
+	// Note that for host modules, the structure is entirely different. See buildHostModuleOpaque.
+	moduleContextOpaque []byte
+)
+
+func newAlignedOpaque(size int) moduleContextOpaque {
+	// Check if the size is a multiple of 16.
+	if size%16 != 0 {
+		panic("size must be a multiple of 16")
+	}
+	buf := make([]byte, size+16)
+	// Align the buffer to 16 bytes.
+	rem := uintptr(unsafe.Pointer(&buf[0])) % 16
+	buf = buf[16-rem:]
+	return buf
+}
+
+func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {
+	s := uint64(len(mem.Buffer))
+	var b uint64
+	if len(mem.Buffer) > 0 {
+		b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0])))
+	}
+	binary.LittleEndian.PutUint64(opaque[offset:], b)
+	binary.LittleEndian.PutUint64(opaque[offset+8:], s)
+}
+
+func (m *moduleEngine) setupOpaque() {
+	inst := m.module
+	offsets := &m.parent.offsets
+	opaque := m.opaque
+
+	binary.LittleEndian.PutUint64(opaque[offsets.ModuleInstanceOffset:],
+		uint64(uintptr(unsafe.Pointer(m.module))),
+	)
+
+	if lm := offsets.LocalMemoryBegin; lm >= 0 {
+		putLocalMemory(opaque, lm, inst.MemoryInstance)
+	}
+
+	// Note: imported memory is resolved in ResolveImportedFunction.
+
+	// Note: imported functions are resolved in ResolveImportedFunction.
+
+	if globalOffset := offsets.GlobalsBegin; globalOffset >= 0 {
+		for i, g := range inst.Globals {
+			if i < int(inst.Source.ImportGlobalCount) {
+				importedME := g.Me.(*moduleEngine)
+				offset := importedME.parent.offsets.GlobalInstanceOffset(g.Index)
+				importedMEOpaque := importedME.opaque
+				binary.LittleEndian.PutUint64(opaque[globalOffset:],
+					uint64(uintptr(unsafe.Pointer(&importedMEOpaque[offset]))))
+			} else {
+				binary.LittleEndian.PutUint64(opaque[globalOffset:], g.Val)
+				binary.LittleEndian.PutUint64(opaque[globalOffset+8:], g.ValHi)
+			}
+			globalOffset += 16
+		}
+	}
+
+	if tableOffset := offsets.TablesBegin; tableOffset >= 0 {
+		// First we write the first element's address of typeIDs.
+		if len(inst.TypeIDs) > 0 {
+			binary.LittleEndian.PutUint64(opaque[offsets.TypeIDs1stElement:], uint64(uintptr(unsafe.Pointer(&inst.TypeIDs[0]))))
+		}
+
+		// Then we write the table addresses.
+		for _, table := range inst.Tables {
+			binary.LittleEndian.PutUint64(opaque[tableOffset:], uint64(uintptr(unsafe.Pointer(table))))
+			tableOffset += 8
+		}
+	}
+
+	if beforeListenerOffset := offsets.BeforeListenerTrampolines1stElement; beforeListenerOffset >= 0 {
+		binary.LittleEndian.PutUint64(opaque[beforeListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerBeforeTrampolines[0]))))
+	}
+	if afterListenerOffset := offsets.AfterListenerTrampolines1stElement; afterListenerOffset >= 0 {
+		binary.LittleEndian.PutUint64(opaque[afterListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerAfterTrampolines[0]))))
+	}
+	if len(inst.DataInstances) > 0 {
+		binary.LittleEndian.PutUint64(opaque[offsets.DataInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.DataInstances[0]))))
+	}
+	if len(inst.ElementInstances) > 0 {
+		binary.LittleEndian.PutUint64(opaque[offsets.ElementInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.ElementInstances[0]))))
+	}
+}
+
+// NewFunction implements wasm.ModuleEngine.
+func (m *moduleEngine) NewFunction(index wasm.Index) api.Function {
+	if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
+		panic("When PrintMachineCodeHexPerFunctionDisassemblable enabled, functions must not be called")
+	}
+
+	localIndex := index
+	if importedFnCount := m.module.Source.ImportFunctionCount; index < importedFnCount {
+		imported := &m.importedFunctions[index]
+		return imported.me.NewFunction(imported.indexInModule)
+	} else {
+		localIndex -= importedFnCount
+	}
+
+	src := m.module.Source
+	typIndex := src.FunctionSection[localIndex]
+	typ := src.TypeSection[typIndex]
+	sizeOfParamResultSlice := typ.ResultNumInUint64
+	if ps := typ.ParamNumInUint64; ps > sizeOfParamResultSlice {
+		sizeOfParamResultSlice = ps
+	}
+	p := m.parent
+	offset := p.functionOffsets[localIndex]
+
+	ce := &callEngine{
+		indexInModule:          index,
+		executable:             &p.executable[offset],
+		parent:                 m,
+		preambleExecutable:     &m.parent.entryPreambles[typIndex][0],
+		sizeOfParamResultSlice: sizeOfParamResultSlice,
+		requiredParams:         typ.ParamNumInUint64,
+		numberOfResults:        typ.ResultNumInUint64,
+	}
+
+	ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0]
+	ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0]
+	ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0]
+	ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0]
+	ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0]
+	ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0]
+	ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0]
+	ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0]
+	ce.execCtx.memmoveAddress = memmovPtr
+	ce.init()
+	return ce
+}
+
+// GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
+func (m *moduleEngine) GetGlobalValue(i wasm.Index) (lo, hi uint64) {
+	offset := m.parent.offsets.GlobalInstanceOffset(i)
+	buf := m.opaque[offset:]
+	if i < m.module.Source.ImportGlobalCount {
+		panic("GetGlobalValue should not be called for imported globals")
+	}
+	return binary.LittleEndian.Uint64(buf), binary.LittleEndian.Uint64(buf[8:])
+}
+
+// SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
+func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) {
+	offset := m.parent.offsets.GlobalInstanceOffset(i)
+	buf := m.opaque[offset:]
+	if i < m.module.Source.ImportGlobalCount {
+		panic("GetGlobalValue should not be called for imported globals")
+	}
+	binary.LittleEndian.PutUint64(buf, lo)
+	binary.LittleEndian.PutUint64(buf[8:], hi)
+}
+
+// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
+func (m *moduleEngine) OwnsGlobals() bool { return true }
+
+// ResolveImportedFunction implements wasm.ModuleEngine.
+func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
+	executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index)
+	importedME := importedModuleEngine.(*moduleEngine)
+
+	if int(indexInImportedModule) >= len(importedME.importedFunctions) {
+		indexInImportedModule -= wasm.Index(len(importedME.importedFunctions))
+	} else {
+		imported := &importedME.importedFunctions[indexInImportedModule]
+		m.ResolveImportedFunction(index, imported.indexInModule, imported.me)
+		return // Recursively resolve the imported function.
+	}
+
+	offset := importedME.parent.functionOffsets[indexInImportedModule]
+	typeID := getTypeIDOf(indexInImportedModule, importedME.module)
+	executable := &importedME.parent.executable[offset]
+	// Write functionInstance.
+	binary.LittleEndian.PutUint64(m.opaque[executableOffset:], uint64(uintptr(unsafe.Pointer(executable))))
+	binary.LittleEndian.PutUint64(m.opaque[moduleCtxOffset:], uint64(uintptr(unsafe.Pointer(importedME.opaquePtr))))
+	binary.LittleEndian.PutUint64(m.opaque[typeIDOffset:], uint64(typeID))
+
+	// Write importedFunction so that it can be used by NewFunction.
+	m.importedFunctions[index] = importedFunction{me: importedME, indexInModule: indexInImportedModule}
+}
+
+func getTypeIDOf(funcIndex wasm.Index, m *wasm.ModuleInstance) wasm.FunctionTypeID {
+	source := m.Source
+
+	var typeIndex wasm.Index
+	if funcIndex >= source.ImportFunctionCount {
+		funcIndex -= source.ImportFunctionCount
+		typeIndex = source.FunctionSection[funcIndex]
+	} else {
+		var cnt wasm.Index
+		for i := range source.ImportSection {
+			if source.ImportSection[i].Type == wasm.ExternTypeFunc {
+				if cnt == funcIndex {
+					typeIndex = source.ImportSection[i].DescFunc
+					break
+				}
+				cnt++
+			}
+		}
+	}
+	return m.TypeIDs[typeIndex]
+}
+
+// ResolveImportedMemory implements wasm.ModuleEngine.
+func (m *moduleEngine) ResolveImportedMemory(importedModuleEngine wasm.ModuleEngine) {
+	importedME := importedModuleEngine.(*moduleEngine)
+	inst := importedME.module
+
+	var memInstPtr uint64
+	var memOwnerOpaquePtr uint64
+	if offs := importedME.parent.offsets; offs.ImportedMemoryBegin >= 0 {
+		offset := offs.ImportedMemoryBegin
+		memInstPtr = binary.LittleEndian.Uint64(importedME.opaque[offset:])
+		memOwnerOpaquePtr = binary.LittleEndian.Uint64(importedME.opaque[offset+8:])
+	} else {
+		memInstPtr = uint64(uintptr(unsafe.Pointer(inst.MemoryInstance)))
+		memOwnerOpaquePtr = uint64(uintptr(unsafe.Pointer(importedME.opaquePtr)))
+	}
+	offset := m.parent.offsets.ImportedMemoryBegin
+	binary.LittleEndian.PutUint64(m.opaque[offset:], memInstPtr)
+	binary.LittleEndian.PutUint64(m.opaque[offset+8:], memOwnerOpaquePtr)
+}
+
+// DoneInstantiation implements wasm.ModuleEngine.
+func (m *moduleEngine) DoneInstantiation() {
+	if !m.module.Source.IsHostModule {
+		m.setupOpaque()
+	}
+}
+
+// FunctionInstanceReference implements wasm.ModuleEngine.
+func (m *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
+	if funcIndex < m.module.Source.ImportFunctionCount {
+		begin, _, _ := m.parent.offsets.ImportedFunctionOffset(funcIndex)
+		return uintptr(unsafe.Pointer(&m.opaque[begin]))
+	}
+	localIndex := funcIndex - m.module.Source.ImportFunctionCount
+	p := m.parent
+	executable := &p.executable[p.functionOffsets[localIndex]]
+	typeID := m.module.TypeIDs[m.module.Source.FunctionSection[localIndex]]
+
+	lf := &functionInstance{
+		executable:             executable,
+		moduleContextOpaquePtr: m.opaquePtr,
+		typeID:                 typeID,
+		indexInModule:          funcIndex,
+	}
+	m.localFunctionInstances = append(m.localFunctionInstances, lf)
+	return uintptr(unsafe.Pointer(lf))
+}
+
+// LookupFunction implements wasm.ModuleEngine.
+func (m *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
+	if tableOffset >= uint32(len(t.References)) || t.Type != wasm.RefTypeFuncref {
+		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+	}
+	rawPtr := t.References[tableOffset]
+	if rawPtr == 0 {
+		panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+	}
+
+	tf := wazevoapi.PtrFromUintptr[functionInstance](rawPtr)
+	if tf.typeID != typeId {
+		panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
+	}
+	return moduleInstanceFromOpaquePtr(tf.moduleContextOpaquePtr), tf.indexInModule
+}
+
+func moduleInstanceFromOpaquePtr(ptr *byte) *wasm.ModuleInstance {
+	return *(**wasm.ModuleInstance)(unsafe.Pointer(ptr))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go
new file mode 100644
index 000000000..6a03fc65c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go
@@ -0,0 +1,11 @@
+//go:build !tinygo
+
+package wazevo
+
+import "reflect"
+
+// setSliceLimits sets both Cap and Len for the given reflected slice.
+func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) {
+	s.Len = int(l)
+	s.Cap = int(c)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go
new file mode 100644
index 000000000..eda3e706a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go
@@ -0,0 +1,11 @@
+//go:build tinygo
+
+package wazevo
+
+import "reflect"
+
+// setSliceLimits sets both Cap and Len for the given reflected slice.
+func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) {
+	s.Len = l
+	s.Cap = c
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
new file mode 100644
index 000000000..10b6b4b62
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
@@ -0,0 +1,407 @@
+package ssa
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// BasicBlock represents the Basic Block of an SSA function.
+// Each BasicBlock always ends with branching instructions (e.g. Branch, Return, etc.),
+// and at most two branches are allowed. If there's two branches, these two are placed together at the end of the block.
+// In other words, there's no branching instruction in the middle of the block.
+//
+// Note: we use the "block argument" variant of SSA, instead of PHI functions. See the package level doc comments.
+//
+// Note: we use "parameter/param" as a placeholder which represents a variant of PHI, and "argument/arg" as an actual
+// Value passed to that "parameter/param".
+type BasicBlock interface {
+	// ID returns the unique ID of this block.
+	ID() BasicBlockID
+
+	// Name returns the unique string ID of this block. e.g. blk0, blk1, ...
+	Name() string
+
+	// AddParam adds the parameter to the block whose type specified by `t`.
+	AddParam(b Builder, t Type) Value
+
+	// Params returns the number of parameters to this block.
+	Params() int
+
+	// Param returns (Variable, Value) which corresponds to the i-th parameter of this block.
+	// The returned Value is the definition of the param in this block.
+	Param(i int) Value
+
+	// InsertInstruction inserts an instruction that implements Value into the tail of this block.
+	InsertInstruction(raw *Instruction)
+
+	// Root returns the root instruction of this block.
+	Root() *Instruction
+
+	// Tail returns the tail instruction of this block.
+	Tail() *Instruction
+
+	// EntryBlock returns true if this block represents the function entry.
+	EntryBlock() bool
+
+	// ReturnBlock returns ture if this block represents the function return.
+	ReturnBlock() bool
+
+	// FormatHeader returns the debug string of this block, not including instruction.
+	FormatHeader(b Builder) string
+
+	// Valid is true if this block is still valid even after optimizations.
+	Valid() bool
+
+	// Sealed is true if this block has been sealed.
+	Sealed() bool
+
+	// BeginPredIterator returns the first predecessor of this block.
+	BeginPredIterator() BasicBlock
+
+	// NextPredIterator returns the next predecessor of this block.
+	NextPredIterator() BasicBlock
+
+	// Preds returns the number of predecessors of this block.
+	Preds() int
+
+	// Pred returns the i-th predecessor of this block.
+	Pred(i int) BasicBlock
+
+	// Succs returns the number of successors of this block.
+	Succs() int
+
+	// Succ returns the i-th successor of this block.
+	Succ(i int) BasicBlock
+
+	// LoopHeader returns true if this block is a loop header.
+	LoopHeader() bool
+
+	// LoopNestingForestChildren returns the children of this block in the loop nesting forest.
+	LoopNestingForestChildren() []BasicBlock
+}
+
+type (
+	// basicBlock is a basic block in a SSA-transformed function.
+	basicBlock struct {
+		id                      BasicBlockID
+		rootInstr, currentInstr *Instruction
+		params                  []blockParam
+		predIter                int
+		preds                   []basicBlockPredecessorInfo
+		success                 []*basicBlock
+		// singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called.
+		singlePred *basicBlock
+		// lastDefinitions maps Variable to its last definition in this block.
+		lastDefinitions map[Variable]Value
+		// unknownsValues are used in builder.findValue. The usage is well-described in the paper.
+		unknownValues []unknownValue
+		// invalid is true if this block is made invalid during optimizations.
+		invalid bool
+		// sealed is true if this is sealed (all the predecessors are known).
+		sealed bool
+		// loopHeader is true if this block is a loop header:
+		//
+		// > A loop header (sometimes called the entry point of the loop) is a dominator that is the target
+		// > of a loop-forming back edge. The loop header dominates all blocks in the loop body.
+		// > A block may be a loop header for more than one loop. A loop may have multiple entry points,
+		// > in which case it has no "loop header".
+		//
+		// See https://en.wikipedia.org/wiki/Control-flow_graph for more details.
+		//
+		// This is modified during the subPassLoopDetection pass.
+		loopHeader bool
+
+		// loopNestingForestChildren holds the children of this block in the loop nesting forest.
+		// Non-empty if and only if this block is a loop header (i.e. loopHeader=true)
+		loopNestingForestChildren []BasicBlock
+
+		// reversePostOrder is used to sort all the blocks in the function in reverse post order.
+		// This is used in builder.LayoutBlocks.
+		reversePostOrder int
+
+		// child and sibling are the ones in the dominator tree.
+		child, sibling *basicBlock
+	}
+	// BasicBlockID is the unique ID of a basicBlock.
+	BasicBlockID uint32
+
+	// blockParam implements Value and represents a parameter to a basicBlock.
+	blockParam struct {
+		// value is the Value that corresponds to the parameter in this block,
+		// and can be considered as an output of PHI instruction in traditional SSA.
+		value Value
+		// typ is the type of the parameter.
+		typ Type
+	}
+
+	unknownValue struct {
+		// variable is the variable that this unknownValue represents.
+		variable Variable
+		// value is the value that this unknownValue represents.
+		value Value
+	}
+)
+
+const basicBlockIDReturnBlock = 0xffffffff
+
+// Name implements BasicBlock.Name.
+func (bb *basicBlock) Name() string {
+	if bb.id == basicBlockIDReturnBlock {
+		return "blk_ret"
+	} else {
+		return fmt.Sprintf("blk%d", bb.id)
+	}
+}
+
+// String implements fmt.Stringer for debugging.
+func (bid BasicBlockID) String() string {
+	if bid == basicBlockIDReturnBlock {
+		return "blk_ret"
+	} else {
+		return fmt.Sprintf("blk%d", bid)
+	}
+}
+
+// ID implements BasicBlock.ID.
+func (bb *basicBlock) ID() BasicBlockID {
+	return bb.id
+}
+
+// basicBlockPredecessorInfo is the information of a predecessor of a basicBlock.
+// predecessor is determined by a pair of block and the branch instruction used to jump to the successor.
+type basicBlockPredecessorInfo struct {
+	blk    *basicBlock
+	branch *Instruction
+}
+
+// EntryBlock implements BasicBlock.EntryBlock.
+func (bb *basicBlock) EntryBlock() bool {
+	return bb.id == 0
+}
+
+// ReturnBlock implements BasicBlock.ReturnBlock.
+func (bb *basicBlock) ReturnBlock() bool {
+	return bb.id == basicBlockIDReturnBlock
+}
+
+// AddParam implements BasicBlock.AddParam.
+func (bb *basicBlock) AddParam(b Builder, typ Type) Value {
+	paramValue := b.allocateValue(typ)
+	bb.params = append(bb.params, blockParam{typ: typ, value: paramValue})
+	return paramValue
+}
+
+// addParamOn adds a parameter to this block whose value is already allocated.
+func (bb *basicBlock) addParamOn(typ Type, value Value) {
+	bb.params = append(bb.params, blockParam{typ: typ, value: value})
+}
+
+// Params implements BasicBlock.Params.
+func (bb *basicBlock) Params() int {
+	return len(bb.params)
+}
+
+// Param implements BasicBlock.Param.
+func (bb *basicBlock) Param(i int) Value {
+	p := &bb.params[i]
+	return p.value
+}
+
+// Valid implements BasicBlock.Valid.
+func (bb *basicBlock) Valid() bool {
+	return !bb.invalid
+}
+
+// Sealed implements BasicBlock.Sealed.
+func (bb *basicBlock) Sealed() bool {
+	return bb.sealed
+}
+
+// InsertInstruction implements BasicBlock.InsertInstruction.
+func (bb *basicBlock) InsertInstruction(next *Instruction) {
+	current := bb.currentInstr
+	if current != nil {
+		current.next = next
+		next.prev = current
+	} else {
+		bb.rootInstr = next
+	}
+	bb.currentInstr = next
+
+	switch next.opcode {
+	case OpcodeJump, OpcodeBrz, OpcodeBrnz:
+		target := next.blk.(*basicBlock)
+		target.addPred(bb, next)
+	case OpcodeBrTable:
+		for _, _target := range next.targets {
+			target := _target.(*basicBlock)
+			target.addPred(bb, next)
+		}
+	}
+}
+
+// NumPreds implements BasicBlock.NumPreds.
+func (bb *basicBlock) NumPreds() int {
+	return len(bb.preds)
+}
+
+// BeginPredIterator implements BasicBlock.BeginPredIterator.
+func (bb *basicBlock) BeginPredIterator() BasicBlock {
+	bb.predIter = 0
+	return bb.NextPredIterator()
+}
+
+// NextPredIterator implements BasicBlock.NextPredIterator.
+func (bb *basicBlock) NextPredIterator() BasicBlock {
+	if bb.predIter >= len(bb.preds) {
+		return nil
+	}
+	pred := bb.preds[bb.predIter].blk
+	bb.predIter++
+	return pred
+}
+
+// Preds implements BasicBlock.Preds.
+func (bb *basicBlock) Preds() int {
+	return len(bb.preds)
+}
+
+// Pred implements BasicBlock.Pred.
+func (bb *basicBlock) Pred(i int) BasicBlock {
+	return bb.preds[i].blk
+}
+
+// Succs implements BasicBlock.Succs.
+func (bb *basicBlock) Succs() int {
+	return len(bb.success)
+}
+
+// Succ implements BasicBlock.Succ.
+func (bb *basicBlock) Succ(i int) BasicBlock {
+	return bb.success[i]
+}
+
+// Root implements BasicBlock.Root.
+func (bb *basicBlock) Root() *Instruction {
+	return bb.rootInstr
+}
+
+// Tail implements BasicBlock.Tail.
+func (bb *basicBlock) Tail() *Instruction {
+	return bb.currentInstr
+}
+
+// reset resets the basicBlock to its initial state so that it can be reused for another function.
+func resetBasicBlock(bb *basicBlock) {
+	bb.params = bb.params[:0]
+	bb.rootInstr, bb.currentInstr = nil, nil
+	bb.preds = bb.preds[:0]
+	bb.success = bb.success[:0]
+	bb.invalid, bb.sealed = false, false
+	bb.singlePred = nil
+	bb.unknownValues = bb.unknownValues[:0]
+	bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions)
+	bb.reversePostOrder = -1
+	bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0]
+	bb.loopHeader = false
+	bb.sibling = nil
+	bb.child = nil
+}
+
+// addPred adds a predecessor to this block specified by the branch instruction.
+func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) {
+	if bb.sealed {
+		panic("BUG: trying to add predecessor to a sealed block: " + bb.Name())
+	}
+
+	pred := blk.(*basicBlock)
+	for i := range bb.preds {
+		existingPred := &bb.preds[i]
+		if existingPred.blk == pred && existingPred.branch != branch {
+			// If the target is already added, then this must come from the same BrTable,
+			// otherwise such redundant branch should be eliminated by the frontend. (which should be simpler).
+			panic(fmt.Sprintf("BUG: redundant non BrTable jumps in %s whose targes are the same", bb.Name()))
+		}
+	}
+
+	bb.preds = append(bb.preds, basicBlockPredecessorInfo{
+		blk:    pred,
+		branch: branch,
+	})
+
+	pred.success = append(pred.success, bb)
+}
+
+// FormatHeader implements BasicBlock.FormatHeader.
+func (bb *basicBlock) FormatHeader(b Builder) string {
+	ps := make([]string, len(bb.params))
+	for i, p := range bb.params {
+		ps[i] = p.value.formatWithType(b)
+	}
+
+	if len(bb.preds) > 0 {
+		preds := make([]string, 0, len(bb.preds))
+		for _, pred := range bb.preds {
+			if pred.blk.invalid {
+				continue
+			}
+			preds = append(preds, fmt.Sprintf("blk%d", pred.blk.id))
+
+		}
+		return fmt.Sprintf("blk%d: (%s) <-- (%s)",
+			bb.id, strings.Join(ps, ","), strings.Join(preds, ","))
+	} else {
+		return fmt.Sprintf("blk%d: (%s)", bb.id, strings.Join(ps, ", "))
+	}
+}
+
+// validates validates the basicBlock for debugging purpose.
+func (bb *basicBlock) validate(b *builder) {
+	if bb.invalid {
+		panic("BUG: trying to validate an invalid block: " + bb.Name())
+	}
+	if len(bb.preds) > 0 {
+		for _, pred := range bb.preds {
+			if pred.branch.opcode != OpcodeBrTable {
+				if target := pred.branch.blk; target != bb {
+					panic(fmt.Sprintf("BUG: '%s' is not branch to %s, but to %s",
+						pred.branch.Format(b), bb.Name(), target.Name()))
+				}
+			}
+
+			var exp int
+			if bb.ReturnBlock() {
+				exp = len(b.currentSignature.Results)
+			} else {
+				exp = len(bb.params)
+			}
+
+			if len(pred.branch.vs.View()) != exp {
+				panic(fmt.Sprintf(
+					"BUG: len(argument at %s) != len(params at %s): %d != %d: %s",
+					pred.blk.Name(), bb.Name(),
+					len(pred.branch.vs.View()), len(bb.params), pred.branch.Format(b),
+				))
+			}
+
+		}
+	}
+}
+
+// String implements fmt.Stringer for debugging purpose only.
+func (bb *basicBlock) String() string {
+	return strconv.Itoa(int(bb.id))
+}
+
+// LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren.
+func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock {
+	return bb.loopNestingForestChildren
+}
+
+// LoopHeader implements BasicBlock.LoopHeader.
+func (bb *basicBlock) LoopHeader() bool {
+	return bb.loopHeader
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go
new file mode 100644
index 000000000..e1471edc3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go
@@ -0,0 +1,34 @@
+//go:build go1.21
+
+package ssa
+
+import (
+	"slices"
+)
+
+func sortBlocks(blocks []*basicBlock) {
+	slices.SortFunc(blocks, func(i, j *basicBlock) int {
+		jIsReturn := j.ReturnBlock()
+		iIsReturn := i.ReturnBlock()
+		if iIsReturn && jIsReturn {
+			return 0
+		}
+		if jIsReturn {
+			return 1
+		}
+		if iIsReturn {
+			return -1
+		}
+		iRoot, jRoot := i.rootInstr, j.rootInstr
+		if iRoot == nil && jRoot == nil { // For testing.
+			return 0
+		}
+		if jRoot == nil {
+			return 1
+		}
+		if iRoot == nil {
+			return -1
+		}
+		return i.rootInstr.id - j.rootInstr.id
+	})
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go
new file mode 100644
index 000000000..9dc881dae
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go
@@ -0,0 +1,24 @@
+//go:build !go1.21
+
+// TODO: delete after the floor Go version is 1.21
+
+package ssa
+
+import "sort"
+
+func sortBlocks(blocks []*basicBlock) {
+	sort.SliceStable(blocks, func(i, j int) bool {
+		iBlk, jBlk := blocks[i], blocks[j]
+		if jBlk.ReturnBlock() {
+			return true
+		}
+		if iBlk.ReturnBlock() {
+			return false
+		}
+		iRoot, jRoot := iBlk.rootInstr, jBlk.rootInstr
+		if iRoot == nil || jRoot == nil { // For testing.
+			return true
+		}
+		return iBlk.rootInstr.id < jBlk.rootInstr.id
+	})
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
new file mode 100644
index 000000000..1fc84d2ea
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
@@ -0,0 +1,731 @@
+package ssa
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// Builder is used to builds SSA consisting of Basic Blocks per function.
+type Builder interface {
+	// Init must be called to reuse this builder for the next function.
+	Init(typ *Signature)
+
+	// Signature returns the Signature of the currently-compiled function.
+	Signature() *Signature
+
+	// BlockIDMax returns the maximum value of BasicBlocksID existing in the currently-compiled function.
+	BlockIDMax() BasicBlockID
+
+	// AllocateBasicBlock creates a basic block in SSA function.
+	AllocateBasicBlock() BasicBlock
+
+	// CurrentBlock returns the currently handled BasicBlock which is set by the latest call to SetCurrentBlock.
+	CurrentBlock() BasicBlock
+
+	// EntryBlock returns the entry BasicBlock of the currently-compiled function.
+	EntryBlock() BasicBlock
+
+	// SetCurrentBlock sets the instruction insertion target to the BasicBlock `b`.
+	SetCurrentBlock(b BasicBlock)
+
+	// DeclareVariable declares a Variable of the given Type.
+	DeclareVariable(Type) Variable
+
+	// DefineVariable defines a variable in the `block` with value.
+	// The defining instruction will be inserted into the `block`.
+	DefineVariable(variable Variable, value Value, block BasicBlock)
+
+	// DefineVariableInCurrentBB is the same as DefineVariable except the definition is
+	// inserted into the current BasicBlock. Alias to DefineVariable(x, y, CurrentBlock()).
+	DefineVariableInCurrentBB(variable Variable, value Value)
+
+	// AllocateInstruction returns a new Instruction.
+	AllocateInstruction() *Instruction
+
+	// InsertInstruction executes BasicBlock.InsertInstruction for the currently handled basic block.
+	InsertInstruction(raw *Instruction)
+
+	// allocateValue allocates an unused Value.
+	allocateValue(typ Type) Value
+
+	// MustFindValue searches the latest definition of the given Variable and returns the result.
+	MustFindValue(variable Variable) Value
+
+	// MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock.
+	MustFindValueInBlk(variable Variable, blk BasicBlock) Value
+
+	// FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock.
+	// If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid.
+	FindValueInLinearPath(variable Variable) Value
+
+	// Seal declares that we've known all the predecessors to this block and were added via AddPred.
+	// After calling this, AddPred will be forbidden.
+	Seal(blk BasicBlock)
+
+	// AnnotateValue is for debugging purpose.
+	AnnotateValue(value Value, annotation string)
+
+	// DeclareSignature appends the *Signature to be referenced by various instructions (e.g. OpcodeCall).
+	DeclareSignature(signature *Signature)
+
+	// Signatures returns the slice of declared Signatures.
+	Signatures() []*Signature
+
+	// ResolveSignature returns the Signature which corresponds to SignatureID.
+	ResolveSignature(id SignatureID) *Signature
+
+	// RunPasses runs various passes on the constructed SSA function.
+	RunPasses()
+
+	// Format returns the debugging string of the SSA function.
+	Format() string
+
+	// BlockIteratorBegin initializes the state to iterate over all the valid BasicBlock(s) compiled.
+	// Combined with BlockIteratorNext, we can use this like:
+	//
+	// 	for blk := builder.BlockIteratorBegin(); blk != nil; blk = builder.BlockIteratorNext() {
+	// 		// ...
+	//	}
+	//
+	// The returned blocks are ordered in the order of AllocateBasicBlock being called.
+	BlockIteratorBegin() BasicBlock
+
+	// BlockIteratorNext advances the state for iteration initialized by BlockIteratorBegin.
+	// Returns nil if there's no unseen BasicBlock.
+	BlockIteratorNext() BasicBlock
+
+	// ValueRefCounts returns the map of ValueID to its reference count.
+	// The returned slice must not be modified.
+	ValueRefCounts() []int
+
+	// BlockIteratorReversePostOrderBegin is almost the same as BlockIteratorBegin except it returns the BasicBlock in the reverse post-order.
+	// This is available after RunPasses is run.
+	BlockIteratorReversePostOrderBegin() BasicBlock
+
+	// BlockIteratorReversePostOrderNext is almost the same as BlockIteratorPostOrderNext except it returns the BasicBlock in the reverse post-order.
+	// This is available after RunPasses is run.
+	BlockIteratorReversePostOrderNext() BasicBlock
+
+	// ReturnBlock returns the BasicBlock which is used to return from the function.
+	ReturnBlock() BasicBlock
+
+	// InsertUndefined inserts an undefined instruction at the current position.
+	InsertUndefined()
+
+	// SetCurrentSourceOffset sets the current source offset. The incoming instruction will be annotated with this offset.
+	SetCurrentSourceOffset(line SourceOffset)
+
+	// LoopNestingForestRoots returns the roots of the loop nesting forest.
+	LoopNestingForestRoots() []BasicBlock
+
+	// LowestCommonAncestor returns the lowest common ancestor in the dominator tree of the given BasicBlock(s).
+	LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock
+
+	// Idom returns the immediate dominator of the given BasicBlock.
+	Idom(blk BasicBlock) BasicBlock
+
+	VarLengthPool() *wazevoapi.VarLengthPool[Value]
+}
+
+// NewBuilder returns a new Builder implementation.
+func NewBuilder() Builder {
+	return &builder{
+		instructionsPool:               wazevoapi.NewPool[Instruction](resetInstruction),
+		basicBlocksPool:                wazevoapi.NewPool[basicBlock](resetBasicBlock),
+		varLengthPool:                  wazevoapi.NewVarLengthPool[Value](),
+		valueAnnotations:               make(map[ValueID]string),
+		signatures:                     make(map[SignatureID]*Signature),
+		blkVisited:                     make(map[*basicBlock]int),
+		valueIDAliases:                 make(map[ValueID]Value),
+		redundantParameterIndexToValue: make(map[int]Value),
+		returnBlk:                      &basicBlock{id: basicBlockIDReturnBlock},
+	}
+}
+
+// builder implements Builder interface.
+type builder struct {
+	basicBlocksPool  wazevoapi.Pool[basicBlock]
+	instructionsPool wazevoapi.Pool[Instruction]
+	varLengthPool    wazevoapi.VarLengthPool[Value]
+	signatures       map[SignatureID]*Signature
+	currentSignature *Signature
+
+	// reversePostOrderedBasicBlocks are the BasicBlock(s) ordered in the reverse post-order after passCalculateImmediateDominators.
+	reversePostOrderedBasicBlocks []*basicBlock
+	currentBB                     *basicBlock
+	returnBlk                     *basicBlock
+
+	// variables track the types for Variable with the index regarded Variable.
+	variables []Type
+	// nextValueID is used by builder.AllocateValue.
+	nextValueID ValueID
+	// nextVariable is used by builder.AllocateVariable.
+	nextVariable Variable
+
+	valueIDAliases   map[ValueID]Value
+	valueAnnotations map[ValueID]string
+
+	// valueRefCounts is used to lower the SSA in backend, and will be calculated
+	// by the last SSA-level optimization pass.
+	valueRefCounts []int
+
+	// dominators stores the immediate dominator of each BasicBlock.
+	// The index is blockID of the BasicBlock.
+	dominators []*basicBlock
+	sparseTree dominatorSparseTree
+
+	// loopNestingForestRoots are the roots of the loop nesting forest.
+	loopNestingForestRoots []BasicBlock
+
+	// The followings are used for optimization passes/deterministic compilation.
+	instStack                      []*Instruction
+	blkVisited                     map[*basicBlock]int
+	valueIDToInstruction           []*Instruction
+	blkStack                       []*basicBlock
+	blkStack2                      []*basicBlock
+	ints                           []int
+	redundantParameterIndexToValue map[int]Value
+
+	// blockIterCur is used to implement blockIteratorBegin and blockIteratorNext.
+	blockIterCur int
+
+	// donePreBlockLayoutPasses is true if all the passes before LayoutBlocks are called.
+	donePreBlockLayoutPasses bool
+	// doneBlockLayout is true if LayoutBlocks is called.
+	doneBlockLayout bool
+	// donePostBlockLayoutPasses is true if all the passes after LayoutBlocks are called.
+	donePostBlockLayoutPasses bool
+
+	currentSourceOffset SourceOffset
+}
+
+func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] {
+	return &b.varLengthPool
+}
+
+// ReturnBlock implements Builder.ReturnBlock.
+func (b *builder) ReturnBlock() BasicBlock {
+	return b.returnBlk
+}
+
+// Init implements Builder.Reset.
+func (b *builder) Init(s *Signature) {
+	b.nextVariable = 0
+	b.currentSignature = s
+	resetBasicBlock(b.returnBlk)
+	b.instructionsPool.Reset()
+	b.basicBlocksPool.Reset()
+	b.varLengthPool.Reset()
+	b.donePreBlockLayoutPasses = false
+	b.doneBlockLayout = false
+	b.donePostBlockLayoutPasses = false
+	for _, sig := range b.signatures {
+		sig.used = false
+	}
+
+	b.ints = b.ints[:0]
+	b.blkStack = b.blkStack[:0]
+	b.blkStack2 = b.blkStack2[:0]
+	b.dominators = b.dominators[:0]
+	b.loopNestingForestRoots = b.loopNestingForestRoots[:0]
+
+	for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
+		blk := b.basicBlocksPool.View(i)
+		delete(b.blkVisited, blk)
+	}
+	b.basicBlocksPool.Reset()
+
+	for v := ValueID(0); v < b.nextValueID; v++ {
+		delete(b.valueAnnotations, v)
+		delete(b.valueIDAliases, v)
+		b.valueRefCounts[v] = 0
+		b.valueIDToInstruction[v] = nil
+	}
+	b.nextValueID = 0
+	b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
+	b.doneBlockLayout = false
+	for i := range b.valueRefCounts {
+		b.valueRefCounts[i] = 0
+	}
+
+	b.currentSourceOffset = sourceOffsetUnknown
+}
+
+// Signature implements Builder.Signature.
+func (b *builder) Signature() *Signature {
+	return b.currentSignature
+}
+
+// AnnotateValue implements Builder.AnnotateValue.
+func (b *builder) AnnotateValue(value Value, a string) {
+	b.valueAnnotations[value.ID()] = a
+}
+
+// AllocateInstruction implements Builder.AllocateInstruction.
+func (b *builder) AllocateInstruction() *Instruction {
+	instr := b.instructionsPool.Allocate()
+	instr.id = b.instructionsPool.Allocated()
+	return instr
+}
+
+// DeclareSignature implements Builder.AnnotateValue.
+func (b *builder) DeclareSignature(s *Signature) {
+	b.signatures[s.ID] = s
+	s.used = false
+}
+
+// Signatures implements Builder.Signatures.
+func (b *builder) Signatures() (ret []*Signature) {
+	for _, sig := range b.signatures {
+		ret = append(ret, sig)
+	}
+	sort.Slice(ret, func(i, j int) bool {
+		return ret[i].ID < ret[j].ID
+	})
+	return
+}
+
+// SetCurrentSourceOffset implements Builder.SetCurrentSourceOffset.
+func (b *builder) SetCurrentSourceOffset(l SourceOffset) {
+	b.currentSourceOffset = l
+}
+
+func (b *builder) usedSignatures() (ret []*Signature) {
+	for _, sig := range b.signatures {
+		if sig.used {
+			ret = append(ret, sig)
+		}
+	}
+	sort.Slice(ret, func(i, j int) bool {
+		return ret[i].ID < ret[j].ID
+	})
+	return
+}
+
+// ResolveSignature implements Builder.ResolveSignature.
+func (b *builder) ResolveSignature(id SignatureID) *Signature {
+	return b.signatures[id]
+}
+
+// AllocateBasicBlock implements Builder.AllocateBasicBlock.
+func (b *builder) AllocateBasicBlock() BasicBlock {
+	return b.allocateBasicBlock()
+}
+
+// allocateBasicBlock allocates a new basicBlock.
+func (b *builder) allocateBasicBlock() *basicBlock {
+	id := BasicBlockID(b.basicBlocksPool.Allocated())
+	blk := b.basicBlocksPool.Allocate()
+	blk.id = id
+	return blk
+}
+
+// Idom implements Builder.Idom.
+func (b *builder) Idom(blk BasicBlock) BasicBlock {
+	return b.dominators[blk.ID()]
+}
+
+// InsertInstruction implements Builder.InsertInstruction.
+func (b *builder) InsertInstruction(instr *Instruction) {
+	b.currentBB.InsertInstruction(instr)
+
+	if l := b.currentSourceOffset; l.Valid() {
+		// Emit the source offset info only when the instruction has side effect because
+		// these are the only instructions that are accessed by stack unwinding.
+		// This reduces the significant amount of the offset info in the binary.
+		if instr.sideEffect() != sideEffectNone {
+			instr.annotateSourceOffset(l)
+		}
+	}
+
+	resultTypesFn := instructionReturnTypes[instr.opcode]
+	if resultTypesFn == nil {
+		panic("TODO: " + instr.Format(b))
+	}
+
+	t1, ts := resultTypesFn(b, instr)
+	if t1.invalid() {
+		return
+	}
+
+	r1 := b.allocateValue(t1)
+	instr.rValue = r1
+
+	tsl := len(ts)
+	if tsl == 0 {
+		return
+	}
+
+	rValues := b.varLengthPool.Allocate(tsl)
+	for i := 0; i < tsl; i++ {
+		rValues = rValues.Append(&b.varLengthPool, b.allocateValue(ts[i]))
+	}
+	instr.rValues = rValues
+}
+
+// DefineVariable implements Builder.DefineVariable.
+func (b *builder) DefineVariable(variable Variable, value Value, block BasicBlock) {
+	if b.variables[variable].invalid() {
+		panic("BUG: trying to define variable " + variable.String() + " but is not declared yet")
+	}
+
+	if b.variables[variable] != value.Type() {
+		panic(fmt.Sprintf("BUG: inconsistent type for variable %d: expected %s but got %s", variable, b.variables[variable], value.Type()))
+	}
+	bb := block.(*basicBlock)
+	bb.lastDefinitions[variable] = value
+}
+
+// DefineVariableInCurrentBB implements Builder.DefineVariableInCurrentBB.
+func (b *builder) DefineVariableInCurrentBB(variable Variable, value Value) {
+	b.DefineVariable(variable, value, b.currentBB)
+}
+
+// SetCurrentBlock implements Builder.SetCurrentBlock.
+func (b *builder) SetCurrentBlock(bb BasicBlock) {
+	b.currentBB = bb.(*basicBlock)
+}
+
+// CurrentBlock implements Builder.CurrentBlock.
+func (b *builder) CurrentBlock() BasicBlock {
+	return b.currentBB
+}
+
+// EntryBlock implements Builder.EntryBlock.
+func (b *builder) EntryBlock() BasicBlock {
+	return b.entryBlk()
+}
+
+// DeclareVariable implements Builder.DeclareVariable.
+func (b *builder) DeclareVariable(typ Type) Variable {
+	v := b.allocateVariable()
+	iv := int(v)
+	if l := len(b.variables); l <= iv {
+		b.variables = append(b.variables, make([]Type, 2*(l+1))...)
+	}
+	b.variables[v] = typ
+	return v
+}
+
+// allocateVariable allocates a new variable.
+func (b *builder) allocateVariable() (ret Variable) {
+	ret = b.nextVariable
+	b.nextVariable++
+	return
+}
+
+// allocateValue implements Builder.AllocateValue.
+func (b *builder) allocateValue(typ Type) (v Value) {
+	v = Value(b.nextValueID)
+	v = v.setType(typ)
+	b.nextValueID++
+	return
+}
+
+// FindValueInLinearPath implements Builder.FindValueInLinearPath.
+func (b *builder) FindValueInLinearPath(variable Variable) Value {
+	return b.findValueInLinearPath(variable, b.currentBB)
+}
+
+func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Value {
+	if val, ok := blk.lastDefinitions[variable]; ok {
+		return val
+	} else if !blk.sealed {
+		return ValueInvalid
+	}
+
+	if pred := blk.singlePred; pred != nil {
+		// If this block is sealed and have only one predecessor,
+		// we can use the value in that block without ambiguity on definition.
+		return b.findValueInLinearPath(variable, pred)
+	}
+	if len(blk.preds) == 1 {
+		panic("BUG")
+	}
+	return ValueInvalid
+}
+
+func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value {
+	typ := b.definedVariableType(variable)
+	return b.findValue(typ, variable, blk.(*basicBlock))
+}
+
+// MustFindValue implements Builder.MustFindValue.
+func (b *builder) MustFindValue(variable Variable) Value {
+	typ := b.definedVariableType(variable)
+	return b.findValue(typ, variable, b.currentBB)
+}
+
+// findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in
+// the section 2 of the paper https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf.
+//
+// TODO: reimplement this in iterative, not recursive, to avoid stack overflow.
+func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value {
+	if val, ok := blk.lastDefinitions[variable]; ok {
+		// The value is already defined in this block!
+		return val
+	} else if !blk.sealed { // Incomplete CFG as in the paper.
+		// If this is not sealed, that means it might have additional unknown predecessor later on.
+		// So we temporarily define the placeholder value here (not add as a parameter yet!),
+		// and record it as unknown.
+		// The unknown values are resolved when we call seal this block via BasicBlock.Seal().
+		value := b.allocateValue(typ)
+		if wazevoapi.SSALoggingEnabled {
+			fmt.Printf("adding unknown value placeholder for %s at %d\n", variable, blk.id)
+		}
+		blk.lastDefinitions[variable] = value
+		blk.unknownValues = append(blk.unknownValues, unknownValue{
+			variable: variable,
+			value:    value,
+		})
+		return value
+	}
+
+	if pred := blk.singlePred; pred != nil {
+		// If this block is sealed and have only one predecessor,
+		// we can use the value in that block without ambiguity on definition.
+		return b.findValue(typ, variable, pred)
+	} else if len(blk.preds) == 0 {
+		panic("BUG: value is not defined for " + variable.String())
+	}
+
+	// If this block has multiple predecessors, we have to gather the definitions,
+	// and treat them as an argument to this block.
+	//
+	// The first thing is to define a new parameter to this block which may or may not be redundant, but
+	// later we eliminate trivial params in an optimization pass. This must be done before finding the
+	// definitions in the predecessors so that we can break the cycle.
+	paramValue := blk.AddParam(b, typ)
+	b.DefineVariable(variable, paramValue, blk)
+
+	// After the new param is added, we have to manipulate the original branching instructions
+	// in predecessors so that they would pass the definition of `variable` as the argument to
+	// the newly added PHI.
+	for i := range blk.preds {
+		pred := &blk.preds[i]
+		value := b.findValue(typ, variable, pred.blk)
+		pred.branch.addArgumentBranchInst(b, value)
+	}
+	return paramValue
+}
+
+// Seal implements Builder.Seal.
+func (b *builder) Seal(raw BasicBlock) {
+	blk := raw.(*basicBlock)
+	if len(blk.preds) == 1 {
+		blk.singlePred = blk.preds[0].blk
+	}
+	blk.sealed = true
+
+	for _, v := range blk.unknownValues {
+		variable, phiValue := v.variable, v.value
+		typ := b.definedVariableType(variable)
+		blk.addParamOn(typ, phiValue)
+		for i := range blk.preds {
+			pred := &blk.preds[i]
+			predValue := b.findValue(typ, variable, pred.blk)
+			if !predValue.Valid() {
+				panic("BUG: value is not defined anywhere in the predecessors in the CFG")
+			}
+			pred.branch.addArgumentBranchInst(b, predValue)
+		}
+	}
+}
+
+// definedVariableType returns the type of the given variable. If the variable is not defined yet, it panics.
+func (b *builder) definedVariableType(variable Variable) Type {
+	typ := b.variables[variable]
+	if typ.invalid() {
+		panic(fmt.Sprintf("%s is not defined yet", variable))
+	}
+	return typ
+}
+
+// Format implements Builder.Format.
+func (b *builder) Format() string {
+	str := strings.Builder{}
+	usedSigs := b.usedSignatures()
+	if len(usedSigs) > 0 {
+		str.WriteByte('\n')
+		str.WriteString("signatures:\n")
+		for _, sig := range usedSigs {
+			str.WriteByte('\t')
+			str.WriteString(sig.String())
+			str.WriteByte('\n')
+		}
+	}
+
+	var iterBegin, iterNext func() *basicBlock
+	if b.doneBlockLayout {
+		iterBegin, iterNext = b.blockIteratorReversePostOrderBegin, b.blockIteratorReversePostOrderNext
+	} else {
+		iterBegin, iterNext = b.blockIteratorBegin, b.blockIteratorNext
+	}
+	for bb := iterBegin(); bb != nil; bb = iterNext() {
+		str.WriteByte('\n')
+		str.WriteString(bb.FormatHeader(b))
+		str.WriteByte('\n')
+
+		for cur := bb.Root(); cur != nil; cur = cur.Next() {
+			str.WriteByte('\t')
+			str.WriteString(cur.Format(b))
+			str.WriteByte('\n')
+		}
+	}
+	return str.String()
+}
+
+// BlockIteratorNext implements Builder.BlockIteratorNext.
+func (b *builder) BlockIteratorNext() BasicBlock {
+	if blk := b.blockIteratorNext(); blk == nil {
+		return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil)
+	} else {
+		return blk
+	}
+}
+
+// BlockIteratorNext implements Builder.BlockIteratorNext.
+func (b *builder) blockIteratorNext() *basicBlock {
+	index := b.blockIterCur
+	for {
+		if index == b.basicBlocksPool.Allocated() {
+			return nil
+		}
+		ret := b.basicBlocksPool.View(index)
+		index++
+		if !ret.invalid {
+			b.blockIterCur = index
+			return ret
+		}
+	}
+}
+
+// BlockIteratorBegin implements Builder.BlockIteratorBegin.
+func (b *builder) BlockIteratorBegin() BasicBlock {
+	return b.blockIteratorBegin()
+}
+
+// BlockIteratorBegin implements Builder.BlockIteratorBegin.
+func (b *builder) blockIteratorBegin() *basicBlock {
+	b.blockIterCur = 0
+	return b.blockIteratorNext()
+}
+
+// BlockIteratorReversePostOrderBegin implements Builder.BlockIteratorReversePostOrderBegin.
+func (b *builder) BlockIteratorReversePostOrderBegin() BasicBlock {
+	return b.blockIteratorReversePostOrderBegin()
+}
+
+// BlockIteratorBegin implements Builder.BlockIteratorBegin.
+func (b *builder) blockIteratorReversePostOrderBegin() *basicBlock {
+	b.blockIterCur = 0
+	return b.blockIteratorReversePostOrderNext()
+}
+
+// BlockIteratorReversePostOrderNext implements Builder.BlockIteratorReversePostOrderNext.
+func (b *builder) BlockIteratorReversePostOrderNext() BasicBlock {
+	if blk := b.blockIteratorReversePostOrderNext(); blk == nil {
+		return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil)
+	} else {
+		return blk
+	}
+}
+
+// BlockIteratorNext implements Builder.BlockIteratorNext.
+func (b *builder) blockIteratorReversePostOrderNext() *basicBlock {
+	if b.blockIterCur >= len(b.reversePostOrderedBasicBlocks) {
+		return nil
+	} else {
+		ret := b.reversePostOrderedBasicBlocks[b.blockIterCur]
+		b.blockIterCur++
+		return ret
+	}
+}
+
+// ValueRefCounts implements Builder.ValueRefCounts.
+func (b *builder) ValueRefCounts() []int {
+	return b.valueRefCounts
+}
+
+// alias records the alias of the given values. The alias(es) will be
+// eliminated in the optimization pass via resolveArgumentAlias.
+func (b *builder) alias(dst, src Value) {
+	b.valueIDAliases[dst.ID()] = src
+}
+
+// resolveArgumentAlias resolves the alias of the arguments of the given instruction.
+func (b *builder) resolveArgumentAlias(instr *Instruction) {
+	if instr.v.Valid() {
+		instr.v = b.resolveAlias(instr.v)
+	}
+
+	if instr.v2.Valid() {
+		instr.v2 = b.resolveAlias(instr.v2)
+	}
+
+	if instr.v3.Valid() {
+		instr.v3 = b.resolveAlias(instr.v3)
+	}
+
+	view := instr.vs.View()
+	for i, v := range view {
+		view[i] = b.resolveAlias(v)
+	}
+}
+
+// resolveAlias resolves the alias of the given value.
+func (b *builder) resolveAlias(v Value) Value {
+	// Some aliases are chained, so we need to resolve them recursively.
+	for {
+		if src, ok := b.valueIDAliases[v.ID()]; ok {
+			v = src
+		} else {
+			break
+		}
+	}
+	return v
+}
+
+// entryBlk returns the entry block of the function.
+func (b *builder) entryBlk() *basicBlock {
+	return b.basicBlocksPool.View(0)
+}
+
+// isDominatedBy returns true if the given block `n` is dominated by the given block `d`.
+// Before calling this, the builder must pass by passCalculateImmediateDominators.
+func (b *builder) isDominatedBy(n *basicBlock, d *basicBlock) bool {
+	if len(b.dominators) == 0 {
+		panic("BUG: passCalculateImmediateDominators must be called before calling isDominatedBy")
+	}
+	ent := b.entryBlk()
+	doms := b.dominators
+	for n != d && n != ent {
+		n = doms[n.id]
+	}
+	return n == d
+}
+
+// BlockIDMax implements Builder.BlockIDMax.
+func (b *builder) BlockIDMax() BasicBlockID {
+	return BasicBlockID(b.basicBlocksPool.Allocated())
+}
+
+// InsertUndefined implements Builder.InsertUndefined.
+func (b *builder) InsertUndefined() {
+	instr := b.AllocateInstruction()
+	instr.opcode = OpcodeUndefined
+	b.InsertInstruction(instr)
+}
+
+// LoopNestingForestRoots implements Builder.LoopNestingForestRoots.
+func (b *builder) LoopNestingForestRoots() []BasicBlock {
+	return b.loopNestingForestRoots
+}
+
+// LowestCommonAncestor implements Builder.LowestCommonAncestor.
+func (b *builder) LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock {
+	return b.sparseTree.findLCA(blk1.ID(), blk2.ID())
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go
new file mode 100644
index 000000000..15b62ca8e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go
@@ -0,0 +1,107 @@
+package ssa
+
+// IntegerCmpCond represents a condition for integer comparison.
+type IntegerCmpCond byte
+
+const (
+	// IntegerCmpCondInvalid represents an invalid condition.
+	IntegerCmpCondInvalid IntegerCmpCond = iota
+	// IntegerCmpCondEqual represents "==".
+	IntegerCmpCondEqual
+	// IntegerCmpCondNotEqual represents "!=".
+	IntegerCmpCondNotEqual
+	// IntegerCmpCondSignedLessThan represents Signed "<".
+	IntegerCmpCondSignedLessThan
+	// IntegerCmpCondSignedGreaterThanOrEqual represents Signed ">=".
+	IntegerCmpCondSignedGreaterThanOrEqual
+	// IntegerCmpCondSignedGreaterThan represents Signed ">".
+	IntegerCmpCondSignedGreaterThan
+	// IntegerCmpCondSignedLessThanOrEqual represents Signed "<=".
+	IntegerCmpCondSignedLessThanOrEqual
+	// IntegerCmpCondUnsignedLessThan represents Unsigned "<".
+	IntegerCmpCondUnsignedLessThan
+	// IntegerCmpCondUnsignedGreaterThanOrEqual represents Unsigned ">=".
+	IntegerCmpCondUnsignedGreaterThanOrEqual
+	// IntegerCmpCondUnsignedGreaterThan represents Unsigned ">".
+	IntegerCmpCondUnsignedGreaterThan
+	// IntegerCmpCondUnsignedLessThanOrEqual represents Unsigned "<=".
+	IntegerCmpCondUnsignedLessThanOrEqual
+)
+
+// String implements fmt.Stringer.
+func (i IntegerCmpCond) String() string {
+	switch i {
+	case IntegerCmpCondEqual:
+		return "eq"
+	case IntegerCmpCondNotEqual:
+		return "neq"
+	case IntegerCmpCondSignedLessThan:
+		return "lt_s"
+	case IntegerCmpCondSignedGreaterThanOrEqual:
+		return "ge_s"
+	case IntegerCmpCondSignedGreaterThan:
+		return "gt_s"
+	case IntegerCmpCondSignedLessThanOrEqual:
+		return "le_s"
+	case IntegerCmpCondUnsignedLessThan:
+		return "lt_u"
+	case IntegerCmpCondUnsignedGreaterThanOrEqual:
+		return "ge_u"
+	case IntegerCmpCondUnsignedGreaterThan:
+		return "gt_u"
+	case IntegerCmpCondUnsignedLessThanOrEqual:
+		return "le_u"
+	default:
+		panic("invalid integer comparison condition")
+	}
+}
+
+// Signed returns true if the condition is signed integer comparison.
+func (i IntegerCmpCond) Signed() bool {
+	switch i {
+	case IntegerCmpCondSignedLessThan, IntegerCmpCondSignedGreaterThanOrEqual,
+		IntegerCmpCondSignedGreaterThan, IntegerCmpCondSignedLessThanOrEqual:
+		return true
+	default:
+		return false
+	}
+}
+
+type FloatCmpCond byte
+
+const (
+	// FloatCmpCondInvalid represents an invalid condition.
+	FloatCmpCondInvalid FloatCmpCond = iota
+	// FloatCmpCondEqual represents "==".
+	FloatCmpCondEqual
+	// FloatCmpCondNotEqual represents "!=".
+	FloatCmpCondNotEqual
+	// FloatCmpCondLessThan represents "<".
+	FloatCmpCondLessThan
+	// FloatCmpCondLessThanOrEqual represents "<=".
+	FloatCmpCondLessThanOrEqual
+	// FloatCmpCondGreaterThan represents ">".
+	FloatCmpCondGreaterThan
+	// FloatCmpCondGreaterThanOrEqual represents ">=".
+	FloatCmpCondGreaterThanOrEqual
+)
+
+// String implements fmt.Stringer.
+func (f FloatCmpCond) String() string {
+	switch f {
+	case FloatCmpCondEqual:
+		return "eq"
+	case FloatCmpCondNotEqual:
+		return "neq"
+	case FloatCmpCondLessThan:
+		return "lt"
+	case FloatCmpCondLessThanOrEqual:
+		return "le"
+	case FloatCmpCondGreaterThan:
+		return "gt"
+	case FloatCmpCondGreaterThanOrEqual:
+		return "ge"
+	default:
+		panic("invalid float comparison condition")
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go
new file mode 100644
index 000000000..d9620762a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go
@@ -0,0 +1,12 @@
+package ssa
+
+import "fmt"
+
+// FuncRef is a unique identifier for a function of the frontend,
+// and is used to reference the function in function call.
+type FuncRef uint32
+
+// String implements fmt.Stringer.
+func (r FuncRef) String() string {
+	return fmt.Sprintf("f%d", r)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
new file mode 100644
index 000000000..3e3482efc
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
@@ -0,0 +1,2967 @@
+package ssa
+
+import (
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// Opcode represents a SSA instruction.
+type Opcode uint32
+
+// Instruction represents an instruction whose opcode is specified by
+// Opcode. Since Go doesn't have union type, we use this flattened type
+// for all instructions, and therefore each field has different meaning
+// depending on Opcode.
+type Instruction struct {
+	// id is the unique ID of this instruction which ascends from 0 following the order of program.
+	id         int
+	opcode     Opcode
+	u1, u2     uint64
+	v          Value
+	v2         Value
+	v3         Value
+	vs         Values
+	typ        Type
+	blk        BasicBlock
+	targets    []BasicBlock
+	prev, next *Instruction
+
+	rValue         Value
+	rValues        Values
+	gid            InstructionGroupID
+	sourceOffset   SourceOffset
+	live           bool
+	alreadyLowered bool
+}
+
+// SourceOffset represents the offset of the source of an instruction.
+type SourceOffset int64
+
+const sourceOffsetUnknown = -1
+
+// Valid returns true if this source offset is valid.
+func (l SourceOffset) Valid() bool {
+	return l != sourceOffsetUnknown
+}
+
+func (i *Instruction) annotateSourceOffset(line SourceOffset) {
+	i.sourceOffset = line
+}
+
+// SourceOffset returns the source offset of this instruction.
+func (i *Instruction) SourceOffset() SourceOffset {
+	return i.sourceOffset
+}
+
+// Opcode returns the opcode of this instruction.
+func (i *Instruction) Opcode() Opcode {
+	return i.opcode
+}
+
+// GroupID returns the InstructionGroupID of this instruction.
+func (i *Instruction) GroupID() InstructionGroupID {
+	return i.gid
+}
+
+// MarkLowered marks this instruction as already lowered.
+func (i *Instruction) MarkLowered() {
+	i.alreadyLowered = true
+}
+
+// Lowered returns true if this instruction is already lowered.
+func (i *Instruction) Lowered() bool {
+	return i.alreadyLowered
+}
+
+// resetInstruction resets this instruction to the initial state.
+func resetInstruction(i *Instruction) {
+	*i = Instruction{}
+	i.v = ValueInvalid
+	i.v2 = ValueInvalid
+	i.v3 = ValueInvalid
+	i.rValue = ValueInvalid
+	i.typ = typeInvalid
+	i.vs = ValuesNil
+	i.sourceOffset = sourceOffsetUnknown
+}
+
+// InstructionGroupID is assigned to each instruction and represents a group of instructions
+// where each instruction is interchangeable with others except for the last instruction
+// in the group which has side effects. In short, InstructionGroupID is determined by the side effects of instructions.
+// That means, if there's an instruction with side effect between two instructions, then these two instructions
+// will have different instructionGroupID. Note that each block always ends with branching, which is with side effects,
+// therefore, instructions in different blocks always have different InstructionGroupID(s).
+//
+// The notable application of this is used in lowering SSA-level instruction to a ISA specific instruction,
+// where we eagerly try to merge multiple instructions into single operation etc. Such merging cannot be done
+// if these instruction have different InstructionGroupID since it will change the semantics of a program.
+//
+// See passDeadCodeElimination.
+type InstructionGroupID uint32
+
+// Returns Value(s) produced by this instruction if any.
+// The `first` is the first return value, and `rest` is the rest of the values.
+func (i *Instruction) Returns() (first Value, rest []Value) {
+	return i.rValue, i.rValues.View()
+}
+
+// Return returns a Value(s) produced by this instruction if any.
+// If there's multiple return values, only the first one is returned.
+func (i *Instruction) Return() (first Value) {
+	return i.rValue
+}
+
+// Args returns the arguments to this instruction.
+func (i *Instruction) Args() (v1, v2, v3 Value, vs []Value) {
+	return i.v, i.v2, i.v3, i.vs.View()
+}
+
+// Arg returns the first argument to this instruction.
+func (i *Instruction) Arg() Value {
+	return i.v
+}
+
+// Arg2 returns the first two arguments to this instruction.
+func (i *Instruction) Arg2() (Value, Value) {
+	return i.v, i.v2
+}
+
+// ArgWithLane returns the first argument to this instruction, and the lane type.
+func (i *Instruction) ArgWithLane() (Value, VecLane) {
+	return i.v, VecLane(i.u1)
+}
+
+// Arg2WithLane returns the first two arguments to this instruction, and the lane type.
+func (i *Instruction) Arg2WithLane() (Value, Value, VecLane) {
+	return i.v, i.v2, VecLane(i.u1)
+}
+
+// ShuffleData returns the first two arguments to this instruction and 2 uint64s `lo`, `hi`.
+//
+// Note: Each uint64 encodes a sequence of 8 bytes where each byte encodes a VecLane,
+// so that the 128bit integer `hi<<64|lo` packs a slice `[16]VecLane`,
+// where `lane[0]` is the least significant byte, and `lane[n]` is shifted to offset `n*8`.
+func (i *Instruction) ShuffleData() (v Value, v2 Value, lo uint64, hi uint64) {
+	return i.v, i.v2, i.u1, i.u2
+}
+
+// Arg3 returns the first three arguments to this instruction.
+func (i *Instruction) Arg3() (Value, Value, Value) {
+	return i.v, i.v2, i.v3
+}
+
+// Next returns the next instruction laid out next to itself.
+func (i *Instruction) Next() *Instruction {
+	return i.next
+}
+
+// Prev returns the previous instruction laid out prior to itself.
+func (i *Instruction) Prev() *Instruction {
+	return i.prev
+}
+
+// IsBranching returns true if this instruction is a branching instruction.
+func (i *Instruction) IsBranching() bool {
+	switch i.opcode {
+	case OpcodeJump, OpcodeBrz, OpcodeBrnz, OpcodeBrTable:
+		return true
+	default:
+		return false
+	}
+}
+
+// TODO: complete opcode comments.
+const (
+	OpcodeInvalid Opcode = iota
+
+	// OpcodeUndefined is a placeholder for undefined opcode. This can be used for debugging to intentionally
+	// cause a crash at certain point.
+	OpcodeUndefined
+
+	// OpcodeJump takes the list of args to the `block` and unconditionally jumps to it.
+	OpcodeJump
+
+	// OpcodeBrz branches into `blk` with `args`  if the value `c` equals zero: `Brz c, blk, args`.
+	OpcodeBrz
+
+	// OpcodeBrnz branches into `blk` with `args`  if the value `c` is not zero: `Brnz c, blk, args`.
+	OpcodeBrnz
+
+	// OpcodeBrTable takes the index value `index`, and branches into `labelX`. If the `index` is out of range,
+	// it branches into the last labelN: `BrTable index, [label1, label2, ... labelN]`.
+	OpcodeBrTable
+
+	// OpcodeExitWithCode exit the execution immediately.
+	OpcodeExitWithCode
+
+	// OpcodeExitIfTrueWithCode exits the execution immediately if the value `c` is not zero.
+	OpcodeExitIfTrueWithCode
+
+	// OpcodeReturn returns from the function: `return rvalues`.
+	OpcodeReturn
+
+	// OpcodeCall calls a function specified by the symbol FN with arguments `args`: `returnvals = Call FN, args...`
+	// This is a "near" call, which means the call target is known at compile time, and the target is relatively close
+	// to this function. If the target cannot be reached by near call, the backend fails to compile.
+	OpcodeCall
+
+	// OpcodeCallIndirect calls a function specified by `callee` which is a function address: `returnvals = call_indirect SIG, callee, args`.
+	// Note that this is different from call_indirect in Wasm, which also does type checking, etc.
+	OpcodeCallIndirect
+
+	// OpcodeSplat performs a vector splat operation: `v = Splat.lane x`.
+	OpcodeSplat
+
+	// OpcodeSwizzle performs a vector swizzle operation: `v = Swizzle.lane x, y`.
+	OpcodeSwizzle
+
+	// OpcodeInsertlane inserts a lane value into a vector: `v = InsertLane x, y, Idx`.
+	OpcodeInsertlane
+
+	// OpcodeExtractlane extracts a lane value from a vector: `v = ExtractLane x, Idx`.
+	OpcodeExtractlane
+
+	// OpcodeLoad loads a Type value from the [base + offset] address: `v = Load base, offset`.
+	OpcodeLoad
+
+	// OpcodeStore stores a Type value to the [base + offset] address: `Store v, base, offset`.
+	OpcodeStore
+
+	// OpcodeUload8 loads the 8-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload8 base, offset`.
+	OpcodeUload8
+
+	// OpcodeSload8 loads the 8-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload8 base, offset`.
+	OpcodeSload8
+
+	// OpcodeIstore8 stores the 8-bit value to the [base + offset] address, sign-extended to 64 bits: `Istore8 v, base, offset`.
+	OpcodeIstore8
+
+	// OpcodeUload16 loads the 16-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload16 base, offset`.
+	OpcodeUload16
+
+	// OpcodeSload16 loads the 16-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload16 base, offset`.
+	OpcodeSload16
+
+	// OpcodeIstore16 stores the 16-bit value to the [base + offset] address, zero-extended to 64 bits: `Istore16 v, base, offset`.
+	OpcodeIstore16
+
+	// OpcodeUload32 loads the 32-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload32 base, offset`.
+	OpcodeUload32
+
+	// OpcodeSload32 loads the 32-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload32 base, offset`.
+	OpcodeSload32
+
+	// OpcodeIstore32 stores the 32-bit value to the [base + offset] address, zero-extended to 64 bits: `Istore16 v, base, offset`.
+	OpcodeIstore32
+
+	// OpcodeLoadSplat represents a load that replicates the loaded value to all lanes `v = LoadSplat.lane p, Offset`.
+	OpcodeLoadSplat
+
+	// OpcodeVZeroExtLoad loads a scalar single/double precision floating point value from the [p + Offset] address,
+	// and zero-extend it to the V128 value: `v = VExtLoad  p, Offset`.
+	OpcodeVZeroExtLoad
+
+	// OpcodeIconst represents the integer const.
+	OpcodeIconst
+
+	// OpcodeF32const represents the single-precision const.
+	OpcodeF32const
+
+	// OpcodeF64const represents the double-precision const.
+	OpcodeF64const
+
+	// OpcodeVconst represents the 128bit vector const.
+	OpcodeVconst
+
+	// OpcodeVbor computes binary or between two 128bit vectors: `v = bor x, y`.
+	OpcodeVbor
+
+	// OpcodeVbxor computes binary xor between two 128bit vectors: `v = bxor x, y`.
+	OpcodeVbxor
+
+	// OpcodeVband computes binary and between two 128bit vectors: `v = band x, y`.
+	OpcodeVband
+
+	// OpcodeVbandnot computes binary and-not between two 128bit vectors: `v = bandnot x, y`.
+	OpcodeVbandnot
+
+	// OpcodeVbnot negates a 128bit vector: `v = bnot x`.
+	OpcodeVbnot
+
+	// OpcodeVbitselect uses the bits in the control mask c to select the corresponding bit from x when 1
+	// and y when 0: `v = bitselect c, x, y`.
+	OpcodeVbitselect
+
+	// OpcodeShuffle shuffles two vectors using the given 128-bit immediate: `v = shuffle imm, x, y`.
+	// For each byte in the immediate, a value i in [0, 15] selects the i-th byte in vector x;
+	// i in [16, 31] selects the (i-16)-th byte in vector y.
+	OpcodeShuffle
+
+	// OpcodeSelect chooses between two values based on a condition `c`: `v = Select c, x, y`.
+	OpcodeSelect
+
+	// OpcodeVanyTrue performs a any true operation: `s = VanyTrue a`.
+	OpcodeVanyTrue
+
+	// OpcodeVallTrue performs a lane-wise all true operation: `s = VallTrue.lane a`.
+	OpcodeVallTrue
+
+	// OpcodeVhighBits performs a lane-wise extract of the high bits: `v = VhighBits.lane a`.
+	OpcodeVhighBits
+
+	// OpcodeIcmp compares two integer values with the given condition: `v = icmp Cond, x, y`.
+	OpcodeIcmp
+
+	// OpcodeVIcmp compares two integer values with the given condition: `v = vicmp Cond, x, y` on vector.
+	OpcodeVIcmp
+
+	// OpcodeIcmpImm compares an integer value with the immediate value on the given condition: `v = icmp_imm Cond, x, Y`.
+	OpcodeIcmpImm
+
+	// OpcodeIadd performs an integer addition: `v = Iadd x, y`.
+	OpcodeIadd
+
+	// OpcodeVIadd performs an integer addition: `v = VIadd.lane x, y` on vector.
+	OpcodeVIadd
+
+	// OpcodeVSaddSat performs a signed saturating vector addition: `v = VSaddSat.lane x, y` on vector.
+	OpcodeVSaddSat
+
+	// OpcodeVUaddSat performs an unsigned saturating vector addition: `v = VUaddSat.lane x, y` on vector.
+	OpcodeVUaddSat
+
+	// OpcodeIsub performs an integer subtraction: `v = Isub x, y`.
+	OpcodeIsub
+
+	// OpcodeVIsub performs an integer subtraction: `v = VIsub.lane x, y` on vector.
+	OpcodeVIsub
+
+	// OpcodeVSsubSat performs a signed saturating vector subtraction: `v = VSsubSat.lane x, y` on vector.
+	OpcodeVSsubSat
+
+	// OpcodeVUsubSat performs an unsigned saturating vector subtraction: `v = VUsubSat.lane x, y` on vector.
+	OpcodeVUsubSat
+
+	// OpcodeVImin performs a signed integer min: `v = VImin.lane x, y` on vector.
+	OpcodeVImin
+
+	// OpcodeVUmin performs an unsigned integer min: `v = VUmin.lane x, y` on vector.
+	OpcodeVUmin
+
+	// OpcodeVImax performs a signed integer max: `v = VImax.lane x, y` on vector.
+	OpcodeVImax
+
+	// OpcodeVUmax performs an unsigned integer max: `v = VUmax.lane x, y` on vector.
+	OpcodeVUmax
+
+	// OpcodeVAvgRound performs an unsigned integer avg, truncating to zero: `v = VAvgRound.lane x, y` on vector.
+	OpcodeVAvgRound
+
+	// OpcodeVImul performs an integer multiplication: `v = VImul.lane x, y` on vector.
+	OpcodeVImul
+
+	// OpcodeVIneg negates the given integer vector value: `v = VIneg x`.
+	OpcodeVIneg
+
+	// OpcodeVIpopcnt counts the number of 1-bits in the given vector: `v = VIpopcnt x`.
+	OpcodeVIpopcnt
+
+	// OpcodeVIabs returns the absolute value for the given vector value: `v = VIabs.lane x`.
+	OpcodeVIabs
+
+	// OpcodeVIshl shifts x left by (y mod lane-width): `v = VIshl.lane x, y` on vector.
+	OpcodeVIshl
+
+	// OpcodeVUshr shifts x right by (y mod lane-width), unsigned: `v = VUshr.lane x, y` on vector.
+	OpcodeVUshr
+
+	// OpcodeVSshr shifts x right by (y mod lane-width), signed: `v = VSshr.lane x, y` on vector.
+	OpcodeVSshr
+
+	// OpcodeVFabs takes the absolute value of a floating point value: `v = VFabs.lane x on vector.
+	OpcodeVFabs
+
+	// OpcodeVFmax takes the maximum of two floating point values: `v = VFmax.lane x, y on vector.
+	OpcodeVFmax
+
+	// OpcodeVFmin takes the minimum of two floating point values: `v = VFmin.lane x, y on vector.
+	OpcodeVFmin
+
+	// OpcodeVFneg negates the given floating point vector value: `v = VFneg x`.
+	OpcodeVFneg
+
+	// OpcodeVFadd performs a floating point addition: `v = VFadd.lane x, y` on vector.
+	OpcodeVFadd
+
+	// OpcodeVFsub performs a floating point subtraction: `v = VFsub.lane x, y` on vector.
+	OpcodeVFsub
+
+	// OpcodeVFmul performs a floating point multiplication: `v = VFmul.lane x, y` on vector.
+	OpcodeVFmul
+
+	// OpcodeVFdiv performs a floating point division: `v = VFdiv.lane x, y` on vector.
+	OpcodeVFdiv
+
+	// OpcodeVFcmp compares two float values with the given condition: `v = VFcmp.lane Cond, x, y` on float.
+	OpcodeVFcmp
+
+	// OpcodeVCeil takes the ceiling of the given floating point value: `v = ceil.lane x` on vector.
+	OpcodeVCeil
+
+	// OpcodeVFloor takes the floor of the given floating point value: `v = floor.lane x` on vector.
+	OpcodeVFloor
+
+	// OpcodeVTrunc takes the truncation of the given floating point value: `v = trunc.lane x` on vector.
+	OpcodeVTrunc
+
+	// OpcodeVNearest takes the nearest integer of the given floating point value: `v = nearest.lane x` on vector.
+	OpcodeVNearest
+
+	// OpcodeVMaxPseudo computes the lane-wise maximum value `v = VMaxPseudo.lane x, y` on vector defined as `x < y ? x : y`.
+	OpcodeVMaxPseudo
+
+	// OpcodeVMinPseudo computes the lane-wise minimum value `v = VMinPseudo.lane x, y` on vector defined as `y < x ? x : y`.
+	OpcodeVMinPseudo
+
+	// OpcodeVSqrt takes the minimum of two floating point values: `v = VFmin.lane x, y` on vector.
+	OpcodeVSqrt
+
+	// OpcodeVFcvtToUintSat converts a floating point value to an unsigned integer: `v = FcvtToUintSat.lane x` on vector.
+	OpcodeVFcvtToUintSat
+
+	// OpcodeVFcvtToSintSat converts a floating point value to a signed integer: `v = VFcvtToSintSat.lane x` on vector.
+	OpcodeVFcvtToSintSat
+
+	// OpcodeVFcvtFromUint converts a floating point value from an unsigned integer: `v = FcvtFromUint.lane x` on vector.
+	// x is always a 32-bit integer lane, and the result is either a 32-bit or 64-bit floating point-sized vector.
+	OpcodeVFcvtFromUint
+
+	// OpcodeVFcvtFromSint converts a floating point value from a signed integer: `v = VFcvtFromSint.lane x` on vector.
+	// x is always a 32-bit integer lane, and the result is either a 32-bit or 64-bit floating point-sized vector.
+	OpcodeVFcvtFromSint
+
+	// OpcodeImul performs an integer multiplication: `v = Imul x, y`.
+	OpcodeImul
+
+	// OpcodeUdiv performs the unsigned integer division `v = Udiv x, y`.
+	OpcodeUdiv
+
+	// OpcodeSdiv performs the signed integer division `v = Sdiv x, y`.
+	OpcodeSdiv
+
+	// OpcodeUrem computes the remainder of the unsigned integer division `v = Urem x, y`.
+	OpcodeUrem
+
+	// OpcodeSrem computes the remainder of the signed integer division `v = Srem x, y`.
+	OpcodeSrem
+
+	// OpcodeBand performs a binary and: `v = Band x, y`.
+	OpcodeBand
+
+	// OpcodeBor performs a binary or: `v = Bor x, y`.
+	OpcodeBor
+
+	// OpcodeBxor performs a binary xor: `v = Bxor x, y`.
+	OpcodeBxor
+
+	// OpcodeBnot performs a binary not: `v = Bnot x`.
+	OpcodeBnot
+
+	// OpcodeRotl rotates the given integer value to the left: `v = Rotl x, y`.
+	OpcodeRotl
+
+	// OpcodeRotr rotates the given integer value to the right: `v = Rotr x, y`.
+	OpcodeRotr
+
+	// OpcodeIshl does logical shift left: `v = Ishl x, y`.
+	OpcodeIshl
+
+	// OpcodeUshr does logical shift right: `v = Ushr x, y`.
+	OpcodeUshr
+
+	// OpcodeSshr does arithmetic shift right: `v = Sshr x, y`.
+	OpcodeSshr
+
+	// OpcodeClz counts the number of leading zeros: `v = clz x`.
+	OpcodeClz
+
+	// OpcodeCtz counts the number of trailing zeros: `v = ctz x`.
+	OpcodeCtz
+
+	// OpcodePopcnt counts the number of 1-bits: `v = popcnt x`.
+	OpcodePopcnt
+
+	// OpcodeFcmp compares two floating point values: `v = fcmp Cond, x, y`.
+	OpcodeFcmp
+
+	// OpcodeFadd performs a floating point addition: / `v = Fadd x, y`.
+	OpcodeFadd
+
+	// OpcodeFsub performs a floating point subtraction: `v = Fsub x, y`.
+	OpcodeFsub
+
+	// OpcodeFmul performs a floating point multiplication: `v = Fmul x, y`.
+	OpcodeFmul
+
+	// OpcodeSqmulRoundSat performs a lane-wise saturating rounding multiplication
+	// in Q15 format: `v = SqmulRoundSat.lane x,y` on vector.
+	OpcodeSqmulRoundSat
+
+	// OpcodeFdiv performs a floating point division: `v = Fdiv x, y`.
+	OpcodeFdiv
+
+	// OpcodeSqrt takes the square root of the given floating point value: `v = sqrt x`.
+	OpcodeSqrt
+
+	// OpcodeFneg negates the given floating point value: `v = Fneg x`.
+	OpcodeFneg
+
+	// OpcodeFabs takes the absolute value of the given floating point value: `v = fabs x`.
+	OpcodeFabs
+
+	// OpcodeFcopysign copies the sign of the second floating point value to the first floating point value:
+	// `v = Fcopysign x, y`.
+	OpcodeFcopysign
+
+	// OpcodeFmin takes the minimum of two floating point values: `v = fmin x, y`.
+	OpcodeFmin
+
+	// OpcodeFmax takes the maximum of two floating point values: `v = fmax x, y`.
+	OpcodeFmax
+
+	// OpcodeCeil takes the ceiling of the given floating point value: `v = ceil x`.
+	OpcodeCeil
+
+	// OpcodeFloor takes the floor of the given floating point value: `v = floor x`.
+	OpcodeFloor
+
+	// OpcodeTrunc takes the truncation of the given floating point value: `v = trunc x`.
+	OpcodeTrunc
+
+	// OpcodeNearest takes the nearest integer of the given floating point value: `v = nearest x`.
+	OpcodeNearest
+
+	// OpcodeBitcast is a bitcast operation: `v = bitcast x`.
+	OpcodeBitcast
+
+	// OpcodeIreduce narrow the given integer: `v = Ireduce x`.
+	OpcodeIreduce
+
+	// OpcodeSnarrow converts two input vectors x, y into a smaller lane vector by narrowing each lane, signed `v = Snarrow.lane x, y`.
+	OpcodeSnarrow
+
+	// OpcodeUnarrow converts two input vectors x, y into a smaller lane vector by narrowing each lane, unsigned `v = Unarrow.lane x, y`.
+	OpcodeUnarrow
+
+	// OpcodeSwidenLow converts low half of the smaller lane vector to a larger lane vector, sign extended: `v = SwidenLow.lane x`.
+	OpcodeSwidenLow
+
+	// OpcodeSwidenHigh converts high half of the smaller lane vector to a larger lane vector, sign extended: `v = SwidenHigh.lane x`.
+	OpcodeSwidenHigh
+
+	// OpcodeUwidenLow converts low half of the smaller lane vector to a larger lane vector, zero (unsigned) extended: `v = UwidenLow.lane x`.
+	OpcodeUwidenLow
+
+	// OpcodeUwidenHigh converts high half of the smaller lane vector to a larger lane vector, zero (unsigned) extended: `v = UwidenHigh.lane x`.
+	OpcodeUwidenHigh
+
+	// OpcodeExtIaddPairwise is a lane-wise integer extended pairwise addition producing extended results (twice wider results than the inputs): `v = extiadd_pairwise x, y` on vector.
+	OpcodeExtIaddPairwise
+
+	// OpcodeWideningPairwiseDotProductS is a lane-wise widening pairwise dot product with signed saturation: `v = WideningPairwiseDotProductS x, y` on vector.
+	// Currently, the only lane is i16, and the result is i32.
+	OpcodeWideningPairwiseDotProductS
+
+	// OpcodeUExtend zero-extends the given integer: `v = UExtend x, from->to`.
+	OpcodeUExtend
+
+	// OpcodeSExtend sign-extends the given integer: `v = SExtend x, from->to`.
+	OpcodeSExtend
+
+	// OpcodeFpromote promotes the given floating point value: `v = Fpromote x`.
+	OpcodeFpromote
+
+	// OpcodeFvpromoteLow converts the two lower single-precision floating point lanes
+	// to the two double-precision lanes of the result: `v = FvpromoteLow.lane x` on vector.
+	OpcodeFvpromoteLow
+
+	// OpcodeFdemote demotes the given float point value: `v = Fdemote x`.
+	OpcodeFdemote
+
+	// OpcodeFvdemote converts the two double-precision floating point lanes
+	// to two lower single-precision lanes of the result `v = Fvdemote.lane x`.
+	OpcodeFvdemote
+
+	// OpcodeFcvtToUint converts a floating point value to an unsigned integer: `v = FcvtToUint x`.
+	OpcodeFcvtToUint
+
+	// OpcodeFcvtToSint converts a floating point value to a signed integer: `v = FcvtToSint x`.
+	OpcodeFcvtToSint
+
+	// OpcodeFcvtToUintSat converts a floating point value to an unsigned integer: `v = FcvtToUintSat x` which saturates on overflow.
+	OpcodeFcvtToUintSat
+
+	// OpcodeFcvtToSintSat converts a floating point value to a signed integer: `v = FcvtToSintSat x` which saturates on overflow.
+	OpcodeFcvtToSintSat
+
+	// OpcodeFcvtFromUint converts an unsigned integer to a floating point value: `v = FcvtFromUint x`.
+	OpcodeFcvtFromUint
+
+	// OpcodeFcvtFromSint converts a signed integer to a floating point value: `v = FcvtFromSint x`.
+	OpcodeFcvtFromSint
+
+	// OpcodeAtomicRmw is atomic read-modify-write operation: `v = atomic_rmw op, p, offset, value`.
+	OpcodeAtomicRmw
+
+	// OpcodeAtomicCas is atomic compare-and-swap operation.
+	OpcodeAtomicCas
+
+	// OpcodeAtomicLoad is atomic load operation.
+	OpcodeAtomicLoad
+
+	// OpcodeAtomicStore is atomic store operation.
+	OpcodeAtomicStore
+
+	// OpcodeFence is a memory fence operation.
+	OpcodeFence
+
+	// opcodeEnd marks the end of the opcode list.
+	opcodeEnd
+)
+
+// AtomicRmwOp represents the atomic read-modify-write operation.
+type AtomicRmwOp byte
+
+const (
+	// AtomicRmwOpAdd is an atomic add operation.
+	AtomicRmwOpAdd AtomicRmwOp = iota
+	// AtomicRmwOpSub is an atomic sub operation.
+	AtomicRmwOpSub
+	// AtomicRmwOpAnd is an atomic and operation.
+	AtomicRmwOpAnd
+	// AtomicRmwOpOr is an atomic or operation.
+	AtomicRmwOpOr
+	// AtomicRmwOpXor is an atomic xor operation.
+	AtomicRmwOpXor
+	// AtomicRmwOpXchg is an atomic swap operation.
+	AtomicRmwOpXchg
+)
+
+// String implements the fmt.Stringer.
+func (op AtomicRmwOp) String() string {
+	switch op {
+	case AtomicRmwOpAdd:
+		return "add"
+	case AtomicRmwOpSub:
+		return "sub"
+	case AtomicRmwOpAnd:
+		return "and"
+	case AtomicRmwOpOr:
+		return "or"
+	case AtomicRmwOpXor:
+		return "xor"
+	case AtomicRmwOpXchg:
+		return "xchg"
+	}
+	panic(fmt.Sprintf("unknown AtomicRmwOp: %d", op))
+}
+
+// returnTypesFn provides the info to determine the type of instruction.
+// t1 is the type of the first result, ts are the types of the remaining results.
+type returnTypesFn func(b *builder, instr *Instruction) (t1 Type, ts []Type)
+
+var (
+	returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil }
+	returnTypesFnSingle                  = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil }
+	returnTypesFnI32                     = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil }
+	returnTypesFnF32                     = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil }
+	returnTypesFnF64                     = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil }
+	returnTypesFnV128                    = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil }
+)
+
+// sideEffect provides the info to determine if an instruction has side effects which
+// is used to determine if it can be optimized out, interchanged with others, etc.
+type sideEffect byte
+
+const (
+	sideEffectUnknown sideEffect = iota
+	// sideEffectStrict represents an instruction with side effects, and should be always alive plus cannot be reordered.
+	sideEffectStrict
+	// sideEffectTraps represents an instruction that can trap, and should be always alive but can be reordered within the group.
+	sideEffectTraps
+	// sideEffectNone represents an instruction without side effects, and can be eliminated if the result is not used, plus can be reordered within the group.
+	sideEffectNone
+)
+
+// instructionSideEffects provides the info to determine if an instruction has side effects.
+// Instructions with side effects must not be eliminated regardless whether the result is used or not.
+var instructionSideEffects = [opcodeEnd]sideEffect{
+	OpcodeUndefined:                   sideEffectStrict,
+	OpcodeJump:                        sideEffectStrict,
+	OpcodeIconst:                      sideEffectNone,
+	OpcodeCall:                        sideEffectStrict,
+	OpcodeCallIndirect:                sideEffectStrict,
+	OpcodeIadd:                        sideEffectNone,
+	OpcodeImul:                        sideEffectNone,
+	OpcodeIsub:                        sideEffectNone,
+	OpcodeIcmp:                        sideEffectNone,
+	OpcodeExtractlane:                 sideEffectNone,
+	OpcodeInsertlane:                  sideEffectNone,
+	OpcodeBand:                        sideEffectNone,
+	OpcodeBor:                         sideEffectNone,
+	OpcodeBxor:                        sideEffectNone,
+	OpcodeRotl:                        sideEffectNone,
+	OpcodeRotr:                        sideEffectNone,
+	OpcodeFcmp:                        sideEffectNone,
+	OpcodeFadd:                        sideEffectNone,
+	OpcodeClz:                         sideEffectNone,
+	OpcodeCtz:                         sideEffectNone,
+	OpcodePopcnt:                      sideEffectNone,
+	OpcodeLoad:                        sideEffectNone,
+	OpcodeLoadSplat:                   sideEffectNone,
+	OpcodeUload8:                      sideEffectNone,
+	OpcodeUload16:                     sideEffectNone,
+	OpcodeUload32:                     sideEffectNone,
+	OpcodeSload8:                      sideEffectNone,
+	OpcodeSload16:                     sideEffectNone,
+	OpcodeSload32:                     sideEffectNone,
+	OpcodeSExtend:                     sideEffectNone,
+	OpcodeUExtend:                     sideEffectNone,
+	OpcodeSwidenLow:                   sideEffectNone,
+	OpcodeUwidenLow:                   sideEffectNone,
+	OpcodeSwidenHigh:                  sideEffectNone,
+	OpcodeUwidenHigh:                  sideEffectNone,
+	OpcodeSnarrow:                     sideEffectNone,
+	OpcodeUnarrow:                     sideEffectNone,
+	OpcodeSwizzle:                     sideEffectNone,
+	OpcodeShuffle:                     sideEffectNone,
+	OpcodeSplat:                       sideEffectNone,
+	OpcodeFsub:                        sideEffectNone,
+	OpcodeF32const:                    sideEffectNone,
+	OpcodeF64const:                    sideEffectNone,
+	OpcodeIshl:                        sideEffectNone,
+	OpcodeSshr:                        sideEffectNone,
+	OpcodeUshr:                        sideEffectNone,
+	OpcodeStore:                       sideEffectStrict,
+	OpcodeIstore8:                     sideEffectStrict,
+	OpcodeIstore16:                    sideEffectStrict,
+	OpcodeIstore32:                    sideEffectStrict,
+	OpcodeExitWithCode:                sideEffectStrict,
+	OpcodeExitIfTrueWithCode:          sideEffectStrict,
+	OpcodeReturn:                      sideEffectStrict,
+	OpcodeBrz:                         sideEffectStrict,
+	OpcodeBrnz:                        sideEffectStrict,
+	OpcodeBrTable:                     sideEffectStrict,
+	OpcodeFdiv:                        sideEffectNone,
+	OpcodeFmul:                        sideEffectNone,
+	OpcodeFmax:                        sideEffectNone,
+	OpcodeSqmulRoundSat:               sideEffectNone,
+	OpcodeSelect:                      sideEffectNone,
+	OpcodeFmin:                        sideEffectNone,
+	OpcodeFneg:                        sideEffectNone,
+	OpcodeFcvtToSint:                  sideEffectTraps,
+	OpcodeFcvtToUint:                  sideEffectTraps,
+	OpcodeFcvtFromSint:                sideEffectNone,
+	OpcodeFcvtFromUint:                sideEffectNone,
+	OpcodeFcvtToSintSat:               sideEffectNone,
+	OpcodeFcvtToUintSat:               sideEffectNone,
+	OpcodeVFcvtFromUint:               sideEffectNone,
+	OpcodeVFcvtFromSint:               sideEffectNone,
+	OpcodeFdemote:                     sideEffectNone,
+	OpcodeFvpromoteLow:                sideEffectNone,
+	OpcodeFvdemote:                    sideEffectNone,
+	OpcodeFpromote:                    sideEffectNone,
+	OpcodeBitcast:                     sideEffectNone,
+	OpcodeIreduce:                     sideEffectNone,
+	OpcodeSqrt:                        sideEffectNone,
+	OpcodeCeil:                        sideEffectNone,
+	OpcodeFloor:                       sideEffectNone,
+	OpcodeTrunc:                       sideEffectNone,
+	OpcodeNearest:                     sideEffectNone,
+	OpcodeSdiv:                        sideEffectTraps,
+	OpcodeSrem:                        sideEffectTraps,
+	OpcodeUdiv:                        sideEffectTraps,
+	OpcodeUrem:                        sideEffectTraps,
+	OpcodeFabs:                        sideEffectNone,
+	OpcodeFcopysign:                   sideEffectNone,
+	OpcodeExtIaddPairwise:             sideEffectNone,
+	OpcodeVconst:                      sideEffectNone,
+	OpcodeVbor:                        sideEffectNone,
+	OpcodeVbxor:                       sideEffectNone,
+	OpcodeVband:                       sideEffectNone,
+	OpcodeVbandnot:                    sideEffectNone,
+	OpcodeVbnot:                       sideEffectNone,
+	OpcodeVbitselect:                  sideEffectNone,
+	OpcodeVanyTrue:                    sideEffectNone,
+	OpcodeVallTrue:                    sideEffectNone,
+	OpcodeVhighBits:                   sideEffectNone,
+	OpcodeVIadd:                       sideEffectNone,
+	OpcodeVSaddSat:                    sideEffectNone,
+	OpcodeVUaddSat:                    sideEffectNone,
+	OpcodeVIsub:                       sideEffectNone,
+	OpcodeVSsubSat:                    sideEffectNone,
+	OpcodeVUsubSat:                    sideEffectNone,
+	OpcodeVIcmp:                       sideEffectNone,
+	OpcodeVImin:                       sideEffectNone,
+	OpcodeVUmin:                       sideEffectNone,
+	OpcodeVImax:                       sideEffectNone,
+	OpcodeVUmax:                       sideEffectNone,
+	OpcodeVAvgRound:                   sideEffectNone,
+	OpcodeVImul:                       sideEffectNone,
+	OpcodeVIabs:                       sideEffectNone,
+	OpcodeVIneg:                       sideEffectNone,
+	OpcodeVIpopcnt:                    sideEffectNone,
+	OpcodeVIshl:                       sideEffectNone,
+	OpcodeVSshr:                       sideEffectNone,
+	OpcodeVUshr:                       sideEffectNone,
+	OpcodeVSqrt:                       sideEffectNone,
+	OpcodeVFabs:                       sideEffectNone,
+	OpcodeVFmin:                       sideEffectNone,
+	OpcodeVFmax:                       sideEffectNone,
+	OpcodeVFneg:                       sideEffectNone,
+	OpcodeVFadd:                       sideEffectNone,
+	OpcodeVFsub:                       sideEffectNone,
+	OpcodeVFmul:                       sideEffectNone,
+	OpcodeVFdiv:                       sideEffectNone,
+	OpcodeVFcmp:                       sideEffectNone,
+	OpcodeVCeil:                       sideEffectNone,
+	OpcodeVFloor:                      sideEffectNone,
+	OpcodeVTrunc:                      sideEffectNone,
+	OpcodeVNearest:                    sideEffectNone,
+	OpcodeVMaxPseudo:                  sideEffectNone,
+	OpcodeVMinPseudo:                  sideEffectNone,
+	OpcodeVFcvtToUintSat:              sideEffectNone,
+	OpcodeVFcvtToSintSat:              sideEffectNone,
+	OpcodeVZeroExtLoad:                sideEffectNone,
+	OpcodeAtomicRmw:                   sideEffectStrict,
+	OpcodeAtomicLoad:                  sideEffectStrict,
+	OpcodeAtomicStore:                 sideEffectStrict,
+	OpcodeAtomicCas:                   sideEffectStrict,
+	OpcodeFence:                       sideEffectStrict,
+	OpcodeWideningPairwiseDotProductS: sideEffectNone,
+}
+
+// sideEffect returns true if this instruction has side effects.
+func (i *Instruction) sideEffect() sideEffect {
+	if e := instructionSideEffects[i.opcode]; e == sideEffectUnknown {
+		panic("BUG: side effect info not registered for " + i.opcode.String())
+	} else {
+		return e
+	}
+}
+
+// instructionReturnTypes provides the function to determine the return types of an instruction.
+var instructionReturnTypes = [opcodeEnd]returnTypesFn{
+	OpcodeExtIaddPairwise: returnTypesFnV128,
+	OpcodeVbor:            returnTypesFnV128,
+	OpcodeVbxor:           returnTypesFnV128,
+	OpcodeVband:           returnTypesFnV128,
+	OpcodeVbnot:           returnTypesFnV128,
+	OpcodeVbandnot:        returnTypesFnV128,
+	OpcodeVbitselect:      returnTypesFnV128,
+	OpcodeVanyTrue:        returnTypesFnI32,
+	OpcodeVallTrue:        returnTypesFnI32,
+	OpcodeVhighBits:       returnTypesFnI32,
+	OpcodeVIadd:           returnTypesFnV128,
+	OpcodeVSaddSat:        returnTypesFnV128,
+	OpcodeVUaddSat:        returnTypesFnV128,
+	OpcodeVIsub:           returnTypesFnV128,
+	OpcodeVSsubSat:        returnTypesFnV128,
+	OpcodeVUsubSat:        returnTypesFnV128,
+	OpcodeVIcmp:           returnTypesFnV128,
+	OpcodeVImin:           returnTypesFnV128,
+	OpcodeVUmin:           returnTypesFnV128,
+	OpcodeVImax:           returnTypesFnV128,
+	OpcodeVUmax:           returnTypesFnV128,
+	OpcodeVImul:           returnTypesFnV128,
+	OpcodeVAvgRound:       returnTypesFnV128,
+	OpcodeVIabs:           returnTypesFnV128,
+	OpcodeVIneg:           returnTypesFnV128,
+	OpcodeVIpopcnt:        returnTypesFnV128,
+	OpcodeVIshl:           returnTypesFnV128,
+	OpcodeVSshr:           returnTypesFnV128,
+	OpcodeVUshr:           returnTypesFnV128,
+	OpcodeExtractlane:     returnTypesFnSingle,
+	OpcodeInsertlane:      returnTypesFnV128,
+	OpcodeBand:            returnTypesFnSingle,
+	OpcodeFcopysign:       returnTypesFnSingle,
+	OpcodeBitcast:         returnTypesFnSingle,
+	OpcodeBor:             returnTypesFnSingle,
+	OpcodeBxor:            returnTypesFnSingle,
+	OpcodeRotl:            returnTypesFnSingle,
+	OpcodeRotr:            returnTypesFnSingle,
+	OpcodeIshl:            returnTypesFnSingle,
+	OpcodeSshr:            returnTypesFnSingle,
+	OpcodeSdiv:            returnTypesFnSingle,
+	OpcodeSrem:            returnTypesFnSingle,
+	OpcodeUdiv:            returnTypesFnSingle,
+	OpcodeUrem:            returnTypesFnSingle,
+	OpcodeUshr:            returnTypesFnSingle,
+	OpcodeJump:            returnTypesFnNoReturns,
+	OpcodeUndefined:       returnTypesFnNoReturns,
+	OpcodeIconst:          returnTypesFnSingle,
+	OpcodeSelect:          returnTypesFnSingle,
+	OpcodeSExtend:         returnTypesFnSingle,
+	OpcodeUExtend:         returnTypesFnSingle,
+	OpcodeSwidenLow:       returnTypesFnV128,
+	OpcodeUwidenLow:       returnTypesFnV128,
+	OpcodeSwidenHigh:      returnTypesFnV128,
+	OpcodeUwidenHigh:      returnTypesFnV128,
+	OpcodeSnarrow:         returnTypesFnV128,
+	OpcodeUnarrow:         returnTypesFnV128,
+	OpcodeSwizzle:         returnTypesFnSingle,
+	OpcodeShuffle:         returnTypesFnV128,
+	OpcodeSplat:           returnTypesFnV128,
+	OpcodeIreduce:         returnTypesFnSingle,
+	OpcodeFabs:            returnTypesFnSingle,
+	OpcodeSqrt:            returnTypesFnSingle,
+	OpcodeCeil:            returnTypesFnSingle,
+	OpcodeFloor:           returnTypesFnSingle,
+	OpcodeTrunc:           returnTypesFnSingle,
+	OpcodeNearest:         returnTypesFnSingle,
+	OpcodeCallIndirect: func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
+		sigID := SignatureID(instr.u1)
+		sig, ok := b.signatures[sigID]
+		if !ok {
+			panic("BUG")
+		}
+		switch len(sig.Results) {
+		case 0:
+			t1 = typeInvalid
+		case 1:
+			t1 = sig.Results[0]
+		default:
+			t1, ts = sig.Results[0], sig.Results[1:]
+		}
+		return
+	},
+	OpcodeCall: func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
+		sigID := SignatureID(instr.u2)
+		sig, ok := b.signatures[sigID]
+		if !ok {
+			panic("BUG")
+		}
+		switch len(sig.Results) {
+		case 0:
+			t1 = typeInvalid
+		case 1:
+			t1 = sig.Results[0]
+		default:
+			t1, ts = sig.Results[0], sig.Results[1:]
+		}
+		return
+	},
+	OpcodeLoad:                        returnTypesFnSingle,
+	OpcodeVZeroExtLoad:                returnTypesFnV128,
+	OpcodeLoadSplat:                   returnTypesFnV128,
+	OpcodeIadd:                        returnTypesFnSingle,
+	OpcodeIsub:                        returnTypesFnSingle,
+	OpcodeImul:                        returnTypesFnSingle,
+	OpcodeIcmp:                        returnTypesFnI32,
+	OpcodeFcmp:                        returnTypesFnI32,
+	OpcodeFadd:                        returnTypesFnSingle,
+	OpcodeFsub:                        returnTypesFnSingle,
+	OpcodeFdiv:                        returnTypesFnSingle,
+	OpcodeFmul:                        returnTypesFnSingle,
+	OpcodeFmax:                        returnTypesFnSingle,
+	OpcodeFmin:                        returnTypesFnSingle,
+	OpcodeSqmulRoundSat:               returnTypesFnV128,
+	OpcodeF32const:                    returnTypesFnF32,
+	OpcodeF64const:                    returnTypesFnF64,
+	OpcodeClz:                         returnTypesFnSingle,
+	OpcodeCtz:                         returnTypesFnSingle,
+	OpcodePopcnt:                      returnTypesFnSingle,
+	OpcodeStore:                       returnTypesFnNoReturns,
+	OpcodeIstore8:                     returnTypesFnNoReturns,
+	OpcodeIstore16:                    returnTypesFnNoReturns,
+	OpcodeIstore32:                    returnTypesFnNoReturns,
+	OpcodeExitWithCode:                returnTypesFnNoReturns,
+	OpcodeExitIfTrueWithCode:          returnTypesFnNoReturns,
+	OpcodeReturn:                      returnTypesFnNoReturns,
+	OpcodeBrz:                         returnTypesFnNoReturns,
+	OpcodeBrnz:                        returnTypesFnNoReturns,
+	OpcodeBrTable:                     returnTypesFnNoReturns,
+	OpcodeUload8:                      returnTypesFnSingle,
+	OpcodeUload16:                     returnTypesFnSingle,
+	OpcodeUload32:                     returnTypesFnSingle,
+	OpcodeSload8:                      returnTypesFnSingle,
+	OpcodeSload16:                     returnTypesFnSingle,
+	OpcodeSload32:                     returnTypesFnSingle,
+	OpcodeFcvtToSint:                  returnTypesFnSingle,
+	OpcodeFcvtToUint:                  returnTypesFnSingle,
+	OpcodeFcvtFromSint:                returnTypesFnSingle,
+	OpcodeFcvtFromUint:                returnTypesFnSingle,
+	OpcodeFcvtToSintSat:               returnTypesFnSingle,
+	OpcodeFcvtToUintSat:               returnTypesFnSingle,
+	OpcodeVFcvtFromUint:               returnTypesFnV128,
+	OpcodeVFcvtFromSint:               returnTypesFnV128,
+	OpcodeFneg:                        returnTypesFnSingle,
+	OpcodeFdemote:                     returnTypesFnF32,
+	OpcodeFvdemote:                    returnTypesFnV128,
+	OpcodeFvpromoteLow:                returnTypesFnV128,
+	OpcodeFpromote:                    returnTypesFnF64,
+	OpcodeVconst:                      returnTypesFnV128,
+	OpcodeVFabs:                       returnTypesFnV128,
+	OpcodeVSqrt:                       returnTypesFnV128,
+	OpcodeVFmax:                       returnTypesFnV128,
+	OpcodeVFmin:                       returnTypesFnV128,
+	OpcodeVFneg:                       returnTypesFnV128,
+	OpcodeVFadd:                       returnTypesFnV128,
+	OpcodeVFsub:                       returnTypesFnV128,
+	OpcodeVFmul:                       returnTypesFnV128,
+	OpcodeVFdiv:                       returnTypesFnV128,
+	OpcodeVFcmp:                       returnTypesFnV128,
+	OpcodeVCeil:                       returnTypesFnV128,
+	OpcodeVFloor:                      returnTypesFnV128,
+	OpcodeVTrunc:                      returnTypesFnV128,
+	OpcodeVNearest:                    returnTypesFnV128,
+	OpcodeVMaxPseudo:                  returnTypesFnV128,
+	OpcodeVMinPseudo:                  returnTypesFnV128,
+	OpcodeVFcvtToUintSat:              returnTypesFnV128,
+	OpcodeVFcvtToSintSat:              returnTypesFnV128,
+	OpcodeAtomicRmw:                   returnTypesFnSingle,
+	OpcodeAtomicLoad:                  returnTypesFnSingle,
+	OpcodeAtomicStore:                 returnTypesFnNoReturns,
+	OpcodeAtomicCas:                   returnTypesFnSingle,
+	OpcodeFence:                       returnTypesFnNoReturns,
+	OpcodeWideningPairwiseDotProductS: returnTypesFnV128,
+}
+
+// AsLoad initializes this instruction as a store instruction with OpcodeLoad.
+func (i *Instruction) AsLoad(ptr Value, offset uint32, typ Type) *Instruction {
+	i.opcode = OpcodeLoad
+	i.v = ptr
+	i.u1 = uint64(offset)
+	i.typ = typ
+	return i
+}
+
+// AsExtLoad initializes this instruction as a store instruction with OpcodeLoad.
+func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) *Instruction {
+	i.opcode = op
+	i.v = ptr
+	i.u1 = uint64(offset)
+	if dst64bit {
+		i.typ = TypeI64
+	} else {
+		i.typ = TypeI32
+	}
+	return i
+}
+
+// AsVZeroExtLoad initializes this instruction as a store instruction with OpcodeVExtLoad.
+func (i *Instruction) AsVZeroExtLoad(ptr Value, offset uint32, scalarType Type) *Instruction {
+	i.opcode = OpcodeVZeroExtLoad
+	i.v = ptr
+	i.u1 = uint64(offset)
+	i.u2 = uint64(scalarType)
+	i.typ = TypeV128
+	return i
+}
+
+// VZeroExtLoadData returns the operands for a load instruction. The returned `typ` is the scalar type of the load target.
+func (i *Instruction) VZeroExtLoadData() (ptr Value, offset uint32, typ Type) {
+	return i.v, uint32(i.u1), Type(i.u2)
+}
+
+// AsLoadSplat initializes this instruction as a store instruction with OpcodeLoadSplat.
+func (i *Instruction) AsLoadSplat(ptr Value, offset uint32, lane VecLane) *Instruction {
+	i.opcode = OpcodeLoadSplat
+	i.v = ptr
+	i.u1 = uint64(offset)
+	i.u2 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// LoadData returns the operands for a load instruction.
+func (i *Instruction) LoadData() (ptr Value, offset uint32, typ Type) {
+	return i.v, uint32(i.u1), i.typ
+}
+
+// LoadSplatData returns the operands for a load splat instruction.
+func (i *Instruction) LoadSplatData() (ptr Value, offset uint32, lane VecLane) {
+	return i.v, uint32(i.u1), VecLane(i.u2)
+}
+
+// AsStore initializes this instruction as a store instruction with OpcodeStore.
+func (i *Instruction) AsStore(storeOp Opcode, value, ptr Value, offset uint32) *Instruction {
+	i.opcode = storeOp
+	i.v = value
+	i.v2 = ptr
+
+	var dstSize uint64
+	switch storeOp {
+	case OpcodeStore:
+		dstSize = uint64(value.Type().Bits())
+	case OpcodeIstore8:
+		dstSize = 8
+	case OpcodeIstore16:
+		dstSize = 16
+	case OpcodeIstore32:
+		dstSize = 32
+	default:
+		panic("invalid store opcode" + storeOp.String())
+	}
+	i.u1 = uint64(offset) | dstSize<<32
+	return i
+}
+
+// StoreData returns the operands for a store instruction.
+func (i *Instruction) StoreData() (value, ptr Value, offset uint32, storeSizeInBits byte) {
+	return i.v, i.v2, uint32(i.u1), byte(i.u1 >> 32)
+}
+
+// AsIconst64 initializes this instruction as a 64-bit integer constant instruction with OpcodeIconst.
+func (i *Instruction) AsIconst64(v uint64) *Instruction {
+	i.opcode = OpcodeIconst
+	i.typ = TypeI64
+	i.u1 = v
+	return i
+}
+
+// AsIconst32 initializes this instruction as a 32-bit integer constant instruction with OpcodeIconst.
+func (i *Instruction) AsIconst32(v uint32) *Instruction {
+	i.opcode = OpcodeIconst
+	i.typ = TypeI32
+	i.u1 = uint64(v)
+	return i
+}
+
+// AsIadd initializes this instruction as an integer addition instruction with OpcodeIadd.
+func (i *Instruction) AsIadd(x, y Value) *Instruction {
+	i.opcode = OpcodeIadd
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+	return i
+}
+
+// AsVIadd initializes this instruction as an integer addition instruction with OpcodeVIadd on a vector.
+func (i *Instruction) AsVIadd(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVIadd
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsWideningPairwiseDotProductS initializes this instruction as a lane-wise integer extended pairwise addition instruction
+// with OpcodeIaddPairwise on a vector.
+func (i *Instruction) AsWideningPairwiseDotProductS(x, y Value) *Instruction {
+	i.opcode = OpcodeWideningPairwiseDotProductS
+	i.v = x
+	i.v2 = y
+	i.typ = TypeV128
+	return i
+}
+
+// AsExtIaddPairwise initializes this instruction as a lane-wise integer extended pairwise addition instruction
+// with OpcodeIaddPairwise on a vector.
+func (i *Instruction) AsExtIaddPairwise(x Value, srcLane VecLane, signed bool) *Instruction {
+	i.opcode = OpcodeExtIaddPairwise
+	i.v = x
+	i.u1 = uint64(srcLane)
+	if signed {
+		i.u2 = 1
+	}
+	i.typ = TypeV128
+	return i
+}
+
+// ExtIaddPairwiseData returns the operands for a lane-wise integer extended pairwise addition instruction.
+func (i *Instruction) ExtIaddPairwiseData() (x Value, srcLane VecLane, signed bool) {
+	return i.v, VecLane(i.u1), i.u2 != 0
+}
+
+// AsVSaddSat initializes this instruction as a vector addition with saturation instruction with OpcodeVSaddSat on a vector.
+func (i *Instruction) AsVSaddSat(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVSaddSat
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVUaddSat initializes this instruction as a vector addition with saturation instruction with OpcodeVUaddSat on a vector.
+func (i *Instruction) AsVUaddSat(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVUaddSat
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVIsub initializes this instruction as an integer subtraction instruction with OpcodeVIsub on a vector.
+func (i *Instruction) AsVIsub(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVIsub
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVSsubSat initializes this instruction as a vector addition with saturation instruction with OpcodeVSsubSat on a vector.
+func (i *Instruction) AsVSsubSat(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVSsubSat
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVUsubSat initializes this instruction as a vector addition with saturation instruction with OpcodeVUsubSat on a vector.
+func (i *Instruction) AsVUsubSat(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVUsubSat
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVImin initializes this instruction as a signed integer min instruction with OpcodeVImin on a vector.
+func (i *Instruction) AsVImin(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVImin
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVUmin initializes this instruction as an unsigned integer min instruction with OpcodeVUmin on a vector.
+func (i *Instruction) AsVUmin(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVUmin
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVImax initializes this instruction as a signed integer max instruction with OpcodeVImax on a vector.
+func (i *Instruction) AsVImax(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVImax
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVUmax initializes this instruction as an unsigned integer max instruction with OpcodeVUmax on a vector.
+func (i *Instruction) AsVUmax(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVUmax
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVAvgRound initializes this instruction as an unsigned integer avg instruction, truncating to zero with OpcodeVAvgRound on a vector.
+func (i *Instruction) AsVAvgRound(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVAvgRound
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVImul initializes this instruction as an integer multiplication with OpcodeVImul on a vector.
+func (i *Instruction) AsVImul(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVImul
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsSqmulRoundSat initializes this instruction as a lane-wise saturating rounding multiplication
+// in Q15 format with OpcodeSqmulRoundSat on a vector.
+func (i *Instruction) AsSqmulRoundSat(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeSqmulRoundSat
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVIabs initializes this instruction as a vector absolute value with OpcodeVIabs.
+func (i *Instruction) AsVIabs(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVIabs
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVIneg initializes this instruction as a vector negation with OpcodeVIneg.
+func (i *Instruction) AsVIneg(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVIneg
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVIpopcnt initializes this instruction as a Population Count instruction with OpcodeVIpopcnt on a vector.
+func (i *Instruction) AsVIpopcnt(x Value, lane VecLane) *Instruction {
+	if lane != VecLaneI8x16 {
+		panic("Unsupported lane type " + lane.String())
+	}
+	i.opcode = OpcodeVIpopcnt
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVSqrt initializes this instruction as a sqrt instruction with OpcodeVSqrt on a vector.
+func (i *Instruction) AsVSqrt(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVSqrt
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFabs initializes this instruction as a float abs instruction with OpcodeVFabs on a vector.
+func (i *Instruction) AsVFabs(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFabs
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFneg initializes this instruction as a float neg instruction with OpcodeVFneg on a vector.
+func (i *Instruction) AsVFneg(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFneg
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFmax initializes this instruction as a float max instruction with OpcodeVFmax on a vector.
+func (i *Instruction) AsVFmax(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFmax
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFmin initializes this instruction as a float min instruction with OpcodeVFmin on a vector.
+func (i *Instruction) AsVFmin(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFmin
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFadd initializes this instruction as a floating point add instruction with OpcodeVFadd on a vector.
+func (i *Instruction) AsVFadd(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFadd
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFsub initializes this instruction as a floating point subtraction instruction with OpcodeVFsub on a vector.
+func (i *Instruction) AsVFsub(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFsub
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFmul initializes this instruction as a floating point multiplication instruction with OpcodeVFmul on a vector.
+func (i *Instruction) AsVFmul(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFmul
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFdiv initializes this instruction as a floating point division instruction with OpcodeVFdiv on a vector.
+func (i *Instruction) AsVFdiv(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFdiv
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsImul initializes this instruction as an integer addition instruction with OpcodeImul.
+func (i *Instruction) AsImul(x, y Value) *Instruction {
+	i.opcode = OpcodeImul
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+	return i
+}
+
+func (i *Instruction) Insert(b Builder) *Instruction {
+	b.InsertInstruction(i)
+	return i
+}
+
+// AsIsub initializes this instruction as an integer subtraction instruction with OpcodeIsub.
+func (i *Instruction) AsIsub(x, y Value) *Instruction {
+	i.opcode = OpcodeIsub
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+	return i
+}
+
+// AsIcmp initializes this instruction as an integer comparison instruction with OpcodeIcmp.
+func (i *Instruction) AsIcmp(x, y Value, c IntegerCmpCond) *Instruction {
+	i.opcode = OpcodeIcmp
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(c)
+	i.typ = TypeI32
+	return i
+}
+
+// AsFcmp initializes this instruction as an integer comparison instruction with OpcodeFcmp.
+func (i *Instruction) AsFcmp(x, y Value, c FloatCmpCond) {
+	i.opcode = OpcodeFcmp
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(c)
+	i.typ = TypeI32
+}
+
+// AsVIcmp initializes this instruction as an integer vector comparison instruction with OpcodeVIcmp.
+func (i *Instruction) AsVIcmp(x, y Value, c IntegerCmpCond, lane VecLane) *Instruction {
+	i.opcode = OpcodeVIcmp
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(c)
+	i.u2 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsVFcmp initializes this instruction as a float comparison instruction with OpcodeVFcmp on Vector.
+func (i *Instruction) AsVFcmp(x, y Value, c FloatCmpCond, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFcmp
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(c)
+	i.typ = TypeV128
+	i.u2 = uint64(lane)
+	return i
+}
+
+// AsVCeil initializes this instruction as an instruction with OpcodeCeil.
+func (i *Instruction) AsVCeil(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVCeil
+	i.v = x
+	i.typ = x.Type()
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVFloor initializes this instruction as an instruction with OpcodeFloor.
+func (i *Instruction) AsVFloor(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVFloor
+	i.v = x
+	i.typ = x.Type()
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVTrunc initializes this instruction as an instruction with OpcodeTrunc.
+func (i *Instruction) AsVTrunc(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVTrunc
+	i.v = x
+	i.typ = x.Type()
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVNearest initializes this instruction as an instruction with OpcodeNearest.
+func (i *Instruction) AsVNearest(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVNearest
+	i.v = x
+	i.typ = x.Type()
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVMaxPseudo initializes this instruction as an instruction with OpcodeVMaxPseudo.
+func (i *Instruction) AsVMaxPseudo(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVMaxPseudo
+	i.typ = x.Type()
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVMinPseudo initializes this instruction as an instruction with OpcodeVMinPseudo.
+func (i *Instruction) AsVMinPseudo(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVMinPseudo
+	i.typ = x.Type()
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsSDiv initializes this instruction as an integer bitwise and instruction with OpcodeSdiv.
+func (i *Instruction) AsSDiv(x, y, ctx Value) *Instruction {
+	i.opcode = OpcodeSdiv
+	i.v = x
+	i.v2 = y
+	i.v3 = ctx
+	i.typ = x.Type()
+	return i
+}
+
+// AsUDiv initializes this instruction as an integer bitwise and instruction with OpcodeUdiv.
+func (i *Instruction) AsUDiv(x, y, ctx Value) *Instruction {
+	i.opcode = OpcodeUdiv
+	i.v = x
+	i.v2 = y
+	i.v3 = ctx
+	i.typ = x.Type()
+	return i
+}
+
+// AsSRem initializes this instruction as an integer bitwise and instruction with OpcodeSrem.
+func (i *Instruction) AsSRem(x, y, ctx Value) *Instruction {
+	i.opcode = OpcodeSrem
+	i.v = x
+	i.v2 = y
+	i.v3 = ctx
+	i.typ = x.Type()
+	return i
+}
+
+// AsURem initializes this instruction as an integer bitwise and instruction with OpcodeUrem.
+func (i *Instruction) AsURem(x, y, ctx Value) *Instruction {
+	i.opcode = OpcodeUrem
+	i.v = x
+	i.v2 = y
+	i.v3 = ctx
+	i.typ = x.Type()
+	return i
+}
+
+// AsBand initializes this instruction as an integer bitwise and instruction with OpcodeBand.
+func (i *Instruction) AsBand(x, amount Value) *Instruction {
+	i.opcode = OpcodeBand
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+	return i
+}
+
+// AsBor initializes this instruction as an integer bitwise or instruction with OpcodeBor.
+func (i *Instruction) AsBor(x, amount Value) {
+	i.opcode = OpcodeBor
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+}
+
+// AsBxor initializes this instruction as an integer bitwise xor instruction with OpcodeBxor.
+func (i *Instruction) AsBxor(x, amount Value) {
+	i.opcode = OpcodeBxor
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+}
+
+// AsIshl initializes this instruction as an integer shift left instruction with OpcodeIshl.
+func (i *Instruction) AsIshl(x, amount Value) *Instruction {
+	i.opcode = OpcodeIshl
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+	return i
+}
+
+// AsVIshl initializes this instruction as an integer shift left instruction with OpcodeVIshl on vector.
+func (i *Instruction) AsVIshl(x, amount Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVIshl
+	i.v = x
+	i.v2 = amount
+	i.u1 = uint64(lane)
+	i.typ = x.Type()
+	return i
+}
+
+// AsUshr initializes this instruction as an integer unsigned shift right (logical shift right) instruction with OpcodeUshr.
+func (i *Instruction) AsUshr(x, amount Value) *Instruction {
+	i.opcode = OpcodeUshr
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+	return i
+}
+
+// AsVUshr initializes this instruction as an integer unsigned shift right (logical shift right) instruction with OpcodeVUshr on vector.
+func (i *Instruction) AsVUshr(x, amount Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVUshr
+	i.v = x
+	i.v2 = amount
+	i.u1 = uint64(lane)
+	i.typ = x.Type()
+	return i
+}
+
+// AsSshr initializes this instruction as an integer signed shift right (arithmetic shift right) instruction with OpcodeSshr.
+func (i *Instruction) AsSshr(x, amount Value) *Instruction {
+	i.opcode = OpcodeSshr
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+	return i
+}
+
+// AsVSshr initializes this instruction as an integer signed shift right (arithmetic shift right) instruction with OpcodeVSshr on vector.
+func (i *Instruction) AsVSshr(x, amount Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVSshr
+	i.v = x
+	i.v2 = amount
+	i.u1 = uint64(lane)
+	i.typ = x.Type()
+	return i
+}
+
+// AsExtractlane initializes this instruction as an extract lane instruction with OpcodeExtractlane on vector.
+func (i *Instruction) AsExtractlane(x Value, index byte, lane VecLane, signed bool) *Instruction {
+	i.opcode = OpcodeExtractlane
+	i.v = x
+	// We do not have a field for signedness, but `index` is a byte,
+	// so we just encode the flag in the high bits of `u1`.
+	i.u1 = uint64(index)
+	if signed {
+		i.u1 = i.u1 | 1<<32
+	}
+	i.u2 = uint64(lane)
+	switch lane {
+	case VecLaneI8x16, VecLaneI16x8, VecLaneI32x4:
+		i.typ = TypeI32
+	case VecLaneI64x2:
+		i.typ = TypeI64
+	case VecLaneF32x4:
+		i.typ = TypeF32
+	case VecLaneF64x2:
+		i.typ = TypeF64
+	}
+	return i
+}
+
+// AsInsertlane initializes this instruction as an insert lane instruction with OpcodeInsertlane on vector.
+func (i *Instruction) AsInsertlane(x, y Value, index byte, lane VecLane) *Instruction {
+	i.opcode = OpcodeInsertlane
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(index)
+	i.u2 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsShuffle initializes this instruction as a shuffle instruction with OpcodeShuffle on vector.
+func (i *Instruction) AsShuffle(x, y Value, lane []byte) *Instruction {
+	i.opcode = OpcodeShuffle
+	i.v = x
+	i.v2 = y
+	// Encode the 16 bytes as 8 bytes in u1, and 8 bytes in u2.
+	i.u1 = uint64(lane[7])<<56 | uint64(lane[6])<<48 | uint64(lane[5])<<40 | uint64(lane[4])<<32 | uint64(lane[3])<<24 | uint64(lane[2])<<16 | uint64(lane[1])<<8 | uint64(lane[0])
+	i.u2 = uint64(lane[15])<<56 | uint64(lane[14])<<48 | uint64(lane[13])<<40 | uint64(lane[12])<<32 | uint64(lane[11])<<24 | uint64(lane[10])<<16 | uint64(lane[9])<<8 | uint64(lane[8])
+	i.typ = TypeV128
+	return i
+}
+
+// AsSwizzle initializes this instruction as an insert lane instruction with OpcodeSwizzle on vector.
+func (i *Instruction) AsSwizzle(x, y Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeSwizzle
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsSplat initializes this instruction as an insert lane instruction with OpcodeSplat on vector.
+func (i *Instruction) AsSplat(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeSplat
+	i.v = x
+	i.u1 = uint64(lane)
+	i.typ = TypeV128
+	return i
+}
+
+// AsRotl initializes this instruction as a word rotate left instruction with OpcodeRotl.
+func (i *Instruction) AsRotl(x, amount Value) {
+	i.opcode = OpcodeRotl
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+}
+
+// AsRotr initializes this instruction as a word rotate right instruction with OpcodeRotr.
+func (i *Instruction) AsRotr(x, amount Value) {
+	i.opcode = OpcodeRotr
+	i.v = x
+	i.v2 = amount
+	i.typ = x.Type()
+}
+
+// IcmpData returns the operands and comparison condition of this integer comparison instruction.
+func (i *Instruction) IcmpData() (x, y Value, c IntegerCmpCond) {
+	return i.v, i.v2, IntegerCmpCond(i.u1)
+}
+
+// FcmpData returns the operands and comparison condition of this floating-point comparison instruction.
+func (i *Instruction) FcmpData() (x, y Value, c FloatCmpCond) {
+	return i.v, i.v2, FloatCmpCond(i.u1)
+}
+
+// VIcmpData returns the operands and comparison condition of this integer comparison instruction on vector.
+func (i *Instruction) VIcmpData() (x, y Value, c IntegerCmpCond, l VecLane) {
+	return i.v, i.v2, IntegerCmpCond(i.u1), VecLane(i.u2)
+}
+
+// VFcmpData returns the operands and comparison condition of this float comparison instruction on vector.
+func (i *Instruction) VFcmpData() (x, y Value, c FloatCmpCond, l VecLane) {
+	return i.v, i.v2, FloatCmpCond(i.u1), VecLane(i.u2)
+}
+
+// ExtractlaneData returns the operands and sign flag of Extractlane on vector.
+func (i *Instruction) ExtractlaneData() (x Value, index byte, signed bool, l VecLane) {
+	x = i.v
+	index = byte(0b00001111 & i.u1)
+	signed = i.u1>>32 != 0
+	l = VecLane(i.u2)
+	return
+}
+
+// InsertlaneData returns the operands and sign flag of Insertlane on vector.
+func (i *Instruction) InsertlaneData() (x, y Value, index byte, l VecLane) {
+	x = i.v
+	y = i.v2
+	index = byte(i.u1)
+	l = VecLane(i.u2)
+	return
+}
+
+// AsFadd initializes this instruction as a floating-point addition instruction with OpcodeFadd.
+func (i *Instruction) AsFadd(x, y Value) {
+	i.opcode = OpcodeFadd
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+}
+
+// AsFsub initializes this instruction as a floating-point subtraction instruction with OpcodeFsub.
+func (i *Instruction) AsFsub(x, y Value) {
+	i.opcode = OpcodeFsub
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+}
+
+// AsFmul initializes this instruction as a floating-point multiplication instruction with OpcodeFmul.
+func (i *Instruction) AsFmul(x, y Value) {
+	i.opcode = OpcodeFmul
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+}
+
+// AsFdiv initializes this instruction as a floating-point division instruction with OpcodeFdiv.
+func (i *Instruction) AsFdiv(x, y Value) {
+	i.opcode = OpcodeFdiv
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+}
+
+// AsFmin initializes this instruction to take the minimum of two floating-points with OpcodeFmin.
+func (i *Instruction) AsFmin(x, y Value) {
+	i.opcode = OpcodeFmin
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+}
+
+// AsFmax initializes this instruction to take the maximum of two floating-points with OpcodeFmax.
+func (i *Instruction) AsFmax(x, y Value) {
+	i.opcode = OpcodeFmax
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+}
+
+// AsF32const initializes this instruction as a 32-bit floating-point constant instruction with OpcodeF32const.
+func (i *Instruction) AsF32const(f float32) *Instruction {
+	i.opcode = OpcodeF32const
+	i.typ = TypeF64
+	i.u1 = uint64(math.Float32bits(f))
+	return i
+}
+
+// AsF64const initializes this instruction as a 64-bit floating-point constant instruction with OpcodeF64const.
+func (i *Instruction) AsF64const(f float64) *Instruction {
+	i.opcode = OpcodeF64const
+	i.typ = TypeF64
+	i.u1 = math.Float64bits(f)
+	return i
+}
+
+// AsVconst initializes this instruction as a vector constant instruction with OpcodeVconst.
+func (i *Instruction) AsVconst(lo, hi uint64) *Instruction {
+	i.opcode = OpcodeVconst
+	i.typ = TypeV128
+	i.u1 = lo
+	i.u2 = hi
+	return i
+}
+
+// AsVbnot initializes this instruction as a vector negation instruction with OpcodeVbnot.
+func (i *Instruction) AsVbnot(v Value) *Instruction {
+	i.opcode = OpcodeVbnot
+	i.typ = TypeV128
+	i.v = v
+	return i
+}
+
+// AsVband initializes this instruction as an and vector instruction with OpcodeVband.
+func (i *Instruction) AsVband(x, y Value) *Instruction {
+	i.opcode = OpcodeVband
+	i.typ = TypeV128
+	i.v = x
+	i.v2 = y
+	return i
+}
+
+// AsVbor initializes this instruction as an or vector instruction with OpcodeVbor.
+func (i *Instruction) AsVbor(x, y Value) *Instruction {
+	i.opcode = OpcodeVbor
+	i.typ = TypeV128
+	i.v = x
+	i.v2 = y
+	return i
+}
+
+// AsVbxor initializes this instruction as a xor vector instruction with OpcodeVbxor.
+func (i *Instruction) AsVbxor(x, y Value) *Instruction {
+	i.opcode = OpcodeVbxor
+	i.typ = TypeV128
+	i.v = x
+	i.v2 = y
+	return i
+}
+
+// AsVbandnot initializes this instruction as an and-not vector instruction with OpcodeVbandnot.
+func (i *Instruction) AsVbandnot(x, y Value) *Instruction {
+	i.opcode = OpcodeVbandnot
+	i.typ = TypeV128
+	i.v = x
+	i.v2 = y
+	return i
+}
+
+// AsVbitselect initializes this instruction as a bit select vector instruction with OpcodeVbitselect.
+func (i *Instruction) AsVbitselect(c, x, y Value) *Instruction {
+	i.opcode = OpcodeVbitselect
+	i.typ = TypeV128
+	i.v = c
+	i.v2 = x
+	i.v3 = y
+	return i
+}
+
+// AsVanyTrue initializes this instruction as an anyTrue vector instruction with OpcodeVanyTrue.
+func (i *Instruction) AsVanyTrue(x Value) *Instruction {
+	i.opcode = OpcodeVanyTrue
+	i.typ = TypeI32
+	i.v = x
+	return i
+}
+
+// AsVallTrue initializes this instruction as an allTrue vector instruction with OpcodeVallTrue.
+func (i *Instruction) AsVallTrue(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVallTrue
+	i.typ = TypeI32
+	i.v = x
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVhighBits initializes this instruction as a highBits vector instruction with OpcodeVhighBits.
+func (i *Instruction) AsVhighBits(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeVhighBits
+	i.typ = TypeI32
+	i.v = x
+	i.u1 = uint64(lane)
+	return i
+}
+
+// VconstData returns the operands of this vector constant instruction.
+func (i *Instruction) VconstData() (lo, hi uint64) {
+	return i.u1, i.u2
+}
+
+// AsReturn initializes this instruction as a return instruction with OpcodeReturn.
+func (i *Instruction) AsReturn(vs wazevoapi.VarLength[Value]) *Instruction {
+	i.opcode = OpcodeReturn
+	i.vs = vs
+	return i
+}
+
+// AsIreduce initializes this instruction as a reduction instruction with OpcodeIreduce.
+func (i *Instruction) AsIreduce(v Value, dstType Type) *Instruction {
+	i.opcode = OpcodeIreduce
+	i.v = v
+	i.typ = dstType
+	return i
+}
+
+// AsWiden initializes this instruction as a signed or unsigned widen instruction
+// on low half or high half of the given vector with OpcodeSwidenLow, OpcodeUwidenLow, OpcodeSwidenHigh, OpcodeUwidenHigh.
+func (i *Instruction) AsWiden(v Value, lane VecLane, signed, low bool) *Instruction {
+	switch {
+	case signed && low:
+		i.opcode = OpcodeSwidenLow
+	case !signed && low:
+		i.opcode = OpcodeUwidenLow
+	case signed && !low:
+		i.opcode = OpcodeSwidenHigh
+	case !signed && !low:
+		i.opcode = OpcodeUwidenHigh
+	}
+	i.v = v
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsAtomicLoad initializes this instruction as an atomic load.
+// The size is in bytes and must be 1, 2, 4, or 8.
+func (i *Instruction) AsAtomicLoad(addr Value, size uint64, typ Type) *Instruction {
+	i.opcode = OpcodeAtomicLoad
+	i.u1 = size
+	i.v = addr
+	i.typ = typ
+	return i
+}
+
+// AsAtomicLoad initializes this instruction as an atomic store.
+// The size is in bytes and must be 1, 2, 4, or 8.
+func (i *Instruction) AsAtomicStore(addr, val Value, size uint64) *Instruction {
+	i.opcode = OpcodeAtomicStore
+	i.u1 = size
+	i.v = addr
+	i.v2 = val
+	i.typ = val.Type()
+	return i
+}
+
+// AsAtomicRmw initializes this instruction as an atomic read-modify-write.
+// The size is in bytes and must be 1, 2, 4, or 8.
+func (i *Instruction) AsAtomicRmw(op AtomicRmwOp, addr, val Value, size uint64) *Instruction {
+	i.opcode = OpcodeAtomicRmw
+	i.u1 = uint64(op)
+	i.u2 = size
+	i.v = addr
+	i.v2 = val
+	i.typ = val.Type()
+	return i
+}
+
+// AsAtomicCas initializes this instruction as an atomic compare-and-swap.
+// The size is in bytes and must be 1, 2, 4, or 8.
+func (i *Instruction) AsAtomicCas(addr, exp, repl Value, size uint64) *Instruction {
+	i.opcode = OpcodeAtomicCas
+	i.u1 = size
+	i.v = addr
+	i.v2 = exp
+	i.v3 = repl
+	i.typ = repl.Type()
+	return i
+}
+
+// AsFence initializes this instruction as a memory fence.
+// A single byte immediate may be used to indicate fence ordering in the future
+// but is currently always 0 and ignored.
+func (i *Instruction) AsFence(order byte) *Instruction {
+	i.opcode = OpcodeFence
+	i.u1 = uint64(order)
+	return i
+}
+
+// AtomicRmwData returns the data for this atomic read-modify-write instruction.
+func (i *Instruction) AtomicRmwData() (op AtomicRmwOp, size uint64) {
+	return AtomicRmwOp(i.u1), i.u2
+}
+
+// AtomicTargetSize returns the target memory size of the atomic instruction.
+func (i *Instruction) AtomicTargetSize() (size uint64) {
+	return i.u1
+}
+
+// ReturnVals returns the return values of OpcodeReturn.
+func (i *Instruction) ReturnVals() []Value {
+	return i.vs.View()
+}
+
+// AsExitWithCode initializes this instruction as a trap instruction with OpcodeExitWithCode.
+func (i *Instruction) AsExitWithCode(ctx Value, code wazevoapi.ExitCode) {
+	i.opcode = OpcodeExitWithCode
+	i.v = ctx
+	i.u1 = uint64(code)
+}
+
+// AsExitIfTrueWithCode initializes this instruction as a trap instruction with OpcodeExitIfTrueWithCode.
+func (i *Instruction) AsExitIfTrueWithCode(ctx, c Value, code wazevoapi.ExitCode) *Instruction {
+	i.opcode = OpcodeExitIfTrueWithCode
+	i.v = ctx
+	i.v2 = c
+	i.u1 = uint64(code)
+	return i
+}
+
+// ExitWithCodeData returns the context and exit code of OpcodeExitWithCode.
+func (i *Instruction) ExitWithCodeData() (ctx Value, code wazevoapi.ExitCode) {
+	return i.v, wazevoapi.ExitCode(i.u1)
+}
+
+// ExitIfTrueWithCodeData returns the context and exit code of OpcodeExitWithCode.
+func (i *Instruction) ExitIfTrueWithCodeData() (ctx, c Value, code wazevoapi.ExitCode) {
+	return i.v, i.v2, wazevoapi.ExitCode(i.u1)
+}
+
+// InvertBrx inverts either OpcodeBrz or OpcodeBrnz to the other.
+func (i *Instruction) InvertBrx() {
+	switch i.opcode {
+	case OpcodeBrz:
+		i.opcode = OpcodeBrnz
+	case OpcodeBrnz:
+		i.opcode = OpcodeBrz
+	default:
+		panic("BUG")
+	}
+}
+
+// BranchData returns the branch data for this instruction necessary for backends.
+func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target BasicBlock) {
+	switch i.opcode {
+	case OpcodeJump:
+		condVal = ValueInvalid
+	case OpcodeBrz, OpcodeBrnz:
+		condVal = i.v
+	default:
+		panic("BUG")
+	}
+	blockArgs = i.vs.View()
+	target = i.blk
+	return
+}
+
+// BrTableData returns the branch table data for this instruction necessary for backends.
+func (i *Instruction) BrTableData() (index Value, targets []BasicBlock) {
+	if i.opcode != OpcodeBrTable {
+		panic("BUG: BrTableData only available for OpcodeBrTable")
+	}
+	index = i.v
+	targets = i.targets
+	return
+}
+
+// AsJump initializes this instruction as a jump instruction with OpcodeJump.
+func (i *Instruction) AsJump(vs Values, target BasicBlock) *Instruction {
+	i.opcode = OpcodeJump
+	i.vs = vs
+	i.blk = target
+	return i
+}
+
+// IsFallthroughJump returns true if this instruction is a fallthrough jump.
+func (i *Instruction) IsFallthroughJump() bool {
+	if i.opcode != OpcodeJump {
+		panic("BUG: IsFallthrough only available for OpcodeJump")
+	}
+	return i.opcode == OpcodeJump && i.u1 != 0
+}
+
+// AsFallthroughJump marks this instruction as a fallthrough jump.
+func (i *Instruction) AsFallthroughJump() {
+	if i.opcode != OpcodeJump {
+		panic("BUG: AsFallthroughJump only available for OpcodeJump")
+	}
+	i.u1 = 1
+}
+
+// AsBrz initializes this instruction as a branch-if-zero instruction with OpcodeBrz.
+func (i *Instruction) AsBrz(v Value, args Values, target BasicBlock) {
+	i.opcode = OpcodeBrz
+	i.v = v
+	i.vs = args
+	i.blk = target
+}
+
+// AsBrnz initializes this instruction as a branch-if-not-zero instruction with OpcodeBrnz.
+func (i *Instruction) AsBrnz(v Value, args Values, target BasicBlock) *Instruction {
+	i.opcode = OpcodeBrnz
+	i.v = v
+	i.vs = args
+	i.blk = target
+	return i
+}
+
+// AsBrTable initializes this instruction as a branch-table instruction with OpcodeBrTable.
+func (i *Instruction) AsBrTable(index Value, targets []BasicBlock) {
+	i.opcode = OpcodeBrTable
+	i.v = index
+	i.targets = targets
+}
+
+// AsCall initializes this instruction as a call instruction with OpcodeCall.
+func (i *Instruction) AsCall(ref FuncRef, sig *Signature, args Values) {
+	i.opcode = OpcodeCall
+	i.u1 = uint64(ref)
+	i.vs = args
+	i.u2 = uint64(sig.ID)
+	sig.used = true
+}
+
+// CallData returns the call data for this instruction necessary for backends.
+func (i *Instruction) CallData() (ref FuncRef, sigID SignatureID, args []Value) {
+	if i.opcode != OpcodeCall {
+		panic("BUG: CallData only available for OpcodeCall")
+	}
+	ref = FuncRef(i.u1)
+	sigID = SignatureID(i.u2)
+	args = i.vs.View()
+	return
+}
+
+// AsCallIndirect initializes this instruction as a call-indirect instruction with OpcodeCallIndirect.
+func (i *Instruction) AsCallIndirect(funcPtr Value, sig *Signature, args Values) *Instruction {
+	i.opcode = OpcodeCallIndirect
+	i.typ = TypeF64
+	i.vs = args
+	i.v = funcPtr
+	i.u1 = uint64(sig.ID)
+	sig.used = true
+	return i
+}
+
+// AsCallGoRuntimeMemmove is the same as AsCallIndirect, but with a special flag set to indicate that it is a call to the Go runtime memmove function.
+func (i *Instruction) AsCallGoRuntimeMemmove(funcPtr Value, sig *Signature, args Values) *Instruction {
+	i.AsCallIndirect(funcPtr, sig, args)
+	i.u2 = 1
+	return i
+}
+
+// CallIndirectData returns the call indirect data for this instruction necessary for backends.
+func (i *Instruction) CallIndirectData() (funcPtr Value, sigID SignatureID, args []Value, isGoMemmove bool) {
+	if i.opcode != OpcodeCallIndirect {
+		panic("BUG: CallIndirectData only available for OpcodeCallIndirect")
+	}
+	funcPtr = i.v
+	sigID = SignatureID(i.u1)
+	args = i.vs.View()
+	isGoMemmove = i.u2 == 1
+	return
+}
+
+// AsClz initializes this instruction as a Count Leading Zeroes instruction with OpcodeClz.
+func (i *Instruction) AsClz(x Value) {
+	i.opcode = OpcodeClz
+	i.v = x
+	i.typ = x.Type()
+}
+
+// AsCtz initializes this instruction as a Count Trailing Zeroes instruction with OpcodeCtz.
+func (i *Instruction) AsCtz(x Value) {
+	i.opcode = OpcodeCtz
+	i.v = x
+	i.typ = x.Type()
+}
+
+// AsPopcnt initializes this instruction as a Population Count instruction with OpcodePopcnt.
+func (i *Instruction) AsPopcnt(x Value) {
+	i.opcode = OpcodePopcnt
+	i.v = x
+	i.typ = x.Type()
+}
+
+// AsFneg initializes this instruction as an instruction with OpcodeFneg.
+func (i *Instruction) AsFneg(x Value) *Instruction {
+	i.opcode = OpcodeFneg
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsSqrt initializes this instruction as an instruction with OpcodeSqrt.
+func (i *Instruction) AsSqrt(x Value) *Instruction {
+	i.opcode = OpcodeSqrt
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsFabs initializes this instruction as an instruction with OpcodeFabs.
+func (i *Instruction) AsFabs(x Value) *Instruction {
+	i.opcode = OpcodeFabs
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsFcopysign initializes this instruction as an instruction with OpcodeFcopysign.
+func (i *Instruction) AsFcopysign(x, y Value) *Instruction {
+	i.opcode = OpcodeFcopysign
+	i.v = x
+	i.v2 = y
+	i.typ = x.Type()
+	return i
+}
+
+// AsCeil initializes this instruction as an instruction with OpcodeCeil.
+func (i *Instruction) AsCeil(x Value) *Instruction {
+	i.opcode = OpcodeCeil
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsFloor initializes this instruction as an instruction with OpcodeFloor.
+func (i *Instruction) AsFloor(x Value) *Instruction {
+	i.opcode = OpcodeFloor
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsTrunc initializes this instruction as an instruction with OpcodeTrunc.
+func (i *Instruction) AsTrunc(x Value) *Instruction {
+	i.opcode = OpcodeTrunc
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsNearest initializes this instruction as an instruction with OpcodeNearest.
+func (i *Instruction) AsNearest(x Value) *Instruction {
+	i.opcode = OpcodeNearest
+	i.v = x
+	i.typ = x.Type()
+	return i
+}
+
+// AsBitcast initializes this instruction as an instruction with OpcodeBitcast.
+func (i *Instruction) AsBitcast(x Value, dstType Type) *Instruction {
+	i.opcode = OpcodeBitcast
+	i.v = x
+	i.typ = dstType
+	return i
+}
+
+// BitcastData returns the operands for a bitcast instruction.
+func (i *Instruction) BitcastData() (x Value, dstType Type) {
+	return i.v, i.typ
+}
+
+// AsFdemote initializes this instruction as an instruction with OpcodeFdemote.
+func (i *Instruction) AsFdemote(x Value) {
+	i.opcode = OpcodeFdemote
+	i.v = x
+	i.typ = TypeF32
+}
+
+// AsFpromote initializes this instruction as an instruction with OpcodeFpromote.
+func (i *Instruction) AsFpromote(x Value) {
+	i.opcode = OpcodeFpromote
+	i.v = x
+	i.typ = TypeF64
+}
+
+// AsFcvtFromInt initializes this instruction as an instruction with either OpcodeFcvtFromUint or OpcodeFcvtFromSint
+func (i *Instruction) AsFcvtFromInt(x Value, signed bool, dst64bit bool) *Instruction {
+	if signed {
+		i.opcode = OpcodeFcvtFromSint
+	} else {
+		i.opcode = OpcodeFcvtFromUint
+	}
+	i.v = x
+	if dst64bit {
+		i.typ = TypeF64
+	} else {
+		i.typ = TypeF32
+	}
+	return i
+}
+
+// AsFcvtToInt initializes this instruction as an instruction with either OpcodeFcvtToUint or OpcodeFcvtToSint
+func (i *Instruction) AsFcvtToInt(x, ctx Value, signed bool, dst64bit bool, sat bool) *Instruction {
+	switch {
+	case signed && !sat:
+		i.opcode = OpcodeFcvtToSint
+	case !signed && !sat:
+		i.opcode = OpcodeFcvtToUint
+	case signed && sat:
+		i.opcode = OpcodeFcvtToSintSat
+	case !signed && sat:
+		i.opcode = OpcodeFcvtToUintSat
+	}
+	i.v = x
+	i.v2 = ctx
+	if dst64bit {
+		i.typ = TypeI64
+	} else {
+		i.typ = TypeI32
+	}
+	return i
+}
+
+// AsVFcvtToIntSat initializes this instruction as an instruction with either OpcodeVFcvtToSintSat or OpcodeVFcvtToUintSat
+func (i *Instruction) AsVFcvtToIntSat(x Value, lane VecLane, signed bool) *Instruction {
+	if signed {
+		i.opcode = OpcodeVFcvtToSintSat
+	} else {
+		i.opcode = OpcodeVFcvtToUintSat
+	}
+	i.v = x
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsVFcvtFromInt initializes this instruction as an instruction with either OpcodeVFcvtToSintSat or OpcodeVFcvtToUintSat
+func (i *Instruction) AsVFcvtFromInt(x Value, lane VecLane, signed bool) *Instruction {
+	if signed {
+		i.opcode = OpcodeVFcvtFromSint
+	} else {
+		i.opcode = OpcodeVFcvtFromUint
+	}
+	i.v = x
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsNarrow initializes this instruction as an instruction with either OpcodeSnarrow or OpcodeUnarrow
+func (i *Instruction) AsNarrow(x, y Value, lane VecLane, signed bool) *Instruction {
+	if signed {
+		i.opcode = OpcodeSnarrow
+	} else {
+		i.opcode = OpcodeUnarrow
+	}
+	i.v = x
+	i.v2 = y
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsFvpromoteLow initializes this instruction as an instruction with OpcodeFvpromoteLow
+func (i *Instruction) AsFvpromoteLow(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeFvpromoteLow
+	i.v = x
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsFvdemote initializes this instruction as an instruction with OpcodeFvdemote
+func (i *Instruction) AsFvdemote(x Value, lane VecLane) *Instruction {
+	i.opcode = OpcodeFvdemote
+	i.v = x
+	i.u1 = uint64(lane)
+	return i
+}
+
+// AsSExtend initializes this instruction as a sign extension instruction with OpcodeSExtend.
+func (i *Instruction) AsSExtend(v Value, from, to byte) *Instruction {
+	i.opcode = OpcodeSExtend
+	i.v = v
+	i.u1 = uint64(from)<<8 | uint64(to)
+	if to == 64 {
+		i.typ = TypeI64
+	} else {
+		i.typ = TypeI32
+	}
+	return i
+}
+
+// AsUExtend initializes this instruction as an unsigned extension instruction with OpcodeUExtend.
+func (i *Instruction) AsUExtend(v Value, from, to byte) *Instruction {
+	i.opcode = OpcodeUExtend
+	i.v = v
+	i.u1 = uint64(from)<<8 | uint64(to)
+	if to == 64 {
+		i.typ = TypeI64
+	} else {
+		i.typ = TypeI32
+	}
+	return i
+}
+
+func (i *Instruction) ExtendData() (from, to byte, signed bool) {
+	if i.opcode != OpcodeSExtend && i.opcode != OpcodeUExtend {
+		panic("BUG: ExtendData only available for OpcodeSExtend and OpcodeUExtend")
+	}
+	from = byte(i.u1 >> 8)
+	to = byte(i.u1)
+	signed = i.opcode == OpcodeSExtend
+	return
+}
+
+// AsSelect initializes this instruction as an unsigned extension instruction with OpcodeSelect.
+func (i *Instruction) AsSelect(c, x, y Value) *Instruction {
+	i.opcode = OpcodeSelect
+	i.v = c
+	i.v2 = x
+	i.v3 = y
+	i.typ = x.Type()
+	return i
+}
+
+// SelectData returns the select data for this instruction necessary for backends.
+func (i *Instruction) SelectData() (c, x, y Value) {
+	c = i.v
+	x = i.v2
+	y = i.v3
+	return
+}
+
+// ExtendFromToBits returns the from and to bit size for the extension instruction.
+func (i *Instruction) ExtendFromToBits() (from, to byte) {
+	from = byte(i.u1 >> 8)
+	to = byte(i.u1)
+	return
+}
+
+// Format returns a string representation of this instruction with the given builder.
+// For debugging purposes only.
+func (i *Instruction) Format(b Builder) string {
+	var instSuffix string
+	switch i.opcode {
+	case OpcodeExitWithCode:
+		instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), wazevoapi.ExitCode(i.u1))
+	case OpcodeExitIfTrueWithCode:
+		instSuffix = fmt.Sprintf(" %s, %s, %s", i.v2.Format(b), i.v.Format(b), wazevoapi.ExitCode(i.u1))
+	case OpcodeIadd, OpcodeIsub, OpcodeImul, OpcodeFadd, OpcodeFsub, OpcodeFmin, OpcodeFmax, OpcodeFdiv, OpcodeFmul:
+		instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b))
+	case OpcodeIcmp:
+		instSuffix = fmt.Sprintf(" %s, %s, %s", IntegerCmpCond(i.u1), i.v.Format(b), i.v2.Format(b))
+	case OpcodeFcmp:
+		instSuffix = fmt.Sprintf(" %s, %s, %s", FloatCmpCond(i.u1), i.v.Format(b), i.v2.Format(b))
+	case OpcodeSExtend, OpcodeUExtend:
+		instSuffix = fmt.Sprintf(" %s, %d->%d", i.v.Format(b), i.u1>>8, i.u1&0xff)
+	case OpcodeCall, OpcodeCallIndirect:
+		view := i.vs.View()
+		vs := make([]string, len(view))
+		for idx := range vs {
+			vs[idx] = view[idx].Format(b)
+		}
+		if i.opcode == OpcodeCallIndirect {
+			instSuffix = fmt.Sprintf(" %s:%s, %s", i.v.Format(b), SignatureID(i.u1), strings.Join(vs, ", "))
+		} else {
+			instSuffix = fmt.Sprintf(" %s:%s, %s", FuncRef(i.u1), SignatureID(i.u2), strings.Join(vs, ", "))
+		}
+	case OpcodeStore, OpcodeIstore8, OpcodeIstore16, OpcodeIstore32:
+		instSuffix = fmt.Sprintf(" %s, %s, %#x", i.v.Format(b), i.v2.Format(b), uint32(i.u1))
+	case OpcodeLoad, OpcodeVZeroExtLoad:
+		instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1))
+	case OpcodeLoadSplat:
+		instSuffix = fmt.Sprintf(".%s %s, %#x", VecLane(i.u2), i.v.Format(b), int32(i.u1))
+	case OpcodeUload8, OpcodeUload16, OpcodeUload32, OpcodeSload8, OpcodeSload16, OpcodeSload32:
+		instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1))
+	case OpcodeSelect, OpcodeVbitselect:
+		instSuffix = fmt.Sprintf(" %s, %s, %s", i.v.Format(b), i.v2.Format(b), i.v3.Format(b))
+	case OpcodeIconst:
+		switch i.typ {
+		case TypeI32:
+			instSuffix = fmt.Sprintf("_32 %#x", uint32(i.u1))
+		case TypeI64:
+			instSuffix = fmt.Sprintf("_64 %#x", i.u1)
+		}
+	case OpcodeVconst:
+		instSuffix = fmt.Sprintf(" %016x %016x", i.u1, i.u2)
+	case OpcodeF32const:
+		instSuffix = fmt.Sprintf(" %f", math.Float32frombits(uint32(i.u1)))
+	case OpcodeF64const:
+		instSuffix = fmt.Sprintf(" %f", math.Float64frombits(i.u1))
+	case OpcodeReturn:
+		view := i.vs.View()
+		if len(view) == 0 {
+			break
+		}
+		vs := make([]string, len(view))
+		for idx := range vs {
+			vs[idx] = view[idx].Format(b)
+		}
+		instSuffix = fmt.Sprintf(" %s", strings.Join(vs, ", "))
+	case OpcodeJump:
+		view := i.vs.View()
+		vs := make([]string, len(view)+1)
+		if i.IsFallthroughJump() {
+			vs[0] = " fallthrough"
+		} else {
+			vs[0] = " " + i.blk.(*basicBlock).Name()
+		}
+		for idx := range view {
+			vs[idx+1] = view[idx].Format(b)
+		}
+
+		instSuffix = strings.Join(vs, ", ")
+	case OpcodeBrz, OpcodeBrnz:
+		view := i.vs.View()
+		vs := make([]string, len(view)+2)
+		vs[0] = " " + i.v.Format(b)
+		vs[1] = i.blk.(*basicBlock).Name()
+		for idx := range view {
+			vs[idx+2] = view[idx].Format(b)
+		}
+		instSuffix = strings.Join(vs, ", ")
+	case OpcodeBrTable:
+		// `BrTable index, [label1, label2, ... labelN]`
+		instSuffix = fmt.Sprintf(" %s", i.v.Format(b))
+		instSuffix += ", ["
+		for i, target := range i.targets {
+			blk := target.(*basicBlock)
+			if i == 0 {
+				instSuffix += blk.Name()
+			} else {
+				instSuffix += ", " + blk.Name()
+			}
+		}
+		instSuffix += "]"
+	case OpcodeBand, OpcodeBor, OpcodeBxor, OpcodeRotr, OpcodeRotl, OpcodeIshl, OpcodeSshr, OpcodeUshr,
+		OpcodeSdiv, OpcodeUdiv, OpcodeFcopysign, OpcodeSrem, OpcodeUrem,
+		OpcodeVbnot, OpcodeVbxor, OpcodeVbor, OpcodeVband, OpcodeVbandnot, OpcodeVIcmp, OpcodeVFcmp:
+		instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b))
+	case OpcodeUndefined:
+	case OpcodeClz, OpcodeCtz, OpcodePopcnt, OpcodeFneg, OpcodeFcvtToSint, OpcodeFcvtToUint, OpcodeFcvtFromSint,
+		OpcodeFcvtFromUint, OpcodeFcvtToSintSat, OpcodeFcvtToUintSat, OpcodeFdemote, OpcodeFpromote, OpcodeIreduce, OpcodeBitcast, OpcodeSqrt, OpcodeFabs,
+		OpcodeCeil, OpcodeFloor, OpcodeTrunc, OpcodeNearest:
+		instSuffix = " " + i.v.Format(b)
+	case OpcodeVIadd, OpcodeExtIaddPairwise, OpcodeVSaddSat, OpcodeVUaddSat, OpcodeVIsub, OpcodeVSsubSat, OpcodeVUsubSat,
+		OpcodeVImin, OpcodeVUmin, OpcodeVImax, OpcodeVUmax, OpcodeVImul, OpcodeVAvgRound,
+		OpcodeVFadd, OpcodeVFsub, OpcodeVFmul, OpcodeVFdiv,
+		OpcodeVIshl, OpcodeVSshr, OpcodeVUshr,
+		OpcodeVFmin, OpcodeVFmax, OpcodeVMinPseudo, OpcodeVMaxPseudo,
+		OpcodeSnarrow, OpcodeUnarrow, OpcodeSwizzle, OpcodeSqmulRoundSat:
+		instSuffix = fmt.Sprintf(".%s %s, %s", VecLane(i.u1), i.v.Format(b), i.v2.Format(b))
+	case OpcodeVIabs, OpcodeVIneg, OpcodeVIpopcnt, OpcodeVhighBits, OpcodeVallTrue, OpcodeVanyTrue,
+		OpcodeVFabs, OpcodeVFneg, OpcodeVSqrt, OpcodeVCeil, OpcodeVFloor, OpcodeVTrunc, OpcodeVNearest,
+		OpcodeVFcvtToUintSat, OpcodeVFcvtToSintSat, OpcodeVFcvtFromUint, OpcodeVFcvtFromSint,
+		OpcodeFvpromoteLow, OpcodeFvdemote, OpcodeSwidenLow, OpcodeUwidenLow, OpcodeSwidenHigh, OpcodeUwidenHigh,
+		OpcodeSplat:
+		instSuffix = fmt.Sprintf(".%s %s", VecLane(i.u1), i.v.Format(b))
+	case OpcodeExtractlane:
+		var signedness string
+		if i.u1 != 0 {
+			signedness = "signed"
+		} else {
+			signedness = "unsigned"
+		}
+		instSuffix = fmt.Sprintf(".%s %d, %s (%s)", VecLane(i.u2), 0x0000FFFF&i.u1, i.v.Format(b), signedness)
+	case OpcodeInsertlane:
+		instSuffix = fmt.Sprintf(".%s %d, %s, %s", VecLane(i.u2), i.u1, i.v.Format(b), i.v2.Format(b))
+	case OpcodeShuffle:
+		lanes := make([]byte, 16)
+		for idx := 0; idx < 8; idx++ {
+			lanes[idx] = byte(i.u1 >> (8 * idx))
+		}
+		for idx := 0; idx < 8; idx++ {
+			lanes[idx+8] = byte(i.u2 >> (8 * idx))
+		}
+		// Prints Shuffle.[0 1 2 3 4 5 6 7 ...] v2, v3
+		instSuffix = fmt.Sprintf(".%v %s, %s", lanes, i.v.Format(b), i.v2.Format(b))
+	case OpcodeAtomicRmw:
+		instSuffix = fmt.Sprintf(" %s_%d, %s, %s", AtomicRmwOp(i.u1), 8*i.u2, i.v.Format(b), i.v2.Format(b))
+	case OpcodeAtomicLoad:
+		instSuffix = fmt.Sprintf("_%d, %s", 8*i.u1, i.v.Format(b))
+	case OpcodeAtomicStore:
+		instSuffix = fmt.Sprintf("_%d, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b))
+	case OpcodeAtomicCas:
+		instSuffix = fmt.Sprintf("_%d, %s, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b), i.v3.Format(b))
+	case OpcodeFence:
+		instSuffix = fmt.Sprintf(" %d", i.u1)
+	case OpcodeWideningPairwiseDotProductS:
+		instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b))
+	default:
+		panic(fmt.Sprintf("TODO: format for %s", i.opcode))
+	}
+
+	instr := i.opcode.String() + instSuffix
+
+	var rvs []string
+	if rv := i.rValue; rv.Valid() {
+		rvs = append(rvs, rv.formatWithType(b))
+	}
+
+	for _, v := range i.rValues.View() {
+		rvs = append(rvs, v.formatWithType(b))
+	}
+
+	if len(rvs) > 0 {
+		return fmt.Sprintf("%s = %s", strings.Join(rvs, ", "), instr)
+	} else {
+		return instr
+	}
+}
+
+// addArgumentBranchInst adds an argument to this instruction.
+func (i *Instruction) addArgumentBranchInst(b *builder, v Value) {
+	switch i.opcode {
+	case OpcodeJump, OpcodeBrz, OpcodeBrnz:
+		i.vs = i.vs.Append(&b.varLengthPool, v)
+	default:
+		panic("BUG: " + i.opcode.String())
+	}
+}
+
+// Constant returns true if this instruction is a constant instruction.
+func (i *Instruction) Constant() bool {
+	switch i.opcode {
+	case OpcodeIconst, OpcodeF32const, OpcodeF64const:
+		return true
+	}
+	return false
+}
+
+// ConstantVal returns the constant value of this instruction.
+// How to interpret the return value depends on the opcode.
+func (i *Instruction) ConstantVal() (ret uint64) {
+	switch i.opcode {
+	case OpcodeIconst, OpcodeF32const, OpcodeF64const:
+		ret = i.u1
+	default:
+		panic("TODO")
+	}
+	return
+}
+
+// String implements fmt.Stringer.
+func (o Opcode) String() (ret string) {
+	switch o {
+	case OpcodeInvalid:
+		return "invalid"
+	case OpcodeUndefined:
+		return "Undefined"
+	case OpcodeJump:
+		return "Jump"
+	case OpcodeBrz:
+		return "Brz"
+	case OpcodeBrnz:
+		return "Brnz"
+	case OpcodeBrTable:
+		return "BrTable"
+	case OpcodeExitWithCode:
+		return "Exit"
+	case OpcodeExitIfTrueWithCode:
+		return "ExitIfTrue"
+	case OpcodeReturn:
+		return "Return"
+	case OpcodeCall:
+		return "Call"
+	case OpcodeCallIndirect:
+		return "CallIndirect"
+	case OpcodeSplat:
+		return "Splat"
+	case OpcodeSwizzle:
+		return "Swizzle"
+	case OpcodeInsertlane:
+		return "Insertlane"
+	case OpcodeExtractlane:
+		return "Extractlane"
+	case OpcodeLoad:
+		return "Load"
+	case OpcodeLoadSplat:
+		return "LoadSplat"
+	case OpcodeStore:
+		return "Store"
+	case OpcodeUload8:
+		return "Uload8"
+	case OpcodeSload8:
+		return "Sload8"
+	case OpcodeIstore8:
+		return "Istore8"
+	case OpcodeUload16:
+		return "Uload16"
+	case OpcodeSload16:
+		return "Sload16"
+	case OpcodeIstore16:
+		return "Istore16"
+	case OpcodeUload32:
+		return "Uload32"
+	case OpcodeSload32:
+		return "Sload32"
+	case OpcodeIstore32:
+		return "Istore32"
+	case OpcodeIconst:
+		return "Iconst"
+	case OpcodeF32const:
+		return "F32const"
+	case OpcodeF64const:
+		return "F64const"
+	case OpcodeVconst:
+		return "Vconst"
+	case OpcodeShuffle:
+		return "Shuffle"
+	case OpcodeSelect:
+		return "Select"
+	case OpcodeVanyTrue:
+		return "VanyTrue"
+	case OpcodeVallTrue:
+		return "VallTrue"
+	case OpcodeVhighBits:
+		return "VhighBits"
+	case OpcodeIcmp:
+		return "Icmp"
+	case OpcodeIcmpImm:
+		return "IcmpImm"
+	case OpcodeVIcmp:
+		return "VIcmp"
+	case OpcodeIadd:
+		return "Iadd"
+	case OpcodeIsub:
+		return "Isub"
+	case OpcodeImul:
+		return "Imul"
+	case OpcodeUdiv:
+		return "Udiv"
+	case OpcodeSdiv:
+		return "Sdiv"
+	case OpcodeUrem:
+		return "Urem"
+	case OpcodeSrem:
+		return "Srem"
+	case OpcodeBand:
+		return "Band"
+	case OpcodeBor:
+		return "Bor"
+	case OpcodeBxor:
+		return "Bxor"
+	case OpcodeBnot:
+		return "Bnot"
+	case OpcodeRotl:
+		return "Rotl"
+	case OpcodeRotr:
+		return "Rotr"
+	case OpcodeIshl:
+		return "Ishl"
+	case OpcodeUshr:
+		return "Ushr"
+	case OpcodeSshr:
+		return "Sshr"
+	case OpcodeClz:
+		return "Clz"
+	case OpcodeCtz:
+		return "Ctz"
+	case OpcodePopcnt:
+		return "Popcnt"
+	case OpcodeFcmp:
+		return "Fcmp"
+	case OpcodeFadd:
+		return "Fadd"
+	case OpcodeFsub:
+		return "Fsub"
+	case OpcodeFmul:
+		return "Fmul"
+	case OpcodeFdiv:
+		return "Fdiv"
+	case OpcodeSqmulRoundSat:
+		return "SqmulRoundSat"
+	case OpcodeSqrt:
+		return "Sqrt"
+	case OpcodeFneg:
+		return "Fneg"
+	case OpcodeFabs:
+		return "Fabs"
+	case OpcodeFcopysign:
+		return "Fcopysign"
+	case OpcodeFmin:
+		return "Fmin"
+	case OpcodeFmax:
+		return "Fmax"
+	case OpcodeCeil:
+		return "Ceil"
+	case OpcodeFloor:
+		return "Floor"
+	case OpcodeTrunc:
+		return "Trunc"
+	case OpcodeNearest:
+		return "Nearest"
+	case OpcodeBitcast:
+		return "Bitcast"
+	case OpcodeIreduce:
+		return "Ireduce"
+	case OpcodeSnarrow:
+		return "Snarrow"
+	case OpcodeUnarrow:
+		return "Unarrow"
+	case OpcodeSwidenLow:
+		return "SwidenLow"
+	case OpcodeSwidenHigh:
+		return "SwidenHigh"
+	case OpcodeUwidenLow:
+		return "UwidenLow"
+	case OpcodeUwidenHigh:
+		return "UwidenHigh"
+	case OpcodeExtIaddPairwise:
+		return "IaddPairwise"
+	case OpcodeWideningPairwiseDotProductS:
+		return "WideningPairwiseDotProductS"
+	case OpcodeUExtend:
+		return "UExtend"
+	case OpcodeSExtend:
+		return "SExtend"
+	case OpcodeFpromote:
+		return "Fpromote"
+	case OpcodeFdemote:
+		return "Fdemote"
+	case OpcodeFvdemote:
+		return "Fvdemote"
+	case OpcodeFcvtToUint:
+		return "FcvtToUint"
+	case OpcodeFcvtToSint:
+		return "FcvtToSint"
+	case OpcodeFcvtToUintSat:
+		return "FcvtToUintSat"
+	case OpcodeFcvtToSintSat:
+		return "FcvtToSintSat"
+	case OpcodeFcvtFromUint:
+		return "FcvtFromUint"
+	case OpcodeFcvtFromSint:
+		return "FcvtFromSint"
+	case OpcodeAtomicRmw:
+		return "AtomicRmw"
+	case OpcodeAtomicCas:
+		return "AtomicCas"
+	case OpcodeAtomicLoad:
+		return "AtomicLoad"
+	case OpcodeAtomicStore:
+		return "AtomicStore"
+	case OpcodeFence:
+		return "Fence"
+	case OpcodeVbor:
+		return "Vbor"
+	case OpcodeVbxor:
+		return "Vbxor"
+	case OpcodeVband:
+		return "Vband"
+	case OpcodeVbandnot:
+		return "Vbandnot"
+	case OpcodeVbnot:
+		return "Vbnot"
+	case OpcodeVbitselect:
+		return "Vbitselect"
+	case OpcodeVIadd:
+		return "VIadd"
+	case OpcodeVSaddSat:
+		return "VSaddSat"
+	case OpcodeVUaddSat:
+		return "VUaddSat"
+	case OpcodeVSsubSat:
+		return "VSsubSat"
+	case OpcodeVUsubSat:
+		return "VUsubSat"
+	case OpcodeVAvgRound:
+		return "OpcodeVAvgRound"
+	case OpcodeVIsub:
+		return "VIsub"
+	case OpcodeVImin:
+		return "VImin"
+	case OpcodeVUmin:
+		return "VUmin"
+	case OpcodeVImax:
+		return "VImax"
+	case OpcodeVUmax:
+		return "VUmax"
+	case OpcodeVImul:
+		return "VImul"
+	case OpcodeVIabs:
+		return "VIabs"
+	case OpcodeVIneg:
+		return "VIneg"
+	case OpcodeVIpopcnt:
+		return "VIpopcnt"
+	case OpcodeVIshl:
+		return "VIshl"
+	case OpcodeVUshr:
+		return "VUshr"
+	case OpcodeVSshr:
+		return "VSshr"
+	case OpcodeVFabs:
+		return "VFabs"
+	case OpcodeVFmax:
+		return "VFmax"
+	case OpcodeVFmin:
+		return "VFmin"
+	case OpcodeVFneg:
+		return "VFneg"
+	case OpcodeVFadd:
+		return "VFadd"
+	case OpcodeVFsub:
+		return "VFsub"
+	case OpcodeVFmul:
+		return "VFmul"
+	case OpcodeVFdiv:
+		return "VFdiv"
+	case OpcodeVFcmp:
+		return "VFcmp"
+	case OpcodeVCeil:
+		return "VCeil"
+	case OpcodeVFloor:
+		return "VFloor"
+	case OpcodeVTrunc:
+		return "VTrunc"
+	case OpcodeVNearest:
+		return "VNearest"
+	case OpcodeVMaxPseudo:
+		return "VMaxPseudo"
+	case OpcodeVMinPseudo:
+		return "VMinPseudo"
+	case OpcodeVSqrt:
+		return "VSqrt"
+	case OpcodeVFcvtToUintSat:
+		return "VFcvtToUintSat"
+	case OpcodeVFcvtToSintSat:
+		return "VFcvtToSintSat"
+	case OpcodeVFcvtFromUint:
+		return "VFcvtFromUint"
+	case OpcodeVFcvtFromSint:
+		return "VFcvtFromSint"
+	case OpcodeFvpromoteLow:
+		return "FvpromoteLow"
+	case OpcodeVZeroExtLoad:
+		return "VZeroExtLoad"
+	}
+	panic(fmt.Sprintf("unknown opcode %d", o))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
new file mode 100644
index 000000000..a2e986cd1
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
@@ -0,0 +1,417 @@
+package ssa
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// RunPasses implements Builder.RunPasses.
+//
+// The order here matters; some pass depends on the previous ones.
+//
+// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks
+// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.
+func (b *builder) RunPasses() {
+	b.runPreBlockLayoutPasses()
+	b.runBlockLayoutPass()
+	b.runPostBlockLayoutPasses()
+	b.runFinalizingPasses()
+}
+
+func (b *builder) runPreBlockLayoutPasses() {
+	passSortSuccessors(b)
+	passDeadBlockEliminationOpt(b)
+	passRedundantPhiEliminationOpt(b)
+	// The result of passCalculateImmediateDominators will be used by various passes below.
+	passCalculateImmediateDominators(b)
+	passNopInstElimination(b)
+
+	// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
+	// 	WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case.
+	// 	See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html
+
+	// TODO: implement more optimization passes like:
+	// 	block coalescing.
+	// 	Copy-propagation.
+	// 	Constant folding.
+	// 	Common subexpression elimination.
+	// 	Arithmetic simplifications.
+	// 	and more!
+
+	// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
+	passDeadCodeEliminationOpt(b)
+	b.donePreBlockLayoutPasses = true
+}
+
+func (b *builder) runBlockLayoutPass() {
+	if !b.donePreBlockLayoutPasses {
+		panic("runBlockLayoutPass must be called after all pre passes are done")
+	}
+	passLayoutBlocks(b)
+	b.doneBlockLayout = true
+}
+
+// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable,
+// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks.
+func (b *builder) runPostBlockLayoutPasses() {
+	if !b.doneBlockLayout {
+		panic("runPostBlockLayoutPasses must be called after block layout pass is done")
+	}
+	// TODO: Do more. e.g. tail duplication, loop unrolling, etc.
+
+	b.donePostBlockLayoutPasses = true
+}
+
+// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified.
+func (b *builder) runFinalizingPasses() {
+	if !b.donePostBlockLayoutPasses {
+		panic("runFinalizingPasses must be called after post block layout passes are done")
+	}
+	// Critical edges are split, so we fix the loop nesting forest.
+	passBuildLoopNestingForest(b)
+	passBuildDominatorTree(b)
+	// Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps.
+	b.markFallthroughJumps()
+}
+
+// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
+func passDeadBlockEliminationOpt(b *builder) {
+	entryBlk := b.entryBlk()
+	b.clearBlkVisited()
+	b.blkStack = append(b.blkStack, entryBlk)
+	for len(b.blkStack) > 0 {
+		reachableBlk := b.blkStack[len(b.blkStack)-1]
+		b.blkStack = b.blkStack[:len(b.blkStack)-1]
+		b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass.
+
+		if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
+			panic(fmt.Sprintf("%s is not sealed", reachableBlk))
+		}
+
+		if wazevoapi.SSAValidationEnabled {
+			reachableBlk.validate(b)
+		}
+
+		for _, succ := range reachableBlk.success {
+			if _, ok := b.blkVisited[succ]; ok {
+				continue
+			}
+			b.blkStack = append(b.blkStack, succ)
+		}
+	}
+
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		if _, ok := b.blkVisited[blk]; !ok {
+			blk.invalid = true
+		}
+	}
+}
+
+// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).
+func passRedundantPhiEliminationOpt(b *builder) {
+	redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations.
+
+	// TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the
+	//  same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact
+	//  that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is
+	//  relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,
+	//  the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
+	//  complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
+	for {
+		changed := false
+		_ = b.blockIteratorBegin() // skip entry block!
+		// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!
+		for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() {
+			paramNum := len(blk.params)
+
+			for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
+				phiValue := blk.params[paramIndex].value
+				redundant := true
+
+				nonSelfReferencingValue := ValueInvalid
+				for predIndex := range blk.preds {
+					br := blk.preds[predIndex].branch
+					// Resolve the alias in the arguments so that we could use the previous iteration's result.
+					b.resolveArgumentAlias(br)
+					pred := br.vs.View()[paramIndex]
+					if pred == phiValue {
+						// This is self-referencing: PHI from the same PHI.
+						continue
+					}
+
+					if !nonSelfReferencingValue.Valid() {
+						nonSelfReferencingValue = pred
+						continue
+					}
+
+					if nonSelfReferencingValue != pred {
+						redundant = false
+						break
+					}
+				}
+
+				if !nonSelfReferencingValue.Valid() {
+					// This shouldn't happen, and must be a bug in builder.go.
+					panic("BUG: params added but only self-referencing")
+				}
+
+				if redundant {
+					b.redundantParameterIndexToValue[paramIndex] = nonSelfReferencingValue
+					redundantParameterIndexes = append(redundantParameterIndexes, paramIndex)
+				}
+			}
+
+			if len(b.redundantParameterIndexToValue) == 0 {
+				continue
+			}
+			changed = true
+
+			// Remove the redundant PHIs from the argument list of branching instructions.
+			for predIndex := range blk.preds {
+				var cur int
+				predBlk := blk.preds[predIndex]
+				branchInst := predBlk.branch
+				view := branchInst.vs.View()
+				for argIndex, value := range view {
+					if _, ok := b.redundantParameterIndexToValue[argIndex]; !ok {
+						view[cur] = value
+						cur++
+					}
+				}
+				branchInst.vs.Cut(cur)
+			}
+
+			// Still need to have the definition of the value of the PHI (previously as the parameter).
+			for _, redundantParamIndex := range redundantParameterIndexes {
+				phiValue := blk.params[redundantParamIndex].value
+				onlyValue := b.redundantParameterIndexToValue[redundantParamIndex]
+				// Create an alias in this block from the only phi argument to the phi value.
+				b.alias(phiValue, onlyValue)
+			}
+
+			// Finally, Remove the param from the blk.
+			var cur int
+			for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
+				param := blk.params[paramIndex]
+				if _, ok := b.redundantParameterIndexToValue[paramIndex]; !ok {
+					blk.params[cur] = param
+					cur++
+				}
+			}
+			blk.params = blk.params[:cur]
+
+			// Clears the map for the next iteration.
+			for _, paramIndex := range redundantParameterIndexes {
+				delete(b.redundantParameterIndexToValue, paramIndex)
+			}
+			redundantParameterIndexes = redundantParameterIndexes[:0]
+		}
+
+		if !changed {
+			break
+		}
+	}
+
+	// Reuse the slice for the future passes.
+	b.ints = redundantParameterIndexes
+}
+
+// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and
+// eliminates all the unnecessary instructions whose ref count is zero.
+// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction
+// during the process. This is the last SSA-level optimization pass and after this,
+// the SSA function is ready to be used by backends.
+//
+// TODO: the algorithm here might not be efficient. Get back to this later.
+func passDeadCodeEliminationOpt(b *builder) {
+	nvid := int(b.nextValueID)
+	if nvid >= len(b.valueRefCounts) {
+		b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...)
+	}
+	if nvid >= len(b.valueIDToInstruction) {
+		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
+	}
+
+	// First, we gather all the instructions with side effects.
+	liveInstructions := b.instStack[:0]
+	// During the process, we will assign InstructionGroupID to each instruction, which is not
+	// relevant to dead code elimination, but we need in the backend.
+	var gid InstructionGroupID
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for cur := blk.rootInstr; cur != nil; cur = cur.next {
+			cur.gid = gid
+			switch cur.sideEffect() {
+			case sideEffectTraps:
+				// The trappable should always be alive.
+				liveInstructions = append(liveInstructions, cur)
+			case sideEffectStrict:
+				liveInstructions = append(liveInstructions, cur)
+				// The strict side effect should create different instruction groups.
+				gid++
+			}
+
+			r1, rs := cur.Returns()
+			if r1.Valid() {
+				b.valueIDToInstruction[r1.ID()] = cur
+			}
+			for _, r := range rs {
+				b.valueIDToInstruction[r.ID()] = cur
+			}
+		}
+	}
+
+	// Find all the instructions referenced by live instructions transitively.
+	for len(liveInstructions) > 0 {
+		tail := len(liveInstructions) - 1
+		live := liveInstructions[tail]
+		liveInstructions = liveInstructions[:tail]
+		if live.live {
+			// If it's already marked alive, this is referenced multiple times,
+			// so we can skip it.
+			continue
+		}
+		live.live = true
+
+		// Before we walk, we need to resolve the alias first.
+		b.resolveArgumentAlias(live)
+
+		v1, v2, v3, vs := live.Args()
+		if v1.Valid() {
+			producingInst := b.valueIDToInstruction[v1.ID()]
+			if producingInst != nil {
+				liveInstructions = append(liveInstructions, producingInst)
+			}
+		}
+
+		if v2.Valid() {
+			producingInst := b.valueIDToInstruction[v2.ID()]
+			if producingInst != nil {
+				liveInstructions = append(liveInstructions, producingInst)
+			}
+		}
+
+		if v3.Valid() {
+			producingInst := b.valueIDToInstruction[v3.ID()]
+			if producingInst != nil {
+				liveInstructions = append(liveInstructions, producingInst)
+			}
+		}
+
+		for _, v := range vs {
+			producingInst := b.valueIDToInstruction[v.ID()]
+			if producingInst != nil {
+				liveInstructions = append(liveInstructions, producingInst)
+			}
+		}
+	}
+
+	// Now that all the live instructions are flagged as live=true, we eliminate all dead instructions.
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for cur := blk.rootInstr; cur != nil; cur = cur.next {
+			if !cur.live {
+				// Remove the instruction from the list.
+				if prev := cur.prev; prev != nil {
+					prev.next = cur.next
+				} else {
+					blk.rootInstr = cur.next
+				}
+				if next := cur.next; next != nil {
+					next.prev = cur.prev
+				}
+				continue
+			}
+
+			// If the value alive, we can be sure that arguments are used definitely.
+			// Hence, we can increment the value reference counts.
+			v1, v2, v3, vs := cur.Args()
+			if v1.Valid() {
+				b.incRefCount(v1.ID(), cur)
+			}
+			if v2.Valid() {
+				b.incRefCount(v2.ID(), cur)
+			}
+			if v3.Valid() {
+				b.incRefCount(v3.ID(), cur)
+			}
+			for _, v := range vs {
+				b.incRefCount(v.ID(), cur)
+			}
+		}
+	}
+
+	b.instStack = liveInstructions // we reuse the stack for the next iteration.
+}
+
+func (b *builder) incRefCount(id ValueID, from *Instruction) {
+	if wazevoapi.SSALoggingEnabled {
+		fmt.Printf("v%d referenced from %v\n", id, from.Format(b))
+	}
+	b.valueRefCounts[id]++
+}
+
+// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places.
+func (b *builder) clearBlkVisited() {
+	b.blkStack2 = b.blkStack2[:0]
+	for key := range b.blkVisited {
+		b.blkStack2 = append(b.blkStack2, key)
+	}
+	for _, blk := range b.blkStack2 {
+		delete(b.blkVisited, blk)
+	}
+	b.blkStack2 = b.blkStack2[:0]
+}
+
+// passNopInstElimination eliminates the instructions which is essentially a no-op.
+func passNopInstElimination(b *builder) {
+	if int(b.nextValueID) >= len(b.valueIDToInstruction) {
+		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
+	}
+
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for cur := blk.rootInstr; cur != nil; cur = cur.next {
+			r1, rs := cur.Returns()
+			if r1.Valid() {
+				b.valueIDToInstruction[r1.ID()] = cur
+			}
+			for _, r := range rs {
+				b.valueIDToInstruction[r.ID()] = cur
+			}
+		}
+	}
+
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for cur := blk.rootInstr; cur != nil; cur = cur.next {
+			switch cur.Opcode() {
+			// TODO: add more logics here.
+			case OpcodeIshl, OpcodeSshr, OpcodeUshr:
+				x, amount := cur.Arg2()
+				definingInst := b.valueIDToInstruction[amount.ID()]
+				if definingInst == nil {
+					// If there's no defining instruction, that means the amount is coming from the parameter.
+					continue
+				}
+				if definingInst.Constant() {
+					v := definingInst.ConstantVal()
+
+					if x.Type().Bits() == 64 {
+						v = v % 64
+					} else {
+						v = v % 32
+					}
+					if v == 0 {
+						b.alias(cur.Return(), x)
+					}
+				}
+			}
+		}
+	}
+}
+
+// passSortSuccessors sorts the successors of each block in the natural program order.
+func passSortSuccessors(b *builder) {
+	for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
+		blk := b.basicBlocksPool.View(i)
+		sortBlocks(blk.success)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
new file mode 100644
index 000000000..9068180a0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
@@ -0,0 +1,335 @@
+package ssa
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// passLayoutBlocks implements Builder.LayoutBlocks. This re-organizes builder.reversePostOrderedBasicBlocks.
+//
+// TODO: there are tons of room for improvement here. e.g. LLVM has BlockPlacementPass using BlockFrequencyInfo,
+// BranchProbabilityInfo, and LoopInfo to do a much better job. Also, if we have the profiling instrumentation
+// like ball-larus algorithm, then we could do profile-guided optimization. Basically all of them are trying
+// to maximize the fall-through opportunities which is most efficient.
+//
+// Here, fallthrough happens when a block ends with jump instruction whose target is the right next block in the
+// builder.reversePostOrderedBasicBlocks.
+//
+// Currently, we just place blocks using the DFS reverse post-order of the dominator tree with the heuristics:
+//  1. a split edge trampoline towards a loop header will be placed as a fallthrough.
+//  2. we invert the brz and brnz if it makes the fallthrough more likely.
+//
+// This heuristic is done in maybeInvertBranches function.
+func passLayoutBlocks(b *builder) {
+	b.clearBlkVisited()
+
+	// We might end up splitting critical edges which adds more basic blocks,
+	// so we store the currently existing basic blocks in nonSplitBlocks temporarily.
+	// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks.
+	nonSplitBlocks := b.blkStack[:0]
+	for i, blk := range b.reversePostOrderedBasicBlocks {
+		if !blk.Valid() {
+			continue
+		}
+		nonSplitBlocks = append(nonSplitBlocks, blk)
+		if i != len(b.reversePostOrderedBasicBlocks)-1 {
+			_ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1])
+		}
+	}
+
+	var trampolines []*basicBlock
+
+	// Reset the order slice since we update on the fly by splitting critical edges.
+	b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
+	uninsertedTrampolines := b.blkStack2[:0]
+	for _, blk := range nonSplitBlocks {
+		for i := range blk.preds {
+			pred := blk.preds[i].blk
+			if _, ok := b.blkVisited[pred]; ok || !pred.Valid() {
+				continue
+			} else if pred.reversePostOrder < blk.reversePostOrder {
+				// This means the edge is critical, and this pred is the trampoline and yet to be inserted.
+				// Split edge trampolines must come before the destination in reverse post-order.
+				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred)
+				b.blkVisited[pred] = 0 // mark as inserted, the value is not used.
+			}
+		}
+
+		// Now that we've already added all the potential trampoline blocks incoming to this block,
+		// we can add this block itself.
+		b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk)
+		b.blkVisited[blk] = 0 // mark as inserted, the value is not used.
+
+		if len(blk.success) < 2 {
+			// There won't be critical edge originating from this block.
+			continue
+		} else if blk.currentInstr.opcode == OpcodeBrTable {
+			// We don't split critical edges here, because at the construction site of BrTable, we already split the edges.
+			continue
+		}
+
+		for sidx, succ := range blk.success {
+			if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted.
+				// Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical.
+				len(succ.preds) < 2 {
+				continue
+			}
+
+			// Otherwise, we are sure this is a critical edge. To modify the CFG, we need to find the predecessor info
+			// from the successor.
+			var predInfo *basicBlockPredecessorInfo
+			for i := range succ.preds { // This linear search should not be a problem since the number of predecessors should almost always small.
+				pred := &succ.preds[i]
+				if pred.blk == blk {
+					predInfo = pred
+					break
+				}
+			}
+
+			if predInfo == nil {
+				// This must be a bug in somewhere around branch manipulation.
+				panic("BUG: predecessor info not found while the successor exists in successors list")
+			}
+
+			if wazevoapi.SSALoggingEnabled {
+				fmt.Printf("trying to split edge from %d->%d at %s\n",
+					blk.ID(), succ.ID(), predInfo.branch.Format(b))
+			}
+
+			trampoline := b.splitCriticalEdge(blk, succ, predInfo)
+			// Update the successors slice because the target is no longer the original `succ`.
+			blk.success[sidx] = trampoline
+
+			if wazevoapi.SSAValidationEnabled {
+				trampolines = append(trampolines, trampoline)
+			}
+
+			if wazevoapi.SSALoggingEnabled {
+				fmt.Printf("edge split from %d->%d at %s as %d->%d->%d \n",
+					blk.ID(), succ.ID(), predInfo.branch.Format(b),
+					blk.ID(), trampoline.ID(), succ.ID())
+			}
+
+			fallthroughBranch := blk.currentInstr
+			if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {
+				// This can be lowered as fallthrough at the end of the block.
+				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
+				b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
+			} else {
+				uninsertedTrampolines = append(uninsertedTrampolines, trampoline)
+			}
+		}
+
+		for _, trampoline := range uninsertedTrampolines {
+			if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.
+				// This means the critical edge was backward, so we insert after the current block immediately.
+				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
+				b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
+			} // If the target is forward, we can wait to insert until the target is inserted.
+		}
+		uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block.
+	}
+
+	if wazevoapi.SSALoggingEnabled {
+		var bs []string
+		for _, blk := range b.reversePostOrderedBasicBlocks {
+			bs = append(bs, blk.Name())
+		}
+		fmt.Println("ordered blocks: ", strings.Join(bs, ", "))
+	}
+
+	if wazevoapi.SSAValidationEnabled {
+		for _, trampoline := range trampolines {
+			if _, ok := b.blkVisited[trampoline]; !ok {
+				panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b))
+			}
+			trampoline.validate(b)
+		}
+	}
+
+	// Reuse the stack for the next iteration.
+	b.blkStack2 = uninsertedTrampolines[:0]
+}
+
+// markFallthroughJumps finds the fallthrough jumps and marks them as such.
+func (b *builder) markFallthroughJumps() {
+	l := len(b.reversePostOrderedBasicBlocks) - 1
+	for i, blk := range b.reversePostOrderedBasicBlocks {
+		if i < l {
+			cur := blk.currentInstr
+			if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] {
+				cur.AsFallthroughJump()
+			}
+		}
+	}
+}
+
+// maybeInvertBranches inverts the branch instructions if it is likely possible to the fallthrough more likely with simple heuristics.
+// nextInRPO is the next block in the reverse post-order.
+//
+// Returns true if the branch is inverted for testing purpose.
+func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool {
+	fallthroughBranch := now.currentInstr
+	if fallthroughBranch.opcode == OpcodeBrTable {
+		return false
+	}
+
+	condBranch := fallthroughBranch.prev
+	if condBranch == nil || (condBranch.opcode != OpcodeBrnz && condBranch.opcode != OpcodeBrz) {
+		return false
+	}
+
+	if len(fallthroughBranch.vs.View()) != 0 || len(condBranch.vs.View()) != 0 {
+		// If either one of them has arguments, we don't invert the branches.
+		return false
+	}
+
+	// So this block has two branches (a conditional branch followed by an unconditional branch) at the end.
+	// We can invert the condition of the branch if it makes the fallthrough more likely.
+
+	fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock)
+
+	if fallthroughTarget.loopHeader {
+		// First, if the tail's target is loopHeader, we don't need to do anything here,
+		// because the edge is likely to be critical edge for complex loops (e.g. loop with branches inside it).
+		// That means, we will split the edge in the end of LayoutBlocks function, and insert the trampoline block
+		// right after this block, which will be fallthrough in any way.
+		return false
+	} else if condTarget.loopHeader {
+		// On the other hand, if the condBranch's target is loopHeader, we invert the condition of the branch
+		// so that we could get the fallthrough to the trampoline block.
+		goto invert
+	}
+
+	if fallthroughTarget == nextInRPO {
+		// Also, if the tail's target is the next block in the reverse post-order, we don't need to do anything here,
+		// because if this is not critical edge, we would end up placing these two blocks adjacent to each other.
+		// Even if it is the critical edge, we place the trampoline block right after this block, which will be fallthrough in any way.
+		return false
+	} else if condTarget == nextInRPO {
+		// If the condBranch's target is the next block in the reverse post-order, we invert the condition of the branch
+		// so that we could get the fallthrough to the block.
+		goto invert
+	} else {
+		return false
+	}
+
+invert:
+	for i := range fallthroughTarget.preds {
+		pred := &fallthroughTarget.preds[i]
+		if pred.branch == fallthroughBranch {
+			pred.branch = condBranch
+			break
+		}
+	}
+	for i := range condTarget.preds {
+		pred := &condTarget.preds[i]
+		if pred.branch == condBranch {
+			pred.branch = fallthroughBranch
+			break
+		}
+	}
+
+	condBranch.InvertBrx()
+	condBranch.blk = fallthroughTarget
+	fallthroughBranch.blk = condTarget
+	if wazevoapi.SSALoggingEnabled {
+		fmt.Printf("inverting branches at %d->%d and %d->%d\n",
+			now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID())
+	}
+
+	return true
+}
+
+// splitCriticalEdge splits the critical edge between the given predecessor (`pred`) and successor (owning `predInfo`).
+//
+// - `pred` is the source of the critical edge,
+// - `succ` is the destination of the critical edge,
+// - `predInfo` is the predecessor info in the succ.preds slice which represents the critical edge.
+//
+// Why splitting critical edges is important? See following links:
+//
+//   - https://en.wikipedia.org/wiki/Control-flow_graph
+//   - https://nickdesaulniers.github.io/blog/2023/01/27/critical-edge-splitting/
+//
+// The returned basic block is the trampoline block which is inserted to split the critical edge.
+func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlockPredecessorInfo) *basicBlock {
+	// In the following, we convert the following CFG:
+	//
+	//     pred --(originalBranch)--> succ
+	//
+	// to the following CFG:
+	//
+	//     pred --(newBranch)--> trampoline --(originalBranch)-> succ
+	//
+	// where trampoline is a new basic block which is created to split the critical edge.
+
+	trampoline := b.allocateBasicBlock()
+	if int(trampoline.id) >= len(b.dominators) {
+		b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...)
+	}
+	b.dominators[trampoline.id] = pred
+
+	originalBranch := predInfo.branch
+
+	// Replace originalBranch with the newBranch.
+	newBranch := b.AllocateInstruction()
+	newBranch.opcode = originalBranch.opcode
+	newBranch.blk = trampoline
+	switch originalBranch.opcode {
+	case OpcodeJump:
+	case OpcodeBrz, OpcodeBrnz:
+		originalBranch.opcode = OpcodeJump // Trampoline consists of one unconditional branch.
+		newBranch.v = originalBranch.v
+		originalBranch.v = ValueInvalid
+	default:
+		panic("BUG: critical edge shouldn't be originated from br_table")
+	}
+	swapInstruction(pred, originalBranch, newBranch)
+
+	// Replace the original branch with the new branch.
+	trampoline.rootInstr = originalBranch
+	trampoline.currentInstr = originalBranch
+	trampoline.success = append(trampoline.success, succ) // Do not use []*basicBlock{pred} because we might have already allocated the slice.
+	trampoline.preds = append(trampoline.preds,           // same as ^.
+		basicBlockPredecessorInfo{blk: pred, branch: newBranch})
+	b.Seal(trampoline)
+
+	// Update the original branch to point to the trampoline.
+	predInfo.blk = trampoline
+	predInfo.branch = originalBranch
+
+	if wazevoapi.SSAValidationEnabled {
+		trampoline.validate(b)
+	}
+
+	if len(trampoline.params) > 0 {
+		panic("trampoline should not have params")
+	}
+
+	// Assign the same order as the original block so that this will be placed before the actual destination.
+	trampoline.reversePostOrder = pred.reversePostOrder
+	return trampoline
+}
+
+// swapInstruction replaces `old` in the block `blk` with `New`.
+func swapInstruction(blk *basicBlock, old, New *Instruction) {
+	if blk.rootInstr == old {
+		blk.rootInstr = New
+		next := old.next
+		New.next = next
+		next.prev = New
+	} else {
+		if blk.currentInstr == old {
+			blk.currentInstr = New
+		}
+		prev := old.prev
+		prev.next, New.prev = New, prev
+		if next := old.next; next != nil {
+			New.next, next.prev = next, New
+		}
+	}
+	old.prev, old.next = nil, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
new file mode 100644
index 000000000..50cb9c475
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
@@ -0,0 +1,312 @@
+package ssa
+
+import (
+	"fmt"
+	"math"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// passCalculateImmediateDominators calculates immediate dominators for each basic block.
+// The result is stored in b.dominators. This make it possible for the following passes to
+// use builder.isDominatedBy to check if a block is dominated by another block.
+//
+// At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag.
+func passCalculateImmediateDominators(b *builder) {
+	reversePostOrder := b.reversePostOrderedBasicBlocks[:0]
+	exploreStack := b.blkStack[:0]
+	b.clearBlkVisited()
+
+	entryBlk := b.entryBlk()
+
+	// Store the reverse postorder from the entrypoint into reversePostOrder slice.
+	// This calculation of reverse postorder is not described in the paper,
+	// so we use heuristic to calculate it so that we could potentially handle arbitrary
+	// complex CFGs under the assumption that success is sorted in program's natural order.
+	// That means blk.success[i] always appears before blk.success[i+1] in the source program,
+	// which is a reasonable assumption as long as SSA Builder is properly used.
+	//
+	// First we push blocks in postorder iteratively visit successors of the entry block.
+	exploreStack = append(exploreStack, entryBlk)
+	const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2
+	b.blkVisited[entryBlk] = visitStateSeen
+	for len(exploreStack) > 0 {
+		tail := len(exploreStack) - 1
+		blk := exploreStack[tail]
+		exploreStack = exploreStack[:tail]
+		switch b.blkVisited[blk] {
+		case visitStateUnseen:
+			// This is likely a bug in the frontend.
+			panic("BUG: unsupported CFG")
+		case visitStateSeen:
+			// This is the first time to pop this block, and we have to see the successors first.
+			// So push this block again to the stack.
+			exploreStack = append(exploreStack, blk)
+			// And push the successors to the stack if necessary.
+			for _, succ := range blk.success {
+				if succ.ReturnBlock() || succ.invalid {
+					continue
+				}
+				if b.blkVisited[succ] == visitStateUnseen {
+					b.blkVisited[succ] = visitStateSeen
+					exploreStack = append(exploreStack, succ)
+				}
+			}
+			// Finally, we could pop this block once we pop all of its successors.
+			b.blkVisited[blk] = visitStateDone
+		case visitStateDone:
+			// Note: at this point we push blk in postorder despite its name.
+			reversePostOrder = append(reversePostOrder, blk)
+		}
+	}
+	// At this point, reversePostOrder has postorder actually, so we reverse it.
+	for i := len(reversePostOrder)/2 - 1; i >= 0; i-- {
+		j := len(reversePostOrder) - 1 - i
+		reversePostOrder[i], reversePostOrder[j] = reversePostOrder[j], reversePostOrder[i]
+	}
+
+	for i, blk := range reversePostOrder {
+		blk.reversePostOrder = i
+	}
+
+	// Reuse the dominators slice if possible from the previous computation of function.
+	b.dominators = b.dominators[:cap(b.dominators)]
+	if len(b.dominators) < b.basicBlocksPool.Allocated() {
+		// Generously reserve space in the slice because the slice will be reused future allocation.
+		b.dominators = append(b.dominators, make([]*basicBlock, b.basicBlocksPool.Allocated())...)
+	}
+	calculateDominators(reversePostOrder, b.dominators)
+
+	// Reuse the slices for the future use.
+	b.blkStack = exploreStack
+
+	// For the following passes.
+	b.reversePostOrderedBasicBlocks = reversePostOrder
+
+	// Ready to detect loops!
+	subPassLoopDetection(b)
+}
+
+// calculateDominators calculates the immediate dominator of each node in the CFG, and store the result in `doms`.
+// The algorithm is based on the one described in the paper "A Simple, Fast Dominance Algorithm"
+// https://www.cs.rice.edu/~keith/EMBED/dom.pdf which is a faster/simple alternative to the well known Lengauer-Tarjan algorithm.
+//
+// The following code almost matches the pseudocode in the paper with one exception (see the code comment below).
+//
+// The result slice `doms` must be pre-allocated with the size larger than the size of dfsBlocks.
+func calculateDominators(reversePostOrderedBlks []*basicBlock, doms []*basicBlock) {
+	entry, reversePostOrderedBlks := reversePostOrderedBlks[0], reversePostOrderedBlks[1: /* skips entry point */]
+	for _, blk := range reversePostOrderedBlks {
+		doms[blk.id] = nil
+	}
+	doms[entry.id] = entry
+
+	changed := true
+	for changed {
+		changed = false
+		for _, blk := range reversePostOrderedBlks {
+			var u *basicBlock
+			for i := range blk.preds {
+				pred := blk.preds[i].blk
+				// Skip if this pred is not reachable yet. Note that this is not described in the paper,
+				// but it is necessary to handle nested loops etc.
+				if doms[pred.id] == nil {
+					continue
+				}
+
+				if u == nil {
+					u = pred
+					continue
+				} else {
+					u = intersect(doms, u, pred)
+				}
+			}
+			if doms[blk.id] != u {
+				doms[blk.id] = u
+				changed = true
+			}
+		}
+	}
+}
+
+// intersect returns the common dominator of blk1 and blk2.
+//
+// This is the `intersect` function in the paper.
+func intersect(doms []*basicBlock, blk1 *basicBlock, blk2 *basicBlock) *basicBlock {
+	finger1, finger2 := blk1, blk2
+	for finger1 != finger2 {
+		// Move the 'finger1' upwards to its immediate dominator.
+		for finger1.reversePostOrder > finger2.reversePostOrder {
+			finger1 = doms[finger1.id]
+		}
+		// Move the 'finger2' upwards to its immediate dominator.
+		for finger2.reversePostOrder > finger1.reversePostOrder {
+			finger2 = doms[finger2.id]
+		}
+	}
+	return finger1
+}
+
+// subPassLoopDetection detects loops in the function using the immediate dominators.
+//
+// This is run at the last of passCalculateImmediateDominators.
+func subPassLoopDetection(b *builder) {
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for i := range blk.preds {
+			pred := blk.preds[i].blk
+			if pred.invalid {
+				continue
+			}
+			if b.isDominatedBy(pred, blk) {
+				blk.loopHeader = true
+			}
+		}
+	}
+}
+
+// buildLoopNestingForest builds the loop nesting forest for the function.
+// This must be called after branch splitting since it relies on the CFG.
+func passBuildLoopNestingForest(b *builder) {
+	ent := b.entryBlk()
+	doms := b.dominators
+	for _, blk := range b.reversePostOrderedBasicBlocks {
+		n := doms[blk.id]
+		for !n.loopHeader && n != ent {
+			n = doms[n.id]
+		}
+
+		if n == ent && blk.loopHeader {
+			b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk)
+		} else if n == ent {
+		} else if n.loopHeader {
+			n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk)
+		}
+	}
+
+	if wazevoapi.SSALoggingEnabled {
+		for _, root := range b.loopNestingForestRoots {
+			printLoopNestingForest(root.(*basicBlock), 0)
+		}
+	}
+}
+
+func printLoopNestingForest(root *basicBlock, depth int) {
+	fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID())
+	for _, child := range root.loopNestingForestChildren {
+		fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID())
+		if child.LoopHeader() {
+			printLoopNestingForest(child.(*basicBlock), depth+2)
+		}
+	}
+}
+
+type dominatorSparseTree struct {
+	time         int
+	euler        []*basicBlock
+	first, depth []int
+	table        [][]int
+}
+
+// passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree.
+func passBuildDominatorTree(b *builder) {
+	// First we materialize the children of each node in the dominator tree.
+	idoms := b.dominators
+	for _, blk := range b.reversePostOrderedBasicBlocks {
+		parent := idoms[blk.id]
+		if parent == nil {
+			panic("BUG")
+		} else if parent == blk {
+			// This is the entry block.
+			continue
+		}
+		if prev := parent.child; prev == nil {
+			parent.child = blk
+		} else {
+			parent.child = blk
+			blk.sibling = prev
+		}
+	}
+
+	// Reset the state from the previous computation.
+	n := b.basicBlocksPool.Allocated()
+	st := &b.sparseTree
+	st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...)
+	st.first = append(st.first[:0], make([]int, n)...)
+	for i := range st.first {
+		st.first[i] = -1
+	}
+	st.depth = append(st.depth[:0], make([]int, 2*n-1)...)
+	st.time = 0
+
+	// Start building the sparse tree.
+	st.eulerTour(b.entryBlk(), 0)
+	st.buildSparseTable()
+}
+
+func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) {
+	if wazevoapi.SSALoggingEnabled {
+		fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID())
+	}
+	dt.euler[dt.time] = node
+	dt.depth[dt.time] = height
+	if dt.first[node.id] == -1 {
+		dt.first[node.id] = dt.time
+	}
+	dt.time++
+
+	for child := node.child; child != nil; child = child.sibling {
+		dt.eulerTour(child, height+1)
+		dt.euler[dt.time] = node // add the current node again after visiting a child
+		dt.depth[dt.time] = height
+		dt.time++
+	}
+}
+
+// buildSparseTable builds a sparse table for RMQ queries.
+func (dt *dominatorSparseTree) buildSparseTable() {
+	n := len(dt.depth)
+	k := int(math.Log2(float64(n))) + 1
+	table := dt.table
+
+	if n >= len(table) {
+		table = append(table, make([][]int, n+1)...)
+	}
+	for i := range table {
+		if len(table[i]) < k {
+			table[i] = append(table[i], make([]int, k)...)
+		}
+		table[i][0] = i
+	}
+
+	for j := 1; 1<<j <= n; j++ {
+		for i := 0; i+(1<<j)-1 < n; i++ {
+			if dt.depth[table[i][j-1]] < dt.depth[table[i+(1<<(j-1))][j-1]] {
+				table[i][j] = table[i][j-1]
+			} else {
+				table[i][j] = table[i+(1<<(j-1))][j-1]
+			}
+		}
+	}
+	dt.table = table
+}
+
+// rmq performs a range minimum query on the sparse table.
+func (dt *dominatorSparseTree) rmq(l, r int) int {
+	table := dt.table
+	depth := dt.depth
+	j := int(math.Log2(float64(r - l + 1)))
+	if depth[table[l][j]] <= depth[table[r-(1<<j)+1][j]] {
+		return table[l][j]
+	}
+	return table[r-(1<<j)+1][j]
+}
+
+// findLCA finds the LCA using the Euler tour and RMQ.
+func (dt *dominatorSparseTree) findLCA(u, v BasicBlockID) *basicBlock {
+	first := dt.first
+	if first[u] > first[v] {
+		u, v = v, u
+	}
+	return dt.euler[dt.rmq(first[u], first[v])]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go
new file mode 100644
index 000000000..43483395a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go
@@ -0,0 +1,49 @@
+package ssa
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Signature is a function prototype.
+type Signature struct {
+	// ID is a unique identifier for this signature used to lookup.
+	ID SignatureID
+	// Params and Results are the types of the parameters and results of the function.
+	Params, Results []Type
+
+	// used is true if this is used by the currently-compiled function.
+	// Debugging only.
+	used bool
+}
+
+// String implements fmt.Stringer.
+func (s *Signature) String() string {
+	str := strings.Builder{}
+	str.WriteString(s.ID.String())
+	str.WriteString(": ")
+	if len(s.Params) > 0 {
+		for _, typ := range s.Params {
+			str.WriteString(typ.String())
+		}
+	} else {
+		str.WriteByte('v')
+	}
+	str.WriteByte('_')
+	if len(s.Results) > 0 {
+		for _, typ := range s.Results {
+			str.WriteString(typ.String())
+		}
+	} else {
+		str.WriteByte('v')
+	}
+	return str.String()
+}
+
+// SignatureID is an unique identifier used to lookup.
+type SignatureID int
+
+// String implements fmt.Stringer.
+func (s SignatureID) String() string {
+	return fmt.Sprintf("sig%d", s)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go
new file mode 100644
index 000000000..b477e58bd
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go
@@ -0,0 +1,14 @@
+// Package ssa is used to construct SSA function. By nature this is free of Wasm specific thing
+// and ISA.
+//
+// We use the "block argument" variant of SSA: https://en.wikipedia.org/wiki/Static_single-assignment_form#Block_arguments
+// which is equivalent to the traditional PHI function based one, but more convenient during optimizations.
+// However, in this package's source code comment, we might use PHI whenever it seems necessary in order to be aligned with
+// existing literatures, e.g. SSA level optimization algorithms are often described using PHI nodes.
+//
+// The rationale doc for the choice of "block argument" by MLIR of LLVM is worth a read:
+// https://mlir.llvm.org/docs/Rationale/Rationale/#block-arguments-vs-phi-nodes
+//
+// The algorithm to resolve variable definitions used here is based on the paper
+// "Simple and Efficient Construction of Static Single Assignment Form": https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf.
+package ssa
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
new file mode 100644
index 000000000..e8c8cd9de
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
@@ -0,0 +1,112 @@
+package ssa
+
+type Type byte
+
+const (
+	typeInvalid Type = iota
+
+	// TODO: add 8, 16 bit types when it's needed for optimizations.
+
+	// TypeI32 represents an integer type with 32 bits.
+	TypeI32
+
+	// TypeI64 represents an integer type with 64 bits.
+	TypeI64
+
+	// TypeF32 represents 32-bit floats in the IEEE 754.
+	TypeF32
+
+	// TypeF64 represents 64-bit floats in the IEEE 754.
+	TypeF64
+
+	// TypeV128 represents 128-bit SIMD vectors.
+	TypeV128
+)
+
+// String implements fmt.Stringer.
+func (t Type) String() (ret string) {
+	switch t {
+	case typeInvalid:
+		return "invalid"
+	case TypeI32:
+		return "i32"
+	case TypeI64:
+		return "i64"
+	case TypeF32:
+		return "f32"
+	case TypeF64:
+		return "f64"
+	case TypeV128:
+		return "v128"
+	default:
+		panic(int(t))
+	}
+}
+
+// IsInt returns true if the type is an integer type.
+func (t Type) IsInt() bool {
+	return t == TypeI32 || t == TypeI64
+}
+
+// IsFloat returns true if the type is a floating point type.
+func (t Type) IsFloat() bool {
+	return t == TypeF32 || t == TypeF64
+}
+
+// Bits returns the number of bits required to represent the type.
+func (t Type) Bits() byte {
+	switch t {
+	case TypeI32, TypeF32:
+		return 32
+	case TypeI64, TypeF64:
+		return 64
+	case TypeV128:
+		return 128
+	default:
+		panic(int(t))
+	}
+}
+
+// Size returns the number of bytes required to represent the type.
+func (t Type) Size() byte {
+	return t.Bits() / 8
+}
+
+func (t Type) invalid() bool {
+	return t == typeInvalid
+}
+
+// VecLane represents a lane in a SIMD vector.
+type VecLane byte
+
+const (
+	VecLaneInvalid VecLane = 1 + iota
+	VecLaneI8x16
+	VecLaneI16x8
+	VecLaneI32x4
+	VecLaneI64x2
+	VecLaneF32x4
+	VecLaneF64x2
+)
+
+// String implements fmt.Stringer.
+func (vl VecLane) String() (ret string) {
+	switch vl {
+	case VecLaneInvalid:
+		return "invalid"
+	case VecLaneI8x16:
+		return "i8x16"
+	case VecLaneI16x8:
+		return "i16x8"
+	case VecLaneI32x4:
+		return "i32x4"
+	case VecLaneI64x2:
+		return "i64x2"
+	case VecLaneF32x4:
+		return "f32x4"
+	case VecLaneF64x2:
+		return "f64x2"
+	default:
+		panic(int(vl))
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go
new file mode 100644
index 000000000..bcf83cbf8
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go
@@ -0,0 +1,87 @@
+package ssa
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// Variable is a unique identifier for a source program's variable and will correspond to
+// multiple ssa Value(s).
+//
+// For example, `Local 1` is a Variable in WebAssembly, and Value(s) will be created for it
+// whenever it executes `local.set 1`.
+//
+// Variable is useful to track the SSA Values of a variable in the source program, and
+// can be used to find the corresponding latest SSA Value via Builder.FindValue.
+type Variable uint32
+
+// String implements fmt.Stringer.
+func (v Variable) String() string {
+	return fmt.Sprintf("var%d", v)
+}
+
+// Value represents an SSA value with a type information. The relationship with Variable is 1: N (including 0),
+// that means there might be multiple Variable(s) for a Value.
+//
+// Higher 32-bit is used to store Type for this value.
+type Value uint64
+
+// ValueID is the lower 32bit of Value, which is the pure identifier of Value without type info.
+type ValueID uint32
+
+const (
+	valueIDInvalid ValueID = math.MaxUint32
+	ValueInvalid   Value   = Value(valueIDInvalid)
+)
+
+// Format creates a debug string for this Value using the data stored in Builder.
+func (v Value) Format(b Builder) string {
+	if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok {
+		return annotation
+	}
+	return fmt.Sprintf("v%d", v.ID())
+}
+
+func (v Value) formatWithType(b Builder) (ret string) {
+	if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok {
+		ret = annotation + ":" + v.Type().String()
+	} else {
+		ret = fmt.Sprintf("v%d:%s", v.ID(), v.Type())
+	}
+
+	if wazevoapi.SSALoggingEnabled { // This is useful to check live value analysis bugs.
+		if bd := b.(*builder); bd.donePostBlockLayoutPasses {
+			id := v.ID()
+			ret += fmt.Sprintf("(ref=%d)", bd.valueRefCounts[id])
+		}
+	}
+	return ret
+}
+
+// Valid returns true if this value is valid.
+func (v Value) Valid() bool {
+	return v.ID() != valueIDInvalid
+}
+
+// Type returns the Type of this value.
+func (v Value) Type() Type {
+	return Type(v >> 32)
+}
+
+// ID returns the valueID of this value.
+func (v Value) ID() ValueID {
+	return ValueID(v)
+}
+
+// setType sets a type to this Value and returns the updated Value.
+func (v Value) setType(typ Type) Value {
+	return v | Value(typ)<<32
+}
+
+// Values is a slice of Value. Use this instead of []Value to reuse the underlying memory.
+type Values = wazevoapi.VarLength[Value]
+
+// ValuesNil is a nil Values.
+var ValuesNil = wazevoapi.NewNilVarLength[Value]()
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
new file mode 100644
index 000000000..2db61e219
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
@@ -0,0 +1,196 @@
+package wazevoapi
+
+import (
+	"context"
+	"encoding/hex"
+	"fmt"
+	"math/rand"
+	"os"
+	"time"
+)
+
+// These consts are used various places in the wazevo implementations.
+// Instead of defining them in each file, we define them here so that we can quickly iterate on
+// debugging without spending "where do we have debug logging?" time.
+
+// ----- Debug logging -----
+// These consts must be disabled by default. Enable them only when debugging.
+
+const (
+	FrontEndLoggingEnabled = false
+	SSALoggingEnabled      = false
+	RegAllocLoggingEnabled = false
+)
+
+// ----- Output prints -----
+// These consts must be disabled by default. Enable them only when debugging.
+
+const (
+	PrintSSA                                 = false
+	PrintOptimizedSSA                        = false
+	PrintSSAToBackendIRLowering              = false
+	PrintRegisterAllocated                   = false
+	PrintFinalizedMachineCode                = false
+	PrintMachineCodeHexPerFunction           = printMachineCodeHexPerFunctionUnmodified || PrintMachineCodeHexPerFunctionDisassemblable //nolint
+	printMachineCodeHexPerFunctionUnmodified = false
+	// PrintMachineCodeHexPerFunctionDisassemblable prints the machine code while modifying the actual result
+	// to make it disassemblable. This is useful when debugging the final machine code. See the places where this is used for detail.
+	// When this is enabled, functions must not be called.
+	PrintMachineCodeHexPerFunctionDisassemblable = false
+)
+
+// printTarget is the function index to print the machine code. This is used for debugging to print the machine code
+// of a specific function.
+const printTarget = -1
+
+// PrintEnabledIndex returns true if the current function index is the print target.
+func PrintEnabledIndex(ctx context.Context) bool {
+	if printTarget == -1 {
+		return true
+	}
+	return GetCurrentFunctionIndex(ctx) == printTarget
+}
+
+// ----- Validations -----
+const (
+	// SSAValidationEnabled enables the SSA validation. This is disabled by default since the operation is expensive.
+	SSAValidationEnabled = false
+)
+
+// ----- Stack Guard Check -----
+const (
+	// StackGuardCheckEnabled enables the stack guard check to ensure that our stack bounds check works correctly.
+	StackGuardCheckEnabled       = false
+	StackGuardCheckGuardPageSize = 8096
+)
+
+// CheckStackGuardPage checks the given stack guard page is not corrupted.
+func CheckStackGuardPage(s []byte) {
+	for i := 0; i < StackGuardCheckGuardPageSize; i++ {
+		if s[i] != 0 {
+			panic(
+				fmt.Sprintf("BUG: stack guard page is corrupted:\n\tguard_page=%s\n\tstack=%s",
+					hex.EncodeToString(s[:StackGuardCheckGuardPageSize]),
+					hex.EncodeToString(s[StackGuardCheckGuardPageSize:]),
+				))
+		}
+	}
+}
+
+// ----- Deterministic compilation verifier -----
+
+const (
+	// DeterministicCompilationVerifierEnabled enables the deterministic compilation verifier. This is disabled by default
+	// since the operation is expensive. But when in doubt, enable this to make sure the compilation is deterministic.
+	DeterministicCompilationVerifierEnabled = false
+	DeterministicCompilationVerifyingIter   = 5
+)
+
+type (
+	verifierState struct {
+		initialCompilationDone bool
+		maybeRandomizedIndexes []int
+		r                      *rand.Rand
+		values                 map[string]string
+	}
+	verifierStateContextKey struct{}
+	currentFunctionNameKey  struct{}
+	currentFunctionIndexKey struct{}
+)
+
+// NewDeterministicCompilationVerifierContext creates a new context with the deterministic compilation verifier used per wasm.Module.
+func NewDeterministicCompilationVerifierContext(ctx context.Context, localFunctions int) context.Context {
+	maybeRandomizedIndexes := make([]int, localFunctions)
+	for i := range maybeRandomizedIndexes {
+		maybeRandomizedIndexes[i] = i
+	}
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{
+		r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{},
+	})
+}
+
+// DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier.
+// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex.
+func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) {
+	state := ctx.Value(verifierStateContextKey{}).(*verifierState)
+	if !state.initialCompilationDone {
+		// If this is the first attempt, we use the index as-is order.
+		state.initialCompilationDone = true
+		return
+	}
+	r := state.r
+	r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) {
+		state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i]
+	})
+}
+
+// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index`
+// which is assigned by DeterministicCompilationVerifierRandomizeIndexes.
+func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int {
+	state := ctx.Value(verifierStateContextKey{}).(*verifierState)
+	ret := state.maybeRandomizedIndexes[index]
+	return ret
+}
+
+// VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope`
+// and the current function name. If the previous value doesn't exist, it sets the value to the given `newValue`.
+//
+// If the verification fails, this prints the diff and exits the process.
+func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope string, newValue string) {
+	fn := ctx.Value(currentFunctionNameKey{}).(string)
+	key := fn + ": " + scope
+	verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState)
+	oldValue, ok := verifierCtx.values[key]
+	if !ok {
+		verifierCtx.values[key] = newValue
+		return
+	}
+	if oldValue != newValue {
+		fmt.Printf(
+			`BUG: Deterministic compilation failed for function%s at scope="%s".
+
+This is mostly due to (but might not be limited to):
+	* Resetting ssa.Builder, backend.Compiler or frontend.Compiler, etc doens't work as expected, and the compilation has been affected by the previous iterations.
+	* Using a map with non-deterministic iteration order.
+
+---------- [old] ----------
+%s
+
+---------- [new] ----------
+%s
+`,
+			fn, scope, oldValue, newValue,
+		)
+		os.Exit(1)
+	}
+}
+
+// nolint
+const NeedFunctionNameInContext = PrintSSA ||
+	PrintOptimizedSSA ||
+	PrintSSAToBackendIRLowering ||
+	PrintRegisterAllocated ||
+	PrintFinalizedMachineCode ||
+	PrintMachineCodeHexPerFunction ||
+	DeterministicCompilationVerifierEnabled ||
+	PerfMapEnabled
+
+// SetCurrentFunctionName sets the current function name to the given `functionName`.
+func SetCurrentFunctionName(ctx context.Context, index int, functionName string) context.Context {
+	ctx = context.WithValue(ctx, currentFunctionNameKey{}, functionName)
+	ctx = context.WithValue(ctx, currentFunctionIndexKey{}, index)
+	return ctx
+}
+
+// GetCurrentFunctionName returns the current function name.
+func GetCurrentFunctionName(ctx context.Context) string {
+	ret, _ := ctx.Value(currentFunctionNameKey{}).(string)
+	return ret
+}
+
+// GetCurrentFunctionIndex returns the current function index.
+func GetCurrentFunctionIndex(ctx context.Context) int {
+	ret, _ := ctx.Value(currentFunctionIndexKey{}).(int)
+	return ret
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go
new file mode 100644
index 000000000..5ad594982
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go
@@ -0,0 +1,109 @@
+package wazevoapi
+
+// ExitCode is an exit code of an execution of a function.
+type ExitCode uint32
+
+const (
+	ExitCodeOK ExitCode = iota
+	ExitCodeGrowStack
+	ExitCodeGrowMemory
+	ExitCodeUnreachable
+	ExitCodeMemoryOutOfBounds
+	// ExitCodeCallGoModuleFunction is an exit code for a call to an api.GoModuleFunction.
+	ExitCodeCallGoModuleFunction
+	// ExitCodeCallGoFunction is an exit code for a call to an api.GoFunction.
+	ExitCodeCallGoFunction
+	ExitCodeTableOutOfBounds
+	ExitCodeIndirectCallNullPointer
+	ExitCodeIndirectCallTypeMismatch
+	ExitCodeIntegerDivisionByZero
+	ExitCodeIntegerOverflow
+	ExitCodeInvalidConversionToInteger
+	ExitCodeCheckModuleExitCode
+	ExitCodeCallListenerBefore
+	ExitCodeCallListenerAfter
+	ExitCodeCallGoModuleFunctionWithListener
+	ExitCodeCallGoFunctionWithListener
+	ExitCodeTableGrow
+	ExitCodeRefFunc
+	ExitCodeMemoryWait32
+	ExitCodeMemoryWait64
+	ExitCodeMemoryNotify
+	ExitCodeUnalignedAtomic
+	exitCodeMax
+)
+
+const ExitCodeMask = 0xff
+
+// String implements fmt.Stringer.
+func (e ExitCode) String() string {
+	switch e {
+	case ExitCodeOK:
+		return "ok"
+	case ExitCodeGrowStack:
+		return "grow_stack"
+	case ExitCodeCallGoModuleFunction:
+		return "call_go_module_function"
+	case ExitCodeCallGoFunction:
+		return "call_go_function"
+	case ExitCodeUnreachable:
+		return "unreachable"
+	case ExitCodeMemoryOutOfBounds:
+		return "memory_out_of_bounds"
+	case ExitCodeUnalignedAtomic:
+		return "unaligned_atomic"
+	case ExitCodeTableOutOfBounds:
+		return "table_out_of_bounds"
+	case ExitCodeIndirectCallNullPointer:
+		return "indirect_call_null_pointer"
+	case ExitCodeIndirectCallTypeMismatch:
+		return "indirect_call_type_mismatch"
+	case ExitCodeIntegerDivisionByZero:
+		return "integer_division_by_zero"
+	case ExitCodeIntegerOverflow:
+		return "integer_overflow"
+	case ExitCodeInvalidConversionToInteger:
+		return "invalid_conversion_to_integer"
+	case ExitCodeCheckModuleExitCode:
+		return "check_module_exit_code"
+	case ExitCodeCallListenerBefore:
+		return "call_listener_before"
+	case ExitCodeCallListenerAfter:
+		return "call_listener_after"
+	case ExitCodeCallGoModuleFunctionWithListener:
+		return "call_go_module_function_with_listener"
+	case ExitCodeCallGoFunctionWithListener:
+		return "call_go_function_with_listener"
+	case ExitCodeGrowMemory:
+		return "grow_memory"
+	case ExitCodeTableGrow:
+		return "table_grow"
+	case ExitCodeRefFunc:
+		return "ref_func"
+	case ExitCodeMemoryWait32:
+		return "memory_wait32"
+	case ExitCodeMemoryWait64:
+		return "memory_wait64"
+	case ExitCodeMemoryNotify:
+		return "memory_notify"
+	}
+	panic("TODO")
+}
+
+func ExitCodeCallGoModuleFunctionWithIndex(index int, withListener bool) ExitCode {
+	if withListener {
+		return ExitCodeCallGoModuleFunctionWithListener | ExitCode(index<<8)
+	}
+	return ExitCodeCallGoModuleFunction | ExitCode(index<<8)
+}
+
+func ExitCodeCallGoFunctionWithIndex(index int, withListener bool) ExitCode {
+	if withListener {
+		return ExitCodeCallGoFunctionWithListener | ExitCode(index<<8)
+	}
+	return ExitCodeCallGoFunction | ExitCode(index<<8)
+}
+
+func GoFunctionIndexFromExitCode(exitCode ExitCode) int {
+	return int(exitCode >> 8)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go
new file mode 100644
index 000000000..fe6161b04
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go
@@ -0,0 +1,216 @@
+package wazevoapi
+
+import (
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+const (
+	// FunctionInstanceSize is the size of wazevo.functionInstance.
+	FunctionInstanceSize = 24
+	// FunctionInstanceExecutableOffset is an offset of `executable` field in wazevo.functionInstance
+	FunctionInstanceExecutableOffset = 0
+	// FunctionInstanceModuleContextOpaquePtrOffset is an offset of `moduleContextOpaquePtr` field in wazevo.functionInstance
+	FunctionInstanceModuleContextOpaquePtrOffset = 8
+	// FunctionInstanceTypeIDOffset is an offset of `typeID` field in wazevo.functionInstance
+	FunctionInstanceTypeIDOffset = 16
+)
+
+const (
+	// ExecutionContextOffsetExitCodeOffset is an offset of `exitCode` field in wazevo.executionContext
+	ExecutionContextOffsetExitCodeOffset Offset = 0
+	// ExecutionContextOffsetCallerModuleContextPtr is an offset of `callerModuleContextPtr` field in wazevo.executionContext
+	ExecutionContextOffsetCallerModuleContextPtr Offset = 8
+	// ExecutionContextOffsetOriginalFramePointer is an offset of `originalFramePointer` field in wazevo.executionContext
+	ExecutionContextOffsetOriginalFramePointer Offset = 16
+	// ExecutionContextOffsetOriginalStackPointer is an offset of `originalStackPointer` field in wazevo.executionContext
+	ExecutionContextOffsetOriginalStackPointer Offset = 24
+	// ExecutionContextOffsetGoReturnAddress is an offset of `goReturnAddress` field in wazevo.executionContext
+	ExecutionContextOffsetGoReturnAddress Offset = 32
+	// ExecutionContextOffsetStackBottomPtr is an offset of `stackBottomPtr` field in wazevo.executionContext
+	ExecutionContextOffsetStackBottomPtr Offset = 40
+	// ExecutionContextOffsetGoCallReturnAddress is an offset of `goCallReturnAddress` field in wazevo.executionContext
+	ExecutionContextOffsetGoCallReturnAddress Offset = 48
+	// ExecutionContextOffsetStackPointerBeforeGoCall is an offset of `StackPointerBeforeGoCall` field in wazevo.executionContext
+	ExecutionContextOffsetStackPointerBeforeGoCall Offset = 56
+	// ExecutionContextOffsetStackGrowRequiredSize is an offset of `stackGrowRequiredSize` field in wazevo.executionContext
+	ExecutionContextOffsetStackGrowRequiredSize Offset = 64
+	// ExecutionContextOffsetMemoryGrowTrampolineAddress is an offset of `memoryGrowTrampolineAddress` field in wazevo.executionContext
+	ExecutionContextOffsetMemoryGrowTrampolineAddress Offset = 72
+	// ExecutionContextOffsetStackGrowCallTrampolineAddress is an offset of `stackGrowCallTrampolineAddress` field in wazevo.executionContext.
+	ExecutionContextOffsetStackGrowCallTrampolineAddress Offset = 80
+	// ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress is an offset of `checkModuleExitCodeTrampolineAddress` field in wazevo.executionContext.
+	ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress Offset = 88
+	// ExecutionContextOffsetSavedRegistersBegin is an offset of the first element of `savedRegisters` field in wazevo.executionContext
+	ExecutionContextOffsetSavedRegistersBegin Offset = 96
+	// ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque is an offset of `goFunctionCallCalleeModuleContextOpaque` field in wazevo.executionContext
+	ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque Offset = 1120
+	// ExecutionContextOffsetTableGrowTrampolineAddress is an offset of `tableGrowTrampolineAddress` field in wazevo.executionContext
+	ExecutionContextOffsetTableGrowTrampolineAddress Offset = 1128
+	// ExecutionContextOffsetRefFuncTrampolineAddress is an offset of `refFuncTrampolineAddress` field in wazevo.executionContext
+	ExecutionContextOffsetRefFuncTrampolineAddress      Offset = 1136
+	ExecutionContextOffsetMemmoveAddress                Offset = 1144
+	ExecutionContextOffsetFramePointerBeforeGoCall      Offset = 1152
+	ExecutionContextOffsetMemoryWait32TrampolineAddress Offset = 1160
+	ExecutionContextOffsetMemoryWait64TrampolineAddress Offset = 1168
+	ExecutionContextOffsetMemoryNotifyTrampolineAddress Offset = 1176
+)
+
+// ModuleContextOffsetData allows the compilers to get the information about offsets to the fields of wazevo.moduleContextOpaque,
+// This is unique per module.
+type ModuleContextOffsetData struct {
+	TotalSize int
+	ModuleInstanceOffset,
+	LocalMemoryBegin,
+	ImportedMemoryBegin,
+	ImportedFunctionsBegin,
+	GlobalsBegin,
+	TypeIDs1stElement,
+	TablesBegin,
+	BeforeListenerTrampolines1stElement,
+	AfterListenerTrampolines1stElement,
+	DataInstances1stElement,
+	ElementInstances1stElement Offset
+}
+
+// ImportedFunctionOffset returns an offset of the i-th imported function.
+// Each item is stored as wazevo.functionInstance whose size matches FunctionInstanceSize.
+func (m *ModuleContextOffsetData) ImportedFunctionOffset(i wasm.Index) (
+	executableOffset, moduleCtxOffset, typeIDOffset Offset,
+) {
+	base := m.ImportedFunctionsBegin + Offset(i)*FunctionInstanceSize
+	return base, base + 8, base + 16
+}
+
+// GlobalInstanceOffset returns an offset of the i-th global instance.
+func (m *ModuleContextOffsetData) GlobalInstanceOffset(i wasm.Index) Offset {
+	return m.GlobalsBegin + Offset(i)*16
+}
+
+// Offset represents an offset of a field of a struct.
+type Offset int32
+
+// U32 encodes an Offset as uint32 for convenience.
+func (o Offset) U32() uint32 {
+	return uint32(o)
+}
+
+// I64 encodes an Offset as int64 for convenience.
+func (o Offset) I64() int64 {
+	return int64(o)
+}
+
+// U64 encodes an Offset as int64 for convenience.
+func (o Offset) U64() uint64 {
+	return uint64(o)
+}
+
+// LocalMemoryBase returns an offset of the first byte of the local memory.
+func (m *ModuleContextOffsetData) LocalMemoryBase() Offset {
+	return m.LocalMemoryBegin
+}
+
+// LocalMemoryLen returns an offset of the length of the local memory buffer.
+func (m *ModuleContextOffsetData) LocalMemoryLen() Offset {
+	if l := m.LocalMemoryBegin; l >= 0 {
+		return l + 8
+	}
+	return -1
+}
+
+// TableOffset returns an offset of the i-th table instance.
+func (m *ModuleContextOffsetData) TableOffset(tableIndex int) Offset {
+	return m.TablesBegin + Offset(tableIndex)*8
+}
+
+// NewModuleContextOffsetData creates a ModuleContextOffsetData determining the structure of moduleContextOpaque for the given Module.
+// The structure is described in the comment of wazevo.moduleContextOpaque.
+func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContextOffsetData {
+	ret := ModuleContextOffsetData{}
+	var offset Offset
+
+	ret.ModuleInstanceOffset = 0
+	offset += 8
+
+	if m.MemorySection != nil {
+		ret.LocalMemoryBegin = offset
+		// buffer base + memory size.
+		const localMemorySizeInOpaqueModuleContext = 16
+		offset += localMemorySizeInOpaqueModuleContext
+	} else {
+		// Indicates that there's no local memory
+		ret.LocalMemoryBegin = -1
+	}
+
+	if m.ImportMemoryCount > 0 {
+		offset = align8(offset)
+		// *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque)
+		const importedMemorySizeInOpaqueModuleContext = 16
+		ret.ImportedMemoryBegin = offset
+		offset += importedMemorySizeInOpaqueModuleContext
+	} else {
+		// Indicates that there's no imported memory
+		ret.ImportedMemoryBegin = -1
+	}
+
+	if m.ImportFunctionCount > 0 {
+		offset = align8(offset)
+		ret.ImportedFunctionsBegin = offset
+		// Each function is stored wazevo.functionInstance.
+		size := int(m.ImportFunctionCount) * FunctionInstanceSize
+		offset += Offset(size)
+	} else {
+		ret.ImportedFunctionsBegin = -1
+	}
+
+	if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 {
+		// Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions.
+		offset = align16(offset)
+		ret.GlobalsBegin = offset
+		// Pointers to *wasm.GlobalInstance.
+		offset += Offset(globals) * 16
+	} else {
+		ret.GlobalsBegin = -1
+	}
+
+	if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 {
+		offset = align8(offset)
+		ret.TypeIDs1stElement = offset
+		offset += 8 // First element of TypeIDs.
+
+		ret.TablesBegin = offset
+		// Pointers to *wasm.TableInstance.
+		offset += Offset(tables) * 8
+	} else {
+		ret.TypeIDs1stElement = -1
+		ret.TablesBegin = -1
+	}
+
+	if withListener {
+		offset = align8(offset)
+		ret.BeforeListenerTrampolines1stElement = offset
+		offset += 8 // First element of BeforeListenerTrampolines.
+
+		ret.AfterListenerTrampolines1stElement = offset
+		offset += 8 // First element of AfterListenerTrampolines.
+	} else {
+		ret.BeforeListenerTrampolines1stElement = -1
+		ret.AfterListenerTrampolines1stElement = -1
+	}
+
+	ret.DataInstances1stElement = offset
+	offset += 8 // First element of DataInstances.
+
+	ret.ElementInstances1stElement = offset
+	offset += 8 // First element of ElementInstances.
+
+	ret.TotalSize = int(align16(offset))
+	return ret
+}
+
+func align16(o Offset) Offset {
+	return (o + 15) &^ 15
+}
+
+func align8(o Offset) Offset {
+	return (o + 7) &^ 7
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go
new file mode 100644
index 000000000..642c7f75d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go
@@ -0,0 +1,96 @@
+package wazevoapi
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"sync"
+)
+
+var PerfMap *Perfmap
+
+func init() {
+	if PerfMapEnabled {
+		pid := os.Getpid()
+		filename := "/tmp/perf-" + strconv.Itoa(pid) + ".map"
+
+		fh, err := os.OpenFile(filename, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0o644)
+		if err != nil {
+			panic(err)
+		}
+
+		PerfMap = &Perfmap{fh: fh}
+	}
+}
+
+// Perfmap holds perfmap entries to be flushed into a perfmap file.
+type Perfmap struct {
+	entries []entry
+	mux     sync.Mutex
+	fh      *os.File
+}
+
+type entry struct {
+	index  int
+	offset int64
+	size   uint64
+	name   string
+}
+
+func (f *Perfmap) Lock() {
+	f.mux.Lock()
+}
+
+func (f *Perfmap) Unlock() {
+	f.mux.Unlock()
+}
+
+// AddModuleEntry adds a perfmap entry into the perfmap file.
+// index is the index of the function in the module, offset is the offset of the function in the module,
+// size is the size of the function, and name is the name of the function.
+//
+// Note that the entries are not flushed into the perfmap file until Flush is called,
+// and the entries are module-scoped; Perfmap must be locked until Flush is called.
+func (f *Perfmap) AddModuleEntry(index int, offset int64, size uint64, name string) {
+	e := entry{index: index, offset: offset, size: size, name: name}
+	if f.entries == nil {
+		f.entries = []entry{e}
+		return
+	}
+	f.entries = append(f.entries, e)
+}
+
+// Flush writes the perfmap entries into the perfmap file where the entries are adjusted by the given `addr` and `functionOffsets`.
+func (f *Perfmap) Flush(addr uintptr, functionOffsets []int) {
+	defer func() {
+		_ = f.fh.Sync()
+	}()
+
+	for _, e := range f.entries {
+		if _, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
+			uintptr(e.offset)+addr+uintptr(functionOffsets[e.index]),
+			strconv.FormatUint(e.size, 16),
+			e.name,
+		)); err != nil {
+			panic(err)
+		}
+	}
+	f.entries = f.entries[:0]
+}
+
+// Clear clears the perfmap entries not yet flushed.
+func (f *Perfmap) Clear() {
+	f.entries = f.entries[:0]
+}
+
+// AddEntry writes a perfmap entry directly into the perfmap file, not using the entries.
+func (f *Perfmap) AddEntry(addr uintptr, size uint64, name string) {
+	_, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
+		addr,
+		strconv.FormatUint(size, 16),
+		name,
+	))
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go
new file mode 100644
index 000000000..bcc4e545c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go
@@ -0,0 +1,5 @@
+//go:build !perfmap
+
+package wazevoapi
+
+const PerfMapEnabled = false
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go
new file mode 100644
index 000000000..2a39879ec
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go
@@ -0,0 +1,5 @@
+//go:build perfmap
+
+package wazevoapi
+
+const PerfMapEnabled = true
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
new file mode 100644
index 000000000..3149fdc9e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
@@ -0,0 +1,215 @@
+package wazevoapi
+
+const poolPageSize = 128
+
+// Pool is a pool of T that can be allocated and reset.
+// This is useful to avoid unnecessary allocations.
+type Pool[T any] struct {
+	pages            []*[poolPageSize]T
+	resetFn          func(*T)
+	allocated, index int
+}
+
+// NewPool returns a new Pool.
+// resetFn is called when a new T is allocated in Pool.Allocate.
+func NewPool[T any](resetFn func(*T)) Pool[T] {
+	var ret Pool[T]
+	ret.resetFn = resetFn
+	ret.Reset()
+	return ret
+}
+
+// Allocated returns the number of allocated T currently in the pool.
+func (p *Pool[T]) Allocated() int {
+	return p.allocated
+}
+
+// Allocate allocates a new T from the pool.
+func (p *Pool[T]) Allocate() *T {
+	if p.index == poolPageSize {
+		if len(p.pages) == cap(p.pages) {
+			p.pages = append(p.pages, new([poolPageSize]T))
+		} else {
+			i := len(p.pages)
+			p.pages = p.pages[:i+1]
+			if p.pages[i] == nil {
+				p.pages[i] = new([poolPageSize]T)
+			}
+		}
+		p.index = 0
+	}
+	ret := &p.pages[len(p.pages)-1][p.index]
+	if p.resetFn != nil {
+		p.resetFn(ret)
+	}
+	p.index++
+	p.allocated++
+	return ret
+}
+
+// View returns the pointer to i-th item from the pool.
+func (p *Pool[T]) View(i int) *T {
+	page, index := i/poolPageSize, i%poolPageSize
+	return &p.pages[page][index]
+}
+
+// Reset resets the pool.
+func (p *Pool[T]) Reset() {
+	p.pages = p.pages[:0]
+	p.index = poolPageSize
+	p.allocated = 0
+}
+
+// IDedPool is a pool of T that can be allocated and reset, with a way to get T by an ID.
+type IDedPool[T any] struct {
+	pool             Pool[T]
+	idToItems        []*T
+	maxIDEncountered int
+}
+
+// NewIDedPool returns a new IDedPool.
+func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] {
+	return IDedPool[T]{pool: NewPool[T](resetFn)}
+}
+
+// GetOrAllocate returns the T with the given id.
+func (p *IDedPool[T]) GetOrAllocate(id int) *T {
+	if p.maxIDEncountered < id {
+		p.maxIDEncountered = id
+	}
+	if id >= len(p.idToItems) {
+		p.idToItems = append(p.idToItems, make([]*T, id-len(p.idToItems)+1)...)
+	}
+	if p.idToItems[id] == nil {
+		p.idToItems[id] = p.pool.Allocate()
+	}
+	return p.idToItems[id]
+}
+
+// Get returns the T with the given id, or nil if it's not allocated.
+func (p *IDedPool[T]) Get(id int) *T {
+	if id >= len(p.idToItems) {
+		return nil
+	}
+	return p.idToItems[id]
+}
+
+// Reset resets the pool.
+func (p *IDedPool[T]) Reset() {
+	p.pool.Reset()
+	for i := range p.idToItems {
+		p.idToItems[i] = nil
+	}
+	p.maxIDEncountered = -1
+}
+
+// MaxIDEncountered returns the maximum id encountered so far.
+func (p *IDedPool[T]) MaxIDEncountered() int {
+	return p.maxIDEncountered
+}
+
+// arraySize is the size of the array used in VarLengthPool's arrayPool.
+// This is chosen to be 8, which is empirically a good number among 8, 12, 16 and 20.
+const arraySize = 8
+
+// VarLengthPool is a pool of VarLength[T] that can be allocated and reset.
+type (
+	VarLengthPool[T any] struct {
+		arrayPool Pool[varLengthPoolArray[T]]
+		slicePool Pool[[]T]
+	}
+	// varLengthPoolArray wraps an array and keeps track of the next index to be used to avoid the heap allocation.
+	varLengthPoolArray[T any] struct {
+		arr  [arraySize]T
+		next int
+	}
+)
+
+// VarLength is a variable length array that can be reused via a pool.
+type VarLength[T any] struct {
+	arr *varLengthPoolArray[T]
+	slc *[]T
+}
+
+// NewVarLengthPool returns a new VarLengthPool.
+func NewVarLengthPool[T any]() VarLengthPool[T] {
+	return VarLengthPool[T]{
+		arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) {
+			v.next = 0
+		}),
+		slicePool: NewPool[[]T](func(i *[]T) {
+			*i = (*i)[:0]
+		}),
+	}
+}
+
+// NewNilVarLength returns a new VarLength[T] with a nil backing.
+func NewNilVarLength[T any]() VarLength[T] {
+	return VarLength[T]{}
+}
+
+// Allocate allocates a new VarLength[T] from the pool.
+func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] {
+	if knownMin <= arraySize {
+		arr := p.arrayPool.Allocate()
+		return VarLength[T]{arr: arr}
+	}
+	slc := p.slicePool.Allocate()
+	return VarLength[T]{slc: slc}
+}
+
+// Reset resets the pool.
+func (p *VarLengthPool[T]) Reset() {
+	p.arrayPool.Reset()
+	p.slicePool.Reset()
+}
+
+// Append appends items to the backing slice just like the `append` builtin function in Go.
+func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] {
+	if i.slc != nil {
+		*i.slc = append(*i.slc, items...)
+		return i
+	}
+
+	if i.arr == nil {
+		i.arr = p.arrayPool.Allocate()
+	}
+
+	arr := i.arr
+	if arr.next+len(items) <= arraySize {
+		for _, item := range items {
+			arr.arr[arr.next] = item
+			arr.next++
+		}
+	} else {
+		slc := p.slicePool.Allocate()
+		// Copy the array to the slice.
+		for ptr := 0; ptr < arr.next; ptr++ {
+			*slc = append(*slc, arr.arr[ptr])
+		}
+		i.slc = slc
+		*i.slc = append(*i.slc, items...)
+	}
+	return i
+}
+
+// View returns the backing slice.
+func (i VarLength[T]) View() []T {
+	if i.slc != nil {
+		return *i.slc
+	} else if i.arr != nil {
+		arr := i.arr
+		return arr.arr[:arr.next]
+	}
+	return nil
+}
+
+// Cut cuts the backing slice to the given length.
+// Precondition: n <= len(i.backing).
+func (i VarLength[T]) Cut(n int) {
+	if i.slc != nil {
+		*i.slc = (*i.slc)[:n]
+	} else if i.arr != nil {
+		i.arr.next = n
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go
new file mode 100644
index 000000000..f21e1a5d8
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go
@@ -0,0 +1,15 @@
+package wazevoapi
+
+import "unsafe"
+
+// PtrFromUintptr resurrects the original *T from the given uintptr.
+// The caller of this function MUST be sure that ptr is valid.
+func PtrFromUintptr[T any](ptr uintptr) *T {
+	// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
+	//
+	// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
+	// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
+	// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
+	var wrapped *uintptr = &ptr
+	return *(**T)(unsafe.Pointer(wrapped))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go
new file mode 100644
index 000000000..e3118fa69
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go
@@ -0,0 +1,26 @@
+package wazevoapi
+
+// Queue is the resettable queue where the underlying slice is reused.
+type Queue[T any] struct {
+	index int
+	Data  []T
+}
+
+func (q *Queue[T]) Enqueue(v T) {
+	q.Data = append(q.Data, v)
+}
+
+func (q *Queue[T]) Dequeue() (ret T) {
+	ret = q.Data[q.index]
+	q.index++
+	return
+}
+
+func (q *Queue[T]) Empty() bool {
+	return q.index >= len(q.Data)
+}
+
+func (q *Queue[T]) Reset() {
+	q.index = 0
+	q.Data = q.Data[:0]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go
new file mode 100644
index 000000000..7177fbb4b
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go
@@ -0,0 +1,13 @@
+package wazevoapi
+
+// ResetMap resets the map to an empty state, or creates a new map if it is nil.
+func ResetMap[K comparable, V any](m map[K]V) map[K]V {
+	if m == nil {
+		m = make(map[K]V)
+	} else {
+		for v := range m {
+			delete(m, v)
+		}
+	}
+	return m
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/checkpoint.go b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/checkpoint.go
new file mode 100644
index 000000000..fc62e83f3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/checkpoint.go
@@ -0,0 +1,10 @@
+package expctxkeys
+
+// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled.
+// The context.Context passed to a exported function invocation should have this key set
+// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey.
+type EnableSnapshotterKey struct{}
+
+// SnapshotterKey is a context key to access a Snapshotter from a host function.
+// It is only present if EnableSnapshotter was set in the function invocation context.
+type SnapshotterKey struct{}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/close.go b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/close.go
new file mode 100644
index 000000000..75e5134e5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/close.go
@@ -0,0 +1,5 @@
+package expctxkeys
+
+// CloseNotifierKey is a context.Context Value key. Its associated value should be a
+// Notifier.
+type CloseNotifierKey struct{}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/expctxkeys.go b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/expctxkeys.go
new file mode 100644
index 000000000..6800005b9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/expctxkeys.go
@@ -0,0 +1,2 @@
+// Package expctxkeys provides keys for the context used to store the experimental APIs.
+package expctxkeys
diff --git a/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/listener.go b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/listener.go
new file mode 100644
index 000000000..9565db8e9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/listener.go
@@ -0,0 +1,7 @@
+package expctxkeys
+
+// FunctionListenerFactoryKey is a context.Context Value key.
+// Its associated value should be a FunctionListenerFactory.
+//
+// See https://github.com/tetratelabs/wazero/issues/451
+type FunctionListenerFactoryKey struct{}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/memory.go b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/memory.go
new file mode 100644
index 000000000..d41c01914
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/expctxkeys/memory.go
@@ -0,0 +1,4 @@
+package expctxkeys
+
+// MemoryAllocatorKey is a context.Context key for the experimental memory allocator.
+type MemoryAllocatorKey struct{}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/filecache/compilationcache.go b/vendor/github.com/tetratelabs/wazero/internal/filecache/compilationcache.go
new file mode 100644
index 000000000..b2dbd4650
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/filecache/compilationcache.go
@@ -0,0 +1,42 @@
+package filecache
+
+import (
+	"crypto/sha256"
+	"io"
+)
+
+// Cache allows the compiler engine to skip compilation of wasm to machine code
+// where doing so is redundant for the same wasm binary and version of wazero.
+//
+// This augments the default in-memory cache of compiled functions, by
+// decoupling it from a wazero.Runtime instance. Concretely, a runtime loses
+// its cache once closed. This cache allows the runtime to rebuild its
+// in-memory cache quicker, significantly reducing first-hit penalty on a hit.
+//
+// See New for the example implementation.
+type Cache interface {
+	// Get is called when the runtime is trying to get the cached compiled functions.
+	// Implementations are supposed to return compiled function in io.Reader with ok=true
+	// if the key exists on the cache. In the case of not-found, this should return
+	// ok=false with err=nil. content.Close() is automatically called by
+	// the caller of this Get.
+	//
+	// Note: the returned content won't go through the validation pass of Wasm binary
+	// which is applied when the binary is compiled from scratch without cache hit.
+	Get(key Key) (content io.ReadCloser, ok bool, err error)
+	//
+	// Add is called when the runtime is trying to add the new cache entry.
+	// The given `content` must be un-modified, and returned as-is in Get method.
+	//
+	// Note: the `content` is ensured to be safe through the validation phase applied on the Wasm binary.
+	Add(key Key, content io.Reader) (err error)
+	//
+	// Delete is called when the cache on the `key` returned by Get is no longer usable, and
+	// must be purged. Specifically, this is called happens when the wazero's version has been changed.
+	// For example, that is when there's a difference between the version of compiling wazero and the
+	// version of the currently used wazero.
+	Delete(key Key) (err error)
+}
+
+// Key represents the 256-bit unique identifier assigned to each cache entry.
+type Key = [sha256.Size]byte
diff --git a/vendor/github.com/tetratelabs/wazero/internal/filecache/file_cache.go b/vendor/github.com/tetratelabs/wazero/internal/filecache/file_cache.go
new file mode 100644
index 000000000..940a79a8d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/filecache/file_cache.go
@@ -0,0 +1,76 @@
+package filecache
+
+import (
+	"encoding/hex"
+	"errors"
+	"io"
+	"os"
+	"path"
+	"path/filepath"
+)
+
+// New returns a new Cache implemented by fileCache.
+func New(dir string) Cache {
+	return newFileCache(dir)
+}
+
+func newFileCache(dir string) *fileCache {
+	return &fileCache{dirPath: dir}
+}
+
+// fileCache persists compiled functions into dirPath.
+//
+// Note: this can be expanded to do binary signing/verification, set TTL on each entry, etc.
+type fileCache struct {
+	dirPath string
+}
+
+func (fc *fileCache) path(key Key) string {
+	return path.Join(fc.dirPath, hex.EncodeToString(key[:]))
+}
+
+func (fc *fileCache) Get(key Key) (content io.ReadCloser, ok bool, err error) {
+	f, err := os.Open(fc.path(key))
+	if errors.Is(err, os.ErrNotExist) {
+		return nil, false, nil
+	} else if err != nil {
+		return nil, false, err
+	} else {
+		return f, true, nil
+	}
+}
+
+func (fc *fileCache) Add(key Key, content io.Reader) (err error) {
+	path := fc.path(key)
+	dirPath, fileName := filepath.Split(path)
+
+	file, err := os.CreateTemp(dirPath, fileName+".*.tmp")
+	if err != nil {
+		return
+	}
+	defer func() {
+		file.Close()
+		if err != nil {
+			_ = os.Remove(file.Name())
+		}
+	}()
+	if _, err = io.Copy(file, content); err != nil {
+		return
+	}
+	if err = file.Sync(); err != nil {
+		return
+	}
+	if err = file.Close(); err != nil {
+		return
+	}
+	err = os.Rename(file.Name(), path)
+	return
+}
+
+func (fc *fileCache) Delete(key Key) (err error) {
+	err = os.Remove(fc.path(key))
+	if errors.Is(err, os.ErrNotExist) {
+		err = nil
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/fsapi/file.go b/vendor/github.com/tetratelabs/wazero/internal/fsapi/file.go
new file mode 100644
index 000000000..0640b2271
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/fsapi/file.go
@@ -0,0 +1,69 @@
+package fsapi
+
+import experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+
+// File includes methods not yet ready to document for end users, notably
+// non-blocking functionality.
+//
+// Particularly, Poll is subject to debate. For example, whether a user should
+// be able to choose how to implement timeout or not. Currently, this interface
+// allows the user to choose to sleep or use native polling, and which choice
+// they make impacts thread behavior as summarized here:
+// https://github.com/tetratelabs/wazero/pull/1606#issuecomment-1665475516
+type File interface {
+	experimentalsys.File
+
+	// IsNonblock returns true if the file was opened with O_NONBLOCK, or
+	// SetNonblock was successfully enabled on this file.
+	//
+	// # Notes
+	//
+	//   - This might not match the underlying state of the file descriptor if
+	//     the file was not opened via OpenFile.
+	IsNonblock() bool
+
+	// SetNonblock toggles the non-blocking mode (O_NONBLOCK) of this file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.SetNonblock and `fcntl` with O_NONBLOCK in
+	//     POSIX. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html
+	SetNonblock(enable bool) experimentalsys.Errno
+
+	// Poll returns if the file has data ready to be read or written.
+	//
+	// # Parameters
+	//
+	// The `flag` parameter determines which event to await, such as POLLIN,
+	// POLLOUT, or a combination like `POLLIN|POLLOUT`.
+	//
+	// The `timeoutMillis` parameter is how long to block for an event, or
+	// interrupted, in milliseconds. There are two special values:
+	//   - zero returns immediately
+	//   - any negative value blocks any amount of time
+	//
+	// # Results
+	//
+	// `ready` means there was data ready to read or written. False can mean no
+	// event was ready or `errno` is not zero.
+	//
+	// A zero `errno` is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOTSUP: the implementation does not the flag combination.
+	//   - EINTR: the call was interrupted prior to an event.
+	//
+	// # Notes
+	//
+	//   - This is like `poll` in POSIX, for a single file.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/poll.html
+	//   - No-op files, such as those which read from /dev/null, should return
+	//     immediately true, as data will never become available.
+	//   - See /RATIONALE.md for detailed notes including impact of blocking.
+	Poll(flag Pflag, timeoutMillis int32) (ready bool, errno experimentalsys.Errno)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/fsapi/poll.go b/vendor/github.com/tetratelabs/wazero/internal/fsapi/poll.go
new file mode 100644
index 000000000..25f7c5711
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/fsapi/poll.go
@@ -0,0 +1,20 @@
+package fsapi
+
+// Pflag are bit flags used for File.Poll. Values, including zero, should not
+// be interpreted numerically. Instead, use by constants prefixed with 'POLL'.
+//
+// # Notes
+//
+//   - This is like `pollfd.events` flags for `poll` in POSIX. See
+//     https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/poll.h.html
+type Pflag uint32
+
+// Only define bitflags we support and are needed by `poll_oneoff` in wasip1
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#eventrwflags
+const (
+	// POLLIN is a read event.
+	POLLIN Pflag = 1 << iota
+
+	// POLLOUT is a write event.
+	POLLOUT
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/fsapi/unimplemented.go b/vendor/github.com/tetratelabs/wazero/internal/fsapi/unimplemented.go
new file mode 100644
index 000000000..99d9c2db3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/fsapi/unimplemented.go
@@ -0,0 +1,27 @@
+package fsapi
+
+import experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+
+func Adapt(f experimentalsys.File) File {
+	if f, ok := f.(File); ok {
+		return f
+	}
+	return unimplementedFile{f}
+}
+
+type unimplementedFile struct{ experimentalsys.File }
+
+// IsNonblock implements File.IsNonblock
+func (unimplementedFile) IsNonblock() bool {
+	return false
+}
+
+// SetNonblock implements File.SetNonblock
+func (unimplementedFile) SetNonblock(bool) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Poll implements File.Poll
+func (unimplementedFile) Poll(Pflag, int32) (ready bool, errno experimentalsys.Errno) {
+	return false, experimentalsys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/ieee754/ieee754.go b/vendor/github.com/tetratelabs/wazero/internal/ieee754/ieee754.go
new file mode 100644
index 000000000..0c9298957
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/ieee754/ieee754.go
@@ -0,0 +1,29 @@
+package ieee754
+
+import (
+	"encoding/binary"
+	"io"
+	"math"
+)
+
+// DecodeFloat32 decodes a float32 in IEEE 754 binary representation.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#floating-point%E2%91%A2
+func DecodeFloat32(buf []byte) (float32, error) {
+	if len(buf) < 4 {
+		return 0, io.ErrUnexpectedEOF
+	}
+
+	raw := binary.LittleEndian.Uint32(buf[:4])
+	return math.Float32frombits(raw), nil
+}
+
+// DecodeFloat64 decodes a float64 in IEEE 754 binary representation.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#floating-point%E2%91%A2
+func DecodeFloat64(buf []byte) (float64, error) {
+	if len(buf) < 8 {
+		return 0, io.ErrUnexpectedEOF
+	}
+
+	raw := binary.LittleEndian.Uint64(buf)
+	return math.Float64frombits(raw), nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/internalapi/internal.go b/vendor/github.com/tetratelabs/wazero/internal/internalapi/internal.go
new file mode 100644
index 000000000..a4f354355
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/internalapi/internal.go
@@ -0,0 +1,9 @@
+package internalapi
+
+type WazeroOnly interface {
+	wazeroOnly()
+}
+
+type WazeroOnlyType struct{}
+
+func (WazeroOnlyType) wazeroOnly() {}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/leb128/leb128.go b/vendor/github.com/tetratelabs/wazero/internal/leb128/leb128.go
new file mode 100644
index 000000000..a31051724
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/leb128/leb128.go
@@ -0,0 +1,285 @@
+package leb128
+
+import (
+	"errors"
+	"fmt"
+	"io"
+)
+
+const (
+	maxVarintLen32 = 5
+	maxVarintLen33 = maxVarintLen32
+	maxVarintLen64 = 10
+
+	int33Mask  int64 = 1 << 7
+	int33Mask2       = ^int33Mask
+	int33Mask3       = 1 << 6
+	int33Mask4       = 8589934591 // 2^33-1
+	int33Mask5       = 1 << 32
+	int33Mask6       = int33Mask4 + 1 // 2^33
+
+	int64Mask3 = 1 << 6
+	int64Mask4 = ^0
+)
+
+var (
+	errOverflow32 = errors.New("overflows a 32-bit integer")
+	errOverflow33 = errors.New("overflows a 33-bit integer")
+	errOverflow64 = errors.New("overflows a 64-bit integer")
+)
+
+// EncodeInt32 encodes the signed value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
+func EncodeInt32(value int32) []byte {
+	return EncodeInt64(int64(value))
+}
+
+// EncodeInt64 encodes the signed value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
+func EncodeInt64(value int64) (buf []byte) {
+	for {
+		// Take 7 remaining low-order bits from the value into b.
+		b := uint8(value & 0x7f)
+		// Extract the sign bit.
+		s := uint8(value & 0x40)
+		value >>= 7
+
+		// The encoding unsigned numbers is simpler as it only needs to check if the value is non-zero to tell if there
+		// are more bits to encode. Signed is a little more complicated as you have to double-check the sign bit.
+		// If either case, set the high-order bit to tell the reader there are more bytes in this int.
+		if (value != -1 || s == 0) && (value != 0 || s != 0) {
+			b |= 0x80
+		}
+
+		// Append b into the buffer
+		buf = append(buf, b)
+		if b&0x80 == 0 {
+			break
+		}
+	}
+	return buf
+}
+
+// EncodeUint32 encodes the value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
+func EncodeUint32(value uint32) []byte {
+	return EncodeUint64(uint64(value))
+}
+
+// EncodeUint64 encodes the value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
+func EncodeUint64(value uint64) (buf []byte) {
+	// This is effectively a do/while loop where we take 7 bits of the value and encode them until it is zero.
+	for {
+		// Take 7 remaining low-order bits from the value into b.
+		b := uint8(value & 0x7f)
+		value = value >> 7
+
+		// If there are remaining bits, the value won't be zero: Set the high-
+		// order bit to tell the reader there are more bytes in this uint.
+		if value != 0 {
+			b |= 0x80
+		}
+
+		// Append b into the buffer
+		buf = append(buf, b)
+		if b&0x80 == 0 {
+			return buf
+		}
+	}
+}
+
+type nextByte func(i int) (byte, error)
+
+func DecodeUint32(r io.ByteReader) (ret uint32, bytesRead uint64, err error) {
+	return decodeUint32(func(_ int) (byte, error) { return r.ReadByte() })
+}
+
+func LoadUint32(buf []byte) (ret uint32, bytesRead uint64, err error) {
+	return decodeUint32(func(i int) (byte, error) {
+		if i >= len(buf) {
+			return 0, io.EOF
+		}
+		return buf[i], nil
+	})
+}
+
+func decodeUint32(next nextByte) (ret uint32, bytesRead uint64, err error) {
+	// Derived from https://github.com/golang/go/blob/go1.20/src/encoding/binary/varint.go
+	// with the modification on the overflow handling tailored for 32-bits.
+	var s uint32
+	for i := 0; i < maxVarintLen32; i++ {
+		b, err := next(i)
+		if err != nil {
+			return 0, 0, err
+		}
+		if b < 0x80 {
+			// Unused bits must be all zero.
+			if i == maxVarintLen32-1 && (b&0xf0) > 0 {
+				return 0, 0, errOverflow32
+			}
+			return ret | uint32(b)<<s, uint64(i) + 1, nil
+		}
+		ret |= (uint32(b) & 0x7f) << s
+		s += 7
+	}
+	return 0, 0, errOverflow32
+}
+
+func LoadUint64(buf []byte) (ret uint64, bytesRead uint64, err error) {
+	bufLen := len(buf)
+	if bufLen == 0 {
+		return 0, 0, io.EOF
+	}
+
+	// Derived from https://github.com/golang/go/blob/go1.20/src/encoding/binary/varint.go
+	var s uint64
+	for i := 0; i < maxVarintLen64; i++ {
+		if i >= bufLen {
+			return 0, 0, io.EOF
+		}
+		b := buf[i]
+		if b < 0x80 {
+			// Unused bits (non first bit) must all be zero.
+			if i == maxVarintLen64-1 && b > 1 {
+				return 0, 0, errOverflow64
+			}
+			return ret | uint64(b)<<s, uint64(i) + 1, nil
+		}
+		ret |= (uint64(b) & 0x7f) << s
+		s += 7
+	}
+	return 0, 0, errOverflow64
+}
+
+func DecodeInt32(r io.ByteReader) (ret int32, bytesRead uint64, err error) {
+	return decodeInt32(func(_ int) (byte, error) { return r.ReadByte() })
+}
+
+func LoadInt32(buf []byte) (ret int32, bytesRead uint64, err error) {
+	return decodeInt32(func(i int) (byte, error) {
+		if i >= len(buf) {
+			return 0, io.EOF
+		}
+		return buf[i], nil
+	})
+}
+
+func decodeInt32(next nextByte) (ret int32, bytesRead uint64, err error) {
+	var shift int
+	var b byte
+	for {
+		b, err = next(int(bytesRead))
+		if err != nil {
+			return 0, 0, fmt.Errorf("readByte failed: %w", err)
+		}
+		ret |= (int32(b) & 0x7f) << shift
+		shift += 7
+		bytesRead++
+		if b&0x80 == 0 {
+			if shift < 32 && (b&0x40) != 0 {
+				ret |= ^0 << shift
+			}
+			// Over flow checks.
+			// fixme: can be optimized.
+			if bytesRead > maxVarintLen32 {
+				return 0, 0, errOverflow32
+			} else if unused := b & 0b00110000; bytesRead == maxVarintLen32 && ret < 0 && unused != 0b00110000 {
+				return 0, 0, errOverflow32
+			} else if bytesRead == maxVarintLen32 && ret >= 0 && unused != 0x00 {
+				return 0, 0, errOverflow32
+			}
+			return
+		}
+	}
+}
+
+// DecodeInt33AsInt64 is a special cased decoder for wasm.BlockType which is encoded as a positive signed integer, yet
+// still needs to fit the 32-bit range of allowed indices. Hence, this is 33, not 32-bit!
+//
+// See https://webassembly.github.io/spec/core/binary/instructions.html#control-instructions
+func DecodeInt33AsInt64(r io.ByteReader) (ret int64, bytesRead uint64, err error) {
+	var shift int
+	var b int64
+	var rb byte
+	for shift < 35 {
+		rb, err = r.ReadByte()
+		if err != nil {
+			return 0, 0, fmt.Errorf("readByte failed: %w", err)
+		}
+		b = int64(rb)
+		ret |= (b & int33Mask2) << shift
+		shift += 7
+		bytesRead++
+		if b&int33Mask == 0 {
+			break
+		}
+	}
+
+	// fixme: can be optimized
+	if shift < 33 && (b&int33Mask3) == int33Mask3 {
+		ret |= int33Mask4 << shift
+	}
+	ret = ret & int33Mask4
+
+	// if 33rd bit == 1, we translate it as a corresponding signed-33bit minus value
+	if ret&int33Mask5 > 0 {
+		ret = ret - int33Mask6
+	}
+	// Over flow checks.
+	// fixme: can be optimized.
+	if bytesRead > maxVarintLen33 {
+		return 0, 0, errOverflow33
+	} else if unused := b & 0b00100000; bytesRead == maxVarintLen33 && ret < 0 && unused != 0b00100000 {
+		return 0, 0, errOverflow33
+	} else if bytesRead == maxVarintLen33 && ret >= 0 && unused != 0x00 {
+		return 0, 0, errOverflow33
+	}
+	return ret, bytesRead, nil
+}
+
+func DecodeInt64(r io.ByteReader) (ret int64, bytesRead uint64, err error) {
+	return decodeInt64(func(_ int) (byte, error) { return r.ReadByte() })
+}
+
+func LoadInt64(buf []byte) (ret int64, bytesRead uint64, err error) {
+	return decodeInt64(func(i int) (byte, error) {
+		if i >= len(buf) {
+			return 0, io.EOF
+		}
+		return buf[i], nil
+	})
+}
+
+func decodeInt64(next nextByte) (ret int64, bytesRead uint64, err error) {
+	var shift int
+	var b byte
+	for {
+		b, err = next(int(bytesRead))
+		if err != nil {
+			return 0, 0, fmt.Errorf("readByte failed: %w", err)
+		}
+		ret |= (int64(b) & 0x7f) << shift
+		shift += 7
+		bytesRead++
+		if b&0x80 == 0 {
+			if shift < 64 && (b&int64Mask3) == int64Mask3 {
+				ret |= int64Mask4 << shift
+			}
+			// Over flow checks.
+			// fixme: can be optimized.
+			if bytesRead > maxVarintLen64 {
+				return 0, 0, errOverflow64
+			} else if unused := b & 0b00111110; bytesRead == maxVarintLen64 && ret < 0 && unused != 0b00111110 {
+				return 0, 0, errOverflow64
+			} else if bytesRead == maxVarintLen64 && ret >= 0 && unused != 0x00 {
+				return 0, 0, errOverflow64
+			}
+			return
+		}
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/moremath/moremath.go b/vendor/github.com/tetratelabs/wazero/internal/moremath/moremath.go
new file mode 100644
index 000000000..4741f07bb
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/moremath/moremath.go
@@ -0,0 +1,271 @@
+package moremath
+
+import (
+	"math"
+)
+
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/syntax/values.html#floating-point
+const (
+	// F32CanonicalNaNBits is the 32-bit float where payload's MSB equals 1 and others are all zero.
+	F32CanonicalNaNBits = uint32(0x7fc0_0000)
+	// F32CanonicalNaNBitsMask can be used to judge the value `v` is canonical nan as "v&F32CanonicalNaNBitsMask == F32CanonicalNaNBits"
+	F32CanonicalNaNBitsMask = uint32(0x7fff_ffff)
+	// F64CanonicalNaNBits is the 64-bit float where payload's MSB equals 1 and others are all zero.
+	F64CanonicalNaNBits = uint64(0x7ff8_0000_0000_0000)
+	// F64CanonicalNaNBitsMask can be used to judge the value `v` is canonical nan as "v&F64CanonicalNaNBitsMask == F64CanonicalNaNBits"
+	F64CanonicalNaNBitsMask = uint64(0x7fff_ffff_ffff_ffff)
+	// F32ArithmeticNaNPayloadMSB is used to extract the most significant bit of payload of 32-bit arithmetic NaN values
+	F32ArithmeticNaNPayloadMSB = uint32(0x0040_0000)
+	// F32ExponentMask is used to extract the exponent of 32-bit floating point.
+	F32ExponentMask = uint32(0x7f80_0000)
+	// F32ArithmeticNaNBits is an example 32-bit arithmetic NaN.
+	F32ArithmeticNaNBits = F32CanonicalNaNBits | 0b1 // Set first bit to make this different from the canonical NaN.
+	// F64ArithmeticNaNPayloadMSB is used to extract the most significant bit of payload of 64-bit arithmetic NaN values
+	F64ArithmeticNaNPayloadMSB = uint64(0x0008_0000_0000_0000)
+	// F64ExponentMask is used to extract the exponent of 64-bit floating point.
+	F64ExponentMask = uint64(0x7ff0_0000_0000_0000)
+	// F64ArithmeticNaNBits is an example 64-bit arithmetic NaN.
+	F64ArithmeticNaNBits = F64CanonicalNaNBits | 0b1 // Set first bit to make this different from the canonical NaN.
+)
+
+// WasmCompatMin64 is the Wasm spec compatible variant of math.Min for 64-bit floating points.
+func WasmCompatMin64(x, y float64) float64 {
+	switch {
+	case math.IsNaN(x) || math.IsNaN(y):
+		return returnF64NaNBinOp(x, y)
+	case math.IsInf(x, -1) || math.IsInf(y, -1):
+		return math.Inf(-1)
+	case x == 0 && x == y:
+		if math.Signbit(x) {
+			return x
+		}
+		return y
+	}
+	if x < y {
+		return x
+	}
+	return y
+}
+
+// WasmCompatMin32 is the Wasm spec compatible variant of math.Min for 32-bit floating points.
+func WasmCompatMin32(x, y float32) float32 {
+	x64, y64 := float64(x), float64(y)
+	switch {
+	case math.IsNaN(x64) || math.IsNaN(y64):
+		return returnF32NaNBinOp(x, y)
+	case math.IsInf(x64, -1) || math.IsInf(y64, -1):
+		return float32(math.Inf(-1))
+	case x == 0 && x == y:
+		if math.Signbit(x64) {
+			return x
+		}
+		return y
+	}
+	if x < y {
+		return x
+	}
+	return y
+}
+
+// WasmCompatMax64 is the Wasm spec compatible variant of math.Max for 64-bit floating points.
+func WasmCompatMax64(x, y float64) float64 {
+	switch {
+	case math.IsNaN(x) || math.IsNaN(y):
+		return returnF64NaNBinOp(x, y)
+	case math.IsInf(x, 1) || math.IsInf(y, 1):
+		return math.Inf(1)
+	case x == 0 && x == y:
+		if math.Signbit(x) {
+			return y
+		}
+		return x
+	}
+	if x > y {
+		return x
+	}
+	return y
+}
+
+// WasmCompatMax32 is the Wasm spec compatible variant of math.Max for 32-bit floating points.
+func WasmCompatMax32(x, y float32) float32 {
+	x64, y64 := float64(x), float64(y)
+	switch {
+	case math.IsNaN(x64) || math.IsNaN(y64):
+		return returnF32NaNBinOp(x, y)
+	case math.IsInf(x64, 1) || math.IsInf(y64, 1):
+		return float32(math.Inf(1))
+	case x == 0 && x == y:
+		if math.Signbit(x64) {
+			return y
+		}
+		return x
+	}
+	if x > y {
+		return x
+	}
+	return y
+}
+
+// WasmCompatNearestF32 is the Wasm spec compatible variant of math.Round, used for Nearest instruction.
+// For example, this converts 1.9 to 2.0, and this has the semantics of LLVM's rint intrinsic.
+//
+// e.g. math.Round(-4.5) results in -5 while this results in -4.
+//
+// See https://llvm.org/docs/LangRef.html#llvm-rint-intrinsic.
+func WasmCompatNearestF32(f float32) float32 {
+	var res float32
+	// TODO: look at https://github.com/bytecodealliance/wasmtime/pull/2171 and reconsider this algorithm
+	if f != 0 {
+		ceil := float32(math.Ceil(float64(f)))
+		floor := float32(math.Floor(float64(f)))
+		distToCeil := math.Abs(float64(f - ceil))
+		distToFloor := math.Abs(float64(f - floor))
+		h := ceil / 2.0
+		if distToCeil < distToFloor {
+			res = ceil
+		} else if distToCeil == distToFloor && float32(math.Floor(float64(h))) == h {
+			res = ceil
+		} else {
+			res = floor
+		}
+	} else {
+		res = f
+	}
+	return returnF32UniOp(f, res)
+}
+
+// WasmCompatNearestF64 is the Wasm spec compatible variant of math.Round, used for Nearest instruction.
+// For example, this converts 1.9 to 2.0, and this has the semantics of LLVM's rint intrinsic.
+//
+// e.g. math.Round(-4.5) results in -5 while this results in -4.
+//
+// See https://llvm.org/docs/LangRef.html#llvm-rint-intrinsic.
+func WasmCompatNearestF64(f float64) float64 {
+	// TODO: look at https://github.com/bytecodealliance/wasmtime/pull/2171 and reconsider this algorithm
+	var res float64
+	if f != 0 {
+		ceil := math.Ceil(f)
+		floor := math.Floor(f)
+		distToCeil := math.Abs(f - ceil)
+		distToFloor := math.Abs(f - floor)
+		h := ceil / 2.0
+		if distToCeil < distToFloor {
+			res = ceil
+		} else if distToCeil == distToFloor && math.Floor(h) == h {
+			res = ceil
+		} else {
+			res = floor
+		}
+	} else {
+		res = f
+	}
+	return returnF64UniOp(f, res)
+}
+
+// WasmCompatCeilF32 is the same as math.Ceil on 32-bit except that
+// the returned NaN value follows the Wasm specification on NaN
+// propagation.
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func WasmCompatCeilF32(f float32) float32 {
+	return returnF32UniOp(f, float32(math.Ceil(float64(f))))
+}
+
+// WasmCompatCeilF64 is the same as math.Ceil on 64-bit except that
+// the returned NaN value follows the Wasm specification on NaN
+// propagation.
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func WasmCompatCeilF64(f float64) float64 {
+	return returnF64UniOp(f, math.Ceil(f))
+}
+
+// WasmCompatFloorF32 is the same as math.Floor on 32-bit except that
+// the returned NaN value follows the Wasm specification on NaN
+// propagation.
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func WasmCompatFloorF32(f float32) float32 {
+	return returnF32UniOp(f, float32(math.Floor(float64(f))))
+}
+
+// WasmCompatFloorF64 is the same as math.Floor on 64-bit except that
+// the returned NaN value follows the Wasm specification on NaN
+// propagation.
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func WasmCompatFloorF64(f float64) float64 {
+	return returnF64UniOp(f, math.Floor(f))
+}
+
+// WasmCompatTruncF32 is the same as math.Trunc on 32-bit except that
+// the returned NaN value follows the Wasm specification on NaN
+// propagation.
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func WasmCompatTruncF32(f float32) float32 {
+	return returnF32UniOp(f, float32(math.Trunc(float64(f))))
+}
+
+// WasmCompatTruncF64 is the same as math.Trunc on 64-bit except that
+// the returned NaN value follows the Wasm specification on NaN
+// propagation.
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func WasmCompatTruncF64(f float64) float64 {
+	return returnF64UniOp(f, math.Trunc(f))
+}
+
+func f32IsNaN(v float32) bool {
+	return v != v // this is how NaN is defined.
+}
+
+func f64IsNaN(v float64) bool {
+	return v != v // this is how NaN is defined.
+}
+
+// returnF32UniOp returns the result of 32-bit unary operation. This accepts `original` which is the operand,
+// and `result` which is its result. This returns the `result` as-is if the result is not NaN. Otherwise, this follows
+// the same logic as in the reference interpreter as well as the amd64 and arm64 floating point handling.
+func returnF32UniOp(original, result float32) float32 {
+	// Following the same logic as in the reference interpreter:
+	// https://github.com/WebAssembly/spec/blob/d48af683f5e6d00c13f775ab07d29a15daf92203/interpreter/exec/fxx.ml#L115-L122
+	if !f32IsNaN(result) {
+		return result
+	}
+	if !f32IsNaN(original) {
+		return math.Float32frombits(F32CanonicalNaNBits)
+	}
+	return math.Float32frombits(math.Float32bits(original) | F32CanonicalNaNBits)
+}
+
+// returnF32UniOp returns the result of 64-bit unary operation. This accepts `original` which is the operand,
+// and `result` which is its result. This returns the `result` as-is if the result is not NaN. Otherwise, this follows
+// the same logic as in the reference interpreter as well as the amd64 and arm64 floating point handling.
+func returnF64UniOp(original, result float64) float64 {
+	// Following the same logic as in the reference interpreter (== amd64 and arm64's behavior):
+	// https://github.com/WebAssembly/spec/blob/d48af683f5e6d00c13f775ab07d29a15daf92203/interpreter/exec/fxx.ml#L115-L122
+	if !f64IsNaN(result) {
+		return result
+	}
+	if !f64IsNaN(original) {
+		return math.Float64frombits(F64CanonicalNaNBits)
+	}
+	return math.Float64frombits(math.Float64bits(original) | F64CanonicalNaNBits)
+}
+
+// returnF64NaNBinOp returns a NaN for 64-bit binary operations. `x` and `y` are original floats
+// and at least one of them is NaN. The returned NaN is guaranteed to comply with the NaN propagation
+// procedure: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func returnF64NaNBinOp(x, y float64) float64 {
+	if f64IsNaN(x) {
+		return math.Float64frombits(math.Float64bits(x) | F64CanonicalNaNBits)
+	} else {
+		return math.Float64frombits(math.Float64bits(y) | F64CanonicalNaNBits)
+	}
+}
+
+// returnF64NaNBinOp returns a NaN for 32-bit binary operations. `x` and `y` are original floats
+// and at least one of them is NaN. The returned NaN is guaranteed to comply with the NaN propagation
+// procedure: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#nan-propagation
+func returnF32NaNBinOp(x, y float32) float32 {
+	if f32IsNaN(x) {
+		return math.Float32frombits(math.Float32bits(x) | F32CanonicalNaNBits)
+	} else {
+		return math.Float32frombits(math.Float32bits(y) | F32CanonicalNaNBits)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go
new file mode 100644
index 000000000..25d7d3fdc
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go
@@ -0,0 +1,25 @@
+package platform
+
+// CpuFeatureFlags exposes methods for querying CPU capabilities
+type CpuFeatureFlags interface {
+	// Has returns true when the specified flag (represented as uint64) is supported
+	Has(cpuFeature CpuFeature) bool
+	// HasExtra returns true when the specified extraFlag (represented as uint64) is supported
+	HasExtra(cpuFeature CpuFeature) bool
+}
+
+type CpuFeature uint64
+
+const (
+	// CpuFeatureAmd64SSE3 is the flag to query CpuFeatureFlags.Has for SSEv3 capabilities on amd64
+	CpuFeatureAmd64SSE3 CpuFeature = 1
+	// CpuFeatureAmd64SSE4_1 is the flag to query CpuFeatureFlags.Has for SSEv4.1 capabilities on amd64
+	CpuFeatureAmd64SSE4_1 CpuFeature = 1 << 19
+	// CpuFeatureAmd64SSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities on amd64
+	CpuFeatureAmd64SSE4_2 CpuFeature = 1 << 20
+)
+
+const (
+	// CpuExtraFeatureAmd64ABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) on amd64
+	CpuExtraFeatureAmd64ABM CpuFeature = 1 << 5
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go
new file mode 100644
index 000000000..8c9f1a9f3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go
@@ -0,0 +1,59 @@
+//go:build amd64 && !tinygo
+
+package platform
+
+// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods
+var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags()
+
+// cpuFeatureFlags implements CpuFeatureFlags interface
+type cpuFeatureFlags struct {
+	flags      uint64
+	extraFlags uint64
+}
+
+// cpuid exposes the CPUID instruction to the Go layer (https://www.amd.com/system/files/TechDocs/25481.pdf)
+// implemented in impl_amd64.s
+func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32)
+
+// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap
+func cpuidAsBitmap(arg1, arg2 uint32) uint64 {
+	_ /* eax */, _ /* ebx */, ecx, edx := cpuid(arg1, arg2)
+	return (uint64(edx) << 32) | uint64(ecx)
+}
+
+// loadStandardRange load flags from the standard range, panics otherwise
+func loadStandardRange(id uint32) uint64 {
+	// ensure that the id is in the valid range, returned by cpuid(0,0)
+	maxRange, _, _, _ := cpuid(0, 0)
+	if id > maxRange {
+		panic("cannot query standard CPU flags")
+	}
+	return cpuidAsBitmap(id, 0)
+}
+
+// loadStandardRange load flags from the extended range, panics otherwise
+func loadExtendedRange(id uint32) uint64 {
+	// ensure that the id is in the valid range, returned by cpuid(0x80000000,0)
+	maxRange, _, _, _ := cpuid(0x80000000, 0)
+	if id > maxRange {
+		panic("cannot query extended CPU flags")
+	}
+	return cpuidAsBitmap(id, 0)
+}
+
+func loadCpuFeatureFlags() CpuFeatureFlags {
+	return &cpuFeatureFlags{
+		flags:      loadStandardRange(1),
+		extraFlags: loadExtendedRange(0x80000001),
+	}
+}
+
+// Has implements the same method on the CpuFeatureFlags interface
+func (f *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool {
+	return (f.flags & uint64(cpuFeature)) != 0
+}
+
+// HasExtra implements the same method on the CpuFeatureFlags interface
+func (f *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool {
+	return (f.extraFlags & uint64(cpuFeature)) != 0
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.s b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.s
new file mode 100644
index 000000000..8d483f3a6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.s
@@ -0,0 +1,14 @@
+#include "textflag.h"
+
+// lifted from github.com/intel-go/cpuid and src/internal/cpu/cpu_x86.s
+// func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·cpuid(SB), NOSPLIT, $0-24
+	MOVL arg1+0(FP), AX
+	MOVL arg2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go
new file mode 100644
index 000000000..8ae826d36
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !amd64 || tinygo
+
+package platform
+
+var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{}
+
+// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms
+type cpuFeatureFlags struct{}
+
+// Has implements the same method on the CpuFeatureFlags interface
+func (c *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return false }
+
+// HasExtra implements the same method on the CpuFeatureFlags interface
+func (c *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return false }
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/crypto.go b/vendor/github.com/tetratelabs/wazero/internal/platform/crypto.go
new file mode 100644
index 000000000..c141f00f0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/crypto.go
@@ -0,0 +1,17 @@
+package platform
+
+import (
+	"io"
+	"math/rand"
+)
+
+// seed is a fixed seed value for NewFakeRandSource.
+//
+// Trivia: While arbitrary, 42 was chosen as it is the "Ultimate Answer" in
+// the Douglas Adams novel "The Hitchhiker's Guide to the Galaxy."
+const seed = int64(42)
+
+// NewFakeRandSource returns a deterministic source of random values.
+func NewFakeRandSource() io.Reader {
+	return rand.New(rand.NewSource(seed))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_linux.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_linux.go
new file mode 100644
index 000000000..55906e827
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_linux.go
@@ -0,0 +1,76 @@
+package platform
+
+import (
+	"math/bits"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+const (
+	// https://man7.org/linux/man-pages/man2/mmap.2.html
+	__MAP_HUGE_SHIFT = 26
+	__MAP_HUGETLB    = 0x40000
+)
+
+var hugePagesConfigs []hugePagesConfig
+
+type hugePagesConfig struct {
+	size int
+	flag int
+}
+
+func (hpc *hugePagesConfig) match(size int) bool {
+	return (size & (hpc.size - 1)) == 0
+}
+
+func init() {
+	dirents, err := os.ReadDir("/sys/kernel/mm/hugepages/")
+	if err != nil {
+		return
+	}
+
+	for _, dirent := range dirents {
+		name := dirent.Name()
+		if !strings.HasPrefix(name, "hugepages-") {
+			continue
+		}
+		if !strings.HasSuffix(name, "kB") {
+			continue
+		}
+		n, err := strconv.ParseUint(name[10:len(name)-2], 10, 64)
+		if err != nil {
+			continue
+		}
+		if bits.OnesCount64(n) != 1 {
+			continue
+		}
+		n *= 1024
+		hugePagesConfigs = append(hugePagesConfigs, hugePagesConfig{
+			size: int(n),
+			flag: int(bits.TrailingZeros64(n)<<__MAP_HUGE_SHIFT) | __MAP_HUGETLB,
+		})
+	}
+
+	sort.Slice(hugePagesConfigs, func(i, j int) bool {
+		return hugePagesConfigs[i].size > hugePagesConfigs[j].size
+	})
+}
+
+func mmapCodeSegment(size, prot int) ([]byte, error) {
+	flags := syscall.MAP_ANON | syscall.MAP_PRIVATE
+
+	for _, hugePagesConfig := range hugePagesConfigs {
+		if hugePagesConfig.match(size) {
+			b, err := syscall.Mmap(-1, 0, size, prot, flags|hugePagesConfig.flag)
+			if err != nil {
+				continue
+			}
+			return b, nil
+		}
+	}
+
+	return syscall.Mmap(-1, 0, size, prot, flags)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_other.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_other.go
new file mode 100644
index 000000000..ed5c40a4d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_other.go
@@ -0,0 +1,18 @@
+// Separated from linux which has support for huge pages.
+//go:build darwin || freebsd
+
+package platform
+
+import "syscall"
+
+func mmapCodeSegment(size, prot int) ([]byte, error) {
+	return syscall.Mmap(
+		-1,
+		0,
+		size,
+		prot,
+		// Anonymous as this is not an actual file, but a memory,
+		// Private as this is in-process memory region.
+		syscall.MAP_ANON|syscall.MAP_PRIVATE,
+	)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go
new file mode 100644
index 000000000..a61996d58
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go
@@ -0,0 +1,49 @@
+//go:build (darwin || linux || freebsd) && !tinygo
+
+package platform
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	mmapProtAMD64 = syscall.PROT_READ | syscall.PROT_WRITE | syscall.PROT_EXEC
+	mmapProtARM64 = syscall.PROT_READ | syscall.PROT_WRITE
+)
+
+const MmapSupported = true
+
+func munmapCodeSegment(code []byte) error {
+	return syscall.Munmap(code)
+}
+
+// mmapCodeSegmentAMD64 gives all read-write-exec permission to the mmap region
+// to enter the function. Otherwise, segmentation fault exception is raised.
+func mmapCodeSegmentAMD64(size int) ([]byte, error) {
+	// The region must be RWX: RW for writing native codes, X for executing the region.
+	return mmapCodeSegment(size, mmapProtAMD64)
+}
+
+// mmapCodeSegmentARM64 cannot give all read-write-exec permission to the mmap region.
+// Otherwise, the mmap systemcall would raise an error. Here we give read-write
+// to the region so that we can write contents at call-sites. Callers are responsible to
+// execute MprotectRX on the returned buffer.
+func mmapCodeSegmentARM64(size int) ([]byte, error) {
+	// The region must be RW: RW for writing native codes.
+	return mmapCodeSegment(size, mmapProtARM64)
+}
+
+// MprotectRX is like syscall.Mprotect with RX permission, defined locally so that freebsd compiles.
+func MprotectRX(b []byte) (err error) {
+	var _p0 unsafe.Pointer
+	if len(b) > 0 {
+		_p0 = unsafe.Pointer(&b[0])
+	}
+	const prot = syscall.PROT_READ | syscall.PROT_EXEC
+	_, _, e1 := syscall.Syscall(syscall.SYS_MPROTECT, uintptr(_p0), uintptr(len(b)), uintptr(prot))
+	if e1 != 0 {
+		err = syscall.Errno(e1)
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go
new file mode 100644
index 000000000..27833db37
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go
@@ -0,0 +1,28 @@
+//go:build !(darwin || linux || freebsd || windows) || tinygo
+
+package platform
+
+import (
+	"fmt"
+	"runtime"
+)
+
+var errUnsupported = fmt.Errorf("mmap unsupported on GOOS=%s. Use interpreter instead.", runtime.GOOS)
+
+const MmapSupported = false
+
+func munmapCodeSegment(code []byte) error {
+	panic(errUnsupported)
+}
+
+func mmapCodeSegmentAMD64(size int) ([]byte, error) {
+	panic(errUnsupported)
+}
+
+func mmapCodeSegmentARM64(size int) ([]byte, error) {
+	panic(errUnsupported)
+}
+
+func MprotectRX(b []byte) (err error) {
+	panic(errUnsupported)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go
new file mode 100644
index 000000000..69fcb6d6b
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go
@@ -0,0 +1,97 @@
+package platform
+
+import (
+	"fmt"
+	"syscall"
+	"unsafe"
+)
+
+var (
+	kernel32           = syscall.NewLazyDLL("kernel32.dll")
+	procVirtualAlloc   = kernel32.NewProc("VirtualAlloc")
+	procVirtualProtect = kernel32.NewProc("VirtualProtect")
+	procVirtualFree    = kernel32.NewProc("VirtualFree")
+)
+
+const (
+	windows_MEM_COMMIT             uintptr = 0x00001000
+	windows_MEM_RELEASE            uintptr = 0x00008000
+	windows_PAGE_READWRITE         uintptr = 0x00000004
+	windows_PAGE_EXECUTE_READ      uintptr = 0x00000020
+	windows_PAGE_EXECUTE_READWRITE uintptr = 0x00000040
+)
+
+const MmapSupported = true
+
+func munmapCodeSegment(code []byte) error {
+	return freeMemory(code)
+}
+
+// allocateMemory commits the memory region via the "VirtualAlloc" function.
+// See https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc
+func allocateMemory(size uintptr, protect uintptr) (uintptr, error) {
+	address := uintptr(0) // system determines where to allocate the region.
+	alloctype := windows_MEM_COMMIT
+	if r, _, err := procVirtualAlloc.Call(address, size, alloctype, protect); r == 0 {
+		return 0, fmt.Errorf("compiler: VirtualAlloc error: %w", ensureErr(err))
+	} else {
+		return r, nil
+	}
+}
+
+// freeMemory releases the memory region via the "VirtualFree" function.
+// See https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualfree
+func freeMemory(code []byte) error {
+	address := unsafe.Pointer(&code[0])
+	size := uintptr(0) // size must be 0 because we're using MEM_RELEASE.
+	freetype := windows_MEM_RELEASE
+	if r, _, err := procVirtualFree.Call(uintptr(address), size, freetype); r == 0 {
+		return fmt.Errorf("compiler: VirtualFree error: %w", ensureErr(err))
+	}
+	return nil
+}
+
+func virtualProtect(address, size, newprotect uintptr, oldprotect *uint32) error {
+	if r, _, err := procVirtualProtect.Call(address, size, newprotect, uintptr(unsafe.Pointer(oldprotect))); r == 0 {
+		return fmt.Errorf("compiler: VirtualProtect error: %w", ensureErr(err))
+	}
+	return nil
+}
+
+func mmapCodeSegmentAMD64(size int) ([]byte, error) {
+	p, err := allocateMemory(uintptr(size), windows_PAGE_EXECUTE_READWRITE)
+	if err != nil {
+		return nil, err
+	}
+
+	return unsafe.Slice((*byte)(unsafe.Pointer(p)), size), nil
+}
+
+func mmapCodeSegmentARM64(size int) ([]byte, error) {
+	p, err := allocateMemory(uintptr(size), windows_PAGE_READWRITE)
+	if err != nil {
+		return nil, err
+	}
+
+	return unsafe.Slice((*byte)(unsafe.Pointer(p)), size), nil
+}
+
+var old = uint32(windows_PAGE_READWRITE)
+
+func MprotectRX(b []byte) (err error) {
+	err = virtualProtect(uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), windows_PAGE_EXECUTE_READ, &old)
+	return
+}
+
+// ensureErr returns syscall.EINVAL when the input error is nil.
+//
+// We are supposed to use "GetLastError" which is more precise, but it is not safe to execute in goroutines. While
+// "GetLastError" is thread-local, goroutines are not pinned to threads.
+//
+// See https://docs.microsoft.com/en-us/windows/win32/api/errhandlingapi/nf-errhandlingapi-getlasterror
+func ensureErr(err error) error {
+	if err != nil {
+		return err
+	}
+	return syscall.EINVAL
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go
new file mode 100644
index 000000000..5cba99fb2
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_other.go
@@ -0,0 +1,23 @@
+//go:build !(darwin || linux || freebsd) || tinygo
+
+package platform
+
+func remapCodeSegmentAMD64(code []byte, size int) ([]byte, error) {
+	b, err := mmapCodeSegmentAMD64(size)
+	if err != nil {
+		return nil, err
+	}
+	copy(b, code)
+	mustMunmapCodeSegment(code)
+	return b, nil
+}
+
+func remapCodeSegmentARM64(code []byte, size int) ([]byte, error) {
+	b, err := mmapCodeSegmentARM64(size)
+	if err != nil {
+		return nil, err
+	}
+	copy(b, code)
+	mustMunmapCodeSegment(code)
+	return b, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go
new file mode 100644
index 000000000..8f42d44fd
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mremap_unix.go
@@ -0,0 +1,21 @@
+//go:build (darwin || linux || freebsd) && !tinygo
+
+package platform
+
+func remapCodeSegmentAMD64(code []byte, size int) ([]byte, error) {
+	return remapCodeSegment(code, size, mmapProtAMD64)
+}
+
+func remapCodeSegmentARM64(code []byte, size int) ([]byte, error) {
+	return remapCodeSegment(code, size, mmapProtARM64)
+}
+
+func remapCodeSegment(code []byte, size, prot int) ([]byte, error) {
+	b, err := mmapCodeSegment(size, prot)
+	if err != nil {
+		return nil, err
+	}
+	copy(b, code)
+	mustMunmapCodeSegment(code)
+	return b, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/path.go b/vendor/github.com/tetratelabs/wazero/internal/platform/path.go
new file mode 100644
index 000000000..361049ae2
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/path.go
@@ -0,0 +1,6 @@
+//go:build !windows
+
+package platform
+
+// ToPosixPath returns the input, as only windows might return backslashes.
+func ToPosixPath(in string) string { return in }
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/path_windows.go b/vendor/github.com/tetratelabs/wazero/internal/platform/path_windows.go
new file mode 100644
index 000000000..77c4187d9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/path_windows.go
@@ -0,0 +1,17 @@
+package platform
+
+import "strings"
+
+// ToPosixPath returns the input, converting any backslashes to forward ones.
+func ToPosixPath(in string) string {
+	// strings.Map only allocates on change, which is good enough especially as
+	// path.Join uses forward slash even on windows.
+	return strings.Map(windowsToPosixSeparator, in)
+}
+
+func windowsToPosixSeparator(r rune) rune {
+	if r == '\\' {
+		return '/'
+	}
+	return r
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go b/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go
new file mode 100644
index 000000000..c6dc0f857
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/platform.go
@@ -0,0 +1,81 @@
+// Package platform includes runtime-specific code needed for the compiler or otherwise.
+//
+// Note: This is a dependency-free alternative to depending on parts of Go's x/sys.
+// See /RATIONALE.md for more context.
+package platform
+
+import (
+	"runtime"
+)
+
+// archRequirementsVerified is set by platform-specific init to true if the platform is supported
+var archRequirementsVerified bool
+
+// CompilerSupported is exported for tests and includes constraints here and also the assembler.
+func CompilerSupported() bool {
+	switch runtime.GOOS {
+	case "darwin", "windows", "linux", "freebsd":
+	default:
+		return false
+	}
+
+	return archRequirementsVerified
+}
+
+// MmapCodeSegment copies the code into the executable region and returns the byte slice of the region.
+//
+// See https://man7.org/linux/man-pages/man2/mmap.2.html for mmap API and flags.
+func MmapCodeSegment(size int) ([]byte, error) {
+	if size == 0 {
+		panic("BUG: MmapCodeSegment with zero length")
+	}
+	if runtime.GOARCH == "amd64" {
+		return mmapCodeSegmentAMD64(size)
+	} else {
+		return mmapCodeSegmentARM64(size)
+	}
+}
+
+// RemapCodeSegment reallocates the memory mapping of an existing code segment
+// to increase its size. The previous code mapping is unmapped and must not be
+// reused after the function returns.
+//
+// This is similar to mremap(2) on linux, and emulated on platforms which do not
+// have this syscall.
+//
+// See https://man7.org/linux/man-pages/man2/mremap.2.html
+func RemapCodeSegment(code []byte, size int) ([]byte, error) {
+	if size < len(code) {
+		panic("BUG: RemapCodeSegment with size less than code")
+	}
+	if code == nil {
+		return MmapCodeSegment(size)
+	}
+	if runtime.GOARCH == "amd64" {
+		return remapCodeSegmentAMD64(code, size)
+	} else {
+		return remapCodeSegmentARM64(code, size)
+	}
+}
+
+// MunmapCodeSegment unmaps the given memory region.
+func MunmapCodeSegment(code []byte) error {
+	if len(code) == 0 {
+		panic("BUG: MunmapCodeSegment with zero length")
+	}
+	return munmapCodeSegment(code)
+}
+
+// mustMunmapCodeSegment panics instead of returning an error to the
+// application.
+//
+// # Why panic?
+//
+// It is less disruptive to the application to leak the previous block if it
+// could be unmapped than to leak the new block and return an error.
+// Realistically, either scenarios are pretty hard to debug, so we panic.
+func mustMunmapCodeSegment(code []byte) {
+	if err := munmapCodeSegment(code); err != nil {
+		panic(err)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/platform_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/platform/platform_amd64.go
new file mode 100644
index 000000000..59aaf5eae
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/platform_amd64.go
@@ -0,0 +1,7 @@
+package platform
+
+// init verifies that the current CPU supports the required AMD64 instructions
+func init() {
+	// Ensure SSE4.1 is supported.
+	archRequirementsVerified = CpuFeatures.Has(CpuFeatureAmd64SSE4_1)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/platform_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/platform/platform_arm64.go
new file mode 100644
index 000000000..caac58a3d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/platform_arm64.go
@@ -0,0 +1,7 @@
+package platform
+
+// init verifies that the current CPU supports the required ARM64 features
+func init() {
+	// No further checks currently needed.
+	archRequirementsVerified = true
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/time.go b/vendor/github.com/tetratelabs/wazero/internal/platform/time.go
new file mode 100644
index 000000000..fa9da1acb
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/time.go
@@ -0,0 +1,76 @@
+package platform
+
+import (
+	"sync/atomic"
+	"time"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+const (
+	ms = int64(time.Millisecond)
+	// FakeEpochNanos is midnight UTC 2022-01-01 and exposed for testing
+	FakeEpochNanos = 1640995200000 * ms
+)
+
+// NewFakeWalltime implements sys.Walltime with FakeEpochNanos that increases by 1ms each reading.
+// See /RATIONALE.md
+func NewFakeWalltime() sys.Walltime {
+	// AddInt64 returns the new value. Adjust so the first reading will be FakeEpochNanos
+	t := FakeEpochNanos - ms
+	return func() (sec int64, nsec int32) {
+		wt := atomic.AddInt64(&t, ms)
+		return wt / 1e9, int32(wt % 1e9)
+	}
+}
+
+// NewFakeNanotime implements sys.Nanotime that increases by 1ms each reading.
+// See /RATIONALE.md
+func NewFakeNanotime() sys.Nanotime {
+	// AddInt64 returns the new value. Adjust so the first reading will be zero.
+	t := int64(0) - ms
+	return func() int64 {
+		return atomic.AddInt64(&t, ms)
+	}
+}
+
+// FakeNanosleep implements sys.Nanosleep by returning without sleeping.
+var FakeNanosleep = sys.Nanosleep(func(int64) {})
+
+// FakeOsyield implements sys.Osyield by returning without yielding.
+var FakeOsyield = sys.Osyield(func() {})
+
+// Walltime implements sys.Walltime with time.Now.
+//
+// Note: This is only notably less efficient than it could be is reading
+// runtime.walltime(). time.Now defensively reads nanotime also, just in case
+// time.Since is used. This doubles the performance impact. However, wall time
+// is likely to be read less frequently than Nanotime. Also, doubling the cost
+// matters less on fast platforms that can return both in <=100ns.
+func Walltime() (sec int64, nsec int32) {
+	t := time.Now()
+	return t.Unix(), int32(t.Nanosecond())
+}
+
+// nanoBase uses time.Now to ensure a monotonic clock reading on all platforms
+// via time.Since.
+var nanoBase = time.Now()
+
+// nanotimePortable implements sys.Nanotime with time.Since.
+//
+// Note: This is less efficient than it could be is reading runtime.nanotime(),
+// Just to do that requires CGO.
+func nanotimePortable() int64 {
+	return time.Since(nanoBase).Nanoseconds()
+}
+
+// Nanotime implements sys.Nanotime with runtime.nanotime() if CGO is available
+// and time.Since if not.
+func Nanotime() int64 {
+	return nanotime()
+}
+
+// Nanosleep implements sys.Nanosleep with time.Sleep.
+func Nanosleep(ns int64) {
+	time.Sleep(time.Duration(ns))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/time_cgo.go b/vendor/github.com/tetratelabs/wazero/internal/platform/time_cgo.go
new file mode 100644
index 000000000..ff01d90ce
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/time_cgo.go
@@ -0,0 +1,11 @@
+//go:build cgo && !windows
+
+package platform
+
+import _ "unsafe" // for go:linkname
+
+// nanotime uses runtime.nanotime as it is available on all platforms and
+// benchmarks faster than using time.Since.
+//
+//go:linkname nanotime runtime.nanotime
+func nanotime() int64
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/time_notcgo.go b/vendor/github.com/tetratelabs/wazero/internal/platform/time_notcgo.go
new file mode 100644
index 000000000..0697b7c70
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/time_notcgo.go
@@ -0,0 +1,7 @@
+//go:build !cgo && !windows
+
+package platform
+
+func nanotime() int64 {
+	return nanotimePortable()
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/time_windows.go b/vendor/github.com/tetratelabs/wazero/internal/platform/time_windows.go
new file mode 100644
index 000000000..58731fc8e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/time_windows.go
@@ -0,0 +1,40 @@
+//go:build windows
+
+package platform
+
+import (
+	"math/bits"
+	"time"
+	"unsafe"
+)
+
+var (
+	_QueryPerformanceCounter   = kernel32.NewProc("QueryPerformanceCounter")
+	_QueryPerformanceFrequency = kernel32.NewProc("QueryPerformanceFrequency")
+)
+
+var qpcfreq uint64
+
+func init() {
+	_, _, _ = _QueryPerformanceFrequency.Call(uintptr(unsafe.Pointer(&qpcfreq)))
+}
+
+// On Windows, time.Time handled in time package cannot have the nanosecond precision.
+// The reason is that by default, it doesn't use QueryPerformanceCounter[1], but instead, use "interrupt time"
+// which doesn't support nanoseconds precision (though it is a monotonic) [2, 3, 4, 5].
+//
+// [1] https://learn.microsoft.com/en-us/windows/win32/api/profileapi/nf-profileapi-queryperformancecounter
+// [2] https://github.com/golang/go/blob/0cd309e12818f988693bf8e4d9f1453331dcf9f2/src/runtime/sys_windows_amd64.s#L297-L298
+// [3] https://github.com/golang/go/blob/0cd309e12818f988693bf8e4d9f1453331dcf9f2/src/runtime/os_windows.go#L549-L551
+// [4] https://github.com/golang/go/blob/master/src/runtime/time_windows.h#L7-L13
+// [5] http://web.archive.org/web/20210411000829/https://wrkhpi.wordpress.com/2007/08/09/getting-os-information-the-kuser_shared_data-structure/
+//
+// Therefore, on Windows, we directly invoke the syscall for QPC instead of time.Now or runtime.nanotime.
+// See https://github.com/golang/go/issues/31160 for example.
+func nanotime() int64 {
+	var counter uint64
+	_, _, _ = _QueryPerformanceCounter.Call(uintptr(unsafe.Pointer(&counter)))
+	hi, lo := bits.Mul64(counter, uint64(time.Second))
+	nanos, _ := bits.Div64(hi, lo, qpcfreq)
+	return int64(nanos)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sock/sock.go b/vendor/github.com/tetratelabs/wazero/internal/sock/sock.go
new file mode 100644
index 000000000..ca17aa39e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sock/sock.go
@@ -0,0 +1,89 @@
+package sock
+
+import (
+	"fmt"
+	"net"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// TCPSock is a pseudo-file representing a TCP socket.
+type TCPSock interface {
+	sys.File
+
+	Accept() (TCPConn, sys.Errno)
+}
+
+// TCPConn is a pseudo-file representing a TCP connection.
+type TCPConn interface {
+	sys.File
+
+	// Recvfrom only supports the flag sysfs.MSG_PEEK
+	// TODO: document this like sys.File with known sys.Errno
+	Recvfrom(p []byte, flags int) (n int, errno sys.Errno)
+
+	// TODO: document this like sys.File with known sys.Errno
+	Shutdown(how int) sys.Errno
+}
+
+// ConfigKey is a context.Context Value key. Its associated value should be a Config.
+type ConfigKey struct{}
+
+// Config is an internal struct meant to implement
+// the interface in experimental/sock/Config.
+type Config struct {
+	// TCPAddresses is a slice of the configured host:port pairs.
+	TCPAddresses []TCPAddress
+}
+
+// TCPAddress is a host:port pair to pre-open.
+type TCPAddress struct {
+	// Host is the host name for this listener.
+	Host string
+	// Port is the port number for this listener.
+	Port int
+}
+
+// WithTCPListener implements the method of the same name in experimental/sock/Config.
+//
+// However, to avoid cyclic dependencies, this is returning the *Config in this scope.
+// The interface is implemented in experimental/sock/Config via delegation.
+func (c *Config) WithTCPListener(host string, port int) *Config {
+	ret := c.clone()
+	ret.TCPAddresses = append(ret.TCPAddresses, TCPAddress{host, port})
+	return &ret
+}
+
+// Makes a deep copy of this sockConfig.
+func (c *Config) clone() Config {
+	ret := *c
+	ret.TCPAddresses = make([]TCPAddress, 0, len(c.TCPAddresses))
+	ret.TCPAddresses = append(ret.TCPAddresses, c.TCPAddresses...)
+	return ret
+}
+
+// BuildTCPListeners build listeners from the current configuration.
+func (c *Config) BuildTCPListeners() (tcpListeners []*net.TCPListener, err error) {
+	for _, tcpAddr := range c.TCPAddresses {
+		var ln net.Listener
+		ln, err = net.Listen("tcp", tcpAddr.String())
+		if err != nil {
+			break
+		}
+		if tcpln, ok := ln.(*net.TCPListener); ok {
+			tcpListeners = append(tcpListeners, tcpln)
+		}
+	}
+	if err != nil {
+		// An error occurred, cleanup.
+		for _, l := range tcpListeners {
+			_ = l.Close() // Ignore errors, we are already cleaning.
+		}
+		tcpListeners = nil
+	}
+	return
+}
+
+func (t TCPAddress) String() string {
+	return fmt.Sprintf("%s:%d", t.Host, t.Port)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sock/sock_supported.go b/vendor/github.com/tetratelabs/wazero/internal/sock/sock_supported.go
new file mode 100644
index 000000000..e317be832
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sock/sock_supported.go
@@ -0,0 +1,11 @@
+//go:build !plan9 && !js && !tinygo
+
+package sock
+
+import "syscall"
+
+const (
+	SHUT_RD   = syscall.SHUT_RD
+	SHUT_RDWR = syscall.SHUT_RDWR
+	SHUT_WR   = syscall.SHUT_WR
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sock/sock_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sock/sock_unsupported.go
new file mode 100644
index 000000000..77026754f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sock/sock_unsupported.go
@@ -0,0 +1,10 @@
+//go:build plan9 || js || tinygo
+
+package sock
+
+// plan9/js doesn't declare these constants
+const (
+	SHUT_RD = 1 << iota
+	SHUT_WR
+	SHUT_RDWR = SHUT_RD | SHUT_WR
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sys/fs.go b/vendor/github.com/tetratelabs/wazero/internal/sys/fs.go
new file mode 100644
index 000000000..157de788f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sys/fs.go
@@ -0,0 +1,457 @@
+package sys
+
+import (
+	"io"
+	"io/fs"
+	"net"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/descriptor"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+	"github.com/tetratelabs/wazero/internal/sysfs"
+)
+
+const (
+	FdStdin int32 = iota
+	FdStdout
+	FdStderr
+	// FdPreopen is the file descriptor of the first pre-opened directory.
+	//
+	// # Why file descriptor 3?
+	//
+	// While not specified, the most common WASI implementation, wasi-libc,
+	// expects POSIX style file descriptor allocation, where the lowest
+	// available number is used to open the next file. Since 1 and 2 are taken
+	// by stdout and stderr, the next is 3.
+	//   - https://github.com/WebAssembly/WASI/issues/122
+	//   - https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_14
+	//   - https://github.com/WebAssembly/wasi-libc/blob/wasi-sdk-16/libc-bottom-half/sources/preopens.c#L215
+	FdPreopen
+)
+
+const modeDevice = fs.ModeDevice | 0o640
+
+// FileEntry maps a path to an open file in a file system.
+type FileEntry struct {
+	// Name is the name of the directory up to its pre-open, or the pre-open
+	// name itself when IsPreopen.
+	//
+	// # Notes
+	//
+	//   - This can drift on rename.
+	//   - This relates to the guest path, which is not the real file path
+	//     except if the entire host filesystem was made available.
+	Name string
+
+	// IsPreopen is a directory that is lazily opened.
+	IsPreopen bool
+
+	// FS is the filesystem associated with the pre-open.
+	FS sys.FS
+
+	// File is always non-nil.
+	File fsapi.File
+
+	// direntCache is nil until DirentCache was called.
+	direntCache *DirentCache
+}
+
+// DirentCache gets or creates a DirentCache for this file or returns an error.
+//
+// # Errors
+//
+// A zero sys.Errno is success. The below are expected otherwise:
+//   - sys.ENOSYS: the implementation does not support this function.
+//   - sys.EBADF: the dir was closed or not readable.
+//   - sys.ENOTDIR: the file was not a directory.
+//
+// # Notes
+//
+//   - See /RATIONALE.md for design notes.
+func (f *FileEntry) DirentCache() (*DirentCache, sys.Errno) {
+	if dir := f.direntCache; dir != nil {
+		return dir, 0
+	}
+
+	// Require the file to be a directory vs a late error on the same.
+	if isDir, errno := f.File.IsDir(); errno != 0 {
+		return nil, errno
+	} else if !isDir {
+		return nil, sys.ENOTDIR
+	}
+
+	// Generate the dotEntries only once.
+	if dotEntries, errno := synthesizeDotEntries(f); errno != 0 {
+		return nil, errno
+	} else {
+		f.direntCache = &DirentCache{f: f.File, dotEntries: dotEntries}
+	}
+
+	return f.direntCache, 0
+}
+
+// DirentCache is a caching abstraction of sys.File Readdir.
+//
+// This is special-cased for "wasi_snapshot_preview1.fd_readdir", and may be
+// unneeded, or require changes, to support preview1 or preview2.
+//   - The position of the dirents are serialized as `d_next`. For reasons
+//     described below, any may need to be re-read. This accepts any positions
+//     in the cache, rather than track the position of the last dirent.
+//   - dot entries ("." and "..") must be returned. See /RATIONALE.md for why.
+//   - An sys.Dirent Name is variable length, it could exceed memory size and
+//     need to be re-read.
+//   - Multiple dirents may be returned. It is more efficient to read from the
+//     underlying file in bulk vs one-at-a-time.
+//
+// The last results returned by Read are cached, but entries before that
+// position are not. This support re-reading entries that couldn't fit into
+// memory without accidentally caching all entries in a large directory. This
+// approach is sometimes called a sliding window.
+type DirentCache struct {
+	// f is the underlying file
+	f sys.File
+
+	// dotEntries are the "." and ".." entries added when the directory is
+	// initialized.
+	dotEntries []sys.Dirent
+
+	// dirents are the potentially unread directory entries.
+	//
+	// Internal detail: nil is different from zero length. Zero length is an
+	// exhausted directory (eof). nil means the re-read.
+	dirents []sys.Dirent
+
+	// countRead is the total count of dirents read since last rewind.
+	countRead uint64
+
+	// eof is true when the underlying file is at EOF. This avoids re-reading
+	// the directory when it is exhausted. Entires in an exhausted directory
+	// are not visible until it is rewound via calling Read with `pos==0`.
+	eof bool
+}
+
+// synthesizeDotEntries generates a slice of the two elements "." and "..".
+func synthesizeDotEntries(f *FileEntry) ([]sys.Dirent, sys.Errno) {
+	dotIno, errno := f.File.Ino()
+	if errno != 0 {
+		return nil, errno
+	}
+	result := [2]sys.Dirent{}
+	result[0] = sys.Dirent{Name: ".", Ino: dotIno, Type: fs.ModeDir}
+	// See /RATIONALE.md for why we don't attempt to get an inode for ".." and
+	// why in wasi-libc this won't fan-out either.
+	result[1] = sys.Dirent{Name: "..", Ino: 0, Type: fs.ModeDir}
+	return result[:], 0
+}
+
+// exhaustedDirents avoids allocating empty slices.
+var exhaustedDirents = [0]sys.Dirent{}
+
+// Read is similar to and returns the same errors as `Readdir` on sys.File.
+// The main difference is this caches entries returned, resulting in multiple
+// valid positions to read from.
+//
+// When zero, `pos` means rewind to the beginning of this directory. This
+// implies a rewind (Seek to zero on the underlying sys.File), unless the
+// initial entries are still cached.
+//
+// When non-zero, `pos` is the zero based index of all dirents returned since
+// last rewind. Only entries beginning at `pos` are cached for subsequent
+// calls. A non-zero `pos` before the cache returns sys.ENOENT for reasons
+// described on DirentCache documentation.
+//
+// Up to `n` entries are cached and returned. When `n` exceeds the cache, the
+// difference are read from the underlying sys.File via `Readdir`. EOF is
+// when `len(dirents)` returned are less than `n`.
+func (d *DirentCache) Read(pos uint64, n uint32) (dirents []sys.Dirent, errno sys.Errno) {
+	switch {
+	case pos > d.countRead: // farther than read or negative coerced to uint64.
+		return nil, sys.ENOENT
+	case pos == 0 && d.dirents != nil:
+		// Rewind if we have already read entries. This allows us to see new
+		// entries added after the directory was opened.
+		if _, errno = d.f.Seek(0, io.SeekStart); errno != 0 {
+			return
+		}
+		d.dirents = nil // dump cache
+		d.countRead = 0
+	}
+
+	if n == 0 {
+		return // special case no entries.
+	}
+
+	if d.dirents == nil {
+		// Always populate dot entries, which makes min len(dirents) == 2.
+		d.dirents = d.dotEntries
+		d.countRead = 2
+		d.eof = false
+
+		if countToRead := int(n - 2); countToRead <= 0 {
+			return
+		} else if dirents, errno = d.f.Readdir(countToRead); errno != 0 {
+			return
+		} else if countRead := len(dirents); countRead > 0 {
+			d.eof = countRead < countToRead
+			d.dirents = append(d.dotEntries, dirents...)
+			d.countRead += uint64(countRead)
+		}
+
+		return d.cachedDirents(n), 0
+	}
+
+	// Reset our cache to the first entry being read.
+	cacheStart := d.countRead - uint64(len(d.dirents))
+	if pos < cacheStart {
+		// We don't currently allow reads before our cache because Seek(0) is
+		// the only portable way. Doing otherwise requires skipping, which we
+		// won't do unless wasi-testsuite starts requiring it. Implementing
+		// this would allow re-reading a large directory, so care would be
+		// needed to not buffer the entire directory in memory while skipping.
+		errno = sys.ENOENT
+		return
+	} else if posInCache := pos - cacheStart; posInCache != 0 {
+		if uint64(len(d.dirents)) == posInCache {
+			// Avoid allocation re-slicing to zero length.
+			d.dirents = exhaustedDirents[:]
+		} else {
+			d.dirents = d.dirents[posInCache:]
+		}
+	}
+
+	// See if we need more entries.
+	if countToRead := int(n) - len(d.dirents); countToRead > 0 && !d.eof {
+		// Try to read more, which could fail.
+		if dirents, errno = d.f.Readdir(countToRead); errno != 0 {
+			return
+		}
+
+		// Append the next read entries if we weren't at EOF.
+		if countRead := len(dirents); countRead > 0 {
+			d.eof = countRead < countToRead
+			d.dirents = append(d.dirents, dirents...)
+			d.countRead += uint64(countRead)
+		}
+	}
+
+	return d.cachedDirents(n), 0
+}
+
+// cachedDirents returns up to `n` dirents from the cache.
+func (d *DirentCache) cachedDirents(n uint32) []sys.Dirent {
+	direntCount := uint32(len(d.dirents))
+	switch {
+	case direntCount == 0:
+		return nil
+	case direntCount > n:
+		return d.dirents[:n]
+	}
+	return d.dirents
+}
+
+type FSContext struct {
+	// openedFiles is a map of file descriptor numbers (>=FdPreopen) to open files
+	// (or directories) and defaults to empty.
+	// TODO: This is unguarded, so not goroutine-safe!
+	openedFiles FileTable
+}
+
+// FileTable is a specialization of the descriptor.Table type used to map file
+// descriptors to file entries.
+type FileTable = descriptor.Table[int32, *FileEntry]
+
+// LookupFile returns a file if it is in the table.
+func (c *FSContext) LookupFile(fd int32) (*FileEntry, bool) {
+	return c.openedFiles.Lookup(fd)
+}
+
+// OpenFile opens the file into the table and returns its file descriptor.
+// The result must be closed by CloseFile or Close.
+func (c *FSContext) OpenFile(fs sys.FS, path string, flag sys.Oflag, perm fs.FileMode) (int32, sys.Errno) {
+	if f, errno := fs.OpenFile(path, flag, perm); errno != 0 {
+		return 0, errno
+	} else {
+		fe := &FileEntry{FS: fs, File: fsapi.Adapt(f)}
+		if path == "/" || path == "." {
+			fe.Name = ""
+		} else {
+			fe.Name = path
+		}
+		if newFD, ok := c.openedFiles.Insert(fe); !ok {
+			return 0, sys.EBADF
+		} else {
+			return newFD, 0
+		}
+	}
+}
+
+// Renumber assigns the file pointed by the descriptor `from` to `to`.
+func (c *FSContext) Renumber(from, to int32) sys.Errno {
+	fromFile, ok := c.openedFiles.Lookup(from)
+	if !ok || to < 0 {
+		return sys.EBADF
+	} else if fromFile.IsPreopen {
+		return sys.ENOTSUP
+	}
+
+	// If toFile is already open, we close it to prevent windows lock issues.
+	//
+	// The doc is unclear and other implementations do nothing for already-opened To FDs.
+	// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-fd_renumberfd-fd-to-fd---errno
+	// https://github.com/bytecodealliance/wasmtime/blob/main/crates/wasi-common/src/snapshots/preview_1.rs#L531-L546
+	if toFile, ok := c.openedFiles.Lookup(to); ok {
+		if toFile.IsPreopen {
+			return sys.ENOTSUP
+		}
+		_ = toFile.File.Close()
+	}
+
+	c.openedFiles.Delete(from)
+	if !c.openedFiles.InsertAt(fromFile, to) {
+		return sys.EBADF
+	}
+	return 0
+}
+
+// SockAccept accepts a sock.TCPConn into the file table and returns its file
+// descriptor.
+func (c *FSContext) SockAccept(sockFD int32, nonblock bool) (int32, sys.Errno) {
+	var sock socketapi.TCPSock
+	if e, ok := c.LookupFile(sockFD); !ok || !e.IsPreopen {
+		return 0, sys.EBADF // Not a preopen
+	} else if sock, ok = e.File.(socketapi.TCPSock); !ok {
+		return 0, sys.EBADF // Not a sock
+	}
+
+	conn, errno := sock.Accept()
+	if errno != 0 {
+		return 0, errno
+	}
+
+	fe := &FileEntry{File: fsapi.Adapt(conn)}
+
+	if nonblock {
+		if errno = fe.File.SetNonblock(true); errno != 0 {
+			_ = conn.Close()
+			return 0, errno
+		}
+	}
+
+	if newFD, ok := c.openedFiles.Insert(fe); !ok {
+		return 0, sys.EBADF
+	} else {
+		return newFD, 0
+	}
+}
+
+// CloseFile returns any error closing the existing file.
+func (c *FSContext) CloseFile(fd int32) (errno sys.Errno) {
+	f, ok := c.openedFiles.Lookup(fd)
+	if !ok {
+		return sys.EBADF
+	}
+	if errno = f.File.Close(); errno != 0 {
+		return errno
+	}
+	c.openedFiles.Delete(fd)
+	return errno
+}
+
+// Close implements io.Closer
+func (c *FSContext) Close() (err error) {
+	// Close any files opened in this context
+	c.openedFiles.Range(func(fd int32, entry *FileEntry) bool {
+		if errno := entry.File.Close(); errno != 0 {
+			err = errno // This means err returned == the last non-nil error.
+		}
+		return true
+	})
+	// A closed FSContext cannot be reused so clear the state.
+	c.openedFiles = FileTable{}
+	return
+}
+
+// InitFSContext initializes a FSContext with stdio streams and optional
+// pre-opened filesystems and TCP listeners.
+func (c *Context) InitFSContext(
+	stdin io.Reader,
+	stdout, stderr io.Writer,
+	fs []sys.FS, guestPaths []string,
+	tcpListeners []*net.TCPListener,
+) (err error) {
+	inFile, err := stdinFileEntry(stdin)
+	if err != nil {
+		return err
+	}
+	c.fsc.openedFiles.Insert(inFile)
+	outWriter, err := stdioWriterFileEntry("stdout", stdout)
+	if err != nil {
+		return err
+	}
+	c.fsc.openedFiles.Insert(outWriter)
+	errWriter, err := stdioWriterFileEntry("stderr", stderr)
+	if err != nil {
+		return err
+	}
+	c.fsc.openedFiles.Insert(errWriter)
+
+	for i, f := range fs {
+		guestPath := guestPaths[i]
+
+		if StripPrefixesAndTrailingSlash(guestPath) == "" {
+			// Default to bind to '/' when guestPath is effectively empty.
+			guestPath = "/"
+		}
+		c.fsc.openedFiles.Insert(&FileEntry{
+			FS:        f,
+			Name:      guestPath,
+			IsPreopen: true,
+			File:      &lazyDir{fs: f},
+		})
+	}
+
+	for _, tl := range tcpListeners {
+		c.fsc.openedFiles.Insert(&FileEntry{IsPreopen: true, File: fsapi.Adapt(sysfs.NewTCPListenerFile(tl))})
+	}
+	return nil
+}
+
+// StripPrefixesAndTrailingSlash skips any leading "./" or "/" such that the
+// result index begins with another string. A result of "." coerces to the
+// empty string "" because the current directory is handled by the guest.
+//
+// Results are the offset/len pair which is an optimization to avoid re-slicing
+// overhead, as this function is called for every path operation.
+//
+// Note: Relative paths should be handled by the guest, as that's what knows
+// what the current directory is. However, paths that escape the current
+// directory e.g. "../.." have been found in `tinygo test` and this
+// implementation takes care to avoid it.
+func StripPrefixesAndTrailingSlash(path string) string {
+	// strip trailing slashes
+	pathLen := len(path)
+	for ; pathLen > 0 && path[pathLen-1] == '/'; pathLen-- {
+	}
+
+	pathI := 0
+loop:
+	for pathI < pathLen {
+		switch path[pathI] {
+		case '/':
+			pathI++
+		case '.':
+			nextI := pathI + 1
+			if nextI < pathLen && path[nextI] == '/' {
+				pathI = nextI + 1
+			} else if nextI == pathLen {
+				pathI = nextI
+			} else {
+				break loop
+			}
+		default:
+			break loop
+		}
+	}
+	return path[pathI:pathLen]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sys/lazy.go b/vendor/github.com/tetratelabs/wazero/internal/sys/lazy.go
new file mode 100644
index 000000000..fe233d29e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sys/lazy.go
@@ -0,0 +1,151 @@
+package sys
+
+import (
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// compile-time check to ensure lazyDir implements sys.File.
+var _ experimentalsys.File = (*lazyDir)(nil)
+
+type lazyDir struct {
+	experimentalsys.DirFile
+
+	fs experimentalsys.FS
+	f  experimentalsys.File
+}
+
+// Dev implements the same method as documented on sys.File
+func (d *lazyDir) Dev() (uint64, experimentalsys.Errno) {
+	if f, ok := d.file(); !ok {
+		return 0, experimentalsys.EBADF
+	} else {
+		return f.Dev()
+	}
+}
+
+// Ino implements the same method as documented on sys.File
+func (d *lazyDir) Ino() (sys.Inode, experimentalsys.Errno) {
+	if f, ok := d.file(); !ok {
+		return 0, experimentalsys.EBADF
+	} else {
+		return f.Ino()
+	}
+}
+
+// IsDir implements the same method as documented on sys.File
+func (d *lazyDir) IsDir() (bool, experimentalsys.Errno) {
+	// Note: we don't return a constant because we don't know if this is really
+	// backed by a dir, until the first call.
+	if f, ok := d.file(); !ok {
+		return false, experimentalsys.EBADF
+	} else {
+		return f.IsDir()
+	}
+}
+
+// IsAppend implements the same method as documented on sys.File
+func (d *lazyDir) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements the same method as documented on sys.File
+func (d *lazyDir) SetAppend(bool) experimentalsys.Errno {
+	return experimentalsys.EISDIR
+}
+
+// Seek implements the same method as documented on sys.File
+func (d *lazyDir) Seek(offset int64, whence int) (newOffset int64, errno experimentalsys.Errno) {
+	if f, ok := d.file(); !ok {
+		return 0, experimentalsys.EBADF
+	} else {
+		return f.Seek(offset, whence)
+	}
+}
+
+// Stat implements the same method as documented on sys.File
+func (d *lazyDir) Stat() (sys.Stat_t, experimentalsys.Errno) {
+	if f, ok := d.file(); !ok {
+		return sys.Stat_t{}, experimentalsys.EBADF
+	} else {
+		return f.Stat()
+	}
+}
+
+// Readdir implements the same method as documented on sys.File
+func (d *lazyDir) Readdir(n int) (dirents []experimentalsys.Dirent, errno experimentalsys.Errno) {
+	if f, ok := d.file(); !ok {
+		return nil, experimentalsys.EBADF
+	} else {
+		return f.Readdir(n)
+	}
+}
+
+// Sync implements the same method as documented on sys.File
+func (d *lazyDir) Sync() experimentalsys.Errno {
+	if f, ok := d.file(); !ok {
+		return experimentalsys.EBADF
+	} else {
+		return f.Sync()
+	}
+}
+
+// Datasync implements the same method as documented on sys.File
+func (d *lazyDir) Datasync() experimentalsys.Errno {
+	if f, ok := d.file(); !ok {
+		return experimentalsys.EBADF
+	} else {
+		return f.Datasync()
+	}
+}
+
+// Utimens implements the same method as documented on sys.File
+func (d *lazyDir) Utimens(atim, mtim int64) experimentalsys.Errno {
+	if f, ok := d.file(); !ok {
+		return experimentalsys.EBADF
+	} else {
+		return f.Utimens(atim, mtim)
+	}
+}
+
+// file returns the underlying file or false if it doesn't exist.
+func (d *lazyDir) file() (experimentalsys.File, bool) {
+	if f := d.f; d.f != nil {
+		return f, true
+	}
+	var errno experimentalsys.Errno
+	d.f, errno = d.fs.OpenFile(".", experimentalsys.O_RDONLY, 0)
+	switch errno {
+	case 0:
+		return d.f, true
+	case experimentalsys.ENOENT:
+		return nil, false
+	default:
+		panic(errno) // unexpected
+	}
+}
+
+// Close implements fs.File
+func (d *lazyDir) Close() experimentalsys.Errno {
+	f := d.f
+	if f == nil {
+		return 0 // never opened
+	}
+	return f.Close()
+}
+
+// IsNonblock implements the same method as documented on fsapi.File
+func (d *lazyDir) IsNonblock() bool {
+	return false
+}
+
+// SetNonblock implements the same method as documented on fsapi.File
+func (d *lazyDir) SetNonblock(bool) experimentalsys.Errno {
+	return experimentalsys.EISDIR
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (d *lazyDir) Poll(fsapi.Pflag, int32) (ready bool, errno experimentalsys.Errno) {
+	return false, experimentalsys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sys/stdio.go b/vendor/github.com/tetratelabs/wazero/internal/sys/stdio.go
new file mode 100644
index 000000000..32c33661e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sys/stdio.go
@@ -0,0 +1,128 @@
+package sys
+
+import (
+	"io"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	"github.com/tetratelabs/wazero/internal/sysfs"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// StdinFile is a fs.ModeDevice file for use implementing FdStdin.
+// This is safer than reading from os.DevNull as it can never overrun
+// operating system file descriptors.
+type StdinFile struct {
+	noopStdinFile
+	io.Reader
+}
+
+// Read implements the same method as documented on sys.File
+func (f *StdinFile) Read(buf []byte) (int, experimentalsys.Errno) {
+	n, err := f.Reader.Read(buf)
+	return n, experimentalsys.UnwrapOSError(err)
+}
+
+type writerFile struct {
+	noopStdoutFile
+
+	w io.Writer
+}
+
+// Write implements the same method as documented on sys.File
+func (f *writerFile) Write(buf []byte) (int, experimentalsys.Errno) {
+	n, err := f.w.Write(buf)
+	return n, experimentalsys.UnwrapOSError(err)
+}
+
+// noopStdinFile is a fs.ModeDevice file for use implementing FdStdin. This is
+// safer than reading from os.DevNull as it can never overrun operating system
+// file descriptors.
+type noopStdinFile struct {
+	noopStdioFile
+}
+
+// Read implements the same method as documented on sys.File
+func (noopStdinFile) Read([]byte) (int, experimentalsys.Errno) {
+	return 0, 0 // Always EOF
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (noopStdinFile) Poll(flag fsapi.Pflag, timeoutMillis int32) (ready bool, errno experimentalsys.Errno) {
+	if flag != fsapi.POLLIN {
+		return false, experimentalsys.ENOTSUP
+	}
+	return true, 0 // always ready to read nothing
+}
+
+// noopStdoutFile is a fs.ModeDevice file for use implementing FdStdout and
+// FdStderr.
+type noopStdoutFile struct {
+	noopStdioFile
+}
+
+// Write implements the same method as documented on sys.File
+func (noopStdoutFile) Write(buf []byte) (int, experimentalsys.Errno) {
+	return len(buf), 0 // same as io.Discard
+}
+
+type noopStdioFile struct {
+	experimentalsys.UnimplementedFile
+}
+
+// Stat implements the same method as documented on sys.File
+func (noopStdioFile) Stat() (sys.Stat_t, experimentalsys.Errno) {
+	return sys.Stat_t{Mode: modeDevice, Nlink: 1}, 0
+}
+
+// IsDir implements the same method as documented on sys.File
+func (noopStdioFile) IsDir() (bool, experimentalsys.Errno) {
+	return false, 0
+}
+
+// Close implements the same method as documented on sys.File
+func (noopStdioFile) Close() (errno experimentalsys.Errno) { return }
+
+// IsNonblock implements the same method as documented on fsapi.File
+func (noopStdioFile) IsNonblock() bool {
+	return false
+}
+
+// SetNonblock implements the same method as documented on fsapi.File
+func (noopStdioFile) SetNonblock(bool) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (noopStdioFile) Poll(fsapi.Pflag, int32) (ready bool, errno experimentalsys.Errno) {
+	return false, experimentalsys.ENOSYS
+}
+
+func stdinFileEntry(r io.Reader) (*FileEntry, error) {
+	if r == nil {
+		return &FileEntry{Name: "stdin", IsPreopen: true, File: &noopStdinFile{}}, nil
+	} else if f, ok := r.(*os.File); ok {
+		if f, err := sysfs.NewStdioFile(true, f); err != nil {
+			return nil, err
+		} else {
+			return &FileEntry{Name: "stdin", IsPreopen: true, File: f}, nil
+		}
+	} else {
+		return &FileEntry{Name: "stdin", IsPreopen: true, File: &StdinFile{Reader: r}}, nil
+	}
+}
+
+func stdioWriterFileEntry(name string, w io.Writer) (*FileEntry, error) {
+	if w == nil {
+		return &FileEntry{Name: name, IsPreopen: true, File: &noopStdoutFile{}}, nil
+	} else if f, ok := w.(*os.File); ok {
+		if f, err := sysfs.NewStdioFile(false, f); err != nil {
+			return nil, err
+		} else {
+			return &FileEntry{Name: name, IsPreopen: true, File: f}, nil
+		}
+	} else {
+		return &FileEntry{Name: name, IsPreopen: true, File: &writerFile{w: w}}, nil
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sys/sys.go b/vendor/github.com/tetratelabs/wazero/internal/sys/sys.go
new file mode 100644
index 000000000..12279ee49
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sys/sys.go
@@ -0,0 +1,228 @@
+package sys
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"time"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/platform"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// Context holds module-scoped system resources currently only supported by
+// built-in host functions.
+type Context struct {
+	args, environ         [][]byte
+	argsSize, environSize uint32
+
+	walltime           sys.Walltime
+	walltimeResolution sys.ClockResolution
+	nanotime           sys.Nanotime
+	nanotimeResolution sys.ClockResolution
+	nanosleep          sys.Nanosleep
+	osyield            sys.Osyield
+	randSource         io.Reader
+	fsc                FSContext
+}
+
+// Args is like os.Args and defaults to nil.
+//
+// Note: The count will never be more than math.MaxUint32.
+// See wazero.ModuleConfig WithArgs
+func (c *Context) Args() [][]byte {
+	return c.args
+}
+
+// ArgsSize is the size to encode Args as Null-terminated strings.
+//
+// Note: To get the size without null-terminators, subtract the length of Args from this value.
+// See wazero.ModuleConfig WithArgs
+// See https://en.wikipedia.org/wiki/Null-terminated_string
+func (c *Context) ArgsSize() uint32 {
+	return c.argsSize
+}
+
+// Environ are "key=value" entries like os.Environ and default to nil.
+//
+// Note: The count will never be more than math.MaxUint32.
+// See wazero.ModuleConfig WithEnv
+func (c *Context) Environ() [][]byte {
+	return c.environ
+}
+
+// EnvironSize is the size to encode Environ as Null-terminated strings.
+//
+// Note: To get the size without null-terminators, subtract the length of Environ from this value.
+// See wazero.ModuleConfig WithEnv
+// See https://en.wikipedia.org/wiki/Null-terminated_string
+func (c *Context) EnvironSize() uint32 {
+	return c.environSize
+}
+
+// Walltime implements platform.Walltime.
+func (c *Context) Walltime() (sec int64, nsec int32) {
+	return c.walltime()
+}
+
+// WalltimeNanos returns platform.Walltime as epoch nanoseconds.
+func (c *Context) WalltimeNanos() int64 {
+	sec, nsec := c.Walltime()
+	return (sec * time.Second.Nanoseconds()) + int64(nsec)
+}
+
+// WalltimeResolution returns resolution of Walltime.
+func (c *Context) WalltimeResolution() sys.ClockResolution {
+	return c.walltimeResolution
+}
+
+// Nanotime implements sys.Nanotime.
+func (c *Context) Nanotime() int64 {
+	return c.nanotime()
+}
+
+// NanotimeResolution returns resolution of Nanotime.
+func (c *Context) NanotimeResolution() sys.ClockResolution {
+	return c.nanotimeResolution
+}
+
+// Nanosleep implements sys.Nanosleep.
+func (c *Context) Nanosleep(ns int64) {
+	c.nanosleep(ns)
+}
+
+// Osyield implements sys.Osyield.
+func (c *Context) Osyield() {
+	c.osyield()
+}
+
+// FS returns the possibly empty (UnimplementedFS) file system context.
+func (c *Context) FS() *FSContext {
+	return &c.fsc
+}
+
+// RandSource is a source of random bytes and defaults to a deterministic source.
+// see wazero.ModuleConfig WithRandSource
+func (c *Context) RandSource() io.Reader {
+	return c.randSource
+}
+
+// DefaultContext returns Context with no values set except a possible nil
+// sys.FS.
+//
+// Note: This is only used for testing.
+func DefaultContext(fs experimentalsys.FS) *Context {
+	if sysCtx, err := NewContext(0, nil, nil, nil, nil, nil, nil, nil, 0, nil, 0, nil, nil, []experimentalsys.FS{fs}, []string{""}, nil); err != nil {
+		panic(fmt.Errorf("BUG: DefaultContext should never error: %w", err))
+	} else {
+		return sysCtx
+	}
+}
+
+// NewContext is a factory function which helps avoid needing to know defaults or exporting all fields.
+// Note: max is exposed for testing. max is only used for env/args validation.
+func NewContext(
+	max uint32,
+	args, environ [][]byte,
+	stdin io.Reader,
+	stdout, stderr io.Writer,
+	randSource io.Reader,
+	walltime sys.Walltime,
+	walltimeResolution sys.ClockResolution,
+	nanotime sys.Nanotime,
+	nanotimeResolution sys.ClockResolution,
+	nanosleep sys.Nanosleep,
+	osyield sys.Osyield,
+	fs []experimentalsys.FS, guestPaths []string,
+	tcpListeners []*net.TCPListener,
+) (sysCtx *Context, err error) {
+	sysCtx = &Context{args: args, environ: environ}
+
+	if sysCtx.argsSize, err = nullTerminatedByteCount(max, args); err != nil {
+		return nil, fmt.Errorf("args invalid: %w", err)
+	}
+
+	if sysCtx.environSize, err = nullTerminatedByteCount(max, environ); err != nil {
+		return nil, fmt.Errorf("environ invalid: %w", err)
+	}
+
+	if randSource == nil {
+		sysCtx.randSource = platform.NewFakeRandSource()
+	} else {
+		sysCtx.randSource = randSource
+	}
+
+	if walltime != nil {
+		if clockResolutionInvalid(walltimeResolution) {
+			return nil, fmt.Errorf("invalid Walltime resolution: %d", walltimeResolution)
+		}
+		sysCtx.walltime = walltime
+		sysCtx.walltimeResolution = walltimeResolution
+	} else {
+		sysCtx.walltime = platform.NewFakeWalltime()
+		sysCtx.walltimeResolution = sys.ClockResolution(time.Microsecond.Nanoseconds())
+	}
+
+	if nanotime != nil {
+		if clockResolutionInvalid(nanotimeResolution) {
+			return nil, fmt.Errorf("invalid Nanotime resolution: %d", nanotimeResolution)
+		}
+		sysCtx.nanotime = nanotime
+		sysCtx.nanotimeResolution = nanotimeResolution
+	} else {
+		sysCtx.nanotime = platform.NewFakeNanotime()
+		sysCtx.nanotimeResolution = sys.ClockResolution(time.Nanosecond)
+	}
+
+	if nanosleep != nil {
+		sysCtx.nanosleep = nanosleep
+	} else {
+		sysCtx.nanosleep = platform.FakeNanosleep
+	}
+
+	if osyield != nil {
+		sysCtx.osyield = osyield
+	} else {
+		sysCtx.osyield = platform.FakeOsyield
+	}
+
+	err = sysCtx.InitFSContext(stdin, stdout, stderr, fs, guestPaths, tcpListeners)
+
+	return
+}
+
+// clockResolutionInvalid returns true if the value stored isn't reasonable.
+func clockResolutionInvalid(resolution sys.ClockResolution) bool {
+	return resolution < 1 || resolution > sys.ClockResolution(time.Hour.Nanoseconds())
+}
+
+// nullTerminatedByteCount ensures the count or Nul-terminated length of the elements doesn't exceed max, and that no
+// element includes the nul character.
+func nullTerminatedByteCount(max uint32, elements [][]byte) (uint32, error) {
+	count := uint32(len(elements))
+	if count > max {
+		return 0, errors.New("exceeds maximum count")
+	}
+
+	// The buffer size is the total size including null terminators. The null terminator count == value count, sum
+	// count with each value length. This works because in Go, the length of a string is the same as its byte count.
+	bufSize, maxSize := uint64(count), uint64(max) // uint64 to allow summing without overflow
+	for _, e := range elements {
+		// As this is null-terminated, We have to validate there are no null characters in the string.
+		for _, c := range e {
+			if c == 0 {
+				return 0, errors.New("contains NUL character")
+			}
+		}
+
+		nextSize := bufSize + uint64(len(e))
+		if nextSize > maxSize {
+			return 0, errors.New("exceeds maximum size")
+		}
+		bufSize = nextSize
+
+	}
+	return uint32(bufSize), nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/adapter.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/adapter.go
new file mode 100644
index 000000000..51a9a5480
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/adapter.go
@@ -0,0 +1,105 @@
+package sysfs
+
+import (
+	"fmt"
+	"io/fs"
+	"path"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+type AdaptFS struct {
+	FS fs.FS
+}
+
+// String implements fmt.Stringer
+func (a *AdaptFS) String() string {
+	return fmt.Sprintf("%v", a.FS)
+}
+
+// OpenFile implements the same method as documented on sys.FS
+func (a *AdaptFS) OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (experimentalsys.File, experimentalsys.Errno) {
+	return OpenFSFile(a.FS, cleanPath(path), flag, perm)
+}
+
+// Lstat implements the same method as documented on sys.FS
+func (a *AdaptFS) Lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	// At this time, we make the assumption sys.FS instances do not support
+	// symbolic links, therefore Lstat is the same as Stat. This is obviously
+	// not true, but until FS.FS has a solid story for how to handle symlinks,
+	// we are better off not making a decision that would be difficult to
+	// revert later on.
+	//
+	// For further discussions on the topic, see:
+	// https://github.com/golang/go/issues/49580
+	return a.Stat(path)
+}
+
+// Stat implements the same method as documented on sys.FS
+func (a *AdaptFS) Stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	f, errno := a.OpenFile(path, experimentalsys.O_RDONLY, 0)
+	if errno != 0 {
+		return sys.Stat_t{}, errno
+	}
+	defer f.Close()
+	return f.Stat()
+}
+
+// Readlink implements the same method as documented on sys.FS
+func (a *AdaptFS) Readlink(string) (string, experimentalsys.Errno) {
+	return "", experimentalsys.ENOSYS
+}
+
+// Mkdir implements the same method as documented on sys.FS
+func (a *AdaptFS) Mkdir(string, fs.FileMode) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Chmod implements the same method as documented on sys.FS
+func (a *AdaptFS) Chmod(string, fs.FileMode) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Rename implements the same method as documented on sys.FS
+func (a *AdaptFS) Rename(string, string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Rmdir implements the same method as documented on sys.FS
+func (a *AdaptFS) Rmdir(string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Link implements the same method as documented on sys.FS
+func (a *AdaptFS) Link(string, string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Symlink implements the same method as documented on sys.FS
+func (a *AdaptFS) Symlink(string, string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Unlink implements the same method as documented on sys.FS
+func (a *AdaptFS) Unlink(string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Utimens implements the same method as documented on sys.FS
+func (a *AdaptFS) Utimens(string, int64, int64) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+func cleanPath(name string) string {
+	if len(name) == 0 {
+		return name
+	}
+	// fs.ValidFile cannot be rooted (start with '/')
+	cleaned := name
+	if name[0] == '/' {
+		cleaned = name[1:]
+	}
+	cleaned = path.Clean(cleaned) // e.g. "sub/." -> "sub"
+	return cleaned
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_linux.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_linux.go
new file mode 100644
index 000000000..5a8a415c5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_linux.go
@@ -0,0 +1,14 @@
+//go:build linux && !tinygo
+
+package sysfs
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func datasync(f *os.File) sys.Errno {
+	return sys.UnwrapOSError(syscall.Fdatasync(int(f.Fd())))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_tinygo.go
new file mode 100644
index 000000000..e58fc9142
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_tinygo.go
@@ -0,0 +1,13 @@
+//go:build tinygo
+
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func datasync(f *os.File) sys.Errno {
+	return sys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_unsupported.go
new file mode 100644
index 000000000..aa05719be
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/datasync_unsupported.go
@@ -0,0 +1,14 @@
+//go:build !linux
+
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func datasync(f *os.File) sys.Errno {
+	// Attempt to sync everything, even if we only need to sync the data.
+	return fsync(f)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/dir.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dir.go
new file mode 100644
index 000000000..f9823287c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dir.go
@@ -0,0 +1,24 @@
+package sysfs
+
+import (
+	"io"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func adjustReaddirErr(f sys.File, isClosed bool, err error) sys.Errno {
+	if err == io.EOF {
+		return 0 // e.g. Readdir on darwin returns io.EOF, but linux doesn't.
+	} else if errno := sys.UnwrapOSError(err); errno != 0 {
+		errno = dirError(f, isClosed, errno)
+		// Comply with errors allowed on sys.File Readdir
+		switch errno {
+		case sys.EINVAL: // os.File Readdir can return this
+			return sys.EBADF
+		case sys.ENOTDIR: // dirError can return this
+			return sys.EBADF
+		}
+		return errno
+	}
+	return 0
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs.go
new file mode 100644
index 000000000..04384038f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs.go
@@ -0,0 +1,99 @@
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/platform"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func DirFS(dir string) experimentalsys.FS {
+	return &dirFS{
+		dir:        dir,
+		cleanedDir: ensureTrailingPathSeparator(dir),
+	}
+}
+
+func ensureTrailingPathSeparator(dir string) string {
+	if !os.IsPathSeparator(dir[len(dir)-1]) {
+		return dir + string(os.PathSeparator)
+	}
+	return dir
+}
+
+// dirFS is not exported because the input fields must be maintained together.
+// This is likely why os.DirFS doesn't, either!
+type dirFS struct {
+	experimentalsys.UnimplementedFS
+
+	dir string
+	// cleanedDir is for easier OS-specific concatenation, as it always has
+	// a trailing path separator.
+	cleanedDir string
+}
+
+// String implements fmt.Stringer
+func (d *dirFS) String() string {
+	return d.dir
+}
+
+// OpenFile implements the same method as documented on sys.FS
+func (d *dirFS) OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (experimentalsys.File, experimentalsys.Errno) {
+	return OpenOSFile(d.join(path), flag, perm)
+}
+
+// Lstat implements the same method as documented on sys.FS
+func (d *dirFS) Lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	return lstat(d.join(path))
+}
+
+// Stat implements the same method as documented on sys.FS
+func (d *dirFS) Stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	return stat(d.join(path))
+}
+
+// Mkdir implements the same method as documented on sys.FS
+func (d *dirFS) Mkdir(path string, perm fs.FileMode) (errno experimentalsys.Errno) {
+	err := os.Mkdir(d.join(path), perm)
+	if errno = experimentalsys.UnwrapOSError(err); errno == experimentalsys.ENOTDIR {
+		errno = experimentalsys.ENOENT
+	}
+	return
+}
+
+// Readlink implements the same method as documented on sys.FS
+func (d *dirFS) Readlink(path string) (string, experimentalsys.Errno) {
+	// Note: do not use syscall.Readlink as that causes race on Windows.
+	// In any case, syscall.Readlink does almost the same logic as os.Readlink.
+	dst, err := os.Readlink(d.join(path))
+	if err != nil {
+		return "", experimentalsys.UnwrapOSError(err)
+	}
+	return platform.ToPosixPath(dst), 0
+}
+
+// Rmdir implements the same method as documented on sys.FS
+func (d *dirFS) Rmdir(path string) experimentalsys.Errno {
+	return rmdir(d.join(path))
+}
+
+// Utimens implements the same method as documented on sys.FS
+func (d *dirFS) Utimens(path string, atim, mtim int64) experimentalsys.Errno {
+	return utimens(d.join(path), atim, mtim)
+}
+
+func (d *dirFS) join(path string) string {
+	switch path {
+	case "", ".", "/":
+		if d.cleanedDir == "/" {
+			return "/"
+		}
+		// cleanedDir includes an unnecessary delimiter for the root path.
+		return d.cleanedDir[:len(d.cleanedDir)-1]
+	}
+	// TODO: Enforce similar to safefilepath.FromFS(path), but be careful as
+	// relative path inputs are allowed. e.g. dir or path == ../
+	return d.cleanedDir + path
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs_supported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs_supported.go
new file mode 100644
index 000000000..ff93415b9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs_supported.go
@@ -0,0 +1,42 @@
+//go:build !tinygo
+
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// Link implements the same method as documented on sys.FS
+func (d *dirFS) Link(oldName, newName string) experimentalsys.Errno {
+	err := os.Link(d.join(oldName), d.join(newName))
+	return experimentalsys.UnwrapOSError(err)
+}
+
+// Unlink implements the same method as documented on sys.FS
+func (d *dirFS) Unlink(path string) (err experimentalsys.Errno) {
+	return unlink(d.join(path))
+}
+
+// Rename implements the same method as documented on sys.FS
+func (d *dirFS) Rename(from, to string) experimentalsys.Errno {
+	from, to = d.join(from), d.join(to)
+	return rename(from, to)
+}
+
+// Chmod implements the same method as documented on sys.FS
+func (d *dirFS) Chmod(path string, perm fs.FileMode) experimentalsys.Errno {
+	err := os.Chmod(d.join(path), perm)
+	return experimentalsys.UnwrapOSError(err)
+}
+
+// Symlink implements the same method as documented on sys.FS
+func (d *dirFS) Symlink(oldName, link string) experimentalsys.Errno {
+	// Note: do not resolve `oldName` relative to this dirFS. The link result is always resolved
+	// when dereference the `link` on its usage (e.g. readlink, read, etc).
+	// https://github.com/bytecodealliance/cap-std/blob/v1.0.4/cap-std/src/fs/dir.rs#L404-L409
+	err := os.Symlink(oldName, d.join(link))
+	return experimentalsys.UnwrapOSError(err)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs_unsupported.go
new file mode 100644
index 000000000..98b1a3b84
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/dirfs_unsupported.go
@@ -0,0 +1,34 @@
+//go:build tinygo
+
+package sysfs
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// Link implements the same method as documented on sys.FS
+func (d *dirFS) Link(oldName, newName string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Unlink implements the same method as documented on sys.FS
+func (d *dirFS) Unlink(path string) (err experimentalsys.Errno) {
+	return experimentalsys.ENOSYS
+}
+
+// Rename implements the same method as documented on sys.FS
+func (d *dirFS) Rename(from, to string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Chmod implements the same method as documented on sys.FS
+func (d *dirFS) Chmod(path string, perm fs.FileMode) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Symlink implements the same method as documented on sys.FS
+func (d *dirFS) Symlink(oldName, link string) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go
new file mode 100644
index 000000000..9a77205bb
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go
@@ -0,0 +1,520 @@
+package sysfs
+
+import (
+	"io"
+	"io/fs"
+	"os"
+	"time"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func NewStdioFile(stdin bool, f fs.File) (fsapi.File, error) {
+	// Return constant stat, which has fake times, but keep the underlying
+	// file mode. Fake times are needed to pass wasi-testsuite.
+	// https://github.com/WebAssembly/wasi-testsuite/blob/af57727/tests/rust/src/bin/fd_filestat_get.rs#L1-L19
+	var mode fs.FileMode
+	if st, err := f.Stat(); err != nil {
+		return nil, err
+	} else {
+		mode = st.Mode()
+	}
+	var flag experimentalsys.Oflag
+	if stdin {
+		flag = experimentalsys.O_RDONLY
+	} else {
+		flag = experimentalsys.O_WRONLY
+	}
+	var file fsapi.File
+	if of, ok := f.(*os.File); ok {
+		// This is ok because functions that need path aren't used by stdioFile
+		file = newOsFile("", flag, 0, of)
+	} else {
+		file = &fsFile{file: f}
+	}
+	return &stdioFile{File: file, st: sys.Stat_t{Mode: mode, Nlink: 1}}, nil
+}
+
+func OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (*os.File, experimentalsys.Errno) {
+	if flag&experimentalsys.O_DIRECTORY != 0 && flag&(experimentalsys.O_WRONLY|experimentalsys.O_RDWR) != 0 {
+		return nil, experimentalsys.EISDIR // invalid to open a directory writeable
+	}
+	return openFile(path, flag, perm)
+}
+
+func OpenOSFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (experimentalsys.File, experimentalsys.Errno) {
+	f, errno := OpenFile(path, flag, perm)
+	if errno != 0 {
+		return nil, errno
+	}
+	return newOsFile(path, flag, perm, f), 0
+}
+
+func OpenFSFile(fs fs.FS, path string, flag experimentalsys.Oflag, perm fs.FileMode) (experimentalsys.File, experimentalsys.Errno) {
+	if flag&experimentalsys.O_DIRECTORY != 0 && flag&(experimentalsys.O_WRONLY|experimentalsys.O_RDWR) != 0 {
+		return nil, experimentalsys.EISDIR // invalid to open a directory writeable
+	}
+	f, err := fs.Open(path)
+	if errno := experimentalsys.UnwrapOSError(err); errno != 0 {
+		return nil, errno
+	}
+	// Don't return an os.File because the path is not absolute. osFile needs
+	// the path to be real and certain FS.File impls are subrooted.
+	return &fsFile{fs: fs, name: path, file: f}, 0
+}
+
+type stdioFile struct {
+	fsapi.File
+	st sys.Stat_t
+}
+
+// SetAppend implements File.SetAppend
+func (f *stdioFile) SetAppend(bool) experimentalsys.Errno {
+	// Ignore for stdio.
+	return 0
+}
+
+// IsAppend implements File.SetAppend
+func (f *stdioFile) IsAppend() bool {
+	return true
+}
+
+// Stat implements File.Stat
+func (f *stdioFile) Stat() (sys.Stat_t, experimentalsys.Errno) {
+	return f.st, 0
+}
+
+// Close implements File.Close
+func (f *stdioFile) Close() experimentalsys.Errno {
+	return 0
+}
+
+// fsFile is used for wrapped fs.File, like os.Stdin or any fs.File
+// implementation. Notably, this does not have access to the full file path.
+// so certain operations can't be supported, such as inode lookups on Windows.
+type fsFile struct {
+	experimentalsys.UnimplementedFile
+
+	// fs is the file-system that opened the file, or nil when wrapped for
+	// pre-opens like stdio.
+	fs fs.FS
+
+	// name is what was used in fs for Open, so it may not be the actual path.
+	name string
+
+	// file is always set, possibly an os.File like os.Stdin.
+	file fs.File
+
+	// reopenDir is true if reopen should be called before Readdir. This flag
+	// is deferred until Readdir to prevent redundant rewinds. This could
+	// happen if Seek(0) was called twice, or if in Windows, Seek(0) was called
+	// before Readdir.
+	reopenDir bool
+
+	// closed is true when closed was called. This ensures proper sys.EBADF
+	closed bool
+
+	// cachedStat includes fields that won't change while a file is open.
+	cachedSt *cachedStat
+}
+
+type cachedStat struct {
+	// dev is the same as sys.Stat_t Dev.
+	dev uint64
+
+	// dev is the same as sys.Stat_t Ino.
+	ino sys.Inode
+
+	// isDir is sys.Stat_t Mode masked with fs.ModeDir
+	isDir bool
+}
+
+// cachedStat returns the cacheable parts of sys.Stat_t or an error if they
+// couldn't be retrieved.
+func (f *fsFile) cachedStat() (dev uint64, ino sys.Inode, isDir bool, errno experimentalsys.Errno) {
+	if f.cachedSt == nil {
+		if _, errno = f.Stat(); errno != 0 {
+			return
+		}
+	}
+	return f.cachedSt.dev, f.cachedSt.ino, f.cachedSt.isDir, 0
+}
+
+// Dev implements the same method as documented on sys.File
+func (f *fsFile) Dev() (uint64, experimentalsys.Errno) {
+	dev, _, _, errno := f.cachedStat()
+	return dev, errno
+}
+
+// Ino implements the same method as documented on sys.File
+func (f *fsFile) Ino() (sys.Inode, experimentalsys.Errno) {
+	_, ino, _, errno := f.cachedStat()
+	return ino, errno
+}
+
+// IsDir implements the same method as documented on sys.File
+func (f *fsFile) IsDir() (bool, experimentalsys.Errno) {
+	_, _, isDir, errno := f.cachedStat()
+	return isDir, errno
+}
+
+// IsAppend implements the same method as documented on sys.File
+func (f *fsFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements the same method as documented on sys.File
+func (f *fsFile) SetAppend(bool) (errno experimentalsys.Errno) {
+	return fileError(f, f.closed, experimentalsys.ENOSYS)
+}
+
+// Stat implements the same method as documented on sys.File
+func (f *fsFile) Stat() (sys.Stat_t, experimentalsys.Errno) {
+	if f.closed {
+		return sys.Stat_t{}, experimentalsys.EBADF
+	}
+
+	st, errno := statFile(f.file)
+	switch errno {
+	case 0:
+		f.cachedSt = &cachedStat{dev: st.Dev, ino: st.Ino, isDir: st.Mode&fs.ModeDir == fs.ModeDir}
+	case experimentalsys.EIO:
+		errno = experimentalsys.EBADF
+	}
+	return st, errno
+}
+
+// Read implements the same method as documented on sys.File
+func (f *fsFile) Read(buf []byte) (n int, errno experimentalsys.Errno) {
+	if n, errno = read(f.file, buf); errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Pread implements the same method as documented on sys.File
+func (f *fsFile) Pread(buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if ra, ok := f.file.(io.ReaderAt); ok {
+		if n, errno = pread(ra, buf, off); errno != 0 {
+			// Defer validation overhead until we've already had an error.
+			errno = fileError(f, f.closed, errno)
+		}
+		return
+	}
+
+	// See /RATIONALE.md "fd_pread: io.Seeker fallback when io.ReaderAt is not supported"
+	if rs, ok := f.file.(io.ReadSeeker); ok {
+		// Determine the current position in the file, as we need to revert it.
+		currentOffset, err := rs.Seek(0, io.SeekCurrent)
+		if err != nil {
+			return 0, fileError(f, f.closed, experimentalsys.UnwrapOSError(err))
+		}
+
+		// Put the read position back when complete.
+		defer func() { _, _ = rs.Seek(currentOffset, io.SeekStart) }()
+
+		// If the current offset isn't in sync with this reader, move it.
+		if off != currentOffset {
+			if _, err = rs.Seek(off, io.SeekStart); err != nil {
+				return 0, fileError(f, f.closed, experimentalsys.UnwrapOSError(err))
+			}
+		}
+
+		n, err = rs.Read(buf)
+		if errno = experimentalsys.UnwrapOSError(err); errno != 0 {
+			// Defer validation overhead until we've already had an error.
+			errno = fileError(f, f.closed, errno)
+		}
+	} else {
+		errno = experimentalsys.ENOSYS // unsupported
+	}
+	return
+}
+
+// Seek implements the same method as documented on sys.File
+func (f *fsFile) Seek(offset int64, whence int) (newOffset int64, errno experimentalsys.Errno) {
+	// If this is a directory, and we're attempting to seek to position zero,
+	// we have to re-open the file to ensure the directory state is reset.
+	var isDir bool
+	if offset == 0 && whence == io.SeekStart {
+		if isDir, errno = f.IsDir(); errno == 0 && isDir {
+			f.reopenDir = true
+			return
+		}
+	}
+
+	if s, ok := f.file.(io.Seeker); ok {
+		if newOffset, errno = seek(s, offset, whence); errno != 0 {
+			// Defer validation overhead until we've already had an error.
+			errno = fileError(f, f.closed, errno)
+		}
+	} else {
+		errno = experimentalsys.ENOSYS // unsupported
+	}
+	return
+}
+
+// Readdir implements the same method as documented on sys.File
+//
+// Notably, this uses readdirFile or fs.ReadDirFile if available. This does not
+// return inodes on windows.
+func (f *fsFile) Readdir(n int) (dirents []experimentalsys.Dirent, errno experimentalsys.Errno) {
+	// Windows lets you Readdir after close, FS.File also may not implement
+	// close in a meaningful way. read our closed field to return consistent
+	// results.
+	if f.closed {
+		errno = experimentalsys.EBADF
+		return
+	}
+
+	if f.reopenDir { // re-open the directory if needed.
+		f.reopenDir = false
+		if errno = adjustReaddirErr(f, f.closed, f.reopen()); errno != 0 {
+			return
+		}
+	}
+
+	if of, ok := f.file.(readdirFile); ok {
+		// We can't use f.name here because it is the path up to the sys.FS,
+		// not necessarily the real path. For this reason, Windows may not be
+		// able to populate inodes. However, Darwin and Linux will.
+		if dirents, errno = readdir(of, "", n); errno != 0 {
+			errno = adjustReaddirErr(f, f.closed, errno)
+		}
+		return
+	}
+
+	// Try with FS.ReadDirFile which is available on api.FS implementations
+	// like embed:FS.
+	if rdf, ok := f.file.(fs.ReadDirFile); ok {
+		entries, e := rdf.ReadDir(n)
+		if errno = adjustReaddirErr(f, f.closed, e); errno != 0 {
+			return
+		}
+		dirents = make([]experimentalsys.Dirent, 0, len(entries))
+		for _, e := range entries {
+			// By default, we don't attempt to read inode data
+			dirents = append(dirents, experimentalsys.Dirent{Name: e.Name(), Type: e.Type()})
+		}
+	} else {
+		errno = experimentalsys.EBADF // not a directory
+	}
+	return
+}
+
+// Write implements the same method as documented on sys.File.
+func (f *fsFile) Write(buf []byte) (n int, errno experimentalsys.Errno) {
+	if w, ok := f.file.(io.Writer); ok {
+		if n, errno = write(w, buf); errno != 0 {
+			// Defer validation overhead until we've already had an error.
+			errno = fileError(f, f.closed, errno)
+		}
+	} else {
+		errno = experimentalsys.ENOSYS // unsupported
+	}
+	return
+}
+
+// Pwrite implements the same method as documented on sys.File.
+func (f *fsFile) Pwrite(buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if wa, ok := f.file.(io.WriterAt); ok {
+		if n, errno = pwrite(wa, buf, off); errno != 0 {
+			// Defer validation overhead until we've already had an error.
+			errno = fileError(f, f.closed, errno)
+		}
+	} else {
+		errno = experimentalsys.ENOSYS // unsupported
+	}
+	return
+}
+
+// Close implements the same method as documented on sys.File.
+func (f *fsFile) Close() experimentalsys.Errno {
+	if f.closed {
+		return 0
+	}
+	f.closed = true
+	return f.close()
+}
+
+func (f *fsFile) close() experimentalsys.Errno {
+	return experimentalsys.UnwrapOSError(f.file.Close())
+}
+
+// IsNonblock implements the same method as documented on fsapi.File
+func (f *fsFile) IsNonblock() bool {
+	return false
+}
+
+// SetNonblock implements the same method as documented on fsapi.File
+func (f *fsFile) SetNonblock(bool) experimentalsys.Errno {
+	return experimentalsys.ENOSYS
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (f *fsFile) Poll(fsapi.Pflag, int32) (ready bool, errno experimentalsys.Errno) {
+	return false, experimentalsys.ENOSYS
+}
+
+// dirError is used for commands that work against a directory, but not a file.
+func dirError(f experimentalsys.File, isClosed bool, errno experimentalsys.Errno) experimentalsys.Errno {
+	if vErrno := validate(f, isClosed, false, true); vErrno != 0 {
+		return vErrno
+	}
+	return errno
+}
+
+// fileError is used for commands that work against a file, but not a directory.
+func fileError(f experimentalsys.File, isClosed bool, errno experimentalsys.Errno) experimentalsys.Errno {
+	if vErrno := validate(f, isClosed, true, false); vErrno != 0 {
+		return vErrno
+	}
+	return errno
+}
+
+// validate is used to making syscalls which will fail.
+func validate(f experimentalsys.File, isClosed, wantFile, wantDir bool) experimentalsys.Errno {
+	if isClosed {
+		return experimentalsys.EBADF
+	}
+
+	isDir, errno := f.IsDir()
+	if errno != 0 {
+		return errno
+	}
+
+	if wantFile && isDir {
+		return experimentalsys.EISDIR
+	} else if wantDir && !isDir {
+		return experimentalsys.ENOTDIR
+	}
+	return 0
+}
+
+func read(r io.Reader, buf []byte) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // less overhead on zero-length reads.
+	}
+
+	n, err := r.Read(buf)
+	return n, experimentalsys.UnwrapOSError(err)
+}
+
+func pread(ra io.ReaderAt, buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // less overhead on zero-length reads.
+	}
+
+	n, err := ra.ReadAt(buf, off)
+	return n, experimentalsys.UnwrapOSError(err)
+}
+
+func seek(s io.Seeker, offset int64, whence int) (int64, experimentalsys.Errno) {
+	if uint(whence) > io.SeekEnd {
+		return 0, experimentalsys.EINVAL // negative or exceeds the largest valid whence
+	}
+
+	newOffset, err := s.Seek(offset, whence)
+	return newOffset, experimentalsys.UnwrapOSError(err)
+}
+
+// reopenFile allows re-opening a file for reasons such as applying flags or
+// directory iteration.
+type reopenFile func() experimentalsys.Errno
+
+// compile-time check to ensure fsFile.reopen implements reopenFile.
+var _ reopenFile = (*fsFile)(nil).reopen
+
+// reopen implements the same method as documented on reopenFile.
+func (f *fsFile) reopen() experimentalsys.Errno {
+	_ = f.close()
+	var err error
+	f.file, err = f.fs.Open(f.name)
+	return experimentalsys.UnwrapOSError(err)
+}
+
+// readdirFile allows masking the `Readdir` function on os.File.
+type readdirFile interface {
+	Readdir(n int) ([]fs.FileInfo, error)
+}
+
+// readdir uses readdirFile.Readdir, special casing windows when path !="".
+func readdir(f readdirFile, path string, n int) (dirents []experimentalsys.Dirent, errno experimentalsys.Errno) {
+	fis, e := f.Readdir(n)
+	if errno = experimentalsys.UnwrapOSError(e); errno != 0 {
+		return
+	}
+
+	dirents = make([]experimentalsys.Dirent, 0, len(fis))
+
+	// linux/darwin won't have to fan out to lstat, but windows will.
+	var ino sys.Inode
+	for fi := range fis {
+		t := fis[fi]
+		// inoFromFileInfo is more efficient than sys.NewStat_t, as it gets the
+		// inode without allocating an instance and filling other fields.
+		if ino, errno = inoFromFileInfo(path, t); errno != 0 {
+			return
+		}
+		dirents = append(dirents, experimentalsys.Dirent{Name: t.Name(), Ino: ino, Type: t.Mode().Type()})
+	}
+	return
+}
+
+func write(w io.Writer, buf []byte) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // less overhead on zero-length writes.
+	}
+
+	n, err := w.Write(buf)
+	return n, experimentalsys.UnwrapOSError(err)
+}
+
+func pwrite(w io.WriterAt, buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // less overhead on zero-length writes.
+	}
+
+	n, err := w.WriteAt(buf, off)
+	return n, experimentalsys.UnwrapOSError(err)
+}
+
+func chtimes(path string, atim, mtim int64) (errno experimentalsys.Errno) { //nolint:unused
+	// When both inputs are omitted, there is nothing to change.
+	if atim == experimentalsys.UTIME_OMIT && mtim == experimentalsys.UTIME_OMIT {
+		return
+	}
+
+	// UTIME_OMIT is expensive until progress is made in Go, as it requires a
+	// stat to read-back the value to re-apply.
+	// - https://github.com/golang/go/issues/32558.
+	// - https://go-review.googlesource.com/c/go/+/219638 (unmerged)
+	var st sys.Stat_t
+	if atim == experimentalsys.UTIME_OMIT || mtim == experimentalsys.UTIME_OMIT {
+		if st, errno = stat(path); errno != 0 {
+			return
+		}
+	}
+
+	var atime, mtime time.Time
+	if atim == experimentalsys.UTIME_OMIT {
+		atime = epochNanosToTime(st.Atim)
+		mtime = epochNanosToTime(mtim)
+	} else if mtim == experimentalsys.UTIME_OMIT {
+		atime = epochNanosToTime(atim)
+		mtime = epochNanosToTime(st.Mtim)
+	} else {
+		atime = epochNanosToTime(atim)
+		mtime = epochNanosToTime(mtim)
+	}
+	return experimentalsys.UnwrapOSError(os.Chtimes(path, atime, mtime))
+}
+
+func epochNanosToTime(epochNanos int64) time.Time { //nolint:unused
+	seconds := epochNanos / 1e9
+	nanos := epochNanos % 1e9
+	return time.Unix(seconds, nanos)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_unix.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_unix.go
new file mode 100644
index 000000000..f201e813d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_unix.go
@@ -0,0 +1,39 @@
+//go:build unix && !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const (
+	nonBlockingFileReadSupported  = true
+	nonBlockingFileWriteSupported = true
+)
+
+func rmdir(path string) sys.Errno {
+	err := syscall.Rmdir(path)
+	return sys.UnwrapOSError(err)
+}
+
+// readFd exposes syscall.Read.
+func readFd(fd uintptr, buf []byte) (int, sys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // Short-circuit 0-len reads.
+	}
+	n, err := syscall.Read(int(fd), buf)
+	errno := sys.UnwrapOSError(err)
+	return n, errno
+}
+
+// writeFd exposes syscall.Write.
+func writeFd(fd uintptr, buf []byte) (int, sys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // Short-circuit 0-len writes.
+	}
+	n, err := syscall.Write(int(fd), buf)
+	errno := sys.UnwrapOSError(err)
+	return n, errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_unsupported.go
new file mode 100644
index 000000000..a028b9479
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_unsupported.go
@@ -0,0 +1,28 @@
+//go:build !(unix || windows) || tinygo
+
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const (
+	nonBlockingFileReadSupported  = false
+	nonBlockingFileWriteSupported = false
+)
+
+func rmdir(path string) sys.Errno {
+	return sys.UnwrapOSError(os.Remove(path))
+}
+
+// readFd returns ENOSYS on unsupported platforms.
+func readFd(fd uintptr, buf []byte) (int, sys.Errno) {
+	return -1, sys.ENOSYS
+}
+
+// writeFd returns ENOSYS on unsupported platforms.
+func writeFd(fd uintptr, buf []byte) (int, sys.Errno) {
+	return -1, sys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_windows.go
new file mode 100644
index 000000000..37870ea36
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file_windows.go
@@ -0,0 +1,175 @@
+package sysfs
+
+import (
+	"errors"
+	"syscall"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const (
+	nonBlockingFileReadSupported  = true
+	nonBlockingFileWriteSupported = false
+
+	_ERROR_IO_INCOMPLETE = syscall.Errno(996)
+)
+
+var kernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+// procPeekNamedPipe is the syscall.LazyProc in kernel32 for PeekNamedPipe
+var (
+	// procPeekNamedPipe is the syscall.LazyProc in kernel32 for PeekNamedPipe
+	procPeekNamedPipe = kernel32.NewProc("PeekNamedPipe")
+	// procGetOverlappedResult is the syscall.LazyProc in kernel32 for GetOverlappedResult
+	procGetOverlappedResult = kernel32.NewProc("GetOverlappedResult")
+	// procCreateEventW is the syscall.LazyProc in kernel32 for CreateEventW
+	procCreateEventW = kernel32.NewProc("CreateEventW")
+)
+
+// readFd returns ENOSYS on unsupported platforms.
+//
+// PeekNamedPipe: https://learn.microsoft.com/en-us/windows/win32/api/namedpipeapi/nf-namedpipeapi-peeknamedpipe
+// "GetFileType can assist in determining what device type the handle refers to. A console handle presents as FILE_TYPE_CHAR."
+// https://learn.microsoft.com/en-us/windows/console/console-handles
+func readFd(fd uintptr, buf []byte) (int, sys.Errno) {
+	handle := syscall.Handle(fd)
+	fileType, err := syscall.GetFileType(handle)
+	if err != nil {
+		return 0, sys.UnwrapOSError(err)
+	}
+	if fileType&syscall.FILE_TYPE_CHAR == 0 {
+		return -1, sys.ENOSYS
+	}
+	n, errno := peekNamedPipe(handle)
+	if errno == syscall.ERROR_BROKEN_PIPE {
+		return 0, 0
+	}
+	if n == 0 {
+		return -1, sys.EAGAIN
+	}
+	un, err := syscall.Read(handle, buf[0:n])
+	return un, sys.UnwrapOSError(err)
+}
+
+func writeFd(fd uintptr, buf []byte) (int, sys.Errno) {
+	return -1, sys.ENOSYS
+}
+
+func readSocket(h uintptr, buf []byte) (int, sys.Errno) {
+	// Poll the socket to ensure that we never perform a blocking/overlapped Read.
+	//
+	// When the socket is closed by the remote peer, wsaPoll will return n=1 and
+	// errno=0, and syscall.ReadFile will return n=0 and errno=0 -- which indicates
+	// io.EOF.
+	if n, errno := wsaPoll(
+		[]pollFd{newPollFd(h, _POLLIN, 0)}, 0); !errors.Is(errno, sys.Errno(0)) {
+		return 0, sys.UnwrapOSError(errno)
+	} else if n <= 0 {
+		return 0, sys.EAGAIN
+	}
+
+	// Properly use overlapped result.
+	//
+	// If hFile was opened with FILE_FLAG_OVERLAPPED, the following conditions are in effect:
+	//  - The lpOverlapped parameter must point to a valid and unique OVERLAPPED structure,
+	//  otherwise the function can incorrectly report that the read operation is complete.
+	//  - The lpNumberOfBytesRead parameter should be set to NULL. Use the GetOverlappedResult
+	//  function to get the actual number of bytes read. If the hFile parameter is associated
+	//  with an I/O completion port, you can also get the number of bytes read by calling the
+	//  GetQueuedCompletionStatus function.
+	//
+	// We are currently skipping checking if hFile was opened with FILE_FLAG_OVERLAPPED but using
+	// both lpOverlapped and lpNumberOfBytesRead.
+	var overlapped syscall.Overlapped
+
+	// Create an event to wait on.
+	if hEvent, err := createEventW(nil, true, false, nil); err != 0 {
+		return 0, sys.UnwrapOSError(err)
+	} else {
+		overlapped.HEvent = syscall.Handle(hEvent)
+	}
+
+	var done uint32
+	errno := syscall.ReadFile(syscall.Handle(h), buf, &done, &overlapped)
+	if errors.Is(errno, syscall.ERROR_IO_PENDING) {
+		errno = syscall.CancelIo(syscall.Handle(h))
+		if errno != nil {
+			return 0, sys.UnwrapOSError(errno) // This is a fatal error. CancelIo failed.
+		}
+
+		done, errno = getOverlappedResult(syscall.Handle(h), &overlapped, true) // wait for I/O to complete(cancel or finish). Overwrite done and errno.
+		if errors.Is(errno, syscall.ERROR_OPERATION_ABORTED) {
+			return int(done), sys.EAGAIN // This is one of the expected behavior, I/O was cancelled(completed) before finished.
+		}
+	}
+
+	return int(done), sys.UnwrapOSError(errno)
+}
+
+func writeSocket(fd uintptr, buf []byte) (int, sys.Errno) {
+	var done uint32
+	var overlapped syscall.Overlapped
+	errno := syscall.WriteFile(syscall.Handle(fd), buf, &done, &overlapped)
+	if errors.Is(errno, syscall.ERROR_IO_PENDING) {
+		errno = syscall.EAGAIN
+	}
+	return int(done), sys.UnwrapOSError(errno)
+}
+
+// peekNamedPipe partially exposes PeekNamedPipe from the Win32 API
+// see https://learn.microsoft.com/en-us/windows/win32/api/namedpipeapi/nf-namedpipeapi-peeknamedpipe
+func peekNamedPipe(handle syscall.Handle) (uint32, syscall.Errno) {
+	var totalBytesAvail uint32
+	totalBytesPtr := unsafe.Pointer(&totalBytesAvail)
+	_, _, errno := syscall.SyscallN(
+		procPeekNamedPipe.Addr(),
+		uintptr(handle),        // [in]            HANDLE  hNamedPipe,
+		0,                      // [out, optional] LPVOID  lpBuffer,
+		0,                      // [in]            DWORD   nBufferSize,
+		0,                      // [out, optional] LPDWORD lpBytesRead
+		uintptr(totalBytesPtr), // [out, optional] LPDWORD lpTotalBytesAvail,
+		0)                      // [out, optional] LPDWORD lpBytesLeftThisMessage
+	return totalBytesAvail, errno
+}
+
+func rmdir(path string) sys.Errno {
+	err := syscall.Rmdir(path)
+	return sys.UnwrapOSError(err)
+}
+
+func getOverlappedResult(handle syscall.Handle, overlapped *syscall.Overlapped, wait bool) (uint32, syscall.Errno) {
+	var totalBytesAvail uint32
+	var bwait uintptr
+	if wait {
+		bwait = 0xFFFFFFFF
+	}
+	totalBytesPtr := unsafe.Pointer(&totalBytesAvail)
+	_, _, errno := syscall.SyscallN(
+		procGetOverlappedResult.Addr(),
+		uintptr(handle),                     // [in]  HANDLE       hFile,
+		uintptr(unsafe.Pointer(overlapped)), // [in]  LPOVERLAPPED lpOverlapped,
+		uintptr(totalBytesPtr),              // [out] LPDWORD      lpNumberOfBytesTransferred,
+		bwait)                               // [in]  BOOL         bWait
+	return totalBytesAvail, errno
+}
+
+func createEventW(lpEventAttributes *syscall.SecurityAttributes, bManualReset bool, bInitialState bool, lpName *uint16) (uintptr, syscall.Errno) {
+	var manualReset uintptr
+	var initialState uintptr
+	if bManualReset {
+		manualReset = 1
+	}
+	if bInitialState {
+		initialState = 1
+	}
+	handle, _, errno := syscall.SyscallN(
+		procCreateEventW.Addr(),
+		uintptr(unsafe.Pointer(lpEventAttributes)), // [in]     LPSECURITY_ATTRIBUTES lpEventAttributes,
+		manualReset,                     // [in]     BOOL                  bManualReset,
+		initialState,                    // [in]     BOOL                  bInitialState,
+		uintptr(unsafe.Pointer(lpName)), // [in, opt]LPCWSTR               lpName,
+	)
+
+	return handle, errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens.go
new file mode 100644
index 000000000..7f6b11094
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens.go
@@ -0,0 +1,37 @@
+//go:build (linux || darwin) && !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func timesToPtr(times *[2]syscall.Timespec) unsafe.Pointer { //nolint:unused
+	if times != nil {
+		return unsafe.Pointer(&times[0])
+	}
+	return unsafe.Pointer(nil)
+}
+
+func timesToTimespecs(atim int64, mtim int64) (times *[2]syscall.Timespec) {
+	// When both inputs are omitted, there is nothing to change.
+	if atim == sys.UTIME_OMIT && mtim == sys.UTIME_OMIT {
+		return
+	}
+
+	times = &[2]syscall.Timespec{}
+	if atim == sys.UTIME_OMIT {
+		times[0] = syscall.Timespec{Nsec: _UTIME_OMIT}
+		times[1] = syscall.NsecToTimespec(mtim)
+	} else if mtim == sys.UTIME_OMIT {
+		times[0] = syscall.NsecToTimespec(atim)
+		times[1] = syscall.Timespec{Nsec: _UTIME_OMIT}
+	} else {
+		times[0] = syscall.NsecToTimespec(atim)
+		times[1] = syscall.NsecToTimespec(mtim)
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_darwin.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_darwin.go
new file mode 100644
index 000000000..88e4008f0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_darwin.go
@@ -0,0 +1,51 @@
+package sysfs
+
+import (
+	"syscall"
+	_ "unsafe"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const (
+	_AT_FDCWD            = -0x2
+	_AT_SYMLINK_NOFOLLOW = 0x0020
+	_UTIME_OMIT          = -2
+)
+
+//go:noescape
+//go:linkname utimensat syscall.utimensat
+func utimensat(dirfd int, path string, times *[2]syscall.Timespec, flags int) error
+
+func utimens(path string, atim, mtim int64) experimentalsys.Errno {
+	times := timesToTimespecs(atim, mtim)
+	if times == nil {
+		return 0
+	}
+	var flags int
+	return experimentalsys.UnwrapOSError(utimensat(_AT_FDCWD, path, times, flags))
+}
+
+func futimens(fd uintptr, atim, mtim int64) experimentalsys.Errno {
+	times := timesToTimespecs(atim, mtim)
+	if times == nil {
+		return 0
+	}
+	_p0 := timesToPtr(times)
+
+	// Warning: futimens only exists since High Sierra (10.13).
+	_, _, e1 := syscall_syscall6(libc_futimens_trampoline_addr, fd, uintptr(_p0), 0, 0, 0, 0)
+	return experimentalsys.UnwrapOSError(e1)
+}
+
+// libc_futimens_trampoline_addr is the address of the
+// `libc_futimens_trampoline` symbol, defined in `futimens_darwin.s`.
+//
+// We use this to invoke the syscall through syscall_syscall6 imported below.
+var libc_futimens_trampoline_addr uintptr
+
+// Imports the futimens symbol from libc as `libc_futimens`.
+//
+// Note: CGO mechanisms are used in darwin regardless of the CGO_ENABLED value
+// or the "cgo" build flag. See /RATIONALE.md for why.
+//go:cgo_import_dynamic libc_futimens futimens "/usr/lib/libSystem.B.dylib"
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_darwin.s b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_darwin.s
new file mode 100644
index 000000000..b86aecdf0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_darwin.s
@@ -0,0 +1,8 @@
+// lifted from golang.org/x/sys unix
+#include "textflag.h"
+
+TEXT libc_futimens_trampoline<>(SB), NOSPLIT, $0-0
+	JMP libc_futimens(SB)
+
+GLOBL ·libc_futimens_trampoline_addr(SB), RODATA, $8
+DATA ·libc_futimens_trampoline_addr(SB)/8, $libc_futimens_trampoline<>(SB)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_linux.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_linux.go
new file mode 100644
index 000000000..db3b1b8b6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_linux.go
@@ -0,0 +1,49 @@
+//go:build !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+	"unsafe"
+	_ "unsafe"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const (
+	_AT_FDCWD   = -0x64
+	_UTIME_OMIT = (1 << 30) - 2
+)
+
+func utimens(path string, atim, mtim int64) experimentalsys.Errno {
+	times := timesToTimespecs(atim, mtim)
+	if times == nil {
+		return 0
+	}
+
+	var flags int
+	var _p0 *byte
+	_p0, err := syscall.BytePtrFromString(path)
+	if err == nil {
+		err = utimensat(_AT_FDCWD, uintptr(unsafe.Pointer(_p0)), times, flags)
+	}
+	return experimentalsys.UnwrapOSError(err)
+}
+
+// On linux, implement futimens via utimensat with the NUL path.
+func futimens(fd uintptr, atim, mtim int64) experimentalsys.Errno {
+	times := timesToTimespecs(atim, mtim)
+	if times == nil {
+		return 0
+	}
+	return experimentalsys.UnwrapOSError(utimensat(int(fd), 0 /* NUL */, times, 0))
+}
+
+// utimensat is like syscall.utimensat special-cased to accept a NUL string for the path value.
+func utimensat(dirfd int, strPtr uintptr, times *[2]syscall.Timespec, flags int) (err error) {
+	_, _, e1 := syscall.Syscall6(syscall.SYS_UTIMENSAT, uintptr(dirfd), strPtr, uintptr(unsafe.Pointer(times)), uintptr(flags), 0, 0)
+	if e1 != 0 {
+		err = e1
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_unsupported.go
new file mode 100644
index 000000000..69d564942
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_unsupported.go
@@ -0,0 +1,18 @@
+//go:build (!windows && !linux && !darwin) || tinygo
+
+package sysfs
+
+import (
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func utimens(path string, atim, mtim int64) sys.Errno {
+	return chtimes(path, atim, mtim)
+}
+
+func futimens(fd uintptr, atim, mtim int64) error {
+	// Go exports syscall.Futimes, which is microsecond granularity, and
+	// WASI tests expect nanosecond. We don't yet have a way to invoke the
+	// futimens syscall portably.
+	return sys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_windows.go
new file mode 100644
index 000000000..e0c89f303
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/futimens_windows.go
@@ -0,0 +1,42 @@
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func utimens(path string, atim, mtim int64) sys.Errno {
+	return chtimes(path, atim, mtim)
+}
+
+func futimens(fd uintptr, atim, mtim int64) error {
+	// Per docs, zero isn't a valid timestamp as it cannot be differentiated
+	// from nil. In both cases, it is a marker like sys.UTIME_OMIT.
+	// See https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-setfiletime
+	a, w := timespecToFiletime(atim, mtim)
+
+	if a == nil && w == nil {
+		return nil // both omitted, so nothing to change
+	}
+
+	// Attempt to get the stat by handle, which works for normal files
+	h := syscall.Handle(fd)
+
+	// Note: This returns ERROR_ACCESS_DENIED when the input is a directory.
+	return syscall.SetFileTime(h, nil, a, w)
+}
+
+func timespecToFiletime(atim, mtim int64) (a, w *syscall.Filetime) {
+	a = timespecToFileTime(atim)
+	w = timespecToFileTime(mtim)
+	return
+}
+
+func timespecToFileTime(tim int64) *syscall.Filetime {
+	if tim == sys.UTIME_OMIT {
+		return nil
+	}
+	ft := syscall.NsecToFiletime(tim)
+	return &ft
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino.go
new file mode 100644
index 000000000..8344cd16f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino.go
@@ -0,0 +1,22 @@
+//go:build !windows && !plan9 && !tinygo
+
+package sysfs
+
+import (
+	"io/fs"
+	"syscall"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func inoFromFileInfo(_ string, info fs.FileInfo) (sys.Inode, experimentalsys.Errno) {
+	switch v := info.Sys().(type) {
+	case *sys.Stat_t:
+		return v.Ino, 0
+	case *syscall.Stat_t:
+		return v.Ino, 0
+	default:
+		return 0, 0
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_plan9.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_plan9.go
new file mode 100644
index 000000000..9c669a475
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_plan9.go
@@ -0,0 +1,15 @@
+package sysfs
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func inoFromFileInfo(_ string, info fs.FileInfo) (sys.Inode, experimentalsys.Errno) {
+	if v, ok := info.Sys().(*sys.Stat_t); ok {
+		return v.Ino, 0
+	}
+	return 0, 0
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_tinygo.go
new file mode 100644
index 000000000..2099231cf
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_tinygo.go
@@ -0,0 +1,14 @@
+//go:build tinygo
+
+package sysfs
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func inoFromFileInfo(_ string, info fs.FileInfo) (sys.Inode, experimentalsys.Errno) {
+	return 0, experimentalsys.ENOTSUP
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_windows.go
new file mode 100644
index 000000000..d163b3601
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/ino_windows.go
@@ -0,0 +1,28 @@
+package sysfs
+
+import (
+	"io/fs"
+	"path"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// inoFromFileInfo uses stat to get the inode information of the file.
+func inoFromFileInfo(dirPath string, info fs.FileInfo) (ino sys.Inode, errno experimentalsys.Errno) {
+	if v, ok := info.Sys().(*sys.Stat_t); ok {
+		return v.Ino, 0
+	}
+	if dirPath == "" {
+		// This is a FS.File backed implementation which doesn't have access to
+		// the original file path.
+		return
+	}
+	// Ino is no not in Win32FileAttributeData
+	inoPath := path.Clean(path.Join(dirPath, info.Name()))
+	var st sys.Stat_t
+	if st, errno = lstat(inoPath); errno == 0 {
+		ino = st.Ino
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_unix.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_unix.go
new file mode 100644
index 000000000..4477ee977
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_unix.go
@@ -0,0 +1,17 @@
+//go:build !windows && !plan9 && !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func setNonblock(fd uintptr, enable bool) sys.Errno {
+	return sys.UnwrapOSError(syscall.SetNonblock(int(fd), enable))
+}
+
+func isNonblock(f *osFile) bool {
+	return f.flag&sys.O_NONBLOCK == sys.O_NONBLOCK
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_unsupported.go
new file mode 100644
index 000000000..3e141a7b5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_unsupported.go
@@ -0,0 +1,13 @@
+//go:build plan9 || tinygo
+
+package sysfs
+
+import "github.com/tetratelabs/wazero/experimental/sys"
+
+func setNonblock(fd uintptr, enable bool) sys.Errno {
+	return sys.ENOSYS
+}
+
+func isNonblock(f *osFile) bool {
+	return false
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_windows.go
new file mode 100644
index 000000000..eb38ea5af
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/nonblock_windows.go
@@ -0,0 +1,23 @@
+package sysfs
+
+import (
+	"io/fs"
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func setNonblock(fd uintptr, enable bool) sys.Errno {
+	// We invoke the syscall, but this is currently no-op.
+	return sys.UnwrapOSError(syscall.SetNonblock(syscall.Handle(fd), enable))
+}
+
+func isNonblock(f *osFile) bool {
+	// On Windows, we support non-blocking reads only on named pipes.
+	isValid := false
+	st, errno := f.Stat()
+	if errno == 0 {
+		isValid = st.Mode&fs.ModeNamedPipe != 0
+	}
+	return isValid && f.flag&sys.O_NONBLOCK == sys.O_NONBLOCK
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/oflag.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/oflag.go
new file mode 100644
index 000000000..be6d2c35f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/oflag.go
@@ -0,0 +1,38 @@
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// toOsOpenFlag converts the input to the flag parameter of os.OpenFile
+func toOsOpenFlag(oflag sys.Oflag) (flag int) {
+	// First flags are exclusive
+	switch oflag & (sys.O_RDONLY | sys.O_RDWR | sys.O_WRONLY) {
+	case sys.O_RDONLY:
+		flag |= os.O_RDONLY
+	case sys.O_RDWR:
+		flag |= os.O_RDWR
+	case sys.O_WRONLY:
+		flag |= os.O_WRONLY
+	}
+
+	// Run down the flags defined in the os package
+	if oflag&sys.O_APPEND != 0 {
+		flag |= os.O_APPEND
+	}
+	if oflag&sys.O_CREAT != 0 {
+		flag |= os.O_CREATE
+	}
+	if oflag&sys.O_EXCL != 0 {
+		flag |= os.O_EXCL
+	}
+	if oflag&sys.O_SYNC != 0 {
+		flag |= os.O_SYNC
+	}
+	if oflag&sys.O_TRUNC != 0 {
+		flag |= os.O_TRUNC
+	}
+	return withSyscallOflag(oflag, flag)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_darwin.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_darwin.go
new file mode 100644
index 000000000..a4f54ca2c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_darwin.go
@@ -0,0 +1,26 @@
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const supportedSyscallOflag = sys.O_DIRECTORY | sys.O_DSYNC | sys.O_NOFOLLOW | sys.O_NONBLOCK
+
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	if oflag&sys.O_DIRECTORY != 0 {
+		flag |= syscall.O_DIRECTORY
+	}
+	if oflag&sys.O_DSYNC != 0 {
+		flag |= syscall.O_DSYNC
+	}
+	if oflag&sys.O_NOFOLLOW != 0 {
+		flag |= syscall.O_NOFOLLOW
+	}
+	if oflag&sys.O_NONBLOCK != 0 {
+		flag |= syscall.O_NONBLOCK
+	}
+	// syscall.O_RSYNC not defined on darwin
+	return flag
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_freebsd.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_freebsd.go
new file mode 100644
index 000000000..42adaa214
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_freebsd.go
@@ -0,0 +1,24 @@
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const supportedSyscallOflag = sys.O_DIRECTORY | sys.O_NOFOLLOW | sys.O_NONBLOCK
+
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	if oflag&sys.O_DIRECTORY != 0 {
+		flag |= syscall.O_DIRECTORY
+	}
+	// syscall.O_DSYNC not defined on darwin
+	if oflag&sys.O_NOFOLLOW != 0 {
+		flag |= syscall.O_NOFOLLOW
+	}
+	if oflag&sys.O_NONBLOCK != 0 {
+		flag |= syscall.O_NONBLOCK
+	}
+	// syscall.O_RSYNC not defined on darwin
+	return flag
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_linux.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_linux.go
new file mode 100644
index 000000000..3fe2bb6e1
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_linux.go
@@ -0,0 +1,30 @@
+//go:build !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const supportedSyscallOflag = sys.O_DIRECTORY | sys.O_DSYNC | sys.O_NOFOLLOW | sys.O_NONBLOCK | sys.O_RSYNC
+
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	if oflag&sys.O_DIRECTORY != 0 {
+		flag |= syscall.O_DIRECTORY
+	}
+	if oflag&sys.O_DSYNC != 0 {
+		flag |= syscall.O_DSYNC
+	}
+	if oflag&sys.O_NOFOLLOW != 0 {
+		flag |= syscall.O_NOFOLLOW
+	}
+	if oflag&sys.O_NONBLOCK != 0 {
+		flag |= syscall.O_NONBLOCK
+	}
+	if oflag&sys.O_RSYNC != 0 {
+		flag |= syscall.O_RSYNC
+	}
+	return flag
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_notwindows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_notwindows.go
new file mode 100644
index 000000000..670e35910
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_notwindows.go
@@ -0,0 +1,20 @@
+//go:build !windows && !tinygo
+
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// openFile is like os.OpenFile except it accepts a sys.Oflag and returns
+// sys.Errno. A zero sys.Errno is success.
+func openFile(path string, oflag sys.Oflag, perm fs.FileMode) (*os.File, sys.Errno) {
+	f, err := os.OpenFile(path, toOsOpenFlag(oflag), perm)
+	// Note: This does not return a sys.File because sys.FS that returns
+	// one may want to hide the real OS path. For example, this is needed for
+	// pre-opens.
+	return f, sys.UnwrapOSError(err)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_sun.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_sun.go
new file mode 100644
index 000000000..bdf7dd84d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_sun.go
@@ -0,0 +1,31 @@
+//go:build illumos || solaris
+
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const supportedSyscallOflag = sys.O_DIRECTORY | sys.O_DSYNC | sys.O_NOFOLLOW | sys.O_NONBLOCK | sys.O_RSYNC
+
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	if oflag&sys.O_DIRECTORY != 0 {
+		// See https://github.com/illumos/illumos-gate/blob/edd580643f2cf1434e252cd7779e83182ea84945/usr/src/uts/common/sys/fcntl.h#L90
+		flag |= 0x1000000
+	}
+	if oflag&sys.O_DSYNC != 0 {
+		flag |= syscall.O_DSYNC
+	}
+	if oflag&sys.O_NOFOLLOW != 0 {
+		flag |= syscall.O_NOFOLLOW
+	}
+	if oflag&sys.O_NONBLOCK != 0 {
+		flag |= syscall.O_NONBLOCK
+	}
+	if oflag&sys.O_RSYNC != 0 {
+		flag |= syscall.O_RSYNC
+	}
+	return flag
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_tinygo.go
new file mode 100644
index 000000000..ccf6847c0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_tinygo.go
@@ -0,0 +1,25 @@
+//go:build tinygo
+
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const supportedSyscallOflag = sys.Oflag(0)
+
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	// O_DIRECTORY not defined
+	// O_DSYNC not defined
+	// O_NOFOLLOW not defined
+	// O_NONBLOCK not defined
+	// O_RSYNC not defined
+	return flag
+}
+
+func openFile(path string, oflag sys.Oflag, perm fs.FileMode) (*os.File, sys.Errno) {
+	return nil, sys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_unsupported.go
new file mode 100644
index 000000000..9f7a6d088
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_unsupported.go
@@ -0,0 +1,18 @@
+//go:build !darwin && !linux && !windows && !illumos && !solaris && !freebsd
+
+package sysfs
+
+import (
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+const supportedSyscallOflag = sys.Oflag(0)
+
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	// O_DIRECTORY not defined
+	// O_DSYNC not defined
+	// O_NOFOLLOW not defined
+	// O_NONBLOCK not defined
+	// O_RSYNC not defined
+	return flag
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_windows.go
new file mode 100644
index 000000000..717f8598a
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/open_file_windows.go
@@ -0,0 +1,161 @@
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+	"strings"
+	"syscall"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func openFile(path string, oflag sys.Oflag, perm fs.FileMode) (*os.File, sys.Errno) {
+	isDir := oflag&sys.O_DIRECTORY > 0
+	flag := toOsOpenFlag(oflag)
+
+	// TODO: document why we are opening twice
+	fd, err := open(path, flag|syscall.O_CLOEXEC, uint32(perm))
+	if err == nil {
+		return os.NewFile(uintptr(fd), path), 0
+	}
+
+	// TODO: Set FILE_SHARE_DELETE for directory as well.
+	f, err := os.OpenFile(path, flag, perm)
+	errno := sys.UnwrapOSError(err)
+	if errno == 0 {
+		return f, 0
+	}
+
+	switch errno {
+	case sys.EINVAL:
+		// WASI expects ENOTDIR for a file path with a trailing slash.
+		if strings.HasSuffix(path, "/") {
+			errno = sys.ENOTDIR
+		}
+	// To match expectations of WASI, e.g. TinyGo TestStatBadDir, return
+	// ENOENT, not ENOTDIR.
+	case sys.ENOTDIR:
+		errno = sys.ENOENT
+	case sys.ENOENT:
+		if isSymlink(path) {
+			// Either symlink or hard link not found. We change the returned
+			// errno depending on if it is symlink or not to have consistent
+			// behavior across OSes.
+			if isDir {
+				// Dangling symlink dir must raise ENOTDIR.
+				errno = sys.ENOTDIR
+			} else {
+				errno = sys.ELOOP
+			}
+		}
+	}
+	return f, errno
+}
+
+const supportedSyscallOflag = sys.O_NONBLOCK
+
+// Map to synthetic values here https://github.com/golang/go/blob/go1.20/src/syscall/types_windows.go#L34-L48
+func withSyscallOflag(oflag sys.Oflag, flag int) int {
+	// O_DIRECTORY not defined in windows
+	// O_DSYNC not defined in windows
+	// O_NOFOLLOW not defined in windows
+	if oflag&sys.O_NONBLOCK != 0 {
+		flag |= syscall.O_NONBLOCK
+	}
+	// O_RSYNC not defined in windows
+	return flag
+}
+
+func isSymlink(path string) bool {
+	if st, e := os.Lstat(path); e == nil && st.Mode()&os.ModeSymlink != 0 {
+		return true
+	}
+	return false
+}
+
+// # Differences from syscall.Open
+//
+// This code is based on syscall.Open from the below link with some differences
+// https://github.com/golang/go/blame/go1.20/src/syscall/syscall_windows.go#L308-L379
+//
+//   - syscall.O_CREAT doesn't imply syscall.GENERIC_WRITE as that breaks
+//     flag expectations in wasi.
+//   - add support for setting FILE_SHARE_DELETE.
+func open(path string, mode int, perm uint32) (fd syscall.Handle, err error) {
+	if len(path) == 0 {
+		return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND
+	}
+	pathp, err := syscall.UTF16PtrFromString(path)
+	if err != nil {
+		return syscall.InvalidHandle, err
+	}
+	var access uint32
+	switch mode & (syscall.O_RDONLY | syscall.O_WRONLY | syscall.O_RDWR) {
+	case syscall.O_RDONLY:
+		access = syscall.GENERIC_READ
+	case syscall.O_WRONLY:
+		access = syscall.GENERIC_WRITE
+	case syscall.O_RDWR:
+		access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
+	}
+	if mode&syscall.O_APPEND != 0 {
+		access &^= syscall.GENERIC_WRITE
+		access |= syscall.FILE_APPEND_DATA
+	}
+	sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE)
+	var sa *syscall.SecurityAttributes
+	if mode&syscall.O_CLOEXEC == 0 {
+		var _sa syscall.SecurityAttributes
+		_sa.Length = uint32(unsafe.Sizeof(sa))
+		_sa.InheritHandle = 1
+		sa = &_sa
+	}
+	var createmode uint32
+	switch {
+	case mode&(syscall.O_CREAT|syscall.O_EXCL) == (syscall.O_CREAT | syscall.O_EXCL):
+		createmode = syscall.CREATE_NEW
+	case mode&(syscall.O_CREAT|syscall.O_TRUNC) == (syscall.O_CREAT | syscall.O_TRUNC):
+		createmode = syscall.CREATE_ALWAYS
+	case mode&syscall.O_CREAT == syscall.O_CREAT:
+		createmode = syscall.OPEN_ALWAYS
+	case mode&syscall.O_TRUNC == syscall.O_TRUNC:
+		createmode = syscall.TRUNCATE_EXISTING
+	default:
+		createmode = syscall.OPEN_EXISTING
+	}
+	var attrs uint32 = syscall.FILE_ATTRIBUTE_NORMAL
+	if perm&syscall.S_IWRITE == 0 {
+		attrs = syscall.FILE_ATTRIBUTE_READONLY
+		if createmode == syscall.CREATE_ALWAYS {
+			// We have been asked to create a read-only file.
+			// If the file already exists, the semantics of
+			// the Unix open system call is to preserve the
+			// existing permissions. If we pass CREATE_ALWAYS
+			// and FILE_ATTRIBUTE_READONLY to CreateFile,
+			// and the file already exists, CreateFile will
+			// change the file permissions.
+			// Avoid that to preserve the Unix semantics.
+			h, e := syscall.CreateFile(pathp, access, sharemode, sa, syscall.TRUNCATE_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+			switch e {
+			case syscall.ERROR_FILE_NOT_FOUND, syscall.ERROR_PATH_NOT_FOUND:
+				// File does not exist. These are the same
+				// errors as Errno.Is checks for ErrNotExist.
+				// Carry on to create the file.
+			default:
+				// Success or some different error.
+				return h, e
+			}
+		}
+	}
+
+	// This shouldn't be included before 1.20 to have consistent behavior.
+	// https://github.com/golang/go/commit/0f0aa5d8a6a0253627d58b3aa083b24a1091933f
+	if createmode == syscall.OPEN_EXISTING && access == syscall.GENERIC_READ {
+		// Necessary for opening directory handles.
+		attrs |= syscall.FILE_FLAG_BACKUP_SEMANTICS
+	}
+
+	h, e := syscall.CreateFile(pathp, access, sharemode, sa, createmode, attrs, 0)
+	return h, e
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/osfile.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/osfile.go
new file mode 100644
index 000000000..490f0fa68
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/osfile.go
@@ -0,0 +1,295 @@
+package sysfs
+
+import (
+	"io"
+	"io/fs"
+	"os"
+	"runtime"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func newOsFile(path string, flag experimentalsys.Oflag, perm fs.FileMode, f *os.File) fsapi.File {
+	// Windows cannot read files written to a directory after it was opened.
+	// This was noticed in #1087 in zig tests. Use a flag instead of a
+	// different type.
+	reopenDir := runtime.GOOS == "windows"
+	return &osFile{path: path, flag: flag, perm: perm, reopenDir: reopenDir, file: f, fd: f.Fd()}
+}
+
+// osFile is a file opened with this package, and uses os.File or syscalls to
+// implement api.File.
+type osFile struct {
+	path string
+	flag experimentalsys.Oflag
+	perm fs.FileMode
+	file *os.File
+	fd   uintptr
+
+	// reopenDir is true if reopen should be called before Readdir. This flag
+	// is deferred until Readdir to prevent redundant rewinds. This could
+	// happen if Seek(0) was called twice, or if in Windows, Seek(0) was called
+	// before Readdir.
+	reopenDir bool
+
+	// closed is true when closed was called. This ensures proper sys.EBADF
+	closed bool
+
+	// cachedStat includes fields that won't change while a file is open.
+	cachedSt *cachedStat
+}
+
+// cachedStat returns the cacheable parts of sys.Stat_t or an error if they
+// couldn't be retrieved.
+func (f *osFile) cachedStat() (dev uint64, ino sys.Inode, isDir bool, errno experimentalsys.Errno) {
+	if f.cachedSt == nil {
+		if _, errno = f.Stat(); errno != 0 {
+			return
+		}
+	}
+	return f.cachedSt.dev, f.cachedSt.ino, f.cachedSt.isDir, 0
+}
+
+// Dev implements the same method as documented on sys.File
+func (f *osFile) Dev() (uint64, experimentalsys.Errno) {
+	dev, _, _, errno := f.cachedStat()
+	return dev, errno
+}
+
+// Ino implements the same method as documented on sys.File
+func (f *osFile) Ino() (sys.Inode, experimentalsys.Errno) {
+	_, ino, _, errno := f.cachedStat()
+	return ino, errno
+}
+
+// IsDir implements the same method as documented on sys.File
+func (f *osFile) IsDir() (bool, experimentalsys.Errno) {
+	_, _, isDir, errno := f.cachedStat()
+	return isDir, errno
+}
+
+// IsAppend implements File.IsAppend
+func (f *osFile) IsAppend() bool {
+	return f.flag&experimentalsys.O_APPEND == experimentalsys.O_APPEND
+}
+
+// SetAppend implements the same method as documented on sys.File
+func (f *osFile) SetAppend(enable bool) (errno experimentalsys.Errno) {
+	if enable {
+		f.flag |= experimentalsys.O_APPEND
+	} else {
+		f.flag &= ^experimentalsys.O_APPEND
+	}
+
+	// Clear any create or trunc flag, as we are re-opening, not re-creating.
+	f.flag &= ^(experimentalsys.O_CREAT | experimentalsys.O_TRUNC)
+
+	// appendMode (bool) cannot be changed later, so we have to re-open the
+	// file. https://github.com/golang/go/blob/go1.20/src/os/file_unix.go#L60
+	return fileError(f, f.closed, f.reopen())
+}
+
+// compile-time check to ensure osFile.reopen implements reopenFile.
+var _ reopenFile = (*osFile)(nil).reopen
+
+func (f *osFile) reopen() (errno experimentalsys.Errno) {
+	// Clear any create flag, as we are re-opening, not re-creating.
+	f.flag &= ^experimentalsys.O_CREAT
+
+	var (
+		isDir  bool
+		offset int64
+		err    error
+	)
+
+	isDir, errno = f.IsDir()
+	if errno != 0 {
+		return errno
+	}
+
+	if !isDir {
+		offset, err = f.file.Seek(0, io.SeekCurrent)
+		if err != nil {
+			return experimentalsys.UnwrapOSError(err)
+		}
+	}
+
+	_ = f.close()
+	f.file, errno = OpenFile(f.path, f.flag, f.perm)
+	if errno != 0 {
+		return errno
+	}
+
+	if !isDir {
+		_, err = f.file.Seek(offset, io.SeekStart)
+		if err != nil {
+			return experimentalsys.UnwrapOSError(err)
+		}
+	}
+
+	return 0
+}
+
+// IsNonblock implements the same method as documented on fsapi.File
+func (f *osFile) IsNonblock() bool {
+	return isNonblock(f)
+}
+
+// SetNonblock implements the same method as documented on fsapi.File
+func (f *osFile) SetNonblock(enable bool) (errno experimentalsys.Errno) {
+	if enable {
+		f.flag |= experimentalsys.O_NONBLOCK
+	} else {
+		f.flag &= ^experimentalsys.O_NONBLOCK
+	}
+	if errno = setNonblock(f.fd, enable); errno != 0 {
+		return fileError(f, f.closed, errno)
+	}
+	return 0
+}
+
+// Stat implements the same method as documented on sys.File
+func (f *osFile) Stat() (sys.Stat_t, experimentalsys.Errno) {
+	if f.closed {
+		return sys.Stat_t{}, experimentalsys.EBADF
+	}
+
+	st, errno := statFile(f.file)
+	switch errno {
+	case 0:
+		f.cachedSt = &cachedStat{dev: st.Dev, ino: st.Ino, isDir: st.Mode&fs.ModeDir == fs.ModeDir}
+	case experimentalsys.EIO:
+		errno = experimentalsys.EBADF
+	}
+	return st, errno
+}
+
+// Read implements the same method as documented on sys.File
+func (f *osFile) Read(buf []byte) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // Short-circuit 0-len reads.
+	}
+	if nonBlockingFileReadSupported && f.IsNonblock() {
+		n, errno = readFd(f.fd, buf)
+	} else {
+		n, errno = read(f.file, buf)
+	}
+	if errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Pread implements the same method as documented on sys.File
+func (f *osFile) Pread(buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if n, errno = pread(f.file, buf, off); errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Seek implements the same method as documented on sys.File
+func (f *osFile) Seek(offset int64, whence int) (newOffset int64, errno experimentalsys.Errno) {
+	if newOffset, errno = seek(f.file, offset, whence); errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+
+		// If the error was trying to rewind a directory, re-open it. Notably,
+		// seeking to zero on a directory doesn't work on Windows with Go 1.19.
+		if errno == experimentalsys.EISDIR && offset == 0 && whence == io.SeekStart {
+			errno = 0
+			f.reopenDir = true
+		}
+	}
+	return
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (f *osFile) Poll(flag fsapi.Pflag, timeoutMillis int32) (ready bool, errno experimentalsys.Errno) {
+	return poll(f.fd, flag, timeoutMillis)
+}
+
+// Readdir implements File.Readdir. Notably, this uses "Readdir", not
+// "ReadDir", from os.File.
+func (f *osFile) Readdir(n int) (dirents []experimentalsys.Dirent, errno experimentalsys.Errno) {
+	if f.reopenDir { // re-open the directory if needed.
+		f.reopenDir = false
+		if errno = adjustReaddirErr(f, f.closed, f.reopen()); errno != 0 {
+			return
+		}
+	}
+
+	if dirents, errno = readdir(f.file, f.path, n); errno != 0 {
+		errno = adjustReaddirErr(f, f.closed, errno)
+	}
+	return
+}
+
+// Write implements the same method as documented on sys.File
+func (f *osFile) Write(buf []byte) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // Short-circuit 0-len writes.
+	}
+	if nonBlockingFileWriteSupported && f.IsNonblock() {
+		n, errno = writeFd(f.fd, buf)
+	} else if n, errno = write(f.file, buf); errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Pwrite implements the same method as documented on sys.File
+func (f *osFile) Pwrite(buf []byte, off int64) (n int, errno experimentalsys.Errno) {
+	if n, errno = pwrite(f.file, buf, off); errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Truncate implements the same method as documented on sys.File
+func (f *osFile) Truncate(size int64) (errno experimentalsys.Errno) {
+	if errno = experimentalsys.UnwrapOSError(f.file.Truncate(size)); errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Sync implements the same method as documented on sys.File
+func (f *osFile) Sync() experimentalsys.Errno {
+	return fsync(f.file)
+}
+
+// Datasync implements the same method as documented on sys.File
+func (f *osFile) Datasync() experimentalsys.Errno {
+	return datasync(f.file)
+}
+
+// Utimens implements the same method as documented on sys.File
+func (f *osFile) Utimens(atim, mtim int64) experimentalsys.Errno {
+	if f.closed {
+		return experimentalsys.EBADF
+	}
+
+	err := futimens(f.fd, atim, mtim)
+	return experimentalsys.UnwrapOSError(err)
+}
+
+// Close implements the same method as documented on sys.File
+func (f *osFile) Close() experimentalsys.Errno {
+	if f.closed {
+		return 0
+	}
+	f.closed = true
+	return f.close()
+}
+
+func (f *osFile) close() experimentalsys.Errno {
+	return experimentalsys.UnwrapOSError(f.file.Close())
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll.go
new file mode 100644
index 000000000..a2e1103e0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll.go
@@ -0,0 +1,18 @@
+//go:build windows || (linux && !tinygo) || darwin
+
+package sysfs
+
+import (
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+)
+
+// poll implements `Poll` as documented on sys.File via a file descriptor.
+func poll(fd uintptr, flag fsapi.Pflag, timeoutMillis int32) (ready bool, errno sys.Errno) {
+	if flag != fsapi.POLLIN {
+		return false, sys.ENOTSUP
+	}
+	fds := []pollFd{newPollFd(fd, _POLLIN, 0)}
+	count, errno := _poll(fds, timeoutMillis)
+	return count > 0, errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_darwin.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_darwin.go
new file mode 100644
index 000000000..1f7f89093
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_darwin.go
@@ -0,0 +1,55 @@
+package sysfs
+
+import (
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// pollFd is the struct to query for file descriptor events using poll.
+type pollFd struct {
+	// fd is the file descriptor.
+	fd int32
+	// events is a bitmap containing the requested events.
+	events int16
+	// revents is a bitmap containing the returned events.
+	revents int16
+}
+
+// newPollFd is a constructor for pollFd that abstracts the platform-specific type of file descriptors.
+func newPollFd(fd uintptr, events, revents int16) pollFd {
+	return pollFd{fd: int32(fd), events: events, revents: revents}
+}
+
+// _POLLIN subscribes a notification when any readable data is available.
+const _POLLIN = 0x0001
+
+// _poll implements poll on Darwin via the corresponding libc function.
+func _poll(fds []pollFd, timeoutMillis int32) (n int, errno sys.Errno) {
+	var fdptr *pollFd
+	nfds := len(fds)
+	if nfds > 0 {
+		fdptr = &fds[0]
+	}
+	n1, _, err := syscall_syscall6(
+		libc_poll_trampoline_addr,
+		uintptr(unsafe.Pointer(fdptr)),
+		uintptr(nfds),
+		uintptr(int(timeoutMillis)),
+		uintptr(unsafe.Pointer(nil)),
+		uintptr(unsafe.Pointer(nil)),
+		uintptr(unsafe.Pointer(nil)))
+	return int(n1), sys.UnwrapOSError(err)
+}
+
+// libc_poll_trampoline_addr is the address of the
+// `libc_poll_trampoline` symbol, defined in `poll_darwin.s`.
+//
+// We use this to invoke the syscall through syscall_syscall6 imported below.
+var libc_poll_trampoline_addr uintptr
+
+// Imports the select symbol from libc as `libc_poll`.
+//
+// Note: CGO mechanisms are used in darwin regardless of the CGO_ENABLED value
+// or the "cgo" build flag. See /RATIONALE.md for why.
+//go:cgo_import_dynamic libc_poll poll "/usr/lib/libSystem.B.dylib"
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_darwin.s b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_darwin.s
new file mode 100644
index 000000000..e04fca583
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_darwin.s
@@ -0,0 +1,8 @@
+// lifted from golang.org/x/sys unix
+#include "textflag.h"
+
+TEXT libc_poll_trampoline<>(SB), NOSPLIT, $0-0
+	JMP libc_poll(SB)
+
+GLOBL ·libc_poll_trampoline_addr(SB), RODATA, $8
+DATA ·libc_poll_trampoline_addr(SB)/8, $libc_poll_trampoline<>(SB)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_linux.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_linux.go
new file mode 100644
index 000000000..49bf4fd06
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_linux.go
@@ -0,0 +1,59 @@
+//go:build !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+	"time"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+// pollFd is the struct to query for file descriptor events using poll.
+type pollFd struct {
+	// fd is the file descriptor.
+	fd int32
+	// events is a bitmap containing the requested events.
+	events int16
+	// revents is a bitmap containing the returned events.
+	revents int16
+}
+
+// newPollFd is a constructor for pollFd that abstracts the platform-specific type of file descriptors.
+func newPollFd(fd uintptr, events, revents int16) pollFd {
+	return pollFd{fd: int32(fd), events: events, revents: revents}
+}
+
+// _POLLIN subscribes a notification when any readable data is available.
+const _POLLIN = 0x0001
+
+// _poll implements poll on Linux via ppoll.
+func _poll(fds []pollFd, timeoutMillis int32) (n int, errno sys.Errno) {
+	var ts syscall.Timespec
+	if timeoutMillis >= 0 {
+		ts = syscall.NsecToTimespec(int64(time.Duration(timeoutMillis) * time.Millisecond))
+	}
+	return ppoll(fds, &ts)
+}
+
+// ppoll is a poll variant that allows to subscribe to a mask of signals.
+// However, we do not need such mask, so the corresponding argument is always nil.
+func ppoll(fds []pollFd, timespec *syscall.Timespec) (n int, err sys.Errno) {
+	var fdptr *pollFd
+	nfd := len(fds)
+	if nfd != 0 {
+		fdptr = &fds[0]
+	}
+
+	n1, _, errno := syscall.Syscall6(
+		uintptr(syscall.SYS_PPOLL),
+		uintptr(unsafe.Pointer(fdptr)),
+		uintptr(nfd),
+		uintptr(unsafe.Pointer(timespec)),
+		uintptr(unsafe.Pointer(nil)), // sigmask is currently always ignored
+		uintptr(unsafe.Pointer(nil)),
+		uintptr(unsafe.Pointer(nil)))
+
+	return int(n1), sys.UnwrapOSError(errno)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_unsupported.go
new file mode 100644
index 000000000..2301a067e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_unsupported.go
@@ -0,0 +1,13 @@
+//go:build !(linux || darwin || windows) || tinygo
+
+package sysfs
+
+import (
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+)
+
+// poll implements `Poll` as documented on fsapi.File via a file descriptor.
+func poll(uintptr, fsapi.Pflag, int32) (bool, sys.Errno) {
+	return false, sys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_windows.go
new file mode 100644
index 000000000..82c8b2baf
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/poll_windows.go
@@ -0,0 +1,224 @@
+package sysfs
+
+import (
+	"syscall"
+	"time"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+var (
+	procWSAPoll          = modws2_32.NewProc("WSAPoll")
+	procGetNamedPipeInfo = kernel32.NewProc("GetNamedPipeInfo")
+)
+
+const (
+	// _POLLRDNORM subscribes to normal data for read.
+	_POLLRDNORM = 0x0100
+	// _POLLRDBAND subscribes to priority band (out-of-band) data for read.
+	_POLLRDBAND = 0x0200
+	// _POLLIN subscribes a notification when any readable data is available.
+	_POLLIN = (_POLLRDNORM | _POLLRDBAND)
+)
+
+// pollFd is the struct to query for file descriptor events using poll.
+type pollFd struct {
+	// fd is the file descriptor.
+	fd uintptr
+	// events is a bitmap containing the requested events.
+	events int16
+	// revents is a bitmap containing the returned events.
+	revents int16
+}
+
+// newPollFd is a constructor for pollFd that abstracts the platform-specific type of file descriptors.
+func newPollFd(fd uintptr, events, revents int16) pollFd {
+	return pollFd{fd: fd, events: events, revents: revents}
+}
+
+// pollInterval is the interval between each calls to peekNamedPipe in selectAllHandles
+const pollInterval = 100 * time.Millisecond
+
+// _poll implements poll on Windows, for a subset of cases.
+//
+// fds may contain any number of file handles, but regular files and pipes are only processed for _POLLIN.
+// Stdin is a pipe, thus it is checked for readiness when present. Pipes are checked using PeekNamedPipe.
+// Regular files always immediately reported as ready, regardless their actual state and timeouts.
+//
+// If n==0 it will wait for the given timeout duration, but it will return sys.ENOSYS if timeout is nil,
+// i.e. it won't block indefinitely. The given ctx is used to allow for cancellation,
+// and it is currently used only in tests.
+//
+// The implementation actually polls every 100 milliseconds (pollInterval) until it reaches the
+// given timeout (in millis).
+//
+// The duration may be negative, in which case it will wait indefinitely. The given ctx is
+// used to allow for cancellation, and it is currently used only in tests.
+func _poll(fds []pollFd, timeoutMillis int32) (n int, errno sys.Errno) {
+	if fds == nil {
+		return -1, sys.ENOSYS
+	}
+
+	regular, pipes, sockets, errno := partionByFtype(fds)
+	nregular := len(regular)
+	if errno != 0 {
+		return -1, errno
+	}
+
+	// Ticker that emits at every pollInterval.
+	tick := time.NewTicker(pollInterval)
+	tickCh := tick.C
+	defer tick.Stop()
+
+	// Timer that expires after the given duration.
+	// Initialize afterCh as nil: the select below will wait forever.
+	var afterCh <-chan time.Time
+	if timeoutMillis >= 0 {
+		// If duration is not nil, instantiate the timer.
+		after := time.NewTimer(time.Duration(timeoutMillis) * time.Millisecond)
+		defer after.Stop()
+		afterCh = after.C
+	}
+
+	npipes, nsockets, errno := peekAll(pipes, sockets)
+	if errno != 0 {
+		return -1, errno
+	}
+	count := nregular + npipes + nsockets
+	if count > 0 {
+		return count, 0
+	}
+
+	for {
+		select {
+		case <-afterCh:
+			return 0, 0
+		case <-tickCh:
+			npipes, nsockets, errno := peekAll(pipes, sockets)
+			if errno != 0 {
+				return -1, errno
+			}
+			count = nregular + npipes + nsockets
+			if count > 0 {
+				return count, 0
+			}
+		}
+	}
+}
+
+func peekAll(pipes, sockets []pollFd) (npipes, nsockets int, errno sys.Errno) {
+	npipes, errno = peekPipes(pipes)
+	if errno != 0 {
+		return
+	}
+
+	// Invoke wsaPoll with a 0-timeout to avoid blocking.
+	// Timeouts are handled in pollWithContext instead.
+	nsockets, errno = wsaPoll(sockets, 0)
+	if errno != 0 {
+		return
+	}
+
+	count := npipes + nsockets
+	if count > 0 {
+		return
+	}
+
+	return
+}
+
+func peekPipes(fds []pollFd) (n int, errno sys.Errno) {
+	for _, fd := range fds {
+		bytes, errno := peekNamedPipe(syscall.Handle(fd.fd))
+		if errno != 0 {
+			return -1, sys.UnwrapOSError(errno)
+		}
+		if bytes > 0 {
+			n++
+		}
+	}
+	return
+}
+
+// wsaPoll is the WSAPoll function from winsock2.
+//
+// See https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsapoll
+func wsaPoll(fds []pollFd, timeout int) (n int, errno sys.Errno) {
+	if len(fds) > 0 {
+		sockptr := &fds[0]
+		ns, _, e := syscall.SyscallN(
+			procWSAPoll.Addr(),
+			uintptr(unsafe.Pointer(sockptr)),
+			uintptr(len(fds)),
+			uintptr(timeout))
+		if e != 0 {
+			return -1, sys.UnwrapOSError(e)
+		}
+		n = int(ns)
+	}
+	return
+}
+
+// ftype is a type of file that can be handled by poll.
+type ftype uint8
+
+const (
+	ftype_regular ftype = iota
+	ftype_pipe
+	ftype_socket
+)
+
+// partionByFtype checks the type of each fd in fds and returns 3 distinct partitions
+// for regular files, named pipes and sockets.
+func partionByFtype(fds []pollFd) (regular, pipe, socket []pollFd, errno sys.Errno) {
+	for _, pfd := range fds {
+		t, errno := ftypeOf(pfd.fd)
+		if errno != 0 {
+			return nil, nil, nil, errno
+		}
+		switch t {
+		case ftype_regular:
+			regular = append(regular, pfd)
+		case ftype_pipe:
+			pipe = append(pipe, pfd)
+		case ftype_socket:
+			socket = append(socket, pfd)
+		}
+	}
+	return
+}
+
+// ftypeOf checks the type of fd and return the corresponding ftype.
+func ftypeOf(fd uintptr) (ftype, sys.Errno) {
+	h := syscall.Handle(fd)
+	t, err := syscall.GetFileType(h)
+	if err != nil {
+		return 0, sys.UnwrapOSError(err)
+	}
+	switch t {
+	case syscall.FILE_TYPE_CHAR, syscall.FILE_TYPE_DISK:
+		return ftype_regular, 0
+	case syscall.FILE_TYPE_PIPE:
+		if isSocket(h) {
+			return ftype_socket, 0
+		} else {
+			return ftype_pipe, 0
+		}
+	default:
+		return ftype_regular, 0
+	}
+}
+
+// isSocket returns true if the given file handle
+// is a pipe.
+func isSocket(fd syscall.Handle) bool {
+	r, _, errno := syscall.SyscallN(
+		procGetNamedPipeInfo.Addr(),
+		uintptr(fd),
+		uintptr(unsafe.Pointer(nil)),
+		uintptr(unsafe.Pointer(nil)),
+		uintptr(unsafe.Pointer(nil)),
+		uintptr(unsafe.Pointer(nil)))
+	return r == 0 || errno != 0
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/readfs.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/readfs.go
new file mode 100644
index 000000000..59e331a29
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/readfs.go
@@ -0,0 +1,117 @@
+package sysfs
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+)
+
+type ReadFS struct {
+	experimentalsys.FS
+}
+
+// OpenFile implements the same method as documented on sys.FS
+func (r *ReadFS) OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (experimentalsys.File, experimentalsys.Errno) {
+	// Mask the mutually exclusive bits as they determine write mode.
+	switch flag & (experimentalsys.O_RDONLY | experimentalsys.O_WRONLY | experimentalsys.O_RDWR) {
+	case experimentalsys.O_WRONLY, experimentalsys.O_RDWR:
+		// Return the correct error if a directory was opened for write.
+		if flag&experimentalsys.O_DIRECTORY != 0 {
+			return nil, experimentalsys.EISDIR
+		}
+		return nil, experimentalsys.ENOSYS
+	default: // sys.O_RDONLY (integer zero) so we are ok!
+	}
+
+	f, errno := r.FS.OpenFile(path, flag, perm)
+	if errno != 0 {
+		return nil, errno
+	}
+	return &readFile{f}, 0
+}
+
+// Mkdir implements the same method as documented on sys.FS
+func (r *ReadFS) Mkdir(path string, perm fs.FileMode) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Chmod implements the same method as documented on sys.FS
+func (r *ReadFS) Chmod(path string, perm fs.FileMode) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Rename implements the same method as documented on sys.FS
+func (r *ReadFS) Rename(from, to string) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Rmdir implements the same method as documented on sys.FS
+func (r *ReadFS) Rmdir(path string) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Link implements the same method as documented on sys.FS
+func (r *ReadFS) Link(_, _ string) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Symlink implements the same method as documented on sys.FS
+func (r *ReadFS) Symlink(_, _ string) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Unlink implements the same method as documented on sys.FS
+func (r *ReadFS) Unlink(path string) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// Utimens implements the same method as documented on sys.FS
+func (r *ReadFS) Utimens(path string, atim, mtim int64) experimentalsys.Errno {
+	return experimentalsys.EROFS
+}
+
+// compile-time check to ensure readFile implements api.File.
+var _ experimentalsys.File = (*readFile)(nil)
+
+type readFile struct {
+	experimentalsys.File
+}
+
+// Write implements the same method as documented on sys.File.
+func (r *readFile) Write([]byte) (int, experimentalsys.Errno) {
+	return 0, r.writeErr()
+}
+
+// Pwrite implements the same method as documented on sys.File.
+func (r *readFile) Pwrite([]byte, int64) (n int, errno experimentalsys.Errno) {
+	return 0, r.writeErr()
+}
+
+// Truncate implements the same method as documented on sys.File.
+func (r *readFile) Truncate(int64) experimentalsys.Errno {
+	return r.writeErr()
+}
+
+// Sync implements the same method as documented on sys.File.
+func (r *readFile) Sync() experimentalsys.Errno {
+	return experimentalsys.EBADF
+}
+
+// Datasync implements the same method as documented on sys.File.
+func (r *readFile) Datasync() experimentalsys.Errno {
+	return experimentalsys.EBADF
+}
+
+// Utimens implements the same method as documented on sys.File.
+func (r *readFile) Utimens(int64, int64) experimentalsys.Errno {
+	return experimentalsys.EBADF
+}
+
+func (r *readFile) writeErr() experimentalsys.Errno {
+	if isDir, errno := r.IsDir(); errno != 0 {
+		return errno
+	} else if isDir {
+		return experimentalsys.EISDIR
+	}
+	return experimentalsys.EBADF
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename.go
new file mode 100644
index 000000000..37c53571d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename.go
@@ -0,0 +1,16 @@
+//go:build !windows && !plan9 && !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func rename(from, to string) sys.Errno {
+	if from == to {
+		return 0
+	}
+	return sys.UnwrapOSError(syscall.Rename(from, to))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename_plan9.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename_plan9.go
new file mode 100644
index 000000000..474cc7595
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename_plan9.go
@@ -0,0 +1,14 @@
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func rename(from, to string) sys.Errno {
+	if from == to {
+		return 0
+	}
+	return sys.UnwrapOSError(os.Rename(from, to))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename_windows.go
new file mode 100644
index 000000000..5e8102239
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/rename_windows.go
@@ -0,0 +1,55 @@
+package sysfs
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func rename(from, to string) sys.Errno {
+	if from == to {
+		return 0
+	}
+
+	var fromIsDir, toIsDir bool
+	if fromStat, errno := stat(from); errno != 0 {
+		return errno // failed to stat from
+	} else {
+		fromIsDir = fromStat.Mode.IsDir()
+	}
+	if toStat, errno := stat(to); errno == sys.ENOENT {
+		return syscallRename(from, to) // file or dir to not-exist is ok
+	} else if errno != 0 {
+		return errno // failed to stat to
+	} else {
+		toIsDir = toStat.Mode.IsDir()
+	}
+
+	// Now, handle known cases
+	switch {
+	case !fromIsDir && toIsDir: // file to dir
+		return sys.EISDIR
+	case !fromIsDir && !toIsDir: // file to file
+		// Use os.Rename instead of syscall.Rename to overwrite a file.
+		// This uses MoveFileEx instead of MoveFile (used by syscall.Rename).
+		return sys.UnwrapOSError(os.Rename(from, to))
+	case fromIsDir && !toIsDir: // dir to file
+		return sys.ENOTDIR
+	default: // dir to dir
+
+		// We can't tell if a directory is empty or not, via stat information.
+		// Reading the directory is expensive, as it can buffer large amounts
+		// of data on fail. Instead, speculatively try to remove the directory.
+		// This is only one syscall and won't buffer anything.
+		if errno := rmdir(to); errno == 0 || errno == sys.ENOENT {
+			return syscallRename(from, to)
+		} else {
+			return errno
+		}
+	}
+}
+
+func syscallRename(from string, to string) sys.Errno {
+	return sys.UnwrapOSError(syscall.Rename(from, to))
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock.go
new file mode 100644
index 000000000..ab9bb1ffa
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock.go
@@ -0,0 +1,187 @@
+package sysfs
+
+import (
+	"net"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// NewTCPListenerFile creates a socketapi.TCPSock for a given *net.TCPListener.
+func NewTCPListenerFile(tl *net.TCPListener) socketapi.TCPSock {
+	return newTCPListenerFile(tl)
+}
+
+// baseSockFile implements base behavior for all TCPSock, TCPConn files,
+// regardless the platform.
+type baseSockFile struct {
+	experimentalsys.UnimplementedFile
+}
+
+var _ experimentalsys.File = (*baseSockFile)(nil)
+
+// IsDir implements the same method as documented on File.IsDir
+func (*baseSockFile) IsDir() (bool, experimentalsys.Errno) {
+	// We need to override this method because WASI-libc prestats the FD
+	// and the default impl returns ENOSYS otherwise.
+	return false, 0
+}
+
+// Stat implements the same method as documented on File.Stat
+func (f *baseSockFile) Stat() (fs sys.Stat_t, errno experimentalsys.Errno) {
+	// The mode is not really important, but it should be neither a regular file nor a directory.
+	fs.Mode = os.ModeIrregular
+	return
+}
+
+var _ socketapi.TCPSock = (*tcpListenerFile)(nil)
+
+type tcpListenerFile struct {
+	baseSockFile
+
+	tl       *net.TCPListener
+	closed   bool
+	nonblock bool
+}
+
+// newTCPListenerFile is a constructor for a socketapi.TCPSock.
+//
+// The current strategy is to wrap a net.TCPListener
+// and invoking raw syscalls using syscallConnControl:
+// this internal calls RawConn.Control(func(fd)), making sure
+// that the underlying file descriptor is valid throughout
+// the duration of the syscall.
+func newDefaultTCPListenerFile(tl *net.TCPListener) socketapi.TCPSock {
+	return &tcpListenerFile{tl: tl}
+}
+
+// Close implements the same method as documented on experimentalsys.File
+func (f *tcpListenerFile) Close() experimentalsys.Errno {
+	if !f.closed {
+		return experimentalsys.UnwrapOSError(f.tl.Close())
+	}
+	return 0
+}
+
+// Addr is exposed for testing.
+func (f *tcpListenerFile) Addr() *net.TCPAddr {
+	return f.tl.Addr().(*net.TCPAddr)
+}
+
+// IsNonblock implements the same method as documented on fsapi.File
+func (f *tcpListenerFile) IsNonblock() bool {
+	return f.nonblock
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (f *tcpListenerFile) Poll(flag fsapi.Pflag, timeoutMillis int32) (ready bool, errno experimentalsys.Errno) {
+	return false, experimentalsys.ENOSYS
+}
+
+var _ socketapi.TCPConn = (*tcpConnFile)(nil)
+
+type tcpConnFile struct {
+	baseSockFile
+
+	tc *net.TCPConn
+
+	// nonblock is true when the underlying connection is flagged as non-blocking.
+	// This ensures that reads and writes return experimentalsys.EAGAIN without blocking the caller.
+	nonblock bool
+	// closed is true when closed was called. This ensures proper experimentalsys.EBADF
+	closed bool
+}
+
+func newTcpConn(tc *net.TCPConn) socketapi.TCPConn {
+	return &tcpConnFile{tc: tc}
+}
+
+// Read implements the same method as documented on experimentalsys.File
+func (f *tcpConnFile) Read(buf []byte) (n int, errno experimentalsys.Errno) {
+	if len(buf) == 0 {
+		return 0, 0 // Short-circuit 0-len reads.
+	}
+	if nonBlockingFileReadSupported && f.IsNonblock() {
+		n, errno = syscallConnControl(f.tc, func(fd uintptr) (int, experimentalsys.Errno) {
+			n, err := readSocket(fd, buf)
+			errno = experimentalsys.UnwrapOSError(err)
+			errno = fileError(f, f.closed, errno)
+			return n, errno
+		})
+	} else {
+		n, errno = read(f.tc, buf)
+	}
+	if errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Write implements the same method as documented on experimentalsys.File
+func (f *tcpConnFile) Write(buf []byte) (n int, errno experimentalsys.Errno) {
+	if nonBlockingFileWriteSupported && f.IsNonblock() {
+		return syscallConnControl(f.tc, func(fd uintptr) (int, experimentalsys.Errno) {
+			n, err := writeSocket(fd, buf)
+			errno = experimentalsys.UnwrapOSError(err)
+			errno = fileError(f, f.closed, errno)
+			return n, errno
+		})
+	} else {
+		n, errno = write(f.tc, buf)
+	}
+	if errno != 0 {
+		// Defer validation overhead until we've already had an error.
+		errno = fileError(f, f.closed, errno)
+	}
+	return
+}
+
+// Recvfrom implements the same method as documented on socketapi.TCPConn
+func (f *tcpConnFile) Recvfrom(p []byte, flags int) (n int, errno experimentalsys.Errno) {
+	if flags != MSG_PEEK {
+		errno = experimentalsys.EINVAL
+		return
+	}
+	return syscallConnControl(f.tc, func(fd uintptr) (int, experimentalsys.Errno) {
+		n, err := recvfrom(fd, p, MSG_PEEK)
+		errno = experimentalsys.UnwrapOSError(err)
+		errno = fileError(f, f.closed, errno)
+		return n, errno
+	})
+}
+
+// Close implements the same method as documented on experimentalsys.File
+func (f *tcpConnFile) Close() experimentalsys.Errno {
+	return f.close()
+}
+
+func (f *tcpConnFile) close() experimentalsys.Errno {
+	if f.closed {
+		return 0
+	}
+	f.closed = true
+	return f.Shutdown(socketapi.SHUT_RDWR)
+}
+
+// SetNonblock implements the same method as documented on fsapi.File
+func (f *tcpConnFile) SetNonblock(enabled bool) (errno experimentalsys.Errno) {
+	f.nonblock = enabled
+	_, errno = syscallConnControl(f.tc, func(fd uintptr) (int, experimentalsys.Errno) {
+		return 0, experimentalsys.UnwrapOSError(setNonblockSocket(fd, enabled))
+	})
+	return
+}
+
+// IsNonblock implements the same method as documented on fsapi.File
+func (f *tcpConnFile) IsNonblock() bool {
+	return f.nonblock
+}
+
+// Poll implements the same method as documented on fsapi.File
+func (f *tcpConnFile) Poll(flag fsapi.Pflag, timeoutMillis int32) (ready bool, errno experimentalsys.Errno) {
+	return false, experimentalsys.ENOSYS
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_supported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_supported.go
new file mode 100644
index 000000000..6c976fb86
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_supported.go
@@ -0,0 +1,77 @@
+//go:build (linux || darwin || windows) && !tinygo
+
+package sysfs
+
+import (
+	"net"
+	"syscall"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+)
+
+// Accept implements the same method as documented on socketapi.TCPSock
+func (f *tcpListenerFile) Accept() (socketapi.TCPConn, experimentalsys.Errno) {
+	// Ensure we have an incoming connection, otherwise return immediately.
+	if f.nonblock {
+		if ready, errno := _pollSock(f.tl, fsapi.POLLIN, 0); !ready || errno != 0 {
+			return nil, experimentalsys.EAGAIN
+		}
+	}
+
+	// Accept normally blocks goroutines, but we
+	// made sure that we have an incoming connection,
+	// so we should be safe.
+	if conn, err := f.tl.Accept(); err != nil {
+		return nil, experimentalsys.UnwrapOSError(err)
+	} else {
+		return newTcpConn(conn.(*net.TCPConn)), 0
+	}
+}
+
+// SetNonblock implements the same method as documented on fsapi.File
+func (f *tcpListenerFile) SetNonblock(enabled bool) (errno experimentalsys.Errno) {
+	f.nonblock = enabled
+	_, errno = syscallConnControl(f.tl, func(fd uintptr) (int, experimentalsys.Errno) {
+		return 0, setNonblockSocket(fd, enabled)
+	})
+	return
+}
+
+// Shutdown implements the same method as documented on experimentalsys.Conn
+func (f *tcpConnFile) Shutdown(how int) experimentalsys.Errno {
+	// FIXME: can userland shutdown listeners?
+	var err error
+	switch how {
+	case socketapi.SHUT_RD:
+		err = f.tc.CloseRead()
+	case socketapi.SHUT_WR:
+		err = f.tc.CloseWrite()
+	case socketapi.SHUT_RDWR:
+		return f.close()
+	default:
+		return experimentalsys.EINVAL
+	}
+	return experimentalsys.UnwrapOSError(err)
+}
+
+// syscallConnControl extracts a syscall.RawConn from the given syscall.Conn and applies
+// the given fn to a file descriptor, returning an integer or a nonzero syscall.Errno on failure.
+//
+// syscallConnControl streamlines the pattern of extracting the syscall.Rawconn,
+// invoking its syscall.RawConn.Control method, then handling properly the errors that may occur
+// within fn or returned by syscall.RawConn.Control itself.
+func syscallConnControl(conn syscall.Conn, fn func(fd uintptr) (int, experimentalsys.Errno)) (n int, errno experimentalsys.Errno) {
+	syscallConn, err := conn.SyscallConn()
+	if err != nil {
+		return 0, experimentalsys.UnwrapOSError(err)
+	}
+	// Prioritize the inner errno over Control
+	if controlErr := syscallConn.Control(func(fd uintptr) {
+		n, errno = fn(fd)
+	}); errno == 0 {
+		errno = experimentalsys.UnwrapOSError(controlErr)
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_unix.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_unix.go
new file mode 100644
index 000000000..99ef018a4
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_unix.go
@@ -0,0 +1,49 @@
+//go:build (linux || darwin) && !tinygo
+
+package sysfs
+
+import (
+	"net"
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+)
+
+// MSG_PEEK is the constant syscall.MSG_PEEK
+const MSG_PEEK = syscall.MSG_PEEK
+
+func newTCPListenerFile(tl *net.TCPListener) socketapi.TCPSock {
+	return newDefaultTCPListenerFile(tl)
+}
+
+func _pollSock(conn syscall.Conn, flag fsapi.Pflag, timeoutMillis int32) (bool, sys.Errno) {
+	n, errno := syscallConnControl(conn, func(fd uintptr) (int, sys.Errno) {
+		if ready, errno := poll(fd, fsapi.POLLIN, 0); !ready || errno != 0 {
+			return -1, errno
+		} else {
+			return 0, errno
+		}
+	})
+	return n >= 0, errno
+}
+
+func setNonblockSocket(fd uintptr, enabled bool) sys.Errno {
+	return sys.UnwrapOSError(setNonblock(fd, enabled))
+}
+
+func readSocket(fd uintptr, buf []byte) (int, sys.Errno) {
+	n, err := syscall.Read(int(fd), buf)
+	return n, sys.UnwrapOSError(err)
+}
+
+func writeSocket(fd uintptr, buf []byte) (int, sys.Errno) {
+	n, err := syscall.Write(int(fd), buf)
+	return n, sys.UnwrapOSError(err)
+}
+
+func recvfrom(fd uintptr, buf []byte, flags int32) (n int, errno sys.Errno) {
+	n, _, err := syscall.Recvfrom(int(fd), buf, int(flags))
+	return n, sys.UnwrapOSError(err)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_unsupported.go
new file mode 100644
index 000000000..8c27fed7f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_unsupported.go
@@ -0,0 +1,81 @@
+//go:build (!linux && !darwin && !windows) || tinygo
+
+package sysfs
+
+import (
+	"net"
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+)
+
+// MSG_PEEK is a filler value.
+const MSG_PEEK = 0x2
+
+func newTCPListenerFile(tl *net.TCPListener) socketapi.TCPSock {
+	return &unsupportedSockFile{}
+}
+
+type unsupportedSockFile struct {
+	baseSockFile
+}
+
+// Accept implements the same method as documented on socketapi.TCPSock
+func (f *unsupportedSockFile) Accept() (socketapi.TCPConn, sys.Errno) {
+	return nil, sys.ENOSYS
+}
+
+func _pollSock(conn syscall.Conn, flag fsapi.Pflag, timeoutMillis int32) (bool, sys.Errno) {
+	return false, sys.ENOTSUP
+}
+
+func setNonblockSocket(fd uintptr, enabled bool) sys.Errno {
+	return sys.ENOTSUP
+}
+
+func readSocket(fd uintptr, buf []byte) (int, sys.Errno) {
+	return -1, sys.ENOTSUP
+}
+
+func writeSocket(fd uintptr, buf []byte) (int, sys.Errno) {
+	return -1, sys.ENOTSUP
+}
+
+func recvfrom(fd uintptr, buf []byte, flags int32) (n int, errno sys.Errno) {
+	return -1, sys.ENOTSUP
+}
+
+// syscallConnControl extracts a syscall.RawConn from the given syscall.Conn and applies
+// the given fn to a file descriptor, returning an integer or a nonzero syscall.Errno on failure.
+//
+// syscallConnControl streamlines the pattern of extracting the syscall.Rawconn,
+// invoking its syscall.RawConn.Control method, then handling properly the errors that may occur
+// within fn or returned by syscall.RawConn.Control itself.
+func syscallConnControl(conn syscall.Conn, fn func(fd uintptr) (int, experimentalsys.Errno)) (n int, errno sys.Errno) {
+	return -1, sys.ENOTSUP
+}
+
+// Accept implements the same method as documented on socketapi.TCPSock
+func (f *tcpListenerFile) Accept() (socketapi.TCPConn, experimentalsys.Errno) {
+	return nil, experimentalsys.ENOSYS
+}
+
+// Shutdown implements the same method as documented on experimentalsys.Conn
+func (f *tcpConnFile) Shutdown(how int) experimentalsys.Errno {
+	// FIXME: can userland shutdown listeners?
+	var err error
+	switch how {
+	case socketapi.SHUT_RD:
+		err = f.tc.Close()
+	case socketapi.SHUT_WR:
+		err = f.tc.Close()
+	case socketapi.SHUT_RDWR:
+		return f.close()
+	default:
+		return experimentalsys.EINVAL
+	}
+	return experimentalsys.UnwrapOSError(err)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_windows.go
new file mode 100644
index 000000000..703df42fc
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sock_windows.go
@@ -0,0 +1,80 @@
+//go:build windows
+
+package sysfs
+
+import (
+	"net"
+	"syscall"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+)
+
+const (
+	// MSG_PEEK is the flag PEEK for syscall.Recvfrom on Windows.
+	// This constant is not exported on this platform.
+	MSG_PEEK = 0x2
+	// _FIONBIO is the flag to set the O_NONBLOCK flag on socket handles using ioctlsocket.
+	_FIONBIO = 0x8004667e
+)
+
+var (
+	// modws2_32 is WinSock.
+	modws2_32 = syscall.NewLazyDLL("ws2_32.dll")
+	// procrecvfrom exposes recvfrom from WinSock.
+	procrecvfrom = modws2_32.NewProc("recvfrom")
+	// procioctlsocket exposes ioctlsocket from WinSock.
+	procioctlsocket = modws2_32.NewProc("ioctlsocket")
+)
+
+func newTCPListenerFile(tl *net.TCPListener) socketapi.TCPSock {
+	return newDefaultTCPListenerFile(tl)
+}
+
+// recvfrom exposes the underlying syscall in Windows.
+//
+// Note: since we are only using this to expose MSG_PEEK,
+// we do not need really need all the parameters that are actually
+// allowed in WinSock.
+// We ignore `from *sockaddr` and `fromlen *int`.
+func recvfrom(s uintptr, buf []byte, flags int32) (n int, errno sys.Errno) {
+	var _p0 *byte
+	if len(buf) > 0 {
+		_p0 = &buf[0]
+	}
+	r0, _, e1 := syscall.SyscallN(
+		procrecvfrom.Addr(),
+		s,
+		uintptr(unsafe.Pointer(_p0)),
+		uintptr(len(buf)),
+		uintptr(flags),
+		0, // from *sockaddr (optional)
+		0) // fromlen *int (optional)
+	return int(r0), sys.UnwrapOSError(e1)
+}
+
+func setNonblockSocket(fd uintptr, enabled bool) sys.Errno {
+	opt := uint64(0)
+	if enabled {
+		opt = 1
+	}
+	// ioctlsocket(fd, FIONBIO, &opt)
+	_, _, errno := syscall.SyscallN(
+		procioctlsocket.Addr(),
+		uintptr(fd),
+		uintptr(_FIONBIO),
+		uintptr(unsafe.Pointer(&opt)))
+	return sys.UnwrapOSError(errno)
+}
+
+func _pollSock(conn syscall.Conn, flag fsapi.Pflag, timeoutMillis int32) (bool, sys.Errno) {
+	if flag != fsapi.POLLIN {
+		return false, sys.ENOTSUP
+	}
+	n, errno := syscallConnControl(conn, func(fd uintptr) (int, sys.Errno) {
+		return _poll([]pollFd{newPollFd(fd, _POLLIN, 0)}, timeoutMillis)
+	})
+	return n > 0, errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat.go
new file mode 100644
index 000000000..2d973b16c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat.go
@@ -0,0 +1,16 @@
+package sysfs
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+func defaultStatFile(f fs.File) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := f.Stat(); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_bsd.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_bsd.go
new file mode 100644
index 000000000..254e204cd
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_bsd.go
@@ -0,0 +1,37 @@
+//go:build (amd64 || arm64) && (darwin || freebsd)
+
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// dirNlinkIncludesDot is true because even though os.File filters out dot
+// entries, the underlying syscall.Stat includes them.
+//
+// Note: this is only used in tests
+const dirNlinkIncludesDot = true
+
+func lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := os.Lstat(path); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
+
+func stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := os.Stat(path); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
+
+func statFile(f fs.File) (sys.Stat_t, experimentalsys.Errno) {
+	return defaultStatFile(f)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_linux.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_linux.go
new file mode 100644
index 000000000..fd289756d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_linux.go
@@ -0,0 +1,40 @@
+//go:build (amd64 || arm64 || riscv64) && linux
+
+// Note: This expression is not the same as compiler support, even if it looks
+// similar. Platform functions here are used in interpreter mode as well.
+
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// dirNlinkIncludesDot is true because even though os.File filters out dot
+// entries, the underlying syscall.Stat includes them.
+//
+// Note: this is only used in tests
+const dirNlinkIncludesDot = true
+
+func lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := os.Lstat(path); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
+
+func stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := os.Stat(path); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
+
+func statFile(f fs.File) (sys.Stat_t, experimentalsys.Errno) {
+	return defaultStatFile(f)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_unsupported.go
new file mode 100644
index 000000000..4b05a8977
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_unsupported.go
@@ -0,0 +1,40 @@
+//go:build (!((amd64 || arm64 || riscv64) && linux) && !((amd64 || arm64) && (darwin || freebsd)) && !((amd64 || arm64) && windows)) || js
+
+package sysfs
+
+import (
+	"io/fs"
+	"os"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// Note: go:build constraints must be the same as /sys.stat_unsupported.go for
+// the same reasons.
+
+// dirNlinkIncludesDot might be true for some operating systems, which can have
+// new stat_XX.go files as necessary.
+//
+// Note: this is only used in tests
+const dirNlinkIncludesDot = false
+
+func lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := os.Lstat(path); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
+
+func stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	if info, err := os.Stat(path); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	} else {
+		return sys.NewStat_t(info), 0
+	}
+}
+
+func statFile(f fs.File) (sys.Stat_t, experimentalsys.Errno) {
+	return defaultStatFile(f)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_windows.go
new file mode 100644
index 000000000..4456dd782
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/stat_windows.go
@@ -0,0 +1,120 @@
+//go:build (amd64 || arm64) && windows
+
+package sysfs
+
+import (
+	"io/fs"
+	"syscall"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// dirNlinkIncludesDot is false because Windows does not return dot entries.
+//
+// Note: this is only used in tests
+const dirNlinkIncludesDot = false
+
+func lstat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	attrs := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS)
+	// Use FILE_FLAG_OPEN_REPARSE_POINT, otherwise CreateFile will follow symlink.
+	// See https://docs.microsoft.com/en-us/windows/desktop/FileIO/symbolic-link-effects-on-file-systems-functions#createfile-and-createfiletransacted
+	attrs |= syscall.FILE_FLAG_OPEN_REPARSE_POINT
+	return statPath(attrs, path)
+}
+
+func stat(path string) (sys.Stat_t, experimentalsys.Errno) {
+	attrs := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS)
+	return statPath(attrs, path)
+}
+
+func statPath(createFileAttrs uint32, path string) (sys.Stat_t, experimentalsys.Errno) {
+	if len(path) == 0 {
+		return sys.Stat_t{}, experimentalsys.ENOENT
+	}
+	pathp, err := syscall.UTF16PtrFromString(path)
+	if err != nil {
+		return sys.Stat_t{}, experimentalsys.EINVAL
+	}
+
+	// open the file handle
+	h, err := syscall.CreateFile(pathp, 0, 0, nil,
+		syscall.OPEN_EXISTING, createFileAttrs, 0)
+	if err != nil {
+		errno := experimentalsys.UnwrapOSError(err)
+		// To match expectations of WASI, e.g. TinyGo TestStatBadDir, return
+		// ENOENT, not ENOTDIR.
+		if errno == experimentalsys.ENOTDIR {
+			errno = experimentalsys.ENOENT
+		}
+		return sys.Stat_t{}, errno
+	}
+	defer syscall.CloseHandle(h)
+
+	return statHandle(h)
+}
+
+// fdFile allows masking the `Fd` function on os.File.
+type fdFile interface {
+	Fd() uintptr
+}
+
+func statFile(f fs.File) (sys.Stat_t, experimentalsys.Errno) {
+	if osF, ok := f.(fdFile); ok {
+		// Attempt to get the stat by handle, which works for normal files
+		st, err := statHandle(syscall.Handle(osF.Fd()))
+
+		// ERROR_INVALID_HANDLE happens before Go 1.20. Don't fail as we only
+		// use that approach to fill in inode data, which is not critical.
+		//
+		// Note: statHandle uses UnwrapOSError which coerces
+		// ERROR_INVALID_HANDLE to EBADF.
+		if err != experimentalsys.EBADF {
+			return st, err
+		}
+	}
+	return defaultStatFile(f)
+}
+
+func statHandle(h syscall.Handle) (sys.Stat_t, experimentalsys.Errno) {
+	winFt, err := syscall.GetFileType(h)
+	if err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	}
+
+	var fi syscall.ByHandleFileInformation
+	if err = syscall.GetFileInformationByHandle(h, &fi); err != nil {
+		return sys.Stat_t{}, experimentalsys.UnwrapOSError(err)
+	}
+
+	var m fs.FileMode
+	if fi.FileAttributes&syscall.FILE_ATTRIBUTE_READONLY != 0 {
+		m |= 0o444
+	} else {
+		m |= 0o666
+	}
+
+	switch { // check whether this is a symlink first
+	case fi.FileAttributes&syscall.FILE_ATTRIBUTE_REPARSE_POINT != 0:
+		m |= fs.ModeSymlink
+	case winFt == syscall.FILE_TYPE_PIPE:
+		m |= fs.ModeNamedPipe
+	case winFt == syscall.FILE_TYPE_CHAR:
+		m |= fs.ModeDevice | fs.ModeCharDevice
+	case fi.FileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY != 0:
+		m |= fs.ModeDir | 0o111 // e.g. 0o444 -> 0o555
+	}
+
+	st := sys.Stat_t{}
+	// FileIndex{High,Low} can be combined and used as a unique identifier like inode.
+	// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/ns-fileapi-by_handle_file_information
+	st.Dev = uint64(fi.VolumeSerialNumber)
+	st.Ino = (uint64(fi.FileIndexHigh) << 32) | uint64(fi.FileIndexLow)
+	st.Mode = m
+	st.Nlink = uint64(fi.NumberOfLinks)
+	st.Size = int64(fi.FileSizeHigh)<<32 + int64(fi.FileSizeLow)
+	st.Atim = fi.LastAccessTime.Nanoseconds()
+	st.Mtim = fi.LastWriteTime.Nanoseconds()
+	st.Ctim = fi.CreationTime.Nanoseconds()
+	return st, 0
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sync.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sync.go
new file mode 100644
index 000000000..86f9a0865
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sync.go
@@ -0,0 +1,13 @@
+//go:build !windows
+
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func fsync(f *os.File) sys.Errno {
+	return sys.UnwrapOSError(f.Sync())
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sync_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sync_windows.go
new file mode 100644
index 000000000..f288eb25b
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sync_windows.go
@@ -0,0 +1,20 @@
+package sysfs
+
+import (
+	"os"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func fsync(f *os.File) sys.Errno {
+	errno := sys.UnwrapOSError(f.Sync())
+	// Coerce error performing stat on a directory to 0, as it won't work
+	// on Windows.
+	switch errno {
+	case sys.EACCES /* Go 1.20 */, sys.EBADF /* Go 1.19 */ :
+		if st, err := f.Stat(); err == nil && st.IsDir() {
+			errno = 0
+		}
+	}
+	return errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/syscall6_darwin.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/syscall6_darwin.go
new file mode 100644
index 000000000..9fde5baa5
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/syscall6_darwin.go
@@ -0,0 +1,13 @@
+package sysfs
+
+import (
+	"syscall"
+	_ "unsafe"
+)
+
+// syscall_syscall6 is a private symbol that we link below. We need to use this
+// instead of syscall.Syscall6 because the public syscall.Syscall6 won't work
+// when fn is an address.
+//
+//go:linkname syscall_syscall6 syscall.syscall6
+func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/sysfs.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sysfs.go
new file mode 100644
index 000000000..dd0a8882e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/sysfs.go
@@ -0,0 +1,6 @@
+// Package sysfs includes a low-level filesystem interface and utilities needed
+// for WebAssembly host functions (ABI) such as WASI.
+//
+// The name sysfs was chosen because wazero's public API has a "sys" package,
+// which was named after https://github.com/golang/sys.
+package sysfs
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink.go
new file mode 100644
index 000000000..e3f051008
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink.go
@@ -0,0 +1,17 @@
+//go:build !windows && !plan9 && !tinygo
+
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func unlink(name string) (errno sys.Errno) {
+	err := syscall.Unlink(name)
+	if errno = sys.UnwrapOSError(err); errno == sys.EPERM {
+		errno = sys.EISDIR
+	}
+	return errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink_plan9.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink_plan9.go
new file mode 100644
index 000000000..16ed06ab2
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink_plan9.go
@@ -0,0 +1,12 @@
+package sysfs
+
+import (
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func unlink(name string) sys.Errno {
+	err := syscall.Remove(name)
+	return sys.UnwrapOSError(err)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink_windows.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink_windows.go
new file mode 100644
index 000000000..be31c7b91
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/unlink_windows.go
@@ -0,0 +1,25 @@
+package sysfs
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/tetratelabs/wazero/experimental/sys"
+)
+
+func unlink(name string) sys.Errno {
+	err := syscall.Unlink(name)
+	if err == nil {
+		return 0
+	}
+	errno := sys.UnwrapOSError(err)
+	if errno == sys.EBADF {
+		lstat, errLstat := os.Lstat(name)
+		if errLstat == nil && lstat.Mode()&os.ModeSymlink != 0 {
+			errno = sys.UnwrapOSError(os.Remove(name))
+		} else {
+			errno = sys.EISDIR
+		}
+	}
+	return errno
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/u32/u32.go b/vendor/github.com/tetratelabs/wazero/internal/u32/u32.go
new file mode 100644
index 000000000..5960a6f0c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/u32/u32.go
@@ -0,0 +1,11 @@
+package u32
+
+// LeBytes returns a byte slice corresponding to the 4 bytes in the uint32 in little-endian byte order.
+func LeBytes(v uint32) []byte {
+	return []byte{
+		byte(v),
+		byte(v >> 8),
+		byte(v >> 16),
+		byte(v >> 24),
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/u64/u64.go b/vendor/github.com/tetratelabs/wazero/internal/u64/u64.go
new file mode 100644
index 000000000..65c7cd124
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/u64/u64.go
@@ -0,0 +1,15 @@
+package u64
+
+// LeBytes returns a byte slice corresponding to the 8 bytes in the uint64 in little-endian byte order.
+func LeBytes(v uint64) []byte {
+	return []byte{
+		byte(v),
+		byte(v >> 8),
+		byte(v >> 16),
+		byte(v >> 24),
+		byte(v >> 32),
+		byte(v >> 40),
+		byte(v >> 48),
+		byte(v >> 56),
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/version/version.go b/vendor/github.com/tetratelabs/wazero/internal/version/version.go
new file mode 100644
index 000000000..9261df0f7
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/version/version.go
@@ -0,0 +1,52 @@
+package version
+
+import (
+	"runtime/debug"
+	"strings"
+)
+
+// Default is the default version value used when none was found.
+const Default = "dev"
+
+// version holds the current version from the go.mod of downstream users or set by ldflag for wazero CLI.
+var version string
+
+// GetWazeroVersion returns the current version of wazero either in the go.mod or set by ldflag for wazero CLI.
+//
+// If this is not CLI, this assumes that downstream users of wazero imports wazero as "github.com/tetratelabs/wazero".
+// To be precise, the returned string matches the require statement there.
+// For example, if the go.mod has "require github.com/tetratelabs/wazero 0.1.2-12314124-abcd",
+// then this returns "0.1.2-12314124-abcd".
+//
+// Note: this is tested in ./testdata/main_test.go with a separate go.mod to pretend as the wazero user.
+func GetWazeroVersion() (ret string) {
+	if len(version) != 0 {
+		return version
+	}
+
+	info, ok := debug.ReadBuildInfo()
+	if ok {
+		for _, dep := range info.Deps {
+			// Note: here's the assumption that wazero is imported as github.com/tetratelabs/wazero.
+			if strings.Contains(dep.Path, "github.com/tetratelabs/wazero") {
+				ret = dep.Version
+			}
+		}
+
+		// In wazero CLI, wazero is a main module, so we have to get the version info from info.Main.
+		if versionMissing(ret) {
+			ret = info.Main.Version
+		}
+	}
+	if versionMissing(ret) {
+		return Default // don't return parens
+	}
+
+	// Cache for the subsequent calls.
+	version = ret
+	return ret
+}
+
+func versionMissing(ret string) bool {
+	return ret == "" || ret == "(devel)" // pkg.go defaults to (devel)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/code.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/code.go
new file mode 100644
index 000000000..2fac9196c
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/code.go
@@ -0,0 +1,100 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeCode(r *bytes.Reader, codeSectionStart uint64, ret *wasm.Code) (err error) {
+	ss, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return fmt.Errorf("get the size of code: %w", err)
+	}
+	remaining := int64(ss)
+
+	// Parse #locals.
+	ls, bytesRead, err := leb128.DecodeUint32(r)
+	remaining -= int64(bytesRead)
+	if err != nil {
+		return fmt.Errorf("get the size locals: %v", err)
+	} else if remaining < 0 {
+		return io.EOF
+	}
+
+	// Validate the locals.
+	bytesRead = 0
+	var sum uint64
+	for i := uint32(0); i < ls; i++ {
+		num, n, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return fmt.Errorf("read n of locals: %v", err)
+		} else if remaining < 0 {
+			return io.EOF
+		}
+
+		sum += uint64(num)
+
+		b, err := r.ReadByte()
+		if err != nil {
+			return fmt.Errorf("read type of local: %v", err)
+		}
+
+		bytesRead += n + 1
+		switch vt := b; vt {
+		case wasm.ValueTypeI32, wasm.ValueTypeF32, wasm.ValueTypeI64, wasm.ValueTypeF64,
+			wasm.ValueTypeFuncref, wasm.ValueTypeExternref, wasm.ValueTypeV128:
+		default:
+			return fmt.Errorf("invalid local type: 0x%x", vt)
+		}
+	}
+
+	if sum > math.MaxUint32 {
+		return fmt.Errorf("too many locals: %d", sum)
+	}
+
+	// Rewind the buffer.
+	_, err = r.Seek(-int64(bytesRead), io.SeekCurrent)
+	if err != nil {
+		return err
+	}
+
+	localTypes := make([]wasm.ValueType, 0, sum)
+	for i := uint32(0); i < ls; i++ {
+		num, bytesRead, err := leb128.DecodeUint32(r)
+		remaining -= int64(bytesRead) + 1 // +1 for the subsequent ReadByte
+		if err != nil {
+			return fmt.Errorf("read n of locals: %v", err)
+		} else if remaining < 0 {
+			return io.EOF
+		}
+
+		b, err := r.ReadByte()
+		if err != nil {
+			return fmt.Errorf("read type of local: %v", err)
+		}
+
+		for j := uint32(0); j < num; j++ {
+			localTypes = append(localTypes, b)
+		}
+	}
+
+	bodyOffsetInCodeSection := codeSectionStart - uint64(r.Len())
+	body := make([]byte, remaining)
+	if _, err = io.ReadFull(r, body); err != nil {
+		return fmt.Errorf("read body: %w", err)
+	}
+
+	if endIndex := len(body) - 1; endIndex < 0 || body[endIndex] != wasm.OpcodeEnd {
+		return fmt.Errorf("expr not end with OpcodeEnd")
+	}
+
+	ret.BodyOffsetInCodeSection = bodyOffsetInCodeSection
+	ret.LocalTypes = localTypes
+	ret.Body = body
+	return nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/const_expr.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/const_expr.go
new file mode 100644
index 000000000..edfc0a086
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/const_expr.go
@@ -0,0 +1,105 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/ieee754"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeConstantExpression(r *bytes.Reader, enabledFeatures api.CoreFeatures, ret *wasm.ConstantExpression) error {
+	b, err := r.ReadByte()
+	if err != nil {
+		return fmt.Errorf("read opcode: %v", err)
+	}
+
+	remainingBeforeData := int64(r.Len())
+	offsetAtData := r.Size() - remainingBeforeData
+
+	opcode := b
+	switch opcode {
+	case wasm.OpcodeI32Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		_, _, err = leb128.DecodeInt32(r)
+	case wasm.OpcodeI64Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		_, _, err = leb128.DecodeInt64(r)
+	case wasm.OpcodeF32Const:
+		buf := make([]byte, 4)
+		if _, err := io.ReadFull(r, buf); err != nil {
+			return fmt.Errorf("read f32 constant: %v", err)
+		}
+		_, err = ieee754.DecodeFloat32(buf)
+	case wasm.OpcodeF64Const:
+		buf := make([]byte, 8)
+		if _, err := io.ReadFull(r, buf); err != nil {
+			return fmt.Errorf("read f64 constant: %v", err)
+		}
+		_, err = ieee754.DecodeFloat64(buf)
+	case wasm.OpcodeGlobalGet:
+		_, _, err = leb128.DecodeUint32(r)
+	case wasm.OpcodeRefNull:
+		if err := enabledFeatures.RequireEnabled(api.CoreFeatureBulkMemoryOperations); err != nil {
+			return fmt.Errorf("ref.null is not supported as %w", err)
+		}
+		reftype, err := r.ReadByte()
+		if err != nil {
+			return fmt.Errorf("read reference type for ref.null: %w", err)
+		} else if reftype != wasm.RefTypeFuncref && reftype != wasm.RefTypeExternref {
+			return fmt.Errorf("invalid type for ref.null: 0x%x", reftype)
+		}
+	case wasm.OpcodeRefFunc:
+		if err := enabledFeatures.RequireEnabled(api.CoreFeatureBulkMemoryOperations); err != nil {
+			return fmt.Errorf("ref.func is not supported as %w", err)
+		}
+		// Parsing index.
+		_, _, err = leb128.DecodeUint32(r)
+	case wasm.OpcodeVecPrefix:
+		if err := enabledFeatures.RequireEnabled(api.CoreFeatureSIMD); err != nil {
+			return fmt.Errorf("vector instructions are not supported as %w", err)
+		}
+		opcode, err = r.ReadByte()
+		if err != nil {
+			return fmt.Errorf("read vector instruction opcode suffix: %w", err)
+		}
+
+		if opcode != wasm.OpcodeVecV128Const {
+			return fmt.Errorf("invalid vector opcode for const expression: %#x", opcode)
+		}
+
+		remainingBeforeData = int64(r.Len())
+		offsetAtData = r.Size() - remainingBeforeData
+
+		n, err := r.Read(make([]byte, 16))
+		if err != nil {
+			return fmt.Errorf("read vector const instruction immediates: %w", err)
+		} else if n != 16 {
+			return fmt.Errorf("read vector const instruction immediates: needs 16 bytes but was %d bytes", n)
+		}
+	default:
+		return fmt.Errorf("%v for const expression opt code: %#x", ErrInvalidByte, b)
+	}
+
+	if err != nil {
+		return fmt.Errorf("read value: %v", err)
+	}
+
+	if b, err = r.ReadByte(); err != nil {
+		return fmt.Errorf("look for end opcode: %v", err)
+	}
+
+	if b != wasm.OpcodeEnd {
+		return fmt.Errorf("constant expression has been not terminated")
+	}
+
+	ret.Data = make([]byte, remainingBeforeData-int64(r.Len())-1)
+	if _, err = r.ReadAt(ret.Data, offsetAtData); err != nil {
+		return fmt.Errorf("error re-buffering ConstantExpression.Data")
+	}
+	ret.Opcode = opcode
+	return nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/custom.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/custom.go
new file mode 100644
index 000000000..771f8c327
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/custom.go
@@ -0,0 +1,22 @@
+package binary
+
+import (
+	"bytes"
+
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// decodeCustomSection deserializes the data **not** associated with the "name" key in SectionIDCustom.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+func decodeCustomSection(r *bytes.Reader, name string, limit uint64) (result *wasm.CustomSection, err error) {
+	buf := make([]byte, limit)
+	_, err = r.Read(buf)
+
+	result = &wasm.CustomSection{
+		Name: name,
+		Data: buf,
+	}
+
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/data.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/data.go
new file mode 100644
index 000000000..054ccb3c6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/data.go
@@ -0,0 +1,79 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// dataSegmentPrefix represents three types of data segments.
+//
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-section
+type dataSegmentPrefix = uint32
+
+const (
+	// dataSegmentPrefixActive is the prefix for the version 1.0 compatible data segment, which is classified as "active" in 2.0.
+	dataSegmentPrefixActive dataSegmentPrefix = 0x0
+	// dataSegmentPrefixPassive prefixes the "passive" data segment as in version 2.0 specification.
+	dataSegmentPrefixPassive dataSegmentPrefix = 0x1
+	// dataSegmentPrefixActiveWithMemoryIndex is the active prefix with memory index encoded which is defined for futur use as of 2.0.
+	dataSegmentPrefixActiveWithMemoryIndex dataSegmentPrefix = 0x2
+)
+
+func decodeDataSegment(r *bytes.Reader, enabledFeatures api.CoreFeatures, ret *wasm.DataSegment) (err error) {
+	dataSegmentPrefx, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		err = fmt.Errorf("read data segment prefix: %w", err)
+		return
+	}
+
+	if dataSegmentPrefx != dataSegmentPrefixActive {
+		if err = enabledFeatures.RequireEnabled(api.CoreFeatureBulkMemoryOperations); err != nil {
+			err = fmt.Errorf("non-zero prefix for data segment is invalid as %w", err)
+			return
+		}
+	}
+
+	switch dataSegmentPrefx {
+	case dataSegmentPrefixActive,
+		dataSegmentPrefixActiveWithMemoryIndex:
+		// Active data segment as in
+		// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-section
+		if dataSegmentPrefx == 0x2 {
+			d, _, err := leb128.DecodeUint32(r)
+			if err != nil {
+				return fmt.Errorf("read memory index: %v", err)
+			} else if d != 0 {
+				return fmt.Errorf("memory index must be zero but was %d", d)
+			}
+		}
+
+		err = decodeConstantExpression(r, enabledFeatures, &ret.OffsetExpression)
+		if err != nil {
+			return fmt.Errorf("read offset expression: %v", err)
+		}
+	case dataSegmentPrefixPassive:
+		// Passive data segment doesn't need const expr nor memory index encoded.
+		// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-section
+		ret.Passive = true
+	default:
+		err = fmt.Errorf("invalid data segment prefix: 0x%x", dataSegmentPrefx)
+		return
+	}
+
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		err = fmt.Errorf("get the size of vector: %v", err)
+		return
+	}
+
+	ret.Init = make([]byte, vs)
+	if _, err = io.ReadFull(r, ret.Init); err != nil {
+		err = fmt.Errorf("read bytes for init: %v", err)
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/decoder.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/decoder.go
new file mode 100644
index 000000000..c4191dae9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/decoder.go
@@ -0,0 +1,193 @@
+package binary
+
+import (
+	"bytes"
+	"debug/dwarf"
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+// DecodeModule implements wasm.DecodeModule for the WebAssembly 1.0 (20191205) Binary Format
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-format%E2%91%A0
+func DecodeModule(
+	binary []byte,
+	enabledFeatures api.CoreFeatures,
+	memoryLimitPages uint32,
+	memoryCapacityFromMax,
+	dwarfEnabled, storeCustomSections bool,
+) (*wasm.Module, error) {
+	r := bytes.NewReader(binary)
+
+	// Magic number.
+	buf := make([]byte, 4)
+	if _, err := io.ReadFull(r, buf); err != nil || !bytes.Equal(buf, Magic) {
+		return nil, ErrInvalidMagicNumber
+	}
+
+	// Version.
+	if _, err := io.ReadFull(r, buf); err != nil || !bytes.Equal(buf, version) {
+		return nil, ErrInvalidVersion
+	}
+
+	memSizer := newMemorySizer(memoryLimitPages, memoryCapacityFromMax)
+
+	m := &wasm.Module{}
+	var info, line, str, abbrev, ranges []byte // For DWARF Data.
+	for {
+		// TODO: except custom sections, all others are required to be in order, but we aren't checking yet.
+		// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#modules%E2%91%A0%E2%93%AA
+		sectionID, err := r.ReadByte()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			return nil, fmt.Errorf("read section id: %w", err)
+		}
+
+		sectionSize, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("get size of section %s: %v", wasm.SectionIDName(sectionID), err)
+		}
+
+		sectionContentStart := r.Len()
+		switch sectionID {
+		case wasm.SectionIDCustom:
+			// First, validate the section and determine if the section for this name has already been set
+			name, nameSize, decodeErr := decodeUTF8(r, "custom section name")
+			if decodeErr != nil {
+				err = decodeErr
+				break
+			} else if sectionSize < nameSize {
+				err = fmt.Errorf("malformed custom section %s", name)
+				break
+			} else if name == "name" && m.NameSection != nil {
+				err = fmt.Errorf("redundant custom section %s", name)
+				break
+			}
+
+			// Now, either decode the NameSection or CustomSection
+			limit := sectionSize - nameSize
+
+			var c *wasm.CustomSection
+			if name != "name" {
+				if storeCustomSections || dwarfEnabled {
+					c, err = decodeCustomSection(r, name, uint64(limit))
+					if err != nil {
+						return nil, fmt.Errorf("failed to read custom section name[%s]: %w", name, err)
+					}
+					m.CustomSections = append(m.CustomSections, c)
+					if dwarfEnabled {
+						switch name {
+						case ".debug_info":
+							info = c.Data
+						case ".debug_line":
+							line = c.Data
+						case ".debug_str":
+							str = c.Data
+						case ".debug_abbrev":
+							abbrev = c.Data
+						case ".debug_ranges":
+							ranges = c.Data
+						}
+					}
+				} else {
+					if _, err = io.CopyN(io.Discard, r, int64(limit)); err != nil {
+						return nil, fmt.Errorf("failed to skip name[%s]: %w", name, err)
+					}
+				}
+			} else {
+				m.NameSection, err = decodeNameSection(r, uint64(limit))
+			}
+		case wasm.SectionIDType:
+			m.TypeSection, err = decodeTypeSection(enabledFeatures, r)
+		case wasm.SectionIDImport:
+			m.ImportSection, m.ImportPerModule, m.ImportFunctionCount, m.ImportGlobalCount, m.ImportMemoryCount, m.ImportTableCount, err = decodeImportSection(r, memSizer, memoryLimitPages, enabledFeatures)
+			if err != nil {
+				return nil, err // avoid re-wrapping the error.
+			}
+		case wasm.SectionIDFunction:
+			m.FunctionSection, err = decodeFunctionSection(r)
+		case wasm.SectionIDTable:
+			m.TableSection, err = decodeTableSection(r, enabledFeatures)
+		case wasm.SectionIDMemory:
+			m.MemorySection, err = decodeMemorySection(r, enabledFeatures, memSizer, memoryLimitPages)
+		case wasm.SectionIDGlobal:
+			if m.GlobalSection, err = decodeGlobalSection(r, enabledFeatures); err != nil {
+				return nil, err // avoid re-wrapping the error.
+			}
+		case wasm.SectionIDExport:
+			m.ExportSection, m.Exports, err = decodeExportSection(r)
+		case wasm.SectionIDStart:
+			if m.StartSection != nil {
+				return nil, errors.New("multiple start sections are invalid")
+			}
+			m.StartSection, err = decodeStartSection(r)
+		case wasm.SectionIDElement:
+			m.ElementSection, err = decodeElementSection(r, enabledFeatures)
+		case wasm.SectionIDCode:
+			m.CodeSection, err = decodeCodeSection(r)
+		case wasm.SectionIDData:
+			m.DataSection, err = decodeDataSection(r, enabledFeatures)
+		case wasm.SectionIDDataCount:
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureBulkMemoryOperations); err != nil {
+				return nil, fmt.Errorf("data count section not supported as %v", err)
+			}
+			m.DataCountSection, err = decodeDataCountSection(r)
+		default:
+			err = ErrInvalidSectionID
+		}
+
+		readBytes := sectionContentStart - r.Len()
+		if err == nil && int(sectionSize) != readBytes {
+			err = fmt.Errorf("invalid section length: expected to be %d but got %d", sectionSize, readBytes)
+		}
+
+		if err != nil {
+			return nil, fmt.Errorf("section %s: %v", wasm.SectionIDName(sectionID), err)
+		}
+	}
+
+	if dwarfEnabled {
+		d, _ := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
+		m.DWARFLines = wasmdebug.NewDWARFLines(d)
+	}
+
+	functionCount, codeCount := m.SectionElementCount(wasm.SectionIDFunction), m.SectionElementCount(wasm.SectionIDCode)
+	if functionCount != codeCount {
+		return nil, fmt.Errorf("function and code section have inconsistent lengths: %d != %d", functionCount, codeCount)
+	}
+	return m, nil
+}
+
+// memorySizer derives min, capacity and max pages from decoded wasm.
+type memorySizer func(minPages uint32, maxPages *uint32) (min uint32, capacity uint32, max uint32)
+
+// newMemorySizer sets capacity to minPages unless max is defined and
+// memoryCapacityFromMax is true.
+func newMemorySizer(memoryLimitPages uint32, memoryCapacityFromMax bool) memorySizer {
+	return func(minPages uint32, maxPages *uint32) (min, capacity, max uint32) {
+		if maxPages != nil {
+			if memoryCapacityFromMax {
+				return minPages, *maxPages, *maxPages
+			}
+			// This is an invalid value: let it propagate, we will fail later.
+			if *maxPages > wasm.MemoryLimitPages {
+				return minPages, minPages, *maxPages
+			}
+			// This is a valid value, but it goes over the run-time limit: return the limit.
+			if *maxPages > memoryLimitPages {
+				return minPages, minPages, memoryLimitPages
+			}
+			return minPages, minPages, *maxPages
+		}
+		if memoryCapacityFromMax {
+			return minPages, memoryLimitPages, memoryLimitPages
+		}
+		return minPages, minPages, memoryLimitPages
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/element.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/element.go
new file mode 100644
index 000000000..7ab4b48eb
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/element.go
@@ -0,0 +1,269 @@
+package binary
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func ensureElementKindFuncRef(r *bytes.Reader) error {
+	elemKind, err := r.ReadByte()
+	if err != nil {
+		return fmt.Errorf("read element prefix: %w", err)
+	}
+	if elemKind != 0x0 { // ElemKind is fixed to 0x0 now: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#element-section
+		return fmt.Errorf("element kind must be zero but was 0x%x", elemKind)
+	}
+	return nil
+}
+
+func decodeElementInitValueVector(r *bytes.Reader) ([]wasm.Index, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	vec := make([]wasm.Index, vs)
+	for i := range vec {
+		u32, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("read function index: %w", err)
+		}
+
+		if u32 >= wasm.MaximumFunctionIndex {
+			return nil, fmt.Errorf("too large function index in Element init: %d", u32)
+		}
+		vec[i] = u32
+	}
+	return vec, nil
+}
+
+func decodeElementConstExprVector(r *bytes.Reader, elemType wasm.RefType, enabledFeatures api.CoreFeatures) ([]wasm.Index, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get the size of constexpr vector: %w", err)
+	}
+	vec := make([]wasm.Index, vs)
+	for i := range vec {
+		var expr wasm.ConstantExpression
+		err := decodeConstantExpression(r, enabledFeatures, &expr)
+		if err != nil {
+			return nil, err
+		}
+		switch expr.Opcode {
+		case wasm.OpcodeRefFunc:
+			if elemType != wasm.RefTypeFuncref {
+				return nil, fmt.Errorf("element type mismatch: want %s, but constexpr has funcref", wasm.RefTypeName(elemType))
+			}
+			v, _, _ := leb128.LoadUint32(expr.Data)
+			if v >= wasm.MaximumFunctionIndex {
+				return nil, fmt.Errorf("too large function index in Element init: %d", v)
+			}
+			vec[i] = v
+		case wasm.OpcodeRefNull:
+			if elemType != expr.Data[0] {
+				return nil, fmt.Errorf("element type mismatch: want %s, but constexpr has %s",
+					wasm.RefTypeName(elemType), wasm.RefTypeName(expr.Data[0]))
+			}
+			vec[i] = wasm.ElementInitNullReference
+		case wasm.OpcodeGlobalGet:
+			i32, _, _ := leb128.LoadInt32(expr.Data)
+			// Resolving the reference type from globals is done at instantiation phase. See the comment on
+			// wasm.elementInitImportedGlobalReferenceType.
+			vec[i] = wasm.WrapGlobalIndexAsElementInit(wasm.Index(i32))
+		default:
+			return nil, fmt.Errorf("const expr must be either ref.null or ref.func but was %s", wasm.InstructionName(expr.Opcode))
+		}
+	}
+	return vec, nil
+}
+
+func decodeElementRefType(r *bytes.Reader) (ret wasm.RefType, err error) {
+	ret, err = r.ReadByte()
+	if err != nil {
+		err = fmt.Errorf("read element ref type: %w", err)
+		return
+	}
+	if ret != wasm.RefTypeFuncref && ret != wasm.RefTypeExternref {
+		return 0, errors.New("ref type must be funcref or externref for element as of WebAssembly 2.0")
+	}
+	return
+}
+
+const (
+	// The prefix is explained at https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#element-section
+
+	// elementSegmentPrefixLegacy is the legacy prefix and is only valid one before CoreFeatureBulkMemoryOperations.
+	elementSegmentPrefixLegacy = iota
+	// elementSegmentPrefixPassiveFuncrefValueVector is the passive element whose indexes are encoded as vec(varint), and reftype is fixed to funcref.
+	elementSegmentPrefixPassiveFuncrefValueVector
+	// elementSegmentPrefixActiveFuncrefValueVectorWithTableIndex is the same as elementSegmentPrefixPassiveFuncrefValueVector but active and table index is encoded.
+	elementSegmentPrefixActiveFuncrefValueVectorWithTableIndex
+	// elementSegmentPrefixDeclarativeFuncrefValueVector is the same as elementSegmentPrefixPassiveFuncrefValueVector but declarative.
+	elementSegmentPrefixDeclarativeFuncrefValueVector
+	// elementSegmentPrefixActiveFuncrefConstExprVector is active whoce reftype is fixed to funcref and indexes are encoded as vec(const_expr).
+	elementSegmentPrefixActiveFuncrefConstExprVector
+	// elementSegmentPrefixPassiveConstExprVector is passive whoce indexes are encoded as vec(const_expr), and reftype is encoded.
+	elementSegmentPrefixPassiveConstExprVector
+	// elementSegmentPrefixPassiveConstExprVector is active whoce indexes are encoded as vec(const_expr), and reftype and table index are encoded.
+	elementSegmentPrefixActiveConstExprVector
+	// elementSegmentPrefixDeclarativeConstExprVector is declarative whoce indexes are encoded as vec(const_expr), and reftype is encoded.
+	elementSegmentPrefixDeclarativeConstExprVector
+)
+
+func decodeElementSegment(r *bytes.Reader, enabledFeatures api.CoreFeatures, ret *wasm.ElementSegment) error {
+	prefix, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return fmt.Errorf("read element prefix: %w", err)
+	}
+
+	if prefix != elementSegmentPrefixLegacy {
+		if err := enabledFeatures.RequireEnabled(api.CoreFeatureBulkMemoryOperations); err != nil {
+			return fmt.Errorf("non-zero prefix for element segment is invalid as %w", err)
+		}
+	}
+
+	// Encoding depends on the prefix and described at https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#element-section
+	switch prefix {
+	case elementSegmentPrefixLegacy:
+		// Legacy prefix which is WebAssembly 1.0 compatible.
+		err = decodeConstantExpression(r, enabledFeatures, &ret.OffsetExpr)
+		if err != nil {
+			return fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		ret.Init, err = decodeElementInitValueVector(r)
+		if err != nil {
+			return err
+		}
+
+		ret.Mode = wasm.ElementModeActive
+		ret.Type = wasm.RefTypeFuncref
+		return nil
+	case elementSegmentPrefixPassiveFuncrefValueVector:
+		// Prefix 1 requires funcref.
+		if err = ensureElementKindFuncRef(r); err != nil {
+			return err
+		}
+
+		ret.Init, err = decodeElementInitValueVector(r)
+		if err != nil {
+			return err
+		}
+		ret.Mode = wasm.ElementModePassive
+		ret.Type = wasm.RefTypeFuncref
+		return nil
+	case elementSegmentPrefixActiveFuncrefValueVectorWithTableIndex:
+		ret.TableIndex, _, err = leb128.DecodeUint32(r)
+		if err != nil {
+			return fmt.Errorf("get size of vector: %w", err)
+		}
+
+		if ret.TableIndex != 0 {
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+				return fmt.Errorf("table index must be zero but was %d: %w", ret.TableIndex, err)
+			}
+		}
+
+		err := decodeConstantExpression(r, enabledFeatures, &ret.OffsetExpr)
+		if err != nil {
+			return fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		// Prefix 2 requires funcref.
+		if err = ensureElementKindFuncRef(r); err != nil {
+			return err
+		}
+
+		ret.Init, err = decodeElementInitValueVector(r)
+		if err != nil {
+			return err
+		}
+
+		ret.Mode = wasm.ElementModeActive
+		ret.Type = wasm.RefTypeFuncref
+		return nil
+	case elementSegmentPrefixDeclarativeFuncrefValueVector:
+		// Prefix 3 requires funcref.
+		if err = ensureElementKindFuncRef(r); err != nil {
+			return err
+		}
+		ret.Init, err = decodeElementInitValueVector(r)
+		if err != nil {
+			return err
+		}
+		ret.Type = wasm.RefTypeFuncref
+		ret.Mode = wasm.ElementModeDeclarative
+		return nil
+	case elementSegmentPrefixActiveFuncrefConstExprVector:
+		err := decodeConstantExpression(r, enabledFeatures, &ret.OffsetExpr)
+		if err != nil {
+			return fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		ret.Init, err = decodeElementConstExprVector(r, wasm.RefTypeFuncref, enabledFeatures)
+		if err != nil {
+			return err
+		}
+		ret.Mode = wasm.ElementModeActive
+		ret.Type = wasm.RefTypeFuncref
+		return nil
+	case elementSegmentPrefixPassiveConstExprVector:
+		ret.Type, err = decodeElementRefType(r)
+		if err != nil {
+			return err
+		}
+		ret.Init, err = decodeElementConstExprVector(r, ret.Type, enabledFeatures)
+		if err != nil {
+			return err
+		}
+		ret.Mode = wasm.ElementModePassive
+		return nil
+	case elementSegmentPrefixActiveConstExprVector:
+		ret.TableIndex, _, err = leb128.DecodeUint32(r)
+		if err != nil {
+			return fmt.Errorf("get size of vector: %w", err)
+		}
+
+		if ret.TableIndex != 0 {
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+				return fmt.Errorf("table index must be zero but was %d: %w", ret.TableIndex, err)
+			}
+		}
+		err := decodeConstantExpression(r, enabledFeatures, &ret.OffsetExpr)
+		if err != nil {
+			return fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		ret.Type, err = decodeElementRefType(r)
+		if err != nil {
+			return err
+		}
+
+		ret.Init, err = decodeElementConstExprVector(r, ret.Type, enabledFeatures)
+		if err != nil {
+			return err
+		}
+
+		ret.Mode = wasm.ElementModeActive
+		return nil
+	case elementSegmentPrefixDeclarativeConstExprVector:
+		ret.Type, err = decodeElementRefType(r)
+		if err != nil {
+			return err
+		}
+		ret.Init, err = decodeElementConstExprVector(r, ret.Type, enabledFeatures)
+		if err != nil {
+			return err
+		}
+
+		ret.Mode = wasm.ElementModeDeclarative
+		return nil
+	default:
+		return fmt.Errorf("invalid element segment prefix: 0x%x", prefix)
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/errors.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/errors.go
new file mode 100644
index 000000000..b9125b038
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/errors.go
@@ -0,0 +1,11 @@
+package binary
+
+import "errors"
+
+var (
+	ErrInvalidByte           = errors.New("invalid byte")
+	ErrInvalidMagicNumber    = errors.New("invalid magic number")
+	ErrInvalidVersion        = errors.New("invalid version header")
+	ErrInvalidSectionID      = errors.New("invalid section id")
+	ErrCustomSectionNotFound = errors.New("custom section not found")
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/export.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/export.go
new file mode 100644
index 000000000..925e9c499
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/export.go
@@ -0,0 +1,32 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeExport(r *bytes.Reader, ret *wasm.Export) (err error) {
+	if ret.Name, _, err = decodeUTF8(r, "export name"); err != nil {
+		return
+	}
+
+	b, err := r.ReadByte()
+	if err != nil {
+		err = fmt.Errorf("error decoding export kind: %w", err)
+		return
+	}
+
+	ret.Type = b
+	switch ret.Type {
+	case wasm.ExternTypeFunc, wasm.ExternTypeTable, wasm.ExternTypeMemory, wasm.ExternTypeGlobal:
+		if ret.Index, _, err = leb128.DecodeUint32(r); err != nil {
+			err = fmt.Errorf("error decoding export index: %w", err)
+		}
+	default:
+		err = fmt.Errorf("%w: invalid byte for exportdesc: %#x", ErrInvalidByte, b)
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/function.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/function.go
new file mode 100644
index 000000000..bb9e2b649
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/function.go
@@ -0,0 +1,56 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeFunctionType(enabledFeatures api.CoreFeatures, r *bytes.Reader, ret *wasm.FunctionType) (err error) {
+	b, err := r.ReadByte()
+	if err != nil {
+		return fmt.Errorf("read leading byte: %w", err)
+	}
+
+	if b != 0x60 {
+		return fmt.Errorf("%w: %#x != 0x60", ErrInvalidByte, b)
+	}
+
+	paramCount, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return fmt.Errorf("could not read parameter count: %w", err)
+	}
+
+	paramTypes, err := decodeValueTypes(r, paramCount)
+	if err != nil {
+		return fmt.Errorf("could not read parameter types: %w", err)
+	}
+
+	resultCount, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return fmt.Errorf("could not read result count: %w", err)
+	}
+
+	// Guard >1.0 feature multi-value
+	if resultCount > 1 {
+		if err = enabledFeatures.RequireEnabled(api.CoreFeatureMultiValue); err != nil {
+			return fmt.Errorf("multiple result types invalid as %v", err)
+		}
+	}
+
+	resultTypes, err := decodeValueTypes(r, resultCount)
+	if err != nil {
+		return fmt.Errorf("could not read result types: %w", err)
+	}
+
+	ret.Params = paramTypes
+	ret.Results = resultTypes
+
+	// cache the key for the function type
+	_ = ret.String()
+
+	return nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/global.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/global.go
new file mode 100644
index 000000000..4e1c16fda
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/global.go
@@ -0,0 +1,50 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// decodeGlobal returns the api.Global decoded with the WebAssembly 1.0 (20191205) Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-global
+func decodeGlobal(r *bytes.Reader, enabledFeatures api.CoreFeatures, ret *wasm.Global) (err error) {
+	ret.Type, err = decodeGlobalType(r)
+	if err != nil {
+		return err
+	}
+
+	err = decodeConstantExpression(r, enabledFeatures, &ret.Init)
+	return
+}
+
+// decodeGlobalType returns the wasm.GlobalType decoded with the WebAssembly 1.0 (20191205) Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-globaltype
+func decodeGlobalType(r *bytes.Reader) (wasm.GlobalType, error) {
+	vt, err := decodeValueTypes(r, 1)
+	if err != nil {
+		return wasm.GlobalType{}, fmt.Errorf("read value type: %w", err)
+	}
+
+	ret := wasm.GlobalType{
+		ValType: vt[0],
+	}
+
+	b, err := r.ReadByte()
+	if err != nil {
+		return wasm.GlobalType{}, fmt.Errorf("read mutablity: %w", err)
+	}
+
+	switch mut := b; mut {
+	case 0x00: // not mutable
+	case 0x01: // mutable
+		ret.Mutable = true
+	default:
+		return wasm.GlobalType{}, fmt.Errorf("%w for mutability: %#x != 0x00 or 0x01", ErrInvalidByte, mut)
+	}
+	return ret, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/header.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/header.go
new file mode 100644
index 000000000..29ba1b599
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/header.go
@@ -0,0 +1,9 @@
+package binary
+
+// Magic is the 4 byte preamble (literally "\0asm") of the binary format
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-magic
+var Magic = []byte{0x00, 0x61, 0x73, 0x6D}
+
+// version is format version and doesn't change between known specification versions
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-version
+var version = []byte{0x01, 0x00, 0x00, 0x00}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/import.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/import.go
new file mode 100644
index 000000000..39d310c55
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/import.go
@@ -0,0 +1,52 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeImport(
+	r *bytes.Reader,
+	idx uint32,
+	memorySizer memorySizer,
+	memoryLimitPages uint32,
+	enabledFeatures api.CoreFeatures,
+	ret *wasm.Import,
+) (err error) {
+	if ret.Module, _, err = decodeUTF8(r, "import module"); err != nil {
+		err = fmt.Errorf("import[%d] error decoding module: %w", idx, err)
+		return
+	}
+
+	if ret.Name, _, err = decodeUTF8(r, "import name"); err != nil {
+		err = fmt.Errorf("import[%d] error decoding name: %w", idx, err)
+		return
+	}
+
+	b, err := r.ReadByte()
+	if err != nil {
+		err = fmt.Errorf("import[%d] error decoding type: %w", idx, err)
+		return
+	}
+	ret.Type = b
+	switch ret.Type {
+	case wasm.ExternTypeFunc:
+		ret.DescFunc, _, err = leb128.DecodeUint32(r)
+	case wasm.ExternTypeTable:
+		err = decodeTable(r, enabledFeatures, &ret.DescTable)
+	case wasm.ExternTypeMemory:
+		ret.DescMem, err = decodeMemory(r, enabledFeatures, memorySizer, memoryLimitPages)
+	case wasm.ExternTypeGlobal:
+		ret.DescGlobal, err = decodeGlobalType(r)
+	default:
+		err = fmt.Errorf("%w: invalid byte for importdesc: %#x", ErrInvalidByte, b)
+	}
+	if err != nil {
+		err = fmt.Errorf("import[%d] %s[%s.%s]: %w", idx, wasm.ExternTypeName(ret.Type), ret.Module, ret.Name, err)
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/limits.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/limits.go
new file mode 100644
index 000000000..ff2d73b5f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/limits.go
@@ -0,0 +1,47 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/leb128"
+)
+
+// decodeLimitsType returns the `limitsType` (min, max) decoded with the WebAssembly 1.0 (20191205) Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#limits%E2%91%A6
+//
+// Extended in threads proposal: https://webassembly.github.io/threads/core/binary/types.html#limits
+func decodeLimitsType(r *bytes.Reader) (min uint32, max *uint32, shared bool, err error) {
+	var flag byte
+	if flag, err = r.ReadByte(); err != nil {
+		err = fmt.Errorf("read leading byte: %v", err)
+		return
+	}
+
+	switch flag {
+	case 0x00, 0x02:
+		min, _, err = leb128.DecodeUint32(r)
+		if err != nil {
+			err = fmt.Errorf("read min of limit: %v", err)
+		}
+	case 0x01, 0x03:
+		min, _, err = leb128.DecodeUint32(r)
+		if err != nil {
+			err = fmt.Errorf("read min of limit: %v", err)
+			return
+		}
+		var m uint32
+		if m, _, err = leb128.DecodeUint32(r); err != nil {
+			err = fmt.Errorf("read max of limit: %v", err)
+		} else {
+			max = &m
+		}
+	default:
+		err = fmt.Errorf("%v for limits: %#x not in (0x00, 0x01, 0x02, 0x03)", ErrInvalidByte, flag)
+	}
+
+	shared = flag == 0x02 || flag == 0x03
+
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/memory.go
new file mode 100644
index 000000000..e1b175123
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/memory.go
@@ -0,0 +1,42 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// decodeMemory returns the api.Memory decoded with the WebAssembly 1.0 (20191205) Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-memory
+func decodeMemory(
+	r *bytes.Reader,
+	enabledFeatures api.CoreFeatures,
+	memorySizer func(minPages uint32, maxPages *uint32) (min, capacity, max uint32),
+	memoryLimitPages uint32,
+) (*wasm.Memory, error) {
+	min, maxP, shared, err := decodeLimitsType(r)
+	if err != nil {
+		return nil, err
+	}
+
+	if shared {
+		if !enabledFeatures.IsEnabled(experimental.CoreFeaturesThreads) {
+			return nil, fmt.Errorf("shared memory requested but threads feature not enabled")
+		}
+
+		// This restriction may be lifted in the future.
+		// https://webassembly.github.io/threads/core/binary/types.html#memory-types
+		if maxP == nil {
+			return nil, fmt.Errorf("shared memory requires a maximum size to be specified")
+		}
+	}
+
+	min, capacity, max := memorySizer(min, maxP)
+	mem := &wasm.Memory{Min: min, Cap: capacity, Max: max, IsMaxEncoded: maxP != nil, IsShared: shared}
+
+	return mem, mem.Validate(memoryLimitPages)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/names.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/names.go
new file mode 100644
index 000000000..56fb96dc8
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/names.go
@@ -0,0 +1,151 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+const (
+	// subsectionIDModuleName contains only the module name.
+	subsectionIDModuleName = uint8(0)
+	// subsectionIDFunctionNames is a map of indices to function names, in ascending order by function index
+	subsectionIDFunctionNames = uint8(1)
+	// subsectionIDLocalNames contain a map of function indices to a map of local indices to their names, in ascending
+	// order by function and local index
+	subsectionIDLocalNames = uint8(2)
+)
+
+// decodeNameSection deserializes the data associated with the "name" key in SectionIDCustom according to the
+// standard:
+//
+// * ModuleName decode from subsection 0
+// * FunctionNames decode from subsection 1
+// * LocalNames decode from subsection 2
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namesec
+func decodeNameSection(r *bytes.Reader, limit uint64) (result *wasm.NameSection, err error) {
+	// TODO: add leb128 functions that work on []byte and offset. While using a reader allows us to reuse reader-based
+	// leb128 functions, it is less efficient, causes untestable code and in some cases more complex vs plain []byte.
+	result = &wasm.NameSection{}
+
+	// subsectionID is decoded if known, and skipped if not
+	var subsectionID uint8
+	// subsectionSize is the length to skip when the subsectionID is unknown
+	var subsectionSize uint32
+	var bytesRead uint64
+	for limit > 0 {
+		if subsectionID, err = r.ReadByte(); err != nil {
+			if err == io.EOF {
+				return result, nil
+			}
+			// TODO: untestable as this can't fail for a reason beside EOF reading a byte from a buffer
+			return nil, fmt.Errorf("failed to read a subsection ID: %w", err)
+		}
+		limit--
+
+		if subsectionSize, bytesRead, err = leb128.DecodeUint32(r); err != nil {
+			return nil, fmt.Errorf("failed to read the size of subsection[%d]: %w", subsectionID, err)
+		}
+		limit -= bytesRead
+
+		switch subsectionID {
+		case subsectionIDModuleName:
+			if result.ModuleName, _, err = decodeUTF8(r, "module name"); err != nil {
+				return nil, err
+			}
+		case subsectionIDFunctionNames:
+			if result.FunctionNames, err = decodeFunctionNames(r); err != nil {
+				return nil, err
+			}
+		case subsectionIDLocalNames:
+			if result.LocalNames, err = decodeLocalNames(r); err != nil {
+				return nil, err
+			}
+		default: // Skip other subsections.
+			// Note: Not Seek because it doesn't err when given an offset past EOF. Rather, it leads to undefined state.
+			if _, err = io.CopyN(io.Discard, r, int64(subsectionSize)); err != nil {
+				return nil, fmt.Errorf("failed to skip subsection[%d]: %w", subsectionID, err)
+			}
+		}
+		limit -= uint64(subsectionSize)
+	}
+	return
+}
+
+func decodeFunctionNames(r *bytes.Reader) (wasm.NameMap, error) {
+	functionCount, err := decodeFunctionCount(r, subsectionIDFunctionNames)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make(wasm.NameMap, functionCount)
+	for i := uint32(0); i < functionCount; i++ {
+		functionIndex, err := decodeFunctionIndex(r, subsectionIDFunctionNames)
+		if err != nil {
+			return nil, err
+		}
+
+		name, _, err := decodeUTF8(r, "function[%d] name", functionIndex)
+		if err != nil {
+			return nil, err
+		}
+		result[i] = wasm.NameAssoc{Index: functionIndex, Name: name}
+	}
+	return result, nil
+}
+
+func decodeLocalNames(r *bytes.Reader) (wasm.IndirectNameMap, error) {
+	functionCount, err := decodeFunctionCount(r, subsectionIDLocalNames)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make(wasm.IndirectNameMap, functionCount)
+	for i := uint32(0); i < functionCount; i++ {
+		functionIndex, err := decodeFunctionIndex(r, subsectionIDLocalNames)
+		if err != nil {
+			return nil, err
+		}
+
+		localCount, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read the local count for function[%d]: %w", functionIndex, err)
+		}
+
+		locals := make(wasm.NameMap, localCount)
+		for j := uint32(0); j < localCount; j++ {
+			localIndex, _, err := leb128.DecodeUint32(r)
+			if err != nil {
+				return nil, fmt.Errorf("failed to read a local index of function[%d]: %w", functionIndex, err)
+			}
+
+			name, _, err := decodeUTF8(r, "function[%d] local[%d] name", functionIndex, localIndex)
+			if err != nil {
+				return nil, err
+			}
+			locals[j] = wasm.NameAssoc{Index: localIndex, Name: name}
+		}
+		result[i] = wasm.NameMapAssoc{Index: functionIndex, NameMap: locals}
+	}
+	return result, nil
+}
+
+func decodeFunctionIndex(r *bytes.Reader, subsectionID uint8) (uint32, error) {
+	functionIndex, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return 0, fmt.Errorf("failed to read a function index in subsection[%d]: %w", subsectionID, err)
+	}
+	return functionIndex, nil
+}
+
+func decodeFunctionCount(r *bytes.Reader, subsectionID uint8) (uint32, error) {
+	functionCount, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return 0, fmt.Errorf("failed to read the function count of subsection[%d]: %w", subsectionID, err)
+	}
+	return functionCount, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/section.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/section.go
new file mode 100644
index 000000000..622ee5923
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/section.go
@@ -0,0 +1,226 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeTypeSection(enabledFeatures api.CoreFeatures, r *bytes.Reader) ([]wasm.FunctionType, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]wasm.FunctionType, vs)
+	for i := uint32(0); i < vs; i++ {
+		if err = decodeFunctionType(enabledFeatures, r, &result[i]); err != nil {
+			return nil, fmt.Errorf("read %d-th type: %v", i, err)
+		}
+	}
+	return result, nil
+}
+
+// decodeImportSection decodes the decoded import segments plus the count per wasm.ExternType.
+func decodeImportSection(
+	r *bytes.Reader,
+	memorySizer memorySizer,
+	memoryLimitPages uint32,
+	enabledFeatures api.CoreFeatures,
+) (result []wasm.Import,
+	perModule map[string][]*wasm.Import,
+	funcCount, globalCount, memoryCount, tableCount wasm.Index, err error,
+) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		err = fmt.Errorf("get size of vector: %w", err)
+		return
+	}
+
+	perModule = make(map[string][]*wasm.Import)
+	result = make([]wasm.Import, vs)
+	for i := uint32(0); i < vs; i++ {
+		imp := &result[i]
+		if err = decodeImport(r, i, memorySizer, memoryLimitPages, enabledFeatures, imp); err != nil {
+			return
+		}
+		switch imp.Type {
+		case wasm.ExternTypeFunc:
+			imp.IndexPerType = funcCount
+			funcCount++
+		case wasm.ExternTypeGlobal:
+			imp.IndexPerType = globalCount
+			globalCount++
+		case wasm.ExternTypeMemory:
+			imp.IndexPerType = memoryCount
+			memoryCount++
+		case wasm.ExternTypeTable:
+			imp.IndexPerType = tableCount
+			tableCount++
+		}
+		perModule[imp.Module] = append(perModule[imp.Module], imp)
+	}
+	return
+}
+
+func decodeFunctionSection(r *bytes.Reader) ([]uint32, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]uint32, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], _, err = leb128.DecodeUint32(r); err != nil {
+			return nil, fmt.Errorf("get type index: %w", err)
+		}
+	}
+	return result, err
+}
+
+func decodeTableSection(r *bytes.Reader, enabledFeatures api.CoreFeatures) ([]wasm.Table, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("error reading size")
+	}
+	if vs > 1 {
+		if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+			return nil, fmt.Errorf("at most one table allowed in module as %w", err)
+		}
+	}
+
+	ret := make([]wasm.Table, vs)
+	for i := range ret {
+		err = decodeTable(r, enabledFeatures, &ret[i])
+		if err != nil {
+			return nil, err
+		}
+	}
+	return ret, nil
+}
+
+func decodeMemorySection(
+	r *bytes.Reader,
+	enabledFeatures api.CoreFeatures,
+	memorySizer memorySizer,
+	memoryLimitPages uint32,
+) (*wasm.Memory, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("error reading size")
+	}
+	if vs > 1 {
+		return nil, fmt.Errorf("at most one memory allowed in module, but read %d", vs)
+	} else if vs == 0 {
+		// memory count can be zero.
+		return nil, nil
+	}
+
+	return decodeMemory(r, enabledFeatures, memorySizer, memoryLimitPages)
+}
+
+func decodeGlobalSection(r *bytes.Reader, enabledFeatures api.CoreFeatures) ([]wasm.Global, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]wasm.Global, vs)
+	for i := uint32(0); i < vs; i++ {
+		if err = decodeGlobal(r, enabledFeatures, &result[i]); err != nil {
+			return nil, fmt.Errorf("global[%d]: %w", i, err)
+		}
+	}
+	return result, nil
+}
+
+func decodeExportSection(r *bytes.Reader) ([]wasm.Export, map[string]*wasm.Export, error) {
+	vs, _, sizeErr := leb128.DecodeUint32(r)
+	if sizeErr != nil {
+		return nil, nil, fmt.Errorf("get size of vector: %v", sizeErr)
+	}
+
+	exportMap := make(map[string]*wasm.Export, vs)
+	exportSection := make([]wasm.Export, vs)
+	for i := wasm.Index(0); i < vs; i++ {
+		export := &exportSection[i]
+		err := decodeExport(r, export)
+		if err != nil {
+			return nil, nil, fmt.Errorf("read export: %w", err)
+		}
+		if _, ok := exportMap[export.Name]; ok {
+			return nil, nil, fmt.Errorf("export[%d] duplicates name %q", i, export.Name)
+		} else {
+			exportMap[export.Name] = export
+		}
+	}
+	return exportSection, exportMap, nil
+}
+
+func decodeStartSection(r *bytes.Reader) (*wasm.Index, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get function index: %w", err)
+	}
+	return &vs, nil
+}
+
+func decodeElementSection(r *bytes.Reader, enabledFeatures api.CoreFeatures) ([]wasm.ElementSegment, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]wasm.ElementSegment, vs)
+	for i := uint32(0); i < vs; i++ {
+		if err = decodeElementSegment(r, enabledFeatures, &result[i]); err != nil {
+			return nil, fmt.Errorf("read element: %w", err)
+		}
+	}
+	return result, nil
+}
+
+func decodeCodeSection(r *bytes.Reader) ([]wasm.Code, error) {
+	codeSectionStart := uint64(r.Len())
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]wasm.Code, vs)
+	for i := uint32(0); i < vs; i++ {
+		err = decodeCode(r, codeSectionStart, &result[i])
+		if err != nil {
+			return nil, fmt.Errorf("read %d-th code segment: %v", i, err)
+		}
+	}
+	return result, nil
+}
+
+func decodeDataSection(r *bytes.Reader, enabledFeatures api.CoreFeatures) ([]wasm.DataSegment, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]wasm.DataSegment, vs)
+	for i := uint32(0); i < vs; i++ {
+		if err = decodeDataSegment(r, enabledFeatures, &result[i]); err != nil {
+			return nil, fmt.Errorf("read data segment: %w", err)
+		}
+	}
+	return result, nil
+}
+
+func decodeDataCountSection(r *bytes.Reader) (count *uint32, err error) {
+	v, _, err := leb128.DecodeUint32(r)
+	if err != nil && err != io.EOF {
+		// data count is optional, so EOF is fine.
+		return nil, err
+	}
+	return &v, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/table.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/table.go
new file mode 100644
index 000000000..353ec7566
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/table.go
@@ -0,0 +1,43 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// decodeTable returns the wasm.Table decoded with the WebAssembly 1.0 (20191205) Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-table
+func decodeTable(r *bytes.Reader, enabledFeatures api.CoreFeatures, ret *wasm.Table) (err error) {
+	ret.Type, err = r.ReadByte()
+	if err != nil {
+		return fmt.Errorf("read leading byte: %v", err)
+	}
+
+	if ret.Type != wasm.RefTypeFuncref {
+		if err = enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+			return fmt.Errorf("table type funcref is invalid: %w", err)
+		}
+	}
+
+	var shared bool
+	ret.Min, ret.Max, shared, err = decodeLimitsType(r)
+	if err != nil {
+		return fmt.Errorf("read limits: %v", err)
+	}
+	if ret.Min > wasm.MaximumFunctionIndex {
+		return fmt.Errorf("table min must be at most %d", wasm.MaximumFunctionIndex)
+	}
+	if ret.Max != nil {
+		if *ret.Max < ret.Min {
+			return fmt.Errorf("table size minimum must not be greater than maximum")
+		}
+	}
+	if shared {
+		return fmt.Errorf("tables cannot be marked as shared")
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go
new file mode 100644
index 000000000..755ee5ea3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/binary/value.go
@@ -0,0 +1,60 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"unicode/utf8"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+func decodeValueTypes(r *bytes.Reader, num uint32) ([]wasm.ValueType, error) {
+	if num == 0 {
+		return nil, nil
+	}
+
+	ret := make([]wasm.ValueType, num)
+	_, err := io.ReadFull(r, ret)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, v := range ret {
+		switch v {
+		case wasm.ValueTypeI32, wasm.ValueTypeF32, wasm.ValueTypeI64, wasm.ValueTypeF64,
+			wasm.ValueTypeExternref, wasm.ValueTypeFuncref, wasm.ValueTypeV128:
+		default:
+			return nil, fmt.Errorf("invalid value type: %d", v)
+		}
+	}
+	return ret, nil
+}
+
+// decodeUTF8 decodes a size prefixed string from the reader, returning it and the count of bytes read.
+// contextFormat and contextArgs apply an error format when present
+func decodeUTF8(r *bytes.Reader, contextFormat string, contextArgs ...interface{}) (string, uint32, error) {
+	size, sizeOfSize, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return "", 0, fmt.Errorf("failed to read %s size: %w", fmt.Sprintf(contextFormat, contextArgs...), err)
+	}
+
+	if size == 0 {
+		return "", uint32(sizeOfSize), nil
+	}
+
+	buf := make([]byte, size)
+	if _, err = io.ReadFull(r, buf); err != nil {
+		return "", 0, fmt.Errorf("failed to read %s: %w", fmt.Sprintf(contextFormat, contextArgs...), err)
+	}
+
+	if !utf8.Valid(buf) {
+		return "", 0, fmt.Errorf("%s is not valid UTF-8", fmt.Sprintf(contextFormat, contextArgs...))
+	}
+
+	// TODO: use unsafe.String after flooring Go 1.20.
+	ret := *(*string)(unsafe.Pointer(&buf))
+	return ret, size + uint32(sizeOfSize), nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/counts.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/counts.go
new file mode 100644
index 000000000..685a40941
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/counts.go
@@ -0,0 +1,51 @@
+package wasm
+
+import "fmt"
+
+// SectionElementCount returns the count of elements in a given section ID
+//
+// For example...
+// * SectionIDType returns the count of FunctionType
+// * SectionIDCustom returns the count of CustomSections plus one if NameSection is present
+// * SectionIDHostFunction returns the count of HostFunctionSection
+// * SectionIDExport returns the count of unique export names
+func (m *Module) SectionElementCount(sectionID SectionID) uint32 { // element as in vector elements!
+	switch sectionID {
+	case SectionIDCustom:
+		numCustomSections := uint32(len(m.CustomSections))
+		if m.NameSection != nil {
+			numCustomSections++
+		}
+		return numCustomSections
+	case SectionIDType:
+		return uint32(len(m.TypeSection))
+	case SectionIDImport:
+		return uint32(len(m.ImportSection))
+	case SectionIDFunction:
+		return uint32(len(m.FunctionSection))
+	case SectionIDTable:
+		return uint32(len(m.TableSection))
+	case SectionIDMemory:
+		if m.MemorySection != nil {
+			return 1
+		}
+		return 0
+	case SectionIDGlobal:
+		return uint32(len(m.GlobalSection))
+	case SectionIDExport:
+		return uint32(len(m.ExportSection))
+	case SectionIDStart:
+		if m.StartSection != nil {
+			return 1
+		}
+		return 0
+	case SectionIDElement:
+		return uint32(len(m.ElementSection))
+	case SectionIDCode:
+		return uint32(len(m.CodeSection))
+	case SectionIDData:
+		return uint32(len(m.DataSection))
+	default:
+		panic(fmt.Errorf("BUG: unknown section: %d", sectionID))
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go
new file mode 100644
index 000000000..58a458217
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go
@@ -0,0 +1,72 @@
+package wasm
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+)
+
+// Engine is a Store-scoped mechanism to compile functions declared or imported by a module.
+// This is a top-level type implemented by an interpreter or compiler.
+type Engine interface {
+	// Close closes this engine, and releases all the compiled cache.
+	Close() (err error)
+
+	// CompileModule implements the same method as documented on wasm.Engine.
+	CompileModule(ctx context.Context, module *Module, listeners []experimental.FunctionListener, ensureTermination bool) error
+
+	// CompiledModuleCount is exported for testing, to track the size of the compilation cache.
+	CompiledModuleCount() uint32
+
+	// DeleteCompiledModule releases compilation caches for the given module (source).
+	// Note: it is safe to call this function for a module from which module instances are instantiated even when these
+	// module instances have outstanding calls.
+	DeleteCompiledModule(module *Module)
+
+	// NewModuleEngine compiles down the function instances in a module, and returns ModuleEngine for the module.
+	//
+	// * module is the source module from which moduleFunctions are instantiated. This is used for caching.
+	// * instance is the *ModuleInstance which is created from `module`.
+	//
+	// Note: Input parameters must be pre-validated with wasm.Module Validate, to ensure no fields are invalid
+	// due to reasons such as out-of-bounds.
+	NewModuleEngine(module *Module, instance *ModuleInstance) (ModuleEngine, error)
+}
+
+// ModuleEngine implements function calls for a given module.
+type ModuleEngine interface {
+	// DoneInstantiation is called at the end of the instantiation of the module.
+	DoneInstantiation()
+
+	// NewFunction returns an api.Function for the given function pointed by the given Index.
+	NewFunction(index Index) api.Function
+
+	// ResolveImportedFunction is used to add imported functions needed to make this ModuleEngine fully functional.
+	// 	- `index` is the function Index of this imported function.
+	// 	- `indexInImportedModule` is the function Index of the imported function in the imported module.
+	//	- `importedModuleEngine` is the ModuleEngine for the imported ModuleInstance.
+	ResolveImportedFunction(index, indexInImportedModule Index, importedModuleEngine ModuleEngine)
+
+	// ResolveImportedMemory is called when this module imports a memory from another module.
+	ResolveImportedMemory(importedModuleEngine ModuleEngine)
+
+	// LookupFunction returns the FunctionModule and the Index of the function in the returned ModuleInstance at the given offset in the table.
+	LookupFunction(t *TableInstance, typeId FunctionTypeID, tableOffset Index) (*ModuleInstance, Index)
+
+	// GetGlobalValue returns the value of the global variable at the given Index.
+	// Only called when OwnsGlobals() returns true, and must not be called for imported globals
+	GetGlobalValue(idx Index) (lo, hi uint64)
+
+	// SetGlobalValue sets the value of the global variable at the given Index.
+	// Only called when OwnsGlobals() returns true, and must not be called for imported globals
+	SetGlobalValue(idx Index, lo, hi uint64)
+
+	// OwnsGlobals returns true if this ModuleEngine owns the global variables. If true, wasm.GlobalInstance's Val,ValHi should
+	// not be accessed directly.
+	OwnsGlobals() bool
+
+	// FunctionInstanceReference returns Reference for the given Index for a FunctionInstance. The returned values are used by
+	// the initialization via ElementSegment.
+	FunctionInstanceReference(funcIndex Index) Reference
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go
new file mode 100644
index 000000000..8da689076
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go
@@ -0,0 +1,2340 @@
+package wasm
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/leb128"
+)
+
+// The wazero specific limitation described at RATIONALE.md.
+const maximumValuesOnStack = 1 << 27
+
+// validateFunction validates the instruction sequence of a function.
+// following the specification https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#instructions%E2%91%A2.
+//
+// * idx is the index in the FunctionSection
+// * functions are the function index, which is prefixed by imports. The value is the TypeSection index.
+// * globals are the global index, which is prefixed by imports.
+// * memory is the potentially imported memory and can be nil.
+// * table is the potentially imported table and can be nil.
+// * declaredFunctionIndexes is the set of function indexes declared by declarative element segments which can be acceed by OpcodeRefFunc instruction.
+//
+// Returns an error if the instruction sequence is not valid,
+// or potentially it can exceed the maximum number of values on the stack.
+func (m *Module) validateFunction(sts *stacks, enabledFeatures api.CoreFeatures, idx Index, functions []Index,
+	globals []GlobalType, memory *Memory, tables []Table, declaredFunctionIndexes map[Index]struct{}, br *bytes.Reader,
+) error {
+	return m.validateFunctionWithMaxStackValues(sts, enabledFeatures, idx, functions, globals, memory, tables, maximumValuesOnStack, declaredFunctionIndexes, br)
+}
+
+func readMemArg(pc uint64, body []byte) (align, offset uint32, read uint64, err error) {
+	align, num, err := leb128.LoadUint32(body[pc:])
+	if err != nil {
+		err = fmt.Errorf("read memory align: %v", err)
+		return
+	}
+	read += num
+
+	offset, num, err = leb128.LoadUint32(body[pc+num:])
+	if err != nil {
+		err = fmt.Errorf("read memory offset: %v", err)
+		return
+	}
+
+	read += num
+	return align, offset, read, nil
+}
+
+// validateFunctionWithMaxStackValues is like validateFunction, but allows overriding maxStackValues for testing.
+//
+// * stacks is to track the state of Wasm value and control frame stacks at anypoint of execution, and reused to reduce allocation.
+// * maxStackValues is the maximum height of values stack which the target is allowed to reach.
+func (m *Module) validateFunctionWithMaxStackValues(
+	sts *stacks,
+	enabledFeatures api.CoreFeatures,
+	idx Index,
+	functions []Index,
+	globals []GlobalType,
+	memory *Memory,
+	tables []Table,
+	maxStackValues int,
+	declaredFunctionIndexes map[Index]struct{},
+	br *bytes.Reader,
+) error {
+	nonStaticLocals := make(map[Index]struct{})
+	if len(m.NonStaticLocals) > 0 {
+		m.NonStaticLocals[idx] = nonStaticLocals
+	}
+
+	functionType := &m.TypeSection[m.FunctionSection[idx]]
+	code := &m.CodeSection[idx]
+	body := code.Body
+	localTypes := code.LocalTypes
+
+	sts.reset(functionType)
+	valueTypeStack := &sts.vs
+	// We start with the outermost control block which is for function return if the code branches into it.
+	controlBlockStack := &sts.cs
+
+	// Now start walking through all the instructions in the body while tracking
+	// control blocks and value types to check the validity of all instructions.
+	for pc := uint64(0); pc < uint64(len(body)); pc++ {
+		op := body[pc]
+		if false {
+			var instName string
+			if op == OpcodeMiscPrefix {
+				instName = MiscInstructionName(body[pc+1])
+			} else if op == OpcodeVecPrefix {
+				instName = VectorInstructionName(body[pc+1])
+			} else if op == OpcodeAtomicPrefix {
+				instName = AtomicInstructionName(body[pc+1])
+			} else {
+				instName = InstructionName(op)
+			}
+			fmt.Printf("handling %s, stack=%s, blocks: %v\n", instName, valueTypeStack.stack, controlBlockStack)
+		}
+
+		if len(controlBlockStack.stack) == 0 {
+			return fmt.Errorf("unexpected end of function at pc=%#x", pc)
+		}
+
+		if OpcodeI32Load <= op && op <= OpcodeI64Store32 {
+			if memory == nil {
+				return fmt.Errorf("memory must exist for %s", InstructionName(op))
+			}
+			pc++
+			align, _, read, err := readMemArg(pc, body)
+			if err != nil {
+				return err
+			}
+			pc += read - 1
+			switch op {
+			case OpcodeI32Load:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeF32Load:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeI32Store:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeF32Store:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeI64Load:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeF64Load:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeI64Store:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeF64Store:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeI32Load8S:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI32Load8U:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Load8S, OpcodeI64Load8U:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeI32Store8:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeI64Store8:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeI32Load16S, OpcodeI32Load16U:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Load16S, OpcodeI64Load16U:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeI32Store16:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeI64Store16:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeI64Load32S, OpcodeI64Load32U:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeI64Store32:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			}
+		} else if OpcodeMemorySize <= op && op <= OpcodeMemoryGrow {
+			if memory == nil {
+				return fmt.Errorf("memory must exist for %s", InstructionName(op))
+			}
+			pc++
+			val, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			}
+			if val != 0 || num != 1 {
+				return fmt.Errorf("memory instruction reserved bytes not zero with 1 byte")
+			}
+			switch Opcode(op) {
+			case OpcodeMemoryGrow:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeMemorySize:
+				valueTypeStack.push(ValueTypeI32)
+			}
+			pc += num - 1
+		} else if OpcodeI32Const <= op && op <= OpcodeF64Const {
+			pc++
+			switch Opcode(op) {
+			case OpcodeI32Const:
+				_, num, err := leb128.LoadInt32(body[pc:])
+				if err != nil {
+					return fmt.Errorf("read i32 immediate: %s", err)
+				}
+				pc += num - 1
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Const:
+				_, num, err := leb128.LoadInt64(body[pc:])
+				if err != nil {
+					return fmt.Errorf("read i64 immediate: %v", err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+				pc += num - 1
+			case OpcodeF32Const:
+				valueTypeStack.push(ValueTypeF32)
+				pc += 3
+			case OpcodeF64Const:
+				valueTypeStack.push(ValueTypeF64)
+				pc += 7
+			}
+		} else if OpcodeLocalGet <= op && op <= OpcodeGlobalSet {
+			pc++
+			index, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			}
+			pc += num - 1
+			switch op {
+			case OpcodeLocalGet:
+				inputLen := uint32(len(functionType.Params))
+				if l := uint32(len(localTypes)) + inputLen; index >= l {
+					return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",
+						OpcodeLocalGetName, index, l)
+				}
+				if index < inputLen {
+					valueTypeStack.push(functionType.Params[index])
+				} else {
+					valueTypeStack.push(localTypes[index-inputLen])
+				}
+			case OpcodeLocalSet:
+				inputLen := uint32(len(functionType.Params))
+				if l := uint32(len(localTypes)) + inputLen; index >= l {
+					return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",
+						OpcodeLocalSetName, index, l)
+				}
+				nonStaticLocals[index] = struct{}{}
+				var expType ValueType
+				if index < inputLen {
+					expType = functionType.Params[index]
+				} else {
+					expType = localTypes[index-inputLen]
+				}
+				if err := valueTypeStack.popAndVerifyType(expType); err != nil {
+					return err
+				}
+			case OpcodeLocalTee:
+				inputLen := uint32(len(functionType.Params))
+				if l := uint32(len(localTypes)) + inputLen; index >= l {
+					return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",
+						OpcodeLocalTeeName, index, l)
+				}
+				nonStaticLocals[index] = struct{}{}
+				var expType ValueType
+				if index < inputLen {
+					expType = functionType.Params[index]
+				} else {
+					expType = localTypes[index-inputLen]
+				}
+				if err := valueTypeStack.popAndVerifyType(expType); err != nil {
+					return err
+				}
+				valueTypeStack.push(expType)
+			case OpcodeGlobalGet:
+				if index >= uint32(len(globals)) {
+					return fmt.Errorf("invalid index for %s", OpcodeGlobalGetName)
+				}
+				valueTypeStack.push(globals[index].ValType)
+			case OpcodeGlobalSet:
+				if index >= uint32(len(globals)) {
+					return fmt.Errorf("invalid global index")
+				} else if !globals[index].Mutable {
+					return fmt.Errorf("%s when not mutable", OpcodeGlobalSetName)
+				} else if err := valueTypeStack.popAndVerifyType(
+					globals[index].ValType); err != nil {
+					return err
+				}
+			}
+		} else if op == OpcodeBr {
+			pc++
+			index, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			} else if int(index) >= len(controlBlockStack.stack) {
+				return fmt.Errorf("invalid %s operation: index out of range", OpcodeBrName)
+			}
+			pc += num - 1
+			// Check type soundness.
+			target := &controlBlockStack.stack[len(controlBlockStack.stack)-int(index)-1]
+			var targetResultType []ValueType
+			if target.op == OpcodeLoop {
+				targetResultType = target.blockType.Params
+			} else {
+				targetResultType = target.blockType.Results
+			}
+			if err = valueTypeStack.popResults(op, targetResultType, false); err != nil {
+				return err
+			}
+			// br instruction is stack-polymorphic.
+			valueTypeStack.unreachable()
+		} else if op == OpcodeBrIf {
+			pc++
+			index, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			} else if int(index) >= len(controlBlockStack.stack) {
+				return fmt.Errorf(
+					"invalid ln param given for %s: index=%d with %d for the current label stack length",
+					OpcodeBrIfName, index, len(controlBlockStack.stack))
+			}
+			pc += num - 1
+			if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+				return fmt.Errorf("cannot pop the required operand for %s", OpcodeBrIfName)
+			}
+			// Check type soundness.
+			target := &controlBlockStack.stack[len(controlBlockStack.stack)-int(index)-1]
+			var targetResultType []ValueType
+			if target.op == OpcodeLoop {
+				targetResultType = target.blockType.Params
+			} else {
+				targetResultType = target.blockType.Results
+			}
+			if err := valueTypeStack.popResults(op, targetResultType, false); err != nil {
+				return err
+			}
+			// Push back the result
+			for _, t := range targetResultType {
+				valueTypeStack.push(t)
+			}
+		} else if op == OpcodeBrTable {
+			pc++
+			br.Reset(body[pc:])
+			nl, num, err := leb128.DecodeUint32(br)
+			if err != nil {
+				return fmt.Errorf("read immediate: %w", err)
+			}
+
+			list := make([]uint32, nl)
+			for i := uint32(0); i < nl; i++ {
+				l, n, err := leb128.DecodeUint32(br)
+				if err != nil {
+					return fmt.Errorf("read immediate: %w", err)
+				}
+				num += n
+				list[i] = l
+			}
+			ln, n, err := leb128.DecodeUint32(br)
+			if err != nil {
+				return fmt.Errorf("read immediate: %w", err)
+			} else if int(ln) >= len(controlBlockStack.stack) {
+				return fmt.Errorf(
+					"invalid ln param given for %s: ln=%d with %d for the current label stack length",
+					OpcodeBrTableName, ln, len(controlBlockStack.stack))
+			}
+			pc += n + num - 1
+			// Check type soundness.
+			if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+				return fmt.Errorf("cannot pop the required operand for %s", OpcodeBrTableName)
+			}
+			lnLabel := &controlBlockStack.stack[len(controlBlockStack.stack)-1-int(ln)]
+			var defaultLabelType []ValueType
+			// Below, we might modify the slice in case of unreachable. Therefore,
+			// we have to copy the content of block result types, otherwise the original
+			// function type might result in invalid value types if the block is the outermost label
+			// which equals the function's type.
+			if lnLabel.op != OpcodeLoop { // Loop operation doesn't require results since the continuation is the beginning of the loop.
+				defaultLabelType = make([]ValueType, len(lnLabel.blockType.Results))
+				copy(defaultLabelType, lnLabel.blockType.Results)
+			} else {
+				defaultLabelType = make([]ValueType, len(lnLabel.blockType.Params))
+				copy(defaultLabelType, lnLabel.blockType.Params)
+			}
+
+			if enabledFeatures.IsEnabled(api.CoreFeatureReferenceTypes) {
+				// As of reference-types proposal, br_table on unreachable state
+				// can choose unknown types for expected parameter types for each label.
+				// https://github.com/WebAssembly/reference-types/pull/116
+				for i := range defaultLabelType {
+					index := len(defaultLabelType) - 1 - i
+					exp := defaultLabelType[index]
+					actual, err := valueTypeStack.pop()
+					if err != nil {
+						return err
+					}
+					if actual == valueTypeUnknown {
+						// Re-assign the expected type to unknown.
+						defaultLabelType[index] = valueTypeUnknown
+					} else if actual != exp {
+						return typeMismatchError(true, OpcodeBrTableName, actual, exp, i)
+					}
+				}
+			} else {
+				if err = valueTypeStack.popResults(op, defaultLabelType, false); err != nil {
+					return err
+				}
+			}
+
+			for _, l := range list {
+				if int(l) >= len(controlBlockStack.stack) {
+					return fmt.Errorf("invalid l param given for %s", OpcodeBrTableName)
+				}
+				label := &controlBlockStack.stack[len(controlBlockStack.stack)-1-int(l)]
+				var tableLabelType []ValueType
+				if label.op != OpcodeLoop {
+					tableLabelType = label.blockType.Results
+				} else {
+					tableLabelType = label.blockType.Params
+				}
+				if len(defaultLabelType) != len(tableLabelType) {
+					return fmt.Errorf("inconsistent block type length for %s at %d; %v (ln=%d) != %v (l=%d)", OpcodeBrTableName, l, defaultLabelType, ln, tableLabelType, l)
+				}
+				for i := range defaultLabelType {
+					if defaultLabelType[i] != valueTypeUnknown && defaultLabelType[i] != tableLabelType[i] {
+						return fmt.Errorf("incosistent block type for %s at %d", OpcodeBrTableName, l)
+					}
+				}
+			}
+
+			// br_table instruction is stack-polymorphic.
+			valueTypeStack.unreachable()
+		} else if op == OpcodeCall {
+			pc++
+			index, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			}
+			pc += num - 1
+			if int(index) >= len(functions) {
+				return fmt.Errorf("invalid function index")
+			}
+			funcType := &m.TypeSection[functions[index]]
+			for i := 0; i < len(funcType.Params); i++ {
+				if err := valueTypeStack.popAndVerifyType(funcType.Params[len(funcType.Params)-1-i]); err != nil {
+					return fmt.Errorf("type mismatch on %s operation param type: %v", OpcodeCallName, err)
+				}
+			}
+			for _, exp := range funcType.Results {
+				valueTypeStack.push(exp)
+			}
+		} else if op == OpcodeCallIndirect {
+			pc++
+			typeIndex, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			}
+			pc += num
+
+			if int(typeIndex) >= len(m.TypeSection) {
+				return fmt.Errorf("invalid type index at %s: %d", OpcodeCallIndirectName, typeIndex)
+			}
+
+			tableIndex, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read table index: %v", err)
+			}
+			pc += num - 1
+			if tableIndex != 0 {
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+					return fmt.Errorf("table index must be zero but was %d: %w", tableIndex, err)
+				}
+			}
+
+			if tableIndex >= uint32(len(tables)) {
+				return fmt.Errorf("unknown table index: %d", tableIndex)
+			}
+
+			table := tables[tableIndex]
+			if table.Type != RefTypeFuncref {
+				return fmt.Errorf("table is not funcref type but was %s for %s", RefTypeName(table.Type), OpcodeCallIndirectName)
+			}
+
+			if err = valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+				return fmt.Errorf("cannot pop the offset in table for %s", OpcodeCallIndirectName)
+			}
+			funcType := &m.TypeSection[typeIndex]
+			for i := 0; i < len(funcType.Params); i++ {
+				if err = valueTypeStack.popAndVerifyType(funcType.Params[len(funcType.Params)-1-i]); err != nil {
+					return fmt.Errorf("type mismatch on %s operation input type", OpcodeCallIndirectName)
+				}
+			}
+			for _, exp := range funcType.Results {
+				valueTypeStack.push(exp)
+			}
+		} else if OpcodeI32Eqz <= op && op <= OpcodeI64Extend32S {
+			switch op {
+			case OpcodeI32Eqz:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeI32EqzName, err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI32Eq, OpcodeI32Ne, OpcodeI32LtS,
+				OpcodeI32LtU, OpcodeI32GtS, OpcodeI32GtU, OpcodeI32LeS,
+				OpcodeI32LeU, OpcodeI32GeS, OpcodeI32GeU:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the 1st i32 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the 2nd i32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Eqz:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeI64EqzName, err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Eq, OpcodeI64Ne, OpcodeI64LtS,
+				OpcodeI64LtU, OpcodeI64GtS, OpcodeI64GtU,
+				OpcodeI64LeS, OpcodeI64LeU, OpcodeI64GeS, OpcodeI64GeU:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the 1st i64 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the 2nd i64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeF32Eq, OpcodeF32Ne, OpcodeF32Lt, OpcodeF32Gt, OpcodeF32Le, OpcodeF32Ge:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the 1st f32 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the 2nd f32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeF64Eq, OpcodeF64Ne, OpcodeF64Lt, OpcodeF64Gt, OpcodeF64Le, OpcodeF64Ge:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the 1st f64 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the 2nd f64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI32Clz, OpcodeI32Ctz, OpcodeI32Popcnt:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the i32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI32Add, OpcodeI32Sub, OpcodeI32Mul, OpcodeI32DivS,
+				OpcodeI32DivU, OpcodeI32RemS, OpcodeI32RemU, OpcodeI32And,
+				OpcodeI32Or, OpcodeI32Xor, OpcodeI32Shl, OpcodeI32ShrS,
+				OpcodeI32ShrU, OpcodeI32Rotl, OpcodeI32Rotr:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the 1st operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the 2nd operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Clz, OpcodeI64Ctz, OpcodeI64Popcnt:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the i64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeI64Add, OpcodeI64Sub, OpcodeI64Mul, OpcodeI64DivS,
+				OpcodeI64DivU, OpcodeI64RemS, OpcodeI64RemU, OpcodeI64And,
+				OpcodeI64Or, OpcodeI64Xor, OpcodeI64Shl, OpcodeI64ShrS,
+				OpcodeI64ShrU, OpcodeI64Rotl, OpcodeI64Rotr:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the 1st i64 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the 2nd i64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeF32Abs, OpcodeF32Neg, OpcodeF32Ceil,
+				OpcodeF32Floor, OpcodeF32Trunc, OpcodeF32Nearest,
+				OpcodeF32Sqrt:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the 1st f32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeF32Add, OpcodeF32Sub, OpcodeF32Mul,
+				OpcodeF32Div, OpcodeF32Min, OpcodeF32Max,
+				OpcodeF32Copysign:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the 1st f32 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the 2nd f32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeF64Abs, OpcodeF64Neg, OpcodeF64Ceil,
+				OpcodeF64Floor, OpcodeF64Trunc, OpcodeF64Nearest,
+				OpcodeF64Sqrt:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the 1st f64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeF64Add, OpcodeF64Sub, OpcodeF64Mul,
+				OpcodeF64Div, OpcodeF64Min, OpcodeF64Max,
+				OpcodeF64Copysign:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the 1st f64 operand for %s: %v", InstructionName(op), err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the 2nd f64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeI32WrapI64:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeI32WrapI64Name, err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI32TruncF32S, OpcodeI32TruncF32U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the f32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI32TruncF64S, OpcodeI32TruncF64U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the f64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64ExtendI32S, OpcodeI64ExtendI32U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the i32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeI64TruncF32S, OpcodeI64TruncF32U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the f32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeI64TruncF64S, OpcodeI64TruncF64U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the f64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeF32ConvertI32S, OpcodeF32ConvertI32U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the i32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeF32ConvertI64S, OpcodeF32ConvertI64U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the i64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeF32DemoteF64:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeF32DemoteF64Name, err)
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeF64ConvertI32S, OpcodeF64ConvertI32U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the i32 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeF64ConvertI64S, OpcodeF64ConvertI64U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the i64 operand for %s: %v", InstructionName(op), err)
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeF64PromoteF32:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeF64PromoteF32Name, err)
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeI32ReinterpretF32:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeI32ReinterpretF32Name, err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64ReinterpretF64:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeF64); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeI64ReinterpretF64Name, err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeF32ReinterpretI32:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeF32ReinterpretI32Name, err)
+				}
+				valueTypeStack.push(ValueTypeF32)
+			case OpcodeF64ReinterpretI64:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeF64ReinterpretI64Name, err)
+				}
+				valueTypeStack.push(ValueTypeF64)
+			case OpcodeI32Extend8S, OpcodeI32Extend16S:
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureSignExtensionOps); err != nil {
+					return fmt.Errorf("%s invalid as %v", instructionNames[op], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", instructionNames[op], err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeI64Extend8S, OpcodeI64Extend16S, OpcodeI64Extend32S:
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureSignExtensionOps); err != nil {
+					return fmt.Errorf("%s invalid as %v", instructionNames[op], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", instructionNames[op], err)
+				}
+				valueTypeStack.push(ValueTypeI64)
+			default:
+				return fmt.Errorf("invalid numeric instruction 0x%x", op)
+			}
+		} else if op >= OpcodeRefNull && op <= OpcodeRefFunc {
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+				return fmt.Errorf("%s invalid as %v", instructionNames[op], err)
+			}
+			switch op {
+			case OpcodeRefNull:
+				pc++
+				switch reftype := body[pc]; reftype {
+				case ValueTypeExternref:
+					valueTypeStack.push(ValueTypeExternref)
+				case ValueTypeFuncref:
+					valueTypeStack.push(ValueTypeFuncref)
+				default:
+					return fmt.Errorf("unknown type for ref.null: 0x%x", reftype)
+				}
+			case OpcodeRefIsNull:
+				tp, err := valueTypeStack.pop()
+				if err != nil {
+					return fmt.Errorf("cannot pop the operand for ref.is_null: %v", err)
+				} else if !isReferenceValueType(tp) && tp != valueTypeUnknown {
+					return fmt.Errorf("type mismatch: expected reference type but was %s", ValueTypeName(tp))
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeRefFunc:
+				pc++
+				index, num, err := leb128.LoadUint32(body[pc:])
+				if err != nil {
+					return fmt.Errorf("failed to read function index for ref.func: %v", err)
+				}
+				if _, ok := declaredFunctionIndexes[index]; !ok {
+					return fmt.Errorf("undeclared function index %d for ref.func", index)
+				}
+				pc += num - 1
+				valueTypeStack.push(ValueTypeFuncref)
+			}
+		} else if op == OpcodeTableGet || op == OpcodeTableSet {
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+				return fmt.Errorf("%s is invalid as %v", InstructionName(op), err)
+			}
+			pc++
+			tableIndex, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("read immediate: %v", err)
+			}
+			if tableIndex >= uint32(len(tables)) {
+				return fmt.Errorf("table of index %d not found", tableIndex)
+			}
+
+			refType := tables[tableIndex].Type
+			if op == OpcodeTableGet {
+				if err := valueTypeStack.popAndVerifyType(api.ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for table.get: %v", err)
+				}
+				valueTypeStack.push(refType)
+			} else {
+				if err := valueTypeStack.popAndVerifyType(refType); err != nil {
+					return fmt.Errorf("cannot pop the operand for table.set: %v", err)
+				}
+				if err := valueTypeStack.popAndVerifyType(api.ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for table.set: %v", err)
+				}
+			}
+			pc += num - 1
+		} else if op == OpcodeMiscPrefix {
+			pc++
+			// A misc opcode is encoded as an unsigned variable 32-bit integer.
+			miscOp32, num, err := leb128.LoadUint32(body[pc:])
+			if err != nil {
+				return fmt.Errorf("failed to read misc opcode: %v", err)
+			}
+			pc += num - 1
+			miscOpcode := byte(miscOp32)
+			// If the misc opcode is beyond byte range, it is highly likely this is an invalid binary, or
+			// it is due to the new opcode from a new proposal. In the latter case, we have to
+			// change the alias type of OpcodeMisc (which is currently byte) to uint32.
+			if uint32(byte(miscOp32)) != miscOp32 {
+				return fmt.Errorf("invalid misc opcode: %#x", miscOp32)
+			}
+			if miscOpcode >= OpcodeMiscI32TruncSatF32S && miscOpcode <= OpcodeMiscI64TruncSatF64U {
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureNonTrappingFloatToIntConversion); err != nil {
+					return fmt.Errorf("%s invalid as %v", miscInstructionNames[miscOpcode], err)
+				}
+				var inType, outType ValueType
+				switch miscOpcode {
+				case OpcodeMiscI32TruncSatF32S, OpcodeMiscI32TruncSatF32U:
+					inType, outType = ValueTypeF32, ValueTypeI32
+				case OpcodeMiscI32TruncSatF64S, OpcodeMiscI32TruncSatF64U:
+					inType, outType = ValueTypeF64, ValueTypeI32
+				case OpcodeMiscI64TruncSatF32S, OpcodeMiscI64TruncSatF32U:
+					inType, outType = ValueTypeF32, ValueTypeI64
+				case OpcodeMiscI64TruncSatF64S, OpcodeMiscI64TruncSatF64U:
+					inType, outType = ValueTypeF64, ValueTypeI64
+				}
+				if err := valueTypeStack.popAndVerifyType(inType); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", miscInstructionNames[miscOpcode], err)
+				}
+				valueTypeStack.push(outType)
+			} else if miscOpcode >= OpcodeMiscMemoryInit && miscOpcode <= OpcodeMiscTableCopy {
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureBulkMemoryOperations); err != nil {
+					return fmt.Errorf("%s invalid as %v", miscInstructionNames[miscOpcode], err)
+				}
+				var params []ValueType
+				// Handle opcodes added in bulk-memory-operations/WebAssembly 2.0.
+				switch miscOpcode {
+				case OpcodeMiscDataDrop:
+					if m.DataCountSection == nil {
+						return fmt.Errorf("%s requires data count section", MiscInstructionName(miscOpcode))
+					}
+
+					// We need to read the index to the data section.
+					pc++
+					index, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read data segment index for %s: %v", MiscInstructionName(miscOpcode), err)
+					}
+					if int(index) >= len(m.DataSection) {
+						return fmt.Errorf("index %d out of range of data section(len=%d)", index, len(m.DataSection))
+					}
+					pc += num - 1
+				case OpcodeMiscMemoryInit, OpcodeMiscMemoryCopy, OpcodeMiscMemoryFill:
+					if memory == nil {
+						return fmt.Errorf("memory must exist for %s", MiscInstructionName(miscOpcode))
+					}
+					params = []ValueType{ValueTypeI32, ValueTypeI32, ValueTypeI32}
+
+					if miscOpcode == OpcodeMiscMemoryInit {
+						if m.DataCountSection == nil {
+							return fmt.Errorf("%s requires data count section", MiscInstructionName(miscOpcode))
+						}
+
+						// We need to read the index to the data section.
+						pc++
+						index, num, err := leb128.LoadUint32(body[pc:])
+						if err != nil {
+							return fmt.Errorf("failed to read data segment index for %s: %v", MiscInstructionName(miscOpcode), err)
+						}
+						if int(index) >= len(m.DataSection) {
+							return fmt.Errorf("index %d out of range of data section(len=%d)", index, len(m.DataSection))
+						}
+						pc += num - 1
+					}
+
+					pc++
+					val, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read memory index for %s: %v", MiscInstructionName(miscOpcode), err)
+					}
+					if val != 0 || num != 1 {
+						return fmt.Errorf("%s reserved byte must be zero encoded with 1 byte", MiscInstructionName(miscOpcode))
+					}
+					if miscOpcode == OpcodeMiscMemoryCopy {
+						pc++
+						// memory.copy needs two memory index which are reserved as zero.
+						val, num, err := leb128.LoadUint32(body[pc:])
+						if err != nil {
+							return fmt.Errorf("failed to read memory index for %s: %v", MiscInstructionName(miscOpcode), err)
+						}
+						if val != 0 || num != 1 {
+							return fmt.Errorf("%s reserved byte must be zero encoded with 1 byte", MiscInstructionName(miscOpcode))
+						}
+					}
+
+				case OpcodeMiscTableInit:
+					params = []ValueType{ValueTypeI32, ValueTypeI32, ValueTypeI32}
+					pc++
+					elementIndex, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read element segment index for %s: %v", MiscInstructionName(miscOpcode), err)
+					}
+					if int(elementIndex) >= len(m.ElementSection) {
+						return fmt.Errorf("index %d out of range of element section(len=%d)", elementIndex, len(m.ElementSection))
+					}
+					pc += num
+
+					tableIndex, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read source table index for %s: %v", MiscInstructionName(miscOpcode), err)
+					}
+					if tableIndex != 0 {
+						if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+							return fmt.Errorf("source table index must be zero for %s as %v", MiscInstructionName(miscOpcode), err)
+						}
+					}
+					if tableIndex >= uint32(len(tables)) {
+						return fmt.Errorf("table of index %d not found", tableIndex)
+					}
+
+					if m.ElementSection[elementIndex].Type != tables[tableIndex].Type {
+						return fmt.Errorf("type mismatch for table.init: element type %s does not match table type %s",
+							RefTypeName(m.ElementSection[elementIndex].Type),
+							RefTypeName(tables[tableIndex].Type),
+						)
+					}
+					pc += num - 1
+				case OpcodeMiscElemDrop:
+					pc++
+					elementIndex, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read element segment index for %s: %v", MiscInstructionName(miscOpcode), err)
+					} else if int(elementIndex) >= len(m.ElementSection) {
+						return fmt.Errorf("index %d out of range of element section(len=%d)", elementIndex, len(m.ElementSection))
+					}
+					pc += num - 1
+				case OpcodeMiscTableCopy:
+					params = []ValueType{ValueTypeI32, ValueTypeI32, ValueTypeI32}
+					pc++
+
+					dstTableIndex, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read destination table index for %s: %v", MiscInstructionName(miscOpcode), err)
+					}
+					if dstTableIndex != 0 {
+						if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+							return fmt.Errorf("destination table index must be zero for %s as %v", MiscInstructionName(miscOpcode), err)
+						}
+					}
+					if dstTableIndex >= uint32(len(tables)) {
+						return fmt.Errorf("table of index %d not found", dstTableIndex)
+					}
+					pc += num
+
+					srcTableIndex, num, err := leb128.LoadUint32(body[pc:])
+					if err != nil {
+						return fmt.Errorf("failed to read source table index for %s: %v", MiscInstructionName(miscOpcode), err)
+					}
+					if srcTableIndex != 0 {
+						if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+							return fmt.Errorf("source table index must be zero for %s as %v", MiscInstructionName(miscOpcode), err)
+						}
+					}
+					if srcTableIndex >= uint32(len(tables)) {
+						return fmt.Errorf("table of index %d not found", srcTableIndex)
+					}
+
+					if tables[srcTableIndex].Type != tables[dstTableIndex].Type {
+						return fmt.Errorf("table type mismatch for table.copy: %s (src) != %s (dst)",
+							RefTypeName(tables[srcTableIndex].Type), RefTypeName(tables[dstTableIndex].Type))
+					}
+
+					pc += num - 1
+				}
+				for _, p := range params {
+					if err := valueTypeStack.popAndVerifyType(p); err != nil {
+						return fmt.Errorf("cannot pop the operand for %s: %v", miscInstructionNames[miscOpcode], err)
+					}
+				}
+			} else if miscOpcode >= OpcodeMiscTableGrow && miscOpcode <= OpcodeMiscTableFill {
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+					return fmt.Errorf("%s invalid as %v", miscInstructionNames[miscOpcode], err)
+				}
+
+				pc++
+				tableIndex, num, err := leb128.LoadUint32(body[pc:])
+				if err != nil {
+					return fmt.Errorf("failed to read table index for %s: %v", MiscInstructionName(miscOpcode), err)
+				}
+				if tableIndex >= uint32(len(tables)) {
+					return fmt.Errorf("table of index %d not found", tableIndex)
+				}
+				pc += num - 1
+
+				var params, results []ValueType
+				reftype := tables[tableIndex].Type
+				if miscOpcode == OpcodeMiscTableGrow {
+					params = []ValueType{ValueTypeI32, reftype}
+					results = []ValueType{ValueTypeI32}
+				} else if miscOpcode == OpcodeMiscTableSize {
+					results = []ValueType{ValueTypeI32}
+				} else if miscOpcode == OpcodeMiscTableFill {
+					params = []ValueType{ValueTypeI32, reftype, ValueTypeI32}
+				}
+
+				for _, p := range params {
+					if err := valueTypeStack.popAndVerifyType(p); err != nil {
+						return fmt.Errorf("cannot pop the operand for %s: %v", miscInstructionNames[miscOpcode], err)
+					}
+				}
+				for _, r := range results {
+					valueTypeStack.push(r)
+				}
+			} else {
+				return fmt.Errorf("unknown misc opcode %#x", miscOpcode)
+			}
+		} else if op == OpcodeVecPrefix {
+			pc++
+			// Vector instructions come with two bytes where the first byte is always OpcodeVecPrefix,
+			// and the second byte determines the actual instruction.
+			vecOpcode := body[pc]
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureSIMD); err != nil {
+				return fmt.Errorf("%s invalid as %v", vectorInstructionName[vecOpcode], err)
+			}
+
+			switch vecOpcode {
+			case OpcodeVecV128Const:
+				// Read 128-bit = 16 bytes constants
+				if int(pc+16) >= len(body) {
+					return fmt.Errorf("cannot read constant vector value for %s", vectorInstructionName[vecOpcode])
+				}
+				pc += 16
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecV128AnyTrue, OpcodeVecI8x16AllTrue, OpcodeVecI16x8AllTrue, OpcodeVecI32x4AllTrue, OpcodeVecI64x2AllTrue,
+				OpcodeVecI8x16BitMask, OpcodeVecI16x8BitMask, OpcodeVecI32x4BitMask, OpcodeVecI64x2BitMask:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeVecV128Load, OpcodeVecV128Load8x8s, OpcodeVecV128Load8x8u, OpcodeVecV128Load16x4s, OpcodeVecV128Load16x4u,
+				OpcodeVecV128Load32x2s, OpcodeVecV128Load32x2u, OpcodeVecV128Load8Splat, OpcodeVecV128Load16Splat,
+				OpcodeVecV128Load32Splat, OpcodeVecV128Load64Splat,
+				OpcodeVecV128Load32zero, OpcodeVecV128Load64zero:
+				if memory == nil {
+					return fmt.Errorf("memory must exist for %s", VectorInstructionName(vecOpcode))
+				}
+				pc++
+				align, _, read, err := readMemArg(pc, body)
+				if err != nil {
+					return err
+				}
+				pc += read - 1
+				var maxAlign uint32
+				switch vecOpcode {
+				case OpcodeVecV128Load:
+					maxAlign = 128 / 8
+				case OpcodeVecV128Load8x8s, OpcodeVecV128Load8x8u, OpcodeVecV128Load16x4s, OpcodeVecV128Load16x4u,
+					OpcodeVecV128Load32x2s, OpcodeVecV128Load32x2u:
+					maxAlign = 64 / 8
+				case OpcodeVecV128Load8Splat:
+					maxAlign = 1
+				case OpcodeVecV128Load16Splat:
+					maxAlign = 16 / 8
+				case OpcodeVecV128Load32Splat:
+					maxAlign = 32 / 8
+				case OpcodeVecV128Load64Splat:
+					maxAlign = 64 / 8
+				case OpcodeVecV128Load32zero:
+					maxAlign = 32 / 8
+				case OpcodeVecV128Load64zero:
+					maxAlign = 64 / 8
+				}
+
+				if 1<<align > maxAlign {
+					return fmt.Errorf("invalid memory alignment %d for %s", align, VectorInstructionName(vecOpcode))
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", VectorInstructionName(vecOpcode), err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecV128Store:
+				if memory == nil {
+					return fmt.Errorf("memory must exist for %s", VectorInstructionName(vecOpcode))
+				}
+				pc++
+				align, _, read, err := readMemArg(pc, body)
+				if err != nil {
+					return err
+				}
+				pc += read - 1
+				if 1<<align > 128/8 {
+					return fmt.Errorf("invalid memory alignment %d for %s", align, OpcodeVecV128StoreName)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeVecV128StoreName, err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", OpcodeVecV128StoreName, err)
+				}
+			case OpcodeVecV128Load8Lane, OpcodeVecV128Load16Lane, OpcodeVecV128Load32Lane, OpcodeVecV128Load64Lane:
+				if memory == nil {
+					return fmt.Errorf("memory must exist for %s", VectorInstructionName(vecOpcode))
+				}
+				attr := vecLoadLanes[vecOpcode]
+				pc++
+				align, _, read, err := readMemArg(pc, body)
+				if err != nil {
+					return err
+				}
+				if 1<<align > attr.alignMax {
+					return fmt.Errorf("invalid memory alignment %d for %s", align, vectorInstructionName[vecOpcode])
+				}
+				pc += read
+				if pc >= uint64(len(body)) {
+					return fmt.Errorf("lane for %s not found", OpcodeVecV128Load64LaneName)
+				}
+				lane := body[pc]
+				if lane >= attr.laneCeil {
+					return fmt.Errorf("invalid lane index %d >= %d for %s", lane, attr.laneCeil, vectorInstructionName[vecOpcode])
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecV128Store8Lane, OpcodeVecV128Store16Lane, OpcodeVecV128Store32Lane, OpcodeVecV128Store64Lane:
+				if memory == nil {
+					return fmt.Errorf("memory must exist for %s", VectorInstructionName(vecOpcode))
+				}
+				attr := vecStoreLanes[vecOpcode]
+				pc++
+				align, _, read, err := readMemArg(pc, body)
+				if err != nil {
+					return err
+				}
+				if 1<<align > attr.alignMax {
+					return fmt.Errorf("invalid memory alignment %d for %s", align, vectorInstructionName[vecOpcode])
+				}
+				pc += read
+				if pc >= uint64(len(body)) {
+					return fmt.Errorf("lane for %s not found", vectorInstructionName[vecOpcode])
+				}
+				lane := body[pc]
+				if lane >= attr.laneCeil {
+					return fmt.Errorf("invalid lane index %d >= %d for %s", lane, attr.laneCeil, vectorInstructionName[vecOpcode])
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+			case OpcodeVecI8x16ExtractLaneS,
+				OpcodeVecI8x16ExtractLaneU,
+				OpcodeVecI16x8ExtractLaneS,
+				OpcodeVecI16x8ExtractLaneU,
+				OpcodeVecI32x4ExtractLane,
+				OpcodeVecI64x2ExtractLane,
+				OpcodeVecF32x4ExtractLane,
+				OpcodeVecF64x2ExtractLane:
+				pc++
+				if pc >= uint64(len(body)) {
+					return fmt.Errorf("lane for %s not found", vectorInstructionName[vecOpcode])
+				}
+				attr := vecExtractLanes[vecOpcode]
+				lane := body[pc]
+				if lane >= attr.laneCeil {
+					return fmt.Errorf("invalid lane index %d >= %d for %s", lane, attr.laneCeil, vectorInstructionName[vecOpcode])
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(attr.resultType)
+			case OpcodeVecI8x16ReplaceLane, OpcodeVecI16x8ReplaceLane, OpcodeVecI32x4ReplaceLane,
+				OpcodeVecI64x2ReplaceLane, OpcodeVecF32x4ReplaceLane, OpcodeVecF64x2ReplaceLane:
+				pc++
+				if pc >= uint64(len(body)) {
+					return fmt.Errorf("lane for %s not found", vectorInstructionName[vecOpcode])
+				}
+				attr := vecReplaceLanes[vecOpcode]
+				lane := body[pc]
+				if lane >= attr.laneCeil {
+					return fmt.Errorf("invalid lane index %d >= %d for %s", lane, attr.laneCeil, vectorInstructionName[vecOpcode])
+				}
+				if err := valueTypeStack.popAndVerifyType(attr.paramType); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecI8x16Splat, OpcodeVecI16x8Splat, OpcodeVecI32x4Splat,
+				OpcodeVecI64x2Splat, OpcodeVecF32x4Splat, OpcodeVecF64x2Splat:
+				tp := vecSplatValueTypes[vecOpcode]
+				if err := valueTypeStack.popAndVerifyType(tp); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecI8x16Swizzle, OpcodeVecV128And, OpcodeVecV128Or, OpcodeVecV128Xor, OpcodeVecV128AndNot:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecV128Bitselect:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecV128Not:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecV128i8x16Shuffle:
+				pc++
+				if pc+15 >= uint64(len(body)) {
+					return fmt.Errorf("16 lane indexes for %s not found", vectorInstructionName[vecOpcode])
+				}
+				lanes := body[pc : pc+16]
+				for i, l := range lanes {
+					if l >= 32 {
+						return fmt.Errorf("invalid lane index[%d] %d >= %d for %s", i, l, 32, vectorInstructionName[vecOpcode])
+					}
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+				pc += 15
+			case OpcodeVecI8x16Shl, OpcodeVecI8x16ShrS, OpcodeVecI8x16ShrU,
+				OpcodeVecI16x8Shl, OpcodeVecI16x8ShrS, OpcodeVecI16x8ShrU,
+				OpcodeVecI32x4Shl, OpcodeVecI32x4ShrS, OpcodeVecI32x4ShrU,
+				OpcodeVecI64x2Shl, OpcodeVecI64x2ShrS, OpcodeVecI64x2ShrU:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecI8x16Eq, OpcodeVecI8x16Ne, OpcodeVecI8x16LtS, OpcodeVecI8x16LtU, OpcodeVecI8x16GtS,
+				OpcodeVecI8x16GtU, OpcodeVecI8x16LeS, OpcodeVecI8x16LeU, OpcodeVecI8x16GeS, OpcodeVecI8x16GeU,
+				OpcodeVecI16x8Eq, OpcodeVecI16x8Ne, OpcodeVecI16x8LtS, OpcodeVecI16x8LtU, OpcodeVecI16x8GtS,
+				OpcodeVecI16x8GtU, OpcodeVecI16x8LeS, OpcodeVecI16x8LeU, OpcodeVecI16x8GeS, OpcodeVecI16x8GeU,
+				OpcodeVecI32x4Eq, OpcodeVecI32x4Ne, OpcodeVecI32x4LtS, OpcodeVecI32x4LtU, OpcodeVecI32x4GtS,
+				OpcodeVecI32x4GtU, OpcodeVecI32x4LeS, OpcodeVecI32x4LeU, OpcodeVecI32x4GeS, OpcodeVecI32x4GeU,
+				OpcodeVecI64x2Eq, OpcodeVecI64x2Ne, OpcodeVecI64x2LtS, OpcodeVecI64x2GtS, OpcodeVecI64x2LeS,
+				OpcodeVecI64x2GeS, OpcodeVecF32x4Eq, OpcodeVecF32x4Ne, OpcodeVecF32x4Lt, OpcodeVecF32x4Gt,
+				OpcodeVecF32x4Le, OpcodeVecF32x4Ge, OpcodeVecF64x2Eq, OpcodeVecF64x2Ne, OpcodeVecF64x2Lt,
+				OpcodeVecF64x2Gt, OpcodeVecF64x2Le, OpcodeVecF64x2Ge,
+				OpcodeVecI32x4DotI16x8S,
+				OpcodeVecI8x16NarrowI16x8S, OpcodeVecI8x16NarrowI16x8U, OpcodeVecI16x8NarrowI32x4S, OpcodeVecI16x8NarrowI32x4U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			case OpcodeVecI8x16Neg, OpcodeVecI16x8Neg, OpcodeVecI32x4Neg, OpcodeVecI64x2Neg, OpcodeVecF32x4Neg, OpcodeVecF64x2Neg,
+				OpcodeVecF32x4Sqrt, OpcodeVecF64x2Sqrt,
+				OpcodeVecI8x16Abs, OpcodeVecI8x16Popcnt, OpcodeVecI16x8Abs, OpcodeVecI32x4Abs, OpcodeVecI64x2Abs,
+				OpcodeVecF32x4Abs, OpcodeVecF64x2Abs,
+				OpcodeVecF32x4Ceil, OpcodeVecF32x4Floor, OpcodeVecF32x4Trunc, OpcodeVecF32x4Nearest,
+				OpcodeVecF64x2Ceil, OpcodeVecF64x2Floor, OpcodeVecF64x2Trunc, OpcodeVecF64x2Nearest,
+				OpcodeVecI16x8ExtendLowI8x16S, OpcodeVecI16x8ExtendHighI8x16S, OpcodeVecI16x8ExtendLowI8x16U, OpcodeVecI16x8ExtendHighI8x16U,
+				OpcodeVecI32x4ExtendLowI16x8S, OpcodeVecI32x4ExtendHighI16x8S, OpcodeVecI32x4ExtendLowI16x8U, OpcodeVecI32x4ExtendHighI16x8U,
+				OpcodeVecI64x2ExtendLowI32x4S, OpcodeVecI64x2ExtendHighI32x4S, OpcodeVecI64x2ExtendLowI32x4U, OpcodeVecI64x2ExtendHighI32x4U,
+				OpcodeVecI16x8ExtaddPairwiseI8x16S, OpcodeVecI16x8ExtaddPairwiseI8x16U,
+				OpcodeVecI32x4ExtaddPairwiseI16x8S, OpcodeVecI32x4ExtaddPairwiseI16x8U,
+				OpcodeVecF64x2PromoteLowF32x4Zero, OpcodeVecF32x4DemoteF64x2Zero,
+				OpcodeVecF32x4ConvertI32x4S, OpcodeVecF32x4ConvertI32x4U,
+				OpcodeVecF64x2ConvertLowI32x4S, OpcodeVecF64x2ConvertLowI32x4U,
+				OpcodeVecI32x4TruncSatF32x4S, OpcodeVecI32x4TruncSatF32x4U, OpcodeVecI32x4TruncSatF64x2SZero, OpcodeVecI32x4TruncSatF64x2UZero:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+
+			case OpcodeVecI8x16Add, OpcodeVecI8x16AddSatS, OpcodeVecI8x16AddSatU, OpcodeVecI8x16Sub, OpcodeVecI8x16SubSatS, OpcodeVecI8x16SubSatU,
+				OpcodeVecI16x8Add, OpcodeVecI16x8AddSatS, OpcodeVecI16x8AddSatU, OpcodeVecI16x8Sub, OpcodeVecI16x8SubSatS, OpcodeVecI16x8SubSatU, OpcodeVecI16x8Mul,
+				OpcodeVecI32x4Add, OpcodeVecI32x4Sub, OpcodeVecI32x4Mul,
+				OpcodeVecI64x2Add, OpcodeVecI64x2Sub, OpcodeVecI64x2Mul,
+				OpcodeVecF32x4Add, OpcodeVecF32x4Sub, OpcodeVecF32x4Mul, OpcodeVecF32x4Div,
+				OpcodeVecF64x2Add, OpcodeVecF64x2Sub, OpcodeVecF64x2Mul, OpcodeVecF64x2Div,
+				OpcodeVecI8x16MinS, OpcodeVecI8x16MinU, OpcodeVecI8x16MaxS, OpcodeVecI8x16MaxU,
+				OpcodeVecI8x16AvgrU,
+				OpcodeVecI16x8MinS, OpcodeVecI16x8MinU, OpcodeVecI16x8MaxS, OpcodeVecI16x8MaxU,
+				OpcodeVecI16x8AvgrU,
+				OpcodeVecI32x4MinS, OpcodeVecI32x4MinU, OpcodeVecI32x4MaxS, OpcodeVecI32x4MaxU,
+				OpcodeVecF32x4Min, OpcodeVecF32x4Max, OpcodeVecF64x2Min, OpcodeVecF64x2Max,
+				OpcodeVecF32x4Pmin, OpcodeVecF32x4Pmax, OpcodeVecF64x2Pmin, OpcodeVecF64x2Pmax,
+				OpcodeVecI16x8Q15mulrSatS,
+				OpcodeVecI16x8ExtMulLowI8x16S, OpcodeVecI16x8ExtMulHighI8x16S, OpcodeVecI16x8ExtMulLowI8x16U, OpcodeVecI16x8ExtMulHighI8x16U,
+				OpcodeVecI32x4ExtMulLowI16x8S, OpcodeVecI32x4ExtMulHighI16x8S, OpcodeVecI32x4ExtMulLowI16x8U, OpcodeVecI32x4ExtMulHighI16x8U,
+				OpcodeVecI64x2ExtMulLowI32x4S, OpcodeVecI64x2ExtMulHighI32x4S, OpcodeVecI64x2ExtMulLowI32x4U, OpcodeVecI64x2ExtMulHighI32x4U:
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeV128); err != nil {
+					return fmt.Errorf("cannot pop the operand for %s: %v", vectorInstructionName[vecOpcode], err)
+				}
+				valueTypeStack.push(ValueTypeV128)
+			default:
+				return fmt.Errorf("unknown SIMD instruction %s", vectorInstructionName[vecOpcode])
+			}
+		} else if op == OpcodeBlock {
+			br.Reset(body[pc+1:])
+			bt, num, err := DecodeBlockType(m.TypeSection, br, enabledFeatures)
+			if err != nil {
+				return fmt.Errorf("read block: %w", err)
+			}
+			controlBlockStack.push(pc, 0, 0, bt, num, 0)
+			if err = valueTypeStack.popParams(op, bt.Params, false); err != nil {
+				return err
+			}
+			// Plus we have to push any block params again.
+			for _, p := range bt.Params {
+				valueTypeStack.push(p)
+			}
+			valueTypeStack.pushStackLimit(len(bt.Params))
+			pc += num
+		} else if op == OpcodeAtomicPrefix {
+			pc++
+			// Atomic instructions come with two bytes where the first byte is always OpcodeAtomicPrefix,
+			// and the second byte determines the actual instruction.
+			atomicOpcode := body[pc]
+			if err := enabledFeatures.RequireEnabled(experimental.CoreFeaturesThreads); err != nil {
+				return fmt.Errorf("%s invalid as %v", atomicInstructionName[atomicOpcode], err)
+			}
+			pc++
+
+			if atomicOpcode == OpcodeAtomicFence {
+				// No memory requirement and no arguments or return, however the immediate byte value must be 0.
+				imm := body[pc]
+				if imm != 0x0 {
+					return fmt.Errorf("invalid immediate value for %s", AtomicInstructionName(atomicOpcode))
+				}
+				continue
+			}
+
+			// All atomic operations except fence (checked above) require memory
+			if memory == nil {
+				return fmt.Errorf("memory must exist for %s", AtomicInstructionName(atomicOpcode))
+			}
+			align, _, read, err := readMemArg(pc, body)
+			if err != nil {
+				return err
+			}
+			pc += read - 1
+			switch atomicOpcode {
+			case OpcodeAtomicMemoryNotify:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicMemoryWait32:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicMemoryWait64:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI32Load:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI64Load:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI32Load8U:
+				if 1<<align != 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI32Load16U:
+				if 1<<align != 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI64Load8U:
+				if 1<<align != 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Load16U:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Load32U:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI32Store:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI64Store:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI32Store8:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI32Store16:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI64Store8:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI64Store16:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI64Store32:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+			case OpcodeAtomicI32RmwAdd, OpcodeAtomicI32RmwSub, OpcodeAtomicI32RmwAnd, OpcodeAtomicI32RmwOr, OpcodeAtomicI32RmwXor, OpcodeAtomicI32RmwXchg:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI32Rmw8AddU, OpcodeAtomicI32Rmw8SubU, OpcodeAtomicI32Rmw8AndU, OpcodeAtomicI32Rmw8OrU, OpcodeAtomicI32Rmw8XorU, OpcodeAtomicI32Rmw8XchgU:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI32Rmw16AddU, OpcodeAtomicI32Rmw16SubU, OpcodeAtomicI32Rmw16AndU, OpcodeAtomicI32Rmw16OrU, OpcodeAtomicI32Rmw16XorU, OpcodeAtomicI32Rmw16XchgU:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI64RmwAdd, OpcodeAtomicI64RmwSub, OpcodeAtomicI64RmwAnd, OpcodeAtomicI64RmwOr, OpcodeAtomicI64RmwXor, OpcodeAtomicI64RmwXchg:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Rmw8AddU, OpcodeAtomicI64Rmw8SubU, OpcodeAtomicI64Rmw8AndU, OpcodeAtomicI64Rmw8OrU, OpcodeAtomicI64Rmw8XorU, OpcodeAtomicI64Rmw8XchgU:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Rmw16AddU, OpcodeAtomicI64Rmw16SubU, OpcodeAtomicI64Rmw16AndU, OpcodeAtomicI64Rmw16OrU, OpcodeAtomicI64Rmw16XorU, OpcodeAtomicI64Rmw16XchgU:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Rmw32AddU, OpcodeAtomicI64Rmw32SubU, OpcodeAtomicI64Rmw32AndU, OpcodeAtomicI64Rmw32OrU, OpcodeAtomicI64Rmw32XorU, OpcodeAtomicI64Rmw32XchgU:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI32RmwCmpxchg:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI32Rmw8CmpxchgU:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI32Rmw16CmpxchgU:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI32)
+			case OpcodeAtomicI64RmwCmpxchg:
+				if 1<<align > 64/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Rmw8CmpxchgU:
+				if 1<<align > 1 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Rmw16CmpxchgU:
+				if 1<<align > 16/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			case OpcodeAtomicI64Rmw32CmpxchgU:
+				if 1<<align > 32/8 {
+					return fmt.Errorf("invalid memory alignment")
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI64); err != nil {
+					return err
+				}
+				if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+					return err
+				}
+				valueTypeStack.push(ValueTypeI64)
+			default:
+				return fmt.Errorf("invalid atomic opcode: 0x%x", atomicOpcode)
+			}
+		} else if op == OpcodeLoop {
+			br.Reset(body[pc+1:])
+			bt, num, err := DecodeBlockType(m.TypeSection, br, enabledFeatures)
+			if err != nil {
+				return fmt.Errorf("read block: %w", err)
+			}
+			controlBlockStack.push(pc, 0, 0, bt, num, op)
+			if err = valueTypeStack.popParams(op, bt.Params, false); err != nil {
+				return err
+			}
+			// Plus we have to push any block params again.
+			for _, p := range bt.Params {
+				valueTypeStack.push(p)
+			}
+			valueTypeStack.pushStackLimit(len(bt.Params))
+			pc += num
+		} else if op == OpcodeIf {
+			br.Reset(body[pc+1:])
+			bt, num, err := DecodeBlockType(m.TypeSection, br, enabledFeatures)
+			if err != nil {
+				return fmt.Errorf("read block: %w", err)
+			}
+			controlBlockStack.push(pc, 0, 0, bt, num, op)
+			if err = valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+				return fmt.Errorf("cannot pop the operand for 'if': %v", err)
+			}
+			if err = valueTypeStack.popParams(op, bt.Params, false); err != nil {
+				return err
+			}
+			// Plus we have to push any block params again.
+			for _, p := range bt.Params {
+				valueTypeStack.push(p)
+			}
+			valueTypeStack.pushStackLimit(len(bt.Params))
+			pc += num
+		} else if op == OpcodeElse {
+			bl := &controlBlockStack.stack[len(controlBlockStack.stack)-1]
+			if bl.op != OpcodeIf {
+				return fmt.Errorf("else instruction must be used in if block: %#x", pc)
+			}
+			bl.op = OpcodeElse
+			bl.elseAt = pc
+			// Check the type soundness of the instructions *before* entering this else Op.
+			if err := valueTypeStack.popResults(OpcodeIf, bl.blockType.Results, true); err != nil {
+				return err
+			}
+			// Before entering instructions inside else, we pop all the values pushed by then block.
+			valueTypeStack.resetAtStackLimit()
+			// Plus we have to push any block params again.
+			for _, p := range bl.blockType.Params {
+				valueTypeStack.push(p)
+			}
+		} else if op == OpcodeEnd {
+			bl := controlBlockStack.pop()
+			bl.endAt = pc
+
+			// OpcodeEnd can end a block or the function itself. Check to see what it is:
+
+			ifMissingElse := bl.op == OpcodeIf && bl.elseAt <= bl.startAt
+			if ifMissingElse {
+				// If this is the end of block without else, the number of block's results and params must be same.
+				// Otherwise, the value stack would result in the inconsistent state at runtime.
+				if !bytes.Equal(bl.blockType.Results, bl.blockType.Params) {
+					return typeCountError(false, OpcodeElseName, bl.blockType.Params, bl.blockType.Results)
+				}
+				// -1 skips else, to handle if block without else properly.
+				bl.elseAt = bl.endAt - 1
+			}
+
+			// Determine the block context
+			ctx := "" // the outer-most block: the function return
+			if bl.op == OpcodeIf && !ifMissingElse && bl.elseAt > 0 {
+				ctx = OpcodeElseName
+			} else if bl.op != 0 {
+				ctx = InstructionName(bl.op)
+			}
+
+			// Check return types match
+			if err := valueTypeStack.requireStackValues(false, ctx, bl.blockType.Results, true); err != nil {
+				return err
+			}
+
+			// Put the result types at the end after resetting at the stack limit
+			// since we might have Any type between the limit and the current top.
+			valueTypeStack.resetAtStackLimit()
+			for _, exp := range bl.blockType.Results {
+				valueTypeStack.push(exp)
+			}
+			// We exit if/loop/block, so reset the constraints on the stack manipulation
+			// on values previously pushed by outer blocks.
+			valueTypeStack.popStackLimit()
+		} else if op == OpcodeReturn {
+			// Same formatting as OpcodeEnd on the outer-most block
+			if err := valueTypeStack.requireStackValues(false, "", functionType.Results, false); err != nil {
+				return err
+			}
+			// return instruction is stack-polymorphic.
+			valueTypeStack.unreachable()
+		} else if op == OpcodeDrop {
+			_, err := valueTypeStack.pop()
+			if err != nil {
+				return fmt.Errorf("invalid drop: %v", err)
+			}
+		} else if op == OpcodeSelect || op == OpcodeTypedSelect {
+			if err := valueTypeStack.popAndVerifyType(ValueTypeI32); err != nil {
+				return fmt.Errorf("type mismatch on 3rd select operand: %v", err)
+			}
+			v1, err := valueTypeStack.pop()
+			if err != nil {
+				return fmt.Errorf("invalid select: %v", err)
+			}
+			v2, err := valueTypeStack.pop()
+			if err != nil {
+				return fmt.Errorf("invalid select: %v", err)
+			}
+
+			if op == OpcodeTypedSelect {
+				if err := enabledFeatures.RequireEnabled(api.CoreFeatureReferenceTypes); err != nil {
+					return fmt.Errorf("%s is invalid as %w", InstructionName(op), err)
+				}
+				pc++
+				if numTypeImmeidates := body[pc]; numTypeImmeidates != 1 {
+					return fmt.Errorf("too many type immediates for %s", InstructionName(op))
+				}
+				pc++
+				tp := body[pc]
+				if tp != ValueTypeI32 && tp != ValueTypeI64 && tp != ValueTypeF32 && tp != ValueTypeF64 &&
+					tp != api.ValueTypeExternref && tp != ValueTypeFuncref && tp != ValueTypeV128 {
+					return fmt.Errorf("invalid type %s for %s", ValueTypeName(tp), OpcodeTypedSelectName)
+				}
+			} else if isReferenceValueType(v1) || isReferenceValueType(v2) {
+				return fmt.Errorf("reference types cannot be used for non typed select instruction")
+			}
+
+			if v1 != v2 && v1 != valueTypeUnknown && v2 != valueTypeUnknown {
+				return fmt.Errorf("type mismatch on 1st and 2nd select operands")
+			}
+			if v1 == valueTypeUnknown {
+				valueTypeStack.push(v2)
+			} else {
+				valueTypeStack.push(v1)
+			}
+		} else if op == OpcodeUnreachable {
+			// unreachable instruction is stack-polymorphic.
+			valueTypeStack.unreachable()
+		} else if op == OpcodeNop {
+		} else {
+			return fmt.Errorf("invalid instruction 0x%x", op)
+		}
+	}
+
+	if len(controlBlockStack.stack) > 0 {
+		return fmt.Errorf("ill-nested block exists")
+	}
+	if valueTypeStack.maximumStackPointer > maxStackValues {
+		return fmt.Errorf("function may have %d stack values, which exceeds limit %d", valueTypeStack.maximumStackPointer, maxStackValues)
+	}
+	return nil
+}
+
+var vecExtractLanes = [...]struct {
+	laneCeil   byte
+	resultType ValueType
+}{
+	OpcodeVecI8x16ExtractLaneS: {laneCeil: 16, resultType: ValueTypeI32},
+	OpcodeVecI8x16ExtractLaneU: {laneCeil: 16, resultType: ValueTypeI32},
+	OpcodeVecI16x8ExtractLaneS: {laneCeil: 8, resultType: ValueTypeI32},
+	OpcodeVecI16x8ExtractLaneU: {laneCeil: 8, resultType: ValueTypeI32},
+	OpcodeVecI32x4ExtractLane:  {laneCeil: 4, resultType: ValueTypeI32},
+	OpcodeVecI64x2ExtractLane:  {laneCeil: 2, resultType: ValueTypeI64},
+	OpcodeVecF32x4ExtractLane:  {laneCeil: 4, resultType: ValueTypeF32},
+	OpcodeVecF64x2ExtractLane:  {laneCeil: 2, resultType: ValueTypeF64},
+}
+
+var vecReplaceLanes = [...]struct {
+	laneCeil  byte
+	paramType ValueType
+}{
+	OpcodeVecI8x16ReplaceLane: {laneCeil: 16, paramType: ValueTypeI32},
+	OpcodeVecI16x8ReplaceLane: {laneCeil: 8, paramType: ValueTypeI32},
+	OpcodeVecI32x4ReplaceLane: {laneCeil: 4, paramType: ValueTypeI32},
+	OpcodeVecI64x2ReplaceLane: {laneCeil: 2, paramType: ValueTypeI64},
+	OpcodeVecF32x4ReplaceLane: {laneCeil: 4, paramType: ValueTypeF32},
+	OpcodeVecF64x2ReplaceLane: {laneCeil: 2, paramType: ValueTypeF64},
+}
+
+var vecStoreLanes = [...]struct {
+	alignMax uint32
+	laneCeil byte
+}{
+	OpcodeVecV128Store64Lane: {alignMax: 64 / 8, laneCeil: 128 / 64},
+	OpcodeVecV128Store32Lane: {alignMax: 32 / 8, laneCeil: 128 / 32},
+	OpcodeVecV128Store16Lane: {alignMax: 16 / 8, laneCeil: 128 / 16},
+	OpcodeVecV128Store8Lane:  {alignMax: 1, laneCeil: 128 / 8},
+}
+
+var vecLoadLanes = [...]struct {
+	alignMax uint32
+	laneCeil byte
+}{
+	OpcodeVecV128Load64Lane: {alignMax: 64 / 8, laneCeil: 128 / 64},
+	OpcodeVecV128Load32Lane: {alignMax: 32 / 8, laneCeil: 128 / 32},
+	OpcodeVecV128Load16Lane: {alignMax: 16 / 8, laneCeil: 128 / 16},
+	OpcodeVecV128Load8Lane:  {alignMax: 1, laneCeil: 128 / 8},
+}
+
+var vecSplatValueTypes = [...]ValueType{
+	OpcodeVecI8x16Splat: ValueTypeI32,
+	OpcodeVecI16x8Splat: ValueTypeI32,
+	OpcodeVecI32x4Splat: ValueTypeI32,
+	OpcodeVecI64x2Splat: ValueTypeI64,
+	OpcodeVecF32x4Splat: ValueTypeF32,
+	OpcodeVecF64x2Splat: ValueTypeF64,
+}
+
+type stacks struct {
+	vs valueTypeStack
+	cs controlBlockStack
+}
+
+func (sts *stacks) reset(functionType *FunctionType) {
+	// Reset valueStack for reuse.
+	sts.vs.stack = sts.vs.stack[:0]
+	sts.vs.stackLimits = sts.vs.stackLimits[:0]
+	sts.vs.maximumStackPointer = 0
+	sts.cs.stack = sts.cs.stack[:0]
+	sts.cs.stack = append(sts.cs.stack, controlBlock{blockType: functionType})
+}
+
+type controlBlockStack struct {
+	stack []controlBlock
+}
+
+func (s *controlBlockStack) pop() *controlBlock {
+	tail := len(s.stack) - 1
+	ret := &s.stack[tail]
+	s.stack = s.stack[:tail]
+	return ret
+}
+
+func (s *controlBlockStack) push(startAt, elseAt, endAt uint64, blockType *FunctionType, blockTypeBytes uint64, op Opcode) {
+	s.stack = append(s.stack, controlBlock{
+		startAt:        startAt,
+		elseAt:         elseAt,
+		endAt:          endAt,
+		blockType:      blockType,
+		blockTypeBytes: blockTypeBytes,
+		op:             op,
+	})
+}
+
+type valueTypeStack struct {
+	stack               []ValueType
+	stackLimits         []int
+	maximumStackPointer int
+	// requireStackValuesTmp is used in requireStackValues function to reduce the allocation.
+	requireStackValuesTmp []ValueType
+}
+
+// Only used in the analyzeFunction below.
+const valueTypeUnknown = ValueType(0xFF)
+
+func (s *valueTypeStack) tryPop() (vt ValueType, limit int, ok bool) {
+	if len(s.stackLimits) > 0 {
+		limit = s.stackLimits[len(s.stackLimits)-1]
+	}
+	stackLen := len(s.stack)
+	if stackLen <= limit {
+		return
+	} else if stackLen == limit+1 && s.stack[limit] == valueTypeUnknown {
+		vt = valueTypeUnknown
+		ok = true
+		return
+	} else {
+		vt = s.stack[stackLen-1]
+		s.stack = s.stack[:stackLen-1]
+		ok = true
+		return
+	}
+}
+
+func (s *valueTypeStack) pop() (ValueType, error) {
+	if vt, limit, ok := s.tryPop(); ok {
+		return vt, nil
+	} else {
+		return 0, fmt.Errorf("invalid operation: trying to pop at %d with limit %d", len(s.stack), limit)
+	}
+}
+
+// popAndVerifyType returns an error if the stack value is unexpected.
+func (s *valueTypeStack) popAndVerifyType(expected ValueType) error {
+	have, _, ok := s.tryPop()
+	if !ok {
+		return fmt.Errorf("%s missing", ValueTypeName(expected))
+	}
+	if have != expected && have != valueTypeUnknown && expected != valueTypeUnknown {
+		return fmt.Errorf("type mismatch: expected %s, but was %s", ValueTypeName(expected), ValueTypeName(have))
+	}
+	return nil
+}
+
+func (s *valueTypeStack) push(v ValueType) {
+	s.stack = append(s.stack, v)
+	if sp := len(s.stack); sp > s.maximumStackPointer {
+		s.maximumStackPointer = sp
+	}
+}
+
+func (s *valueTypeStack) unreachable() {
+	s.resetAtStackLimit()
+	s.stack = append(s.stack, valueTypeUnknown)
+}
+
+func (s *valueTypeStack) resetAtStackLimit() {
+	if len(s.stackLimits) != 0 {
+		s.stack = s.stack[:s.stackLimits[len(s.stackLimits)-1]]
+	} else {
+		s.stack = s.stack[:0]
+	}
+}
+
+func (s *valueTypeStack) popStackLimit() {
+	if len(s.stackLimits) != 0 {
+		s.stackLimits = s.stackLimits[:len(s.stackLimits)-1]
+	}
+}
+
+// pushStackLimit pushes the control frame's bottom of the stack.
+func (s *valueTypeStack) pushStackLimit(params int) {
+	limit := len(s.stack) - params
+	s.stackLimits = append(s.stackLimits, limit)
+}
+
+func (s *valueTypeStack) popParams(oc Opcode, want []ValueType, checkAboveLimit bool) error {
+	return s.requireStackValues(true, InstructionName(oc), want, checkAboveLimit)
+}
+
+func (s *valueTypeStack) popResults(oc Opcode, want []ValueType, checkAboveLimit bool) error {
+	return s.requireStackValues(false, InstructionName(oc), want, checkAboveLimit)
+}
+
+func (s *valueTypeStack) requireStackValues(
+	isParam bool,
+	context string,
+	want []ValueType,
+	checkAboveLimit bool,
+) error {
+	limit := 0
+	if len(s.stackLimits) > 0 {
+		limit = s.stackLimits[len(s.stackLimits)-1]
+	}
+	// Iterate backwards as we are comparing the desired slice against stack value types.
+	countWanted := len(want)
+
+	// First, check if there are enough values on the stack.
+	s.requireStackValuesTmp = s.requireStackValuesTmp[:0]
+	for i := countWanted - 1; i >= 0; i-- {
+		popped, _, ok := s.tryPop()
+		if !ok {
+			if len(s.requireStackValuesTmp) > len(want) {
+				return typeCountError(isParam, context, s.requireStackValuesTmp, want)
+			}
+			return typeCountError(isParam, context, s.requireStackValuesTmp, want)
+		}
+		s.requireStackValuesTmp = append(s.requireStackValuesTmp, popped)
+	}
+
+	// Now, check if there are too many values.
+	if checkAboveLimit {
+		if !(limit == len(s.stack) || (limit+1 == len(s.stack) && s.stack[limit] == valueTypeUnknown)) {
+			return typeCountError(isParam, context, append(s.stack, want...), want)
+		}
+	}
+
+	// Finally, check the types of the values:
+	for i, v := range s.requireStackValuesTmp {
+		nextWant := want[countWanted-i-1] // have is in reverse order (stack)
+		if v != nextWant && v != valueTypeUnknown && nextWant != valueTypeUnknown {
+			return typeMismatchError(isParam, context, v, nextWant, i)
+		}
+	}
+	return nil
+}
+
+// typeMismatchError returns an error similar to go compiler's error on type mismatch.
+func typeMismatchError(isParam bool, context string, have ValueType, want ValueType, i int) error {
+	var ret strings.Builder
+	ret.WriteString("cannot use ")
+	ret.WriteString(ValueTypeName(have))
+	if context != "" {
+		ret.WriteString(" in ")
+		ret.WriteString(context)
+		ret.WriteString(" block")
+	}
+	if isParam {
+		ret.WriteString(" as param")
+	} else {
+		ret.WriteString(" as result")
+	}
+	ret.WriteString("[")
+	ret.WriteString(strconv.Itoa(i))
+	ret.WriteString("] type ")
+	ret.WriteString(ValueTypeName(want))
+	return errors.New(ret.String())
+}
+
+// typeCountError returns an error similar to go compiler's error on type count mismatch.
+func typeCountError(isParam bool, context string, have []ValueType, want []ValueType) error {
+	var ret strings.Builder
+	if len(have) > len(want) {
+		ret.WriteString("too many ")
+	} else {
+		ret.WriteString("not enough ")
+	}
+	if isParam {
+		ret.WriteString("params")
+	} else {
+		ret.WriteString("results")
+	}
+	if context != "" {
+		if isParam {
+			ret.WriteString(" for ")
+		} else {
+			ret.WriteString(" in ")
+		}
+		ret.WriteString(context)
+		ret.WriteString(" block")
+	}
+	ret.WriteString("\n\thave (")
+	writeValueTypes(have, &ret)
+	ret.WriteString(")\n\twant (")
+	writeValueTypes(want, &ret)
+	ret.WriteByte(')')
+	return errors.New(ret.String())
+}
+
+func writeValueTypes(vts []ValueType, ret *strings.Builder) {
+	switch len(vts) {
+	case 0:
+	case 1:
+		ret.WriteString(ValueTypeName(vts[0]))
+	default:
+		ret.WriteString(ValueTypeName(vts[0]))
+		for _, vt := range vts[1:] {
+			ret.WriteString(", ")
+			ret.WriteString(ValueTypeName(vt))
+		}
+	}
+}
+
+func (s *valueTypeStack) String() string {
+	var typeStrs, limits []string
+	for _, v := range s.stack {
+		var str string
+		if v == valueTypeUnknown {
+			str = "unknown"
+		} else {
+			str = ValueTypeName(v)
+		}
+		typeStrs = append(typeStrs, str)
+	}
+	for _, d := range s.stackLimits {
+		limits = append(limits, fmt.Sprintf("%d", d))
+	}
+	return fmt.Sprintf("{stack: [%s], limits: [%s]}",
+		strings.Join(typeStrs, ", "), strings.Join(limits, ","))
+}
+
+type controlBlock struct {
+	startAt, elseAt, endAt uint64
+	blockType              *FunctionType
+	blockTypeBytes         uint64
+	// op is zero when the outermost block
+	op Opcode
+}
+
+// DecodeBlockType decodes the type index from a positive 33-bit signed integer. Negative numbers indicate up to one
+// WebAssembly 1.0 (20191205) compatible result type. Positive numbers are decoded when `enabledFeatures` include
+// CoreFeatureMultiValue and include an index in the Module.TypeSection.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-blocktype
+// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+func DecodeBlockType(types []FunctionType, r *bytes.Reader, enabledFeatures api.CoreFeatures) (*FunctionType, uint64, error) {
+	raw, num, err := leb128.DecodeInt33AsInt64(r)
+	if err != nil {
+		return nil, 0, fmt.Errorf("decode int33: %w", err)
+	}
+
+	var ret *FunctionType
+	switch raw {
+	case -64: // 0x40 in original byte = nil
+		ret = blockType_v_v
+	case -1: // 0x7f in original byte = i32
+		ret = blockType_v_i32
+	case -2: // 0x7e in original byte = i64
+		ret = blockType_v_i64
+	case -3: // 0x7d in original byte = f32
+		ret = blockType_v_f32
+	case -4: // 0x7c in original byte = f64
+		ret = blockType_v_f64
+	case -5: // 0x7b in original byte = v128
+		ret = blockType_v_v128
+	case -16: // 0x70 in original byte = funcref
+		ret = blockType_v_funcref
+	case -17: // 0x6f in original byte = externref
+		ret = blockType_v_externref
+	default:
+		if err = enabledFeatures.RequireEnabled(api.CoreFeatureMultiValue); err != nil {
+			return nil, num, fmt.Errorf("block with function type return invalid as %v", err)
+		}
+		if raw < 0 || (raw >= int64(len(types))) {
+			return nil, 0, fmt.Errorf("type index out of range: %d", raw)
+		}
+		ret = &types[raw]
+	}
+	return ret, num, err
+}
+
+// These block types are defined as globals in order to avoid allocations in DecodeBlockType.
+var (
+	blockType_v_v         = &FunctionType{}
+	blockType_v_i32       = &FunctionType{Results: []ValueType{ValueTypeI32}, ResultNumInUint64: 1}
+	blockType_v_i64       = &FunctionType{Results: []ValueType{ValueTypeI64}, ResultNumInUint64: 1}
+	blockType_v_f32       = &FunctionType{Results: []ValueType{ValueTypeF32}, ResultNumInUint64: 1}
+	blockType_v_f64       = &FunctionType{Results: []ValueType{ValueTypeF64}, ResultNumInUint64: 1}
+	blockType_v_v128      = &FunctionType{Results: []ValueType{ValueTypeV128}, ResultNumInUint64: 2}
+	blockType_v_funcref   = &FunctionType{Results: []ValueType{ValueTypeFuncref}, ResultNumInUint64: 1}
+	blockType_v_externref = &FunctionType{Results: []ValueType{ValueTypeExternref}, ResultNumInUint64: 1}
+)
+
+// SplitCallStack returns the input stack resliced to the count of params and
+// results, or errors if it isn't long enough for either.
+func SplitCallStack(ft *FunctionType, stack []uint64) (params []uint64, results []uint64, err error) {
+	stackLen := len(stack)
+	if n := ft.ParamNumInUint64; n > stackLen {
+		return nil, nil, fmt.Errorf("need %d params, but stack size is %d", n, stackLen)
+	} else if n > 0 {
+		params = stack[:n]
+	}
+	if n := ft.ResultNumInUint64; n > stackLen {
+		return nil, nil, fmt.Errorf("need %d results, but stack size is %d", n, stackLen)
+	} else if n > 0 {
+		results = stack[:n]
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/function_definition.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/function_definition.go
new file mode 100644
index 000000000..c5f6e9121
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/function_definition.go
@@ -0,0 +1,188 @@
+package wasm
+
+import (
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+// ImportedFunctions returns the definitions of each imported function.
+//
+// Note: Unlike ExportedFunctions, there is no unique constraint on imports.
+func (m *Module) ImportedFunctions() (ret []api.FunctionDefinition) {
+	for i := uint32(0); i < m.ImportFunctionCount; i++ {
+		ret = append(ret, m.FunctionDefinition(i))
+	}
+	return
+}
+
+// ExportedFunctions returns the definitions of each exported function.
+func (m *Module) ExportedFunctions() map[string]api.FunctionDefinition {
+	ret := map[string]api.FunctionDefinition{}
+	for i := range m.ExportSection {
+		exp := &m.ExportSection[i]
+		if exp.Type == ExternTypeFunc {
+			d := m.FunctionDefinition(exp.Index)
+			ret[exp.Name] = d
+		}
+	}
+	return ret
+}
+
+// FunctionDefinition returns the FunctionDefinition for the given `index`.
+func (m *Module) FunctionDefinition(index Index) *FunctionDefinition {
+	// TODO: function initialization is lazy, but bulk. Make it per function.
+	m.buildFunctionDefinitions()
+	return &m.FunctionDefinitionSection[index]
+}
+
+// buildFunctionDefinitions generates function metadata that can be parsed from
+// the module. This must be called after all validation.
+func (m *Module) buildFunctionDefinitions() {
+	m.functionDefinitionSectionInitOnce.Do(m.buildFunctionDefinitionsOnce)
+}
+
+func (m *Module) buildFunctionDefinitionsOnce() {
+	var moduleName string
+	var functionNames NameMap
+	var localNames, resultNames IndirectNameMap
+	if m.NameSection != nil {
+		moduleName = m.NameSection.ModuleName
+		functionNames = m.NameSection.FunctionNames
+		localNames = m.NameSection.LocalNames
+		resultNames = m.NameSection.ResultNames
+	}
+
+	importCount := m.ImportFunctionCount
+	m.FunctionDefinitionSection = make([]FunctionDefinition, importCount+uint32(len(m.FunctionSection)))
+
+	importFuncIdx := Index(0)
+	for i := range m.ImportSection {
+		imp := &m.ImportSection[i]
+		if imp.Type != ExternTypeFunc {
+			continue
+		}
+
+		def := &m.FunctionDefinitionSection[importFuncIdx]
+		def.importDesc = imp
+		def.index = importFuncIdx
+		def.Functype = &m.TypeSection[imp.DescFunc]
+		importFuncIdx++
+	}
+
+	for codeIndex, typeIndex := range m.FunctionSection {
+		code := &m.CodeSection[codeIndex]
+		idx := importFuncIdx + Index(codeIndex)
+		def := &m.FunctionDefinitionSection[idx]
+		def.index = idx
+		def.Functype = &m.TypeSection[typeIndex]
+		def.goFunc = code.GoFunc
+	}
+
+	n, nLen := 0, len(functionNames)
+	for i := range m.FunctionDefinitionSection {
+		d := &m.FunctionDefinitionSection[i]
+		// The function name section begins with imports, but can be sparse.
+		// This keeps track of how far in the name section we've searched.
+		funcIdx := d.index
+		var funcName string
+		for ; n < nLen; n++ {
+			next := &functionNames[n]
+			if next.Index > funcIdx {
+				break // we have function names, but starting at a later index.
+			} else if next.Index == funcIdx {
+				funcName = next.Name
+				break
+			}
+		}
+
+		d.moduleName = moduleName
+		d.name = funcName
+		d.Debugname = wasmdebug.FuncName(moduleName, funcName, funcIdx)
+		d.paramNames = paramNames(localNames, funcIdx, len(d.Functype.Params))
+		d.resultNames = paramNames(resultNames, funcIdx, len(d.Functype.Results))
+
+		for i := range m.ExportSection {
+			e := &m.ExportSection[i]
+			if e.Type == ExternTypeFunc && e.Index == funcIdx {
+				d.exportNames = append(d.exportNames, e.Name)
+			}
+		}
+	}
+}
+
+// FunctionDefinition implements api.FunctionDefinition
+type FunctionDefinition struct {
+	internalapi.WazeroOnlyType
+	moduleName string
+	index      Index
+	name       string
+	// Debugname is exported for testing purpose.
+	Debugname string
+	goFunc    interface{}
+	// Functype is exported for testing purpose.
+	Functype    *FunctionType
+	importDesc  *Import
+	exportNames []string
+	paramNames  []string
+	resultNames []string
+}
+
+// ModuleName implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) ModuleName() string {
+	return f.moduleName
+}
+
+// Index implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) Index() uint32 {
+	return f.index
+}
+
+// Name implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) Name() string {
+	return f.name
+}
+
+// DebugName implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) DebugName() string {
+	return f.Debugname
+}
+
+// Import implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) Import() (moduleName, name string, isImport bool) {
+	if f.importDesc != nil {
+		importDesc := f.importDesc
+		moduleName, name, isImport = importDesc.Module, importDesc.Name, true
+	}
+	return
+}
+
+// ExportNames implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) ExportNames() []string {
+	return f.exportNames
+}
+
+// GoFunction implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) GoFunction() interface{} {
+	return f.goFunc
+}
+
+// ParamTypes implements api.FunctionDefinition ParamTypes.
+func (f *FunctionDefinition) ParamTypes() []ValueType {
+	return f.Functype.Params
+}
+
+// ParamNames implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) ParamNames() []string {
+	return f.paramNames
+}
+
+// ResultTypes implements api.FunctionDefinition ResultTypes.
+func (f *FunctionDefinition) ResultTypes() []ValueType {
+	return f.Functype.Results
+}
+
+// ResultNames implements the same method as documented on api.FunctionDefinition.
+func (f *FunctionDefinition) ResultNames() []string {
+	return f.resultNames
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/global.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/global.go
new file mode 100644
index 000000000..abaa2d1f9
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/global.go
@@ -0,0 +1,55 @@
+package wasm
+
+import (
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+)
+
+// constantGlobal wraps GlobalInstance to implement api.Global.
+type constantGlobal struct {
+	internalapi.WazeroOnlyType
+	g *GlobalInstance
+}
+
+// Type implements api.Global.
+func (g constantGlobal) Type() api.ValueType {
+	return g.g.Type.ValType
+}
+
+// Get implements api.Global.
+func (g constantGlobal) Get() uint64 {
+	ret, _ := g.g.Value()
+	return ret
+}
+
+// String implements api.Global.
+func (g constantGlobal) String() string {
+	return g.g.String()
+}
+
+// mutableGlobal extends constantGlobal to allow updates.
+type mutableGlobal struct {
+	internalapi.WazeroOnlyType
+	g *GlobalInstance
+}
+
+// Type implements api.Global.
+func (g mutableGlobal) Type() api.ValueType {
+	return g.g.Type.ValType
+}
+
+// Get implements api.Global.
+func (g mutableGlobal) Get() uint64 {
+	ret, _ := g.g.Value()
+	return ret
+}
+
+// String implements api.Global.
+func (g mutableGlobal) String() string {
+	return g.g.String()
+}
+
+// Set implements the same method as documented on api.MutableGlobal.
+func (g mutableGlobal) Set(v uint64) {
+	g.g.SetValue(v, 0)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/gofunc.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/gofunc.go
new file mode 100644
index 000000000..9510c2588
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/gofunc.go
@@ -0,0 +1,279 @@
+package wasm
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"math"
+	"reflect"
+
+	"github.com/tetratelabs/wazero/api"
+)
+
+type paramsKind byte
+
+const (
+	paramsKindNoContext paramsKind = iota
+	paramsKindContext
+	paramsKindContextModule
+)
+
+// Below are reflection code to get the interface type used to parse functions and set values.
+
+var (
+	moduleType    = reflect.TypeOf((*api.Module)(nil)).Elem()
+	goContextType = reflect.TypeOf((*context.Context)(nil)).Elem()
+	errorType     = reflect.TypeOf((*error)(nil)).Elem()
+)
+
+// compile-time check to ensure reflectGoModuleFunction implements
+// api.GoModuleFunction.
+var _ api.GoModuleFunction = (*reflectGoModuleFunction)(nil)
+
+type reflectGoModuleFunction struct {
+	fn              *reflect.Value
+	params, results []ValueType
+}
+
+// Call implements the same method as documented on api.GoModuleFunction.
+func (f *reflectGoModuleFunction) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	callGoFunc(ctx, mod, f.fn, stack)
+}
+
+// EqualTo is exposed for testing.
+func (f *reflectGoModuleFunction) EqualTo(that interface{}) bool {
+	if f2, ok := that.(*reflectGoModuleFunction); !ok {
+		return false
+	} else {
+		// TODO compare reflect pointers
+		return bytes.Equal(f.params, f2.params) && bytes.Equal(f.results, f2.results)
+	}
+}
+
+// compile-time check to ensure reflectGoFunction implements api.GoFunction.
+var _ api.GoFunction = (*reflectGoFunction)(nil)
+
+type reflectGoFunction struct {
+	fn              *reflect.Value
+	pk              paramsKind
+	params, results []ValueType
+}
+
+// EqualTo is exposed for testing.
+func (f *reflectGoFunction) EqualTo(that interface{}) bool {
+	if f2, ok := that.(*reflectGoFunction); !ok {
+		return false
+	} else {
+		// TODO compare reflect pointers
+		return f.pk == f2.pk &&
+			bytes.Equal(f.params, f2.params) && bytes.Equal(f.results, f2.results)
+	}
+}
+
+// Call implements the same method as documented on api.GoFunction.
+func (f *reflectGoFunction) Call(ctx context.Context, stack []uint64) {
+	if f.pk == paramsKindNoContext {
+		ctx = nil
+	}
+	callGoFunc(ctx, nil, f.fn, stack)
+}
+
+// callGoFunc executes the reflective function by converting params to Go
+// types. The results of the function call are converted back to api.ValueType.
+func callGoFunc(ctx context.Context, mod api.Module, fn *reflect.Value, stack []uint64) {
+	tp := fn.Type()
+
+	var in []reflect.Value
+	pLen := tp.NumIn()
+	if pLen != 0 {
+		in = make([]reflect.Value, pLen)
+
+		i := 0
+		if ctx != nil {
+			in[0] = newContextVal(ctx)
+			i++
+		}
+		if mod != nil {
+			in[1] = newModuleVal(mod)
+			i++
+		}
+
+		for j := 0; i < pLen; i++ {
+			next := tp.In(i)
+			val := reflect.New(next).Elem()
+			k := next.Kind()
+			raw := stack[j]
+			j++
+
+			switch k {
+			case reflect.Float32:
+				val.SetFloat(float64(math.Float32frombits(uint32(raw))))
+			case reflect.Float64:
+				val.SetFloat(math.Float64frombits(raw))
+			case reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+				val.SetUint(raw)
+			case reflect.Int32, reflect.Int64:
+				val.SetInt(int64(raw))
+			default:
+				panic(fmt.Errorf("BUG: param[%d] has an invalid type: %v", i, k))
+			}
+			in[i] = val
+		}
+	}
+
+	// Execute the host function and push back the call result onto the stack.
+	for i, ret := range fn.Call(in) {
+		switch ret.Kind() {
+		case reflect.Float32:
+			stack[i] = uint64(math.Float32bits(float32(ret.Float())))
+		case reflect.Float64:
+			stack[i] = math.Float64bits(ret.Float())
+		case reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+			stack[i] = ret.Uint()
+		case reflect.Int32, reflect.Int64:
+			stack[i] = uint64(ret.Int())
+		default:
+			panic(fmt.Errorf("BUG: result[%d] has an invalid type: %v", i, ret.Kind()))
+		}
+	}
+}
+
+func newContextVal(ctx context.Context) reflect.Value {
+	val := reflect.New(goContextType).Elem()
+	val.Set(reflect.ValueOf(ctx))
+	return val
+}
+
+func newModuleVal(m api.Module) reflect.Value {
+	val := reflect.New(moduleType).Elem()
+	val.Set(reflect.ValueOf(m))
+	return val
+}
+
+// MustParseGoReflectFuncCode parses Code from the go function or panics.
+//
+// Exposing this simplifies FunctionDefinition of host functions in built-in host
+// modules and tests.
+func MustParseGoReflectFuncCode(fn interface{}) Code {
+	_, _, code, err := parseGoReflectFunc(fn)
+	if err != nil {
+		panic(err)
+	}
+	return code
+}
+
+func parseGoReflectFunc(fn interface{}) (params, results []ValueType, code Code, err error) {
+	fnV := reflect.ValueOf(fn)
+	p := fnV.Type()
+
+	if fnV.Kind() != reflect.Func {
+		err = fmt.Errorf("kind != func: %s", fnV.Kind().String())
+		return
+	}
+
+	pk, kindErr := kind(p)
+	if kindErr != nil {
+		err = kindErr
+		return
+	}
+
+	pOffset := 0
+	switch pk {
+	case paramsKindNoContext:
+	case paramsKindContext:
+		pOffset = 1
+	case paramsKindContextModule:
+		pOffset = 2
+	}
+
+	pCount := p.NumIn() - pOffset
+	if pCount > 0 {
+		params = make([]ValueType, pCount)
+	}
+	for i := 0; i < len(params); i++ {
+		pI := p.In(i + pOffset)
+		if t, ok := getTypeOf(pI.Kind()); ok {
+			params[i] = t
+			continue
+		}
+
+		// Now, we will definitely err, decide which message is best
+		var arg0Type reflect.Type
+		if hc := pI.Implements(moduleType); hc {
+			arg0Type = moduleType
+		} else if gc := pI.Implements(goContextType); gc {
+			arg0Type = goContextType
+		}
+
+		if arg0Type != nil {
+			err = fmt.Errorf("param[%d] is a %s, which may be defined only once as param[0]", i+pOffset, arg0Type)
+		} else {
+			err = fmt.Errorf("param[%d] is unsupported: %s", i+pOffset, pI.Kind())
+		}
+		return
+	}
+
+	rCount := p.NumOut()
+	if rCount > 0 {
+		results = make([]ValueType, rCount)
+	}
+	for i := 0; i < len(results); i++ {
+		rI := p.Out(i)
+		if t, ok := getTypeOf(rI.Kind()); ok {
+			results[i] = t
+			continue
+		}
+
+		// Now, we will definitely err, decide which message is best
+		if rI.Implements(errorType) {
+			err = fmt.Errorf("result[%d] is an error, which is unsupported", i)
+		} else {
+			err = fmt.Errorf("result[%d] is unsupported: %s", i, rI.Kind())
+		}
+		return
+	}
+
+	code = Code{}
+	if pk == paramsKindContextModule {
+		code.GoFunc = &reflectGoModuleFunction{fn: &fnV, params: params, results: results}
+	} else {
+		code.GoFunc = &reflectGoFunction{pk: pk, fn: &fnV, params: params, results: results}
+	}
+	return
+}
+
+func kind(p reflect.Type) (paramsKind, error) {
+	pCount := p.NumIn()
+	if pCount > 0 && p.In(0).Kind() == reflect.Interface {
+		p0 := p.In(0)
+		if p0.Implements(moduleType) {
+			return 0, errors.New("invalid signature: api.Module parameter must be preceded by context.Context")
+		} else if p0.Implements(goContextType) {
+			if pCount >= 2 && p.In(1).Implements(moduleType) {
+				return paramsKindContextModule, nil
+			}
+			return paramsKindContext, nil
+		}
+	}
+	// Without context param allows portability with reflective runtimes.
+	// This allows people to more easily port to wazero.
+	return paramsKindNoContext, nil
+}
+
+func getTypeOf(kind reflect.Kind) (ValueType, bool) {
+	switch kind {
+	case reflect.Float64:
+		return ValueTypeF64, true
+	case reflect.Float32:
+		return ValueTypeF32, true
+	case reflect.Int32, reflect.Uint32:
+		return ValueTypeI32, true
+	case reflect.Int64, reflect.Uint64:
+		return ValueTypeI64, true
+	case reflect.Uintptr:
+		return ValueTypeExternref, true
+	default:
+		return 0x00, false
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/host.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/host.go
new file mode 100644
index 000000000..bca686d1d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/host.go
@@ -0,0 +1,179 @@
+package wasm
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+type HostFuncExporter interface {
+	ExportHostFunc(*HostFunc)
+}
+
+// HostFunc is a function with an inlined type, used for NewHostModule.
+// Any corresponding FunctionType will be reused or added to the Module.
+type HostFunc struct {
+	// ExportName is the only value returned by api.FunctionDefinition.
+	ExportName string
+
+	// Name is equivalent to the same method on api.FunctionDefinition.
+	Name string
+
+	// ParamTypes is equivalent to the same method on api.FunctionDefinition.
+	ParamTypes []ValueType
+
+	// ParamNames is equivalent to the same method on api.FunctionDefinition.
+	ParamNames []string
+
+	// ResultTypes is equivalent to the same method on api.FunctionDefinition.
+	ResultTypes []ValueType
+
+	// ResultNames is equivalent to the same method on api.FunctionDefinition.
+	ResultNames []string
+
+	// Code is the equivalent function in the SectionIDCode.
+	Code Code
+}
+
+// WithGoModuleFunc returns a copy of the function, replacing its Code.GoFunc.
+func (f *HostFunc) WithGoModuleFunc(fn api.GoModuleFunc) *HostFunc {
+	ret := *f
+	ret.Code.GoFunc = fn
+	return &ret
+}
+
+// NewHostModule is defined internally for use in WASI tests and to keep the code size in the root directory small.
+func NewHostModule(
+	moduleName string,
+	exportNames []string,
+	nameToHostFunc map[string]*HostFunc,
+	enabledFeatures api.CoreFeatures,
+) (m *Module, err error) {
+	if moduleName != "" {
+		m = &Module{NameSection: &NameSection{ModuleName: moduleName}}
+	} else {
+		return nil, errors.New("a module name must not be empty")
+	}
+
+	if exportCount := uint32(len(nameToHostFunc)); exportCount > 0 {
+		m.ExportSection = make([]Export, 0, exportCount)
+		m.Exports = make(map[string]*Export, exportCount)
+		if err = addFuncs(m, exportNames, nameToHostFunc, enabledFeatures); err != nil {
+			return
+		}
+	}
+
+	m.IsHostModule = true
+	// Uses the address of *wasm.Module as the module ID so that host functions can have each state per compilation.
+	// Downside of this is that compilation cache on host functions (trampoline codes for Go functions and
+	// Wasm codes for Wasm-implemented host functions) are not available and compiles each time. On the other hand,
+	// compilation of host modules is not costly as it's merely small trampolines vs the real-world native Wasm binary.
+	// TODO: refactor engines so that we can properly cache compiled machine codes for host modules.
+	m.AssignModuleID([]byte(fmt.Sprintf("@@@@@@@@%p", m)), // @@@@@@@@ = any 8 bytes different from Wasm header.
+		nil, false)
+	return
+}
+
+func addFuncs(
+	m *Module,
+	exportNames []string,
+	nameToHostFunc map[string]*HostFunc,
+	enabledFeatures api.CoreFeatures,
+) (err error) {
+	if m.NameSection == nil {
+		m.NameSection = &NameSection{}
+	}
+	moduleName := m.NameSection.ModuleName
+
+	for _, k := range exportNames {
+		hf := nameToHostFunc[k]
+		if hf.Name == "" {
+			hf.Name = k // default name to export name
+		}
+		switch hf.Code.GoFunc.(type) {
+		case api.GoModuleFunction, api.GoFunction:
+			continue // already parsed
+		}
+
+		// Resolve the code using reflection
+		hf.ParamTypes, hf.ResultTypes, hf.Code, err = parseGoReflectFunc(hf.Code.GoFunc)
+		if err != nil {
+			return fmt.Errorf("func[%s.%s] %w", moduleName, k, err)
+		}
+
+		// Assign names to the function, if they exist.
+		params := hf.ParamTypes
+		if paramNames := hf.ParamNames; paramNames != nil {
+			if paramNamesLen := len(paramNames); paramNamesLen != len(params) {
+				return fmt.Errorf("func[%s.%s] has %d params, but %d params names", moduleName, k, paramNamesLen, len(params))
+			}
+		}
+
+		results := hf.ResultTypes
+		if resultNames := hf.ResultNames; resultNames != nil {
+			if resultNamesLen := len(resultNames); resultNamesLen != len(results) {
+				return fmt.Errorf("func[%s.%s] has %d results, but %d results names", moduleName, k, resultNamesLen, len(results))
+			}
+		}
+	}
+
+	funcCount := uint32(len(exportNames))
+	m.NameSection.FunctionNames = make([]NameAssoc, 0, funcCount)
+	m.FunctionSection = make([]Index, 0, funcCount)
+	m.CodeSection = make([]Code, 0, funcCount)
+
+	idx := Index(0)
+	for _, name := range exportNames {
+		hf := nameToHostFunc[name]
+		debugName := wasmdebug.FuncName(moduleName, name, idx)
+		typeIdx, typeErr := m.maybeAddType(hf.ParamTypes, hf.ResultTypes, enabledFeatures)
+		if typeErr != nil {
+			return fmt.Errorf("func[%s] %v", debugName, typeErr)
+		}
+		m.FunctionSection = append(m.FunctionSection, typeIdx)
+		m.CodeSection = append(m.CodeSection, hf.Code)
+
+		export := hf.ExportName
+		m.ExportSection = append(m.ExportSection, Export{Type: ExternTypeFunc, Name: export, Index: idx})
+		m.Exports[export] = &m.ExportSection[len(m.ExportSection)-1]
+		m.NameSection.FunctionNames = append(m.NameSection.FunctionNames, NameAssoc{Index: idx, Name: hf.Name})
+
+		if len(hf.ParamNames) > 0 {
+			localNames := NameMapAssoc{Index: idx}
+			for i, n := range hf.ParamNames {
+				localNames.NameMap = append(localNames.NameMap, NameAssoc{Index: Index(i), Name: n})
+			}
+			m.NameSection.LocalNames = append(m.NameSection.LocalNames, localNames)
+		}
+		if len(hf.ResultNames) > 0 {
+			resultNames := NameMapAssoc{Index: idx}
+			for i, n := range hf.ResultNames {
+				resultNames.NameMap = append(resultNames.NameMap, NameAssoc{Index: Index(i), Name: n})
+			}
+			m.NameSection.ResultNames = append(m.NameSection.ResultNames, resultNames)
+		}
+		idx++
+	}
+	return nil
+}
+
+func (m *Module) maybeAddType(params, results []ValueType, enabledFeatures api.CoreFeatures) (Index, error) {
+	if len(results) > 1 {
+		// Guard >1.0 feature multi-value
+		if err := enabledFeatures.RequireEnabled(api.CoreFeatureMultiValue); err != nil {
+			return 0, fmt.Errorf("multiple result types invalid as %v", err)
+		}
+	}
+	for i := range m.TypeSection {
+		t := &m.TypeSection[i]
+		if t.EqualsSignature(params, results) {
+			return Index(i), nil
+		}
+	}
+
+	result := m.SectionElementCount(SectionIDType)
+	m.TypeSection = append(m.TypeSection, FunctionType{Params: params, Results: results})
+	return result, nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/instruction.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/instruction.go
new file mode 100644
index 000000000..67f196b8b
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/instruction.go
@@ -0,0 +1,1866 @@
+package wasm
+
+// Opcode is the binary Opcode of an instruction. See also InstructionName
+type Opcode = byte
+
+const (
+	// OpcodeUnreachable causes an unconditional trap.
+	OpcodeUnreachable Opcode = 0x00
+	// OpcodeNop does nothing
+	OpcodeNop Opcode = 0x01
+	// OpcodeBlock brackets a sequence of instructions. A branch instruction on an if label breaks out to after its
+	// OpcodeEnd.
+	OpcodeBlock Opcode = 0x02
+	// OpcodeLoop brackets a sequence of instructions. A branch instruction on a loop label will jump back to the
+	// beginning of its block.
+	OpcodeLoop Opcode = 0x03
+	// OpcodeIf brackets a sequence of instructions. When the top of the stack evaluates to 1, the block is executed.
+	// Zero jumps to the optional OpcodeElse. A branch instruction on an if label breaks out to after its OpcodeEnd.
+	OpcodeIf Opcode = 0x04
+	// OpcodeElse brackets a sequence of instructions enclosed by an OpcodeIf. A branch instruction on a then label
+	// breaks out to after the OpcodeEnd on the enclosing OpcodeIf.
+	OpcodeElse Opcode = 0x05
+	// OpcodeEnd terminates a control instruction OpcodeBlock, OpcodeLoop or OpcodeIf.
+	OpcodeEnd Opcode = 0x0b
+
+	// OpcodeBr is a stack-polymorphic opcode that performs an unconditional branch. How the stack is modified depends
+	// on whether the "br" is enclosed by a loop, and if CoreFeatureMultiValue is enabled.
+	//
+	// Here are the rules in pseudocode about how the stack is modified based on the "br" operand L (label):
+	//	if L is loop: append(L.originalStackWithoutInputs, N-values popped from the stack) where N == L.inputs
+	//	else: append(L.originalStackWithoutInputs, N-values popped from the stack) where N == L.results
+	//
+	// In WebAssembly 1.0 (20191205), N can be zero or one. When CoreFeatureMultiValue is enabled, N can be more than one,
+	// depending on the type use of the label L.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefsyntax-instr-controlmathsfbrl
+	OpcodeBr Opcode = 0x0c
+
+	OpcodeBrIf         Opcode = 0x0d
+	OpcodeBrTable      Opcode = 0x0e
+	OpcodeReturn       Opcode = 0x0f
+	OpcodeCall         Opcode = 0x10
+	OpcodeCallIndirect Opcode = 0x11
+
+	// parametric instructions
+
+	OpcodeDrop        Opcode = 0x1a
+	OpcodeSelect      Opcode = 0x1b
+	OpcodeTypedSelect Opcode = 0x1c
+
+	// variable instructions
+
+	OpcodeLocalGet  Opcode = 0x20
+	OpcodeLocalSet  Opcode = 0x21
+	OpcodeLocalTee  Opcode = 0x22
+	OpcodeGlobalGet Opcode = 0x23
+	OpcodeGlobalSet Opcode = 0x24
+
+	// Below are toggled with CoreFeatureReferenceTypes
+
+	OpcodeTableGet Opcode = 0x25
+	OpcodeTableSet Opcode = 0x26
+
+	// memory instructions
+
+	OpcodeI32Load    Opcode = 0x28
+	OpcodeI64Load    Opcode = 0x29
+	OpcodeF32Load    Opcode = 0x2a
+	OpcodeF64Load    Opcode = 0x2b
+	OpcodeI32Load8S  Opcode = 0x2c
+	OpcodeI32Load8U  Opcode = 0x2d
+	OpcodeI32Load16S Opcode = 0x2e
+	OpcodeI32Load16U Opcode = 0x2f
+	OpcodeI64Load8S  Opcode = 0x30
+	OpcodeI64Load8U  Opcode = 0x31
+	OpcodeI64Load16S Opcode = 0x32
+	OpcodeI64Load16U Opcode = 0x33
+	OpcodeI64Load32S Opcode = 0x34
+	OpcodeI64Load32U Opcode = 0x35
+	OpcodeI32Store   Opcode = 0x36
+	OpcodeI64Store   Opcode = 0x37
+	OpcodeF32Store   Opcode = 0x38
+	OpcodeF64Store   Opcode = 0x39
+	OpcodeI32Store8  Opcode = 0x3a
+	OpcodeI32Store16 Opcode = 0x3b
+	OpcodeI64Store8  Opcode = 0x3c
+	OpcodeI64Store16 Opcode = 0x3d
+	OpcodeI64Store32 Opcode = 0x3e
+	OpcodeMemorySize Opcode = 0x3f
+	OpcodeMemoryGrow Opcode = 0x40
+
+	// const instructions
+
+	OpcodeI32Const Opcode = 0x41
+	OpcodeI64Const Opcode = 0x42
+	OpcodeF32Const Opcode = 0x43
+	OpcodeF64Const Opcode = 0x44
+
+	// numeric instructions
+
+	OpcodeI32Eqz Opcode = 0x45
+	OpcodeI32Eq  Opcode = 0x46
+	OpcodeI32Ne  Opcode = 0x47
+	OpcodeI32LtS Opcode = 0x48
+	OpcodeI32LtU Opcode = 0x49
+	OpcodeI32GtS Opcode = 0x4a
+	OpcodeI32GtU Opcode = 0x4b
+	OpcodeI32LeS Opcode = 0x4c
+	OpcodeI32LeU Opcode = 0x4d
+	OpcodeI32GeS Opcode = 0x4e
+	OpcodeI32GeU Opcode = 0x4f
+
+	OpcodeI64Eqz Opcode = 0x50
+	OpcodeI64Eq  Opcode = 0x51
+	OpcodeI64Ne  Opcode = 0x52
+	OpcodeI64LtS Opcode = 0x53
+	OpcodeI64LtU Opcode = 0x54
+	OpcodeI64GtS Opcode = 0x55
+	OpcodeI64GtU Opcode = 0x56
+	OpcodeI64LeS Opcode = 0x57
+	OpcodeI64LeU Opcode = 0x58
+	OpcodeI64GeS Opcode = 0x59
+	OpcodeI64GeU Opcode = 0x5a
+
+	OpcodeF32Eq Opcode = 0x5b
+	OpcodeF32Ne Opcode = 0x5c
+	OpcodeF32Lt Opcode = 0x5d
+	OpcodeF32Gt Opcode = 0x5e
+	OpcodeF32Le Opcode = 0x5f
+	OpcodeF32Ge Opcode = 0x60
+
+	OpcodeF64Eq Opcode = 0x61
+	OpcodeF64Ne Opcode = 0x62
+	OpcodeF64Lt Opcode = 0x63
+	OpcodeF64Gt Opcode = 0x64
+	OpcodeF64Le Opcode = 0x65
+	OpcodeF64Ge Opcode = 0x66
+
+	OpcodeI32Clz    Opcode = 0x67
+	OpcodeI32Ctz    Opcode = 0x68
+	OpcodeI32Popcnt Opcode = 0x69
+	OpcodeI32Add    Opcode = 0x6a
+	OpcodeI32Sub    Opcode = 0x6b
+	OpcodeI32Mul    Opcode = 0x6c
+	OpcodeI32DivS   Opcode = 0x6d
+	OpcodeI32DivU   Opcode = 0x6e
+	OpcodeI32RemS   Opcode = 0x6f
+	OpcodeI32RemU   Opcode = 0x70
+	OpcodeI32And    Opcode = 0x71
+	OpcodeI32Or     Opcode = 0x72
+	OpcodeI32Xor    Opcode = 0x73
+	OpcodeI32Shl    Opcode = 0x74
+	OpcodeI32ShrS   Opcode = 0x75
+	OpcodeI32ShrU   Opcode = 0x76
+	OpcodeI32Rotl   Opcode = 0x77
+	OpcodeI32Rotr   Opcode = 0x78
+
+	OpcodeI64Clz    Opcode = 0x79
+	OpcodeI64Ctz    Opcode = 0x7a
+	OpcodeI64Popcnt Opcode = 0x7b
+	OpcodeI64Add    Opcode = 0x7c
+	OpcodeI64Sub    Opcode = 0x7d
+	OpcodeI64Mul    Opcode = 0x7e
+	OpcodeI64DivS   Opcode = 0x7f
+	OpcodeI64DivU   Opcode = 0x80
+	OpcodeI64RemS   Opcode = 0x81
+	OpcodeI64RemU   Opcode = 0x82
+	OpcodeI64And    Opcode = 0x83
+	OpcodeI64Or     Opcode = 0x84
+	OpcodeI64Xor    Opcode = 0x85
+	OpcodeI64Shl    Opcode = 0x86
+	OpcodeI64ShrS   Opcode = 0x87
+	OpcodeI64ShrU   Opcode = 0x88
+	OpcodeI64Rotl   Opcode = 0x89
+	OpcodeI64Rotr   Opcode = 0x8a
+
+	OpcodeF32Abs      Opcode = 0x8b
+	OpcodeF32Neg      Opcode = 0x8c
+	OpcodeF32Ceil     Opcode = 0x8d
+	OpcodeF32Floor    Opcode = 0x8e
+	OpcodeF32Trunc    Opcode = 0x8f
+	OpcodeF32Nearest  Opcode = 0x90
+	OpcodeF32Sqrt     Opcode = 0x91
+	OpcodeF32Add      Opcode = 0x92
+	OpcodeF32Sub      Opcode = 0x93
+	OpcodeF32Mul      Opcode = 0x94
+	OpcodeF32Div      Opcode = 0x95
+	OpcodeF32Min      Opcode = 0x96
+	OpcodeF32Max      Opcode = 0x97
+	OpcodeF32Copysign Opcode = 0x98
+
+	OpcodeF64Abs      Opcode = 0x99
+	OpcodeF64Neg      Opcode = 0x9a
+	OpcodeF64Ceil     Opcode = 0x9b
+	OpcodeF64Floor    Opcode = 0x9c
+	OpcodeF64Trunc    Opcode = 0x9d
+	OpcodeF64Nearest  Opcode = 0x9e
+	OpcodeF64Sqrt     Opcode = 0x9f
+	OpcodeF64Add      Opcode = 0xa0
+	OpcodeF64Sub      Opcode = 0xa1
+	OpcodeF64Mul      Opcode = 0xa2
+	OpcodeF64Div      Opcode = 0xa3
+	OpcodeF64Min      Opcode = 0xa4
+	OpcodeF64Max      Opcode = 0xa5
+	OpcodeF64Copysign Opcode = 0xa6
+
+	OpcodeI32WrapI64   Opcode = 0xa7
+	OpcodeI32TruncF32S Opcode = 0xa8
+	OpcodeI32TruncF32U Opcode = 0xa9
+	OpcodeI32TruncF64S Opcode = 0xaa
+	OpcodeI32TruncF64U Opcode = 0xab
+
+	OpcodeI64ExtendI32S Opcode = 0xac
+	OpcodeI64ExtendI32U Opcode = 0xad
+	OpcodeI64TruncF32S  Opcode = 0xae
+	OpcodeI64TruncF32U  Opcode = 0xaf
+	OpcodeI64TruncF64S  Opcode = 0xb0
+	OpcodeI64TruncF64U  Opcode = 0xb1
+
+	OpcodeF32ConvertI32S Opcode = 0xb2
+	OpcodeF32ConvertI32U Opcode = 0xb3
+	OpcodeF32ConvertI64S Opcode = 0xb4
+	OpcodeF32ConvertI64U Opcode = 0xb5
+	OpcodeF32DemoteF64   Opcode = 0xb6
+
+	OpcodeF64ConvertI32S Opcode = 0xb7
+	OpcodeF64ConvertI32U Opcode = 0xb8
+	OpcodeF64ConvertI64S Opcode = 0xb9
+	OpcodeF64ConvertI64U Opcode = 0xba
+	OpcodeF64PromoteF32  Opcode = 0xbb
+
+	OpcodeI32ReinterpretF32 Opcode = 0xbc
+	OpcodeI64ReinterpretF64 Opcode = 0xbd
+	OpcodeF32ReinterpretI32 Opcode = 0xbe
+	OpcodeF64ReinterpretI64 Opcode = 0xbf
+
+	// OpcodeRefNull pushes a null reference value whose type is specified by immediate to this opcode.
+	// This is defined in the reference-types proposal, but necessary for CoreFeatureBulkMemoryOperations as well.
+	//
+	// Currently only supported in the constant expression in element segments.
+	OpcodeRefNull = 0xd0
+	// OpcodeRefIsNull pops a reference value, and pushes 1 if it is null, 0 otherwise.
+	// This is defined in the reference-types proposal, but necessary for CoreFeatureBulkMemoryOperations as well.
+	//
+	// Currently not supported.
+	OpcodeRefIsNull = 0xd1
+	// OpcodeRefFunc pushes a funcref value whose index equals the immediate to this opcode.
+	// This is defined in the reference-types proposal, but necessary for CoreFeatureBulkMemoryOperations as well.
+	//
+	// Currently, this is only supported in the constant expression in element segments.
+	OpcodeRefFunc = 0xd2
+
+	// Below are toggled with CoreFeatureSignExtensionOps
+
+	// OpcodeI32Extend8S extends a signed 8-bit integer to a 32-bit integer.
+	// Note: This is dependent on the flag CoreFeatureSignExtensionOps
+	OpcodeI32Extend8S Opcode = 0xc0
+
+	// OpcodeI32Extend16S extends a signed 16-bit integer to a 32-bit integer.
+	// Note: This is dependent on the flag CoreFeatureSignExtensionOps
+	OpcodeI32Extend16S Opcode = 0xc1
+
+	// OpcodeI64Extend8S extends a signed 8-bit integer to a 64-bit integer.
+	// Note: This is dependent on the flag CoreFeatureSignExtensionOps
+	OpcodeI64Extend8S Opcode = 0xc2
+
+	// OpcodeI64Extend16S extends a signed 16-bit integer to a 64-bit integer.
+	// Note: This is dependent on the flag CoreFeatureSignExtensionOps
+	OpcodeI64Extend16S Opcode = 0xc3
+
+	// OpcodeI64Extend32S extends a signed 32-bit integer to a 64-bit integer.
+	// Note: This is dependent on the flag CoreFeatureSignExtensionOps
+	OpcodeI64Extend32S Opcode = 0xc4
+
+	// OpcodeMiscPrefix is the prefix of various multi-byte opcodes.
+	// Introduced in CoreFeatureNonTrappingFloatToIntConversion, but used in other
+	// features, such as CoreFeatureBulkMemoryOperations.
+	OpcodeMiscPrefix Opcode = 0xfc
+
+	// OpcodeVecPrefix is the prefix of all vector isntructions introduced in
+	// CoreFeatureSIMD.
+	OpcodeVecPrefix Opcode = 0xfd
+
+	// OpcodeAtomicPrefix is the prefix of all atomic instructions introduced in
+	// CoreFeatureThreads.
+	OpcodeAtomicPrefix Opcode = 0xfe
+)
+
+// OpcodeMisc represents opcodes of the miscellaneous operations.
+// Such an operations has multi-byte encoding which is prefixed by OpcodeMiscPrefix.
+type OpcodeMisc = byte
+
+const (
+	// Below are toggled with CoreFeatureNonTrappingFloatToIntConversion.
+	// https://github.com/WebAssembly/spec/blob/ce4b6c4d47eb06098cc7ab2e81f24748da822f20/proposals/nontrapping-float-to-int-conversion/Overview.md
+
+	OpcodeMiscI32TruncSatF32S OpcodeMisc = 0x00
+	OpcodeMiscI32TruncSatF32U OpcodeMisc = 0x01
+	OpcodeMiscI32TruncSatF64S OpcodeMisc = 0x02
+	OpcodeMiscI32TruncSatF64U OpcodeMisc = 0x03
+	OpcodeMiscI64TruncSatF32S OpcodeMisc = 0x04
+	OpcodeMiscI64TruncSatF32U OpcodeMisc = 0x05
+	OpcodeMiscI64TruncSatF64S OpcodeMisc = 0x06
+	OpcodeMiscI64TruncSatF64U OpcodeMisc = 0x07
+
+	// Below are toggled with CoreFeatureBulkMemoryOperations.
+	// Opcodes are those new in document/core/appendix/index-instructions.rst (the commit that merged the feature).
+	// See https://github.com/WebAssembly/spec/commit/7fa2f20a6df4cf1c114582c8cb60f5bfcdbf1be1
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions
+
+	OpcodeMiscMemoryInit OpcodeMisc = 0x08
+	OpcodeMiscDataDrop   OpcodeMisc = 0x09
+	OpcodeMiscMemoryCopy OpcodeMisc = 0x0a
+	OpcodeMiscMemoryFill OpcodeMisc = 0x0b
+	OpcodeMiscTableInit  OpcodeMisc = 0x0c
+	OpcodeMiscElemDrop   OpcodeMisc = 0x0d
+	OpcodeMiscTableCopy  OpcodeMisc = 0x0e
+
+	// Below are toggled with CoreFeatureReferenceTypes
+
+	OpcodeMiscTableGrow OpcodeMisc = 0x0f
+	OpcodeMiscTableSize OpcodeMisc = 0x10
+	OpcodeMiscTableFill OpcodeMisc = 0x11
+)
+
+// OpcodeVec represents an opcode of a vector instructions which has
+// multi-byte encoding and is prefixed by OpcodeMiscPrefix.
+//
+// These opcodes are toggled with CoreFeatureSIMD.
+type OpcodeVec = byte
+
+const (
+	// Loads and stores.
+
+	OpcodeVecV128Load        OpcodeVec = 0x00
+	OpcodeVecV128Load8x8s    OpcodeVec = 0x01
+	OpcodeVecV128Load8x8u    OpcodeVec = 0x02
+	OpcodeVecV128Load16x4s   OpcodeVec = 0x03
+	OpcodeVecV128Load16x4u   OpcodeVec = 0x04
+	OpcodeVecV128Load32x2s   OpcodeVec = 0x05
+	OpcodeVecV128Load32x2u   OpcodeVec = 0x06
+	OpcodeVecV128Load8Splat  OpcodeVec = 0x07
+	OpcodeVecV128Load16Splat OpcodeVec = 0x08
+	OpcodeVecV128Load32Splat OpcodeVec = 0x09
+	OpcodeVecV128Load64Splat OpcodeVec = 0x0a
+
+	OpcodeVecV128Load32zero OpcodeVec = 0x5c
+	OpcodeVecV128Load64zero OpcodeVec = 0x5d
+
+	OpcodeVecV128Store       OpcodeVec = 0x0b
+	OpcodeVecV128Load8Lane   OpcodeVec = 0x54
+	OpcodeVecV128Load16Lane  OpcodeVec = 0x55
+	OpcodeVecV128Load32Lane  OpcodeVec = 0x56
+	OpcodeVecV128Load64Lane  OpcodeVec = 0x57
+	OpcodeVecV128Store8Lane  OpcodeVec = 0x58
+	OpcodeVecV128Store16Lane OpcodeVec = 0x59
+	OpcodeVecV128Store32Lane OpcodeVec = 0x5a
+	OpcodeVecV128Store64Lane OpcodeVec = 0x5b
+
+	// OpcodeVecV128Const is the vector const instruction.
+	OpcodeVecV128Const OpcodeVec = 0x0c
+
+	// OpcodeVecV128i8x16Shuffle is the vector shuffle instruction.
+	OpcodeVecV128i8x16Shuffle OpcodeVec = 0x0d
+
+	// Extrac and replaces.
+
+	OpcodeVecI8x16ExtractLaneS OpcodeVec = 0x15
+	OpcodeVecI8x16ExtractLaneU OpcodeVec = 0x16
+	OpcodeVecI8x16ReplaceLane  OpcodeVec = 0x17
+	OpcodeVecI16x8ExtractLaneS OpcodeVec = 0x18
+	OpcodeVecI16x8ExtractLaneU OpcodeVec = 0x19
+	OpcodeVecI16x8ReplaceLane  OpcodeVec = 0x1a
+	OpcodeVecI32x4ExtractLane  OpcodeVec = 0x1b
+	OpcodeVecI32x4ReplaceLane  OpcodeVec = 0x1c
+	OpcodeVecI64x2ExtractLane  OpcodeVec = 0x1d
+	OpcodeVecI64x2ReplaceLane  OpcodeVec = 0x1e
+	OpcodeVecF32x4ExtractLane  OpcodeVec = 0x1f
+	OpcodeVecF32x4ReplaceLane  OpcodeVec = 0x20
+	OpcodeVecF64x2ExtractLane  OpcodeVec = 0x21
+	OpcodeVecF64x2ReplaceLane  OpcodeVec = 0x22
+
+	// Splat and swizzle.
+
+	OpcodeVecI8x16Swizzle OpcodeVec = 0x0e
+	OpcodeVecI8x16Splat   OpcodeVec = 0x0f
+	OpcodeVecI16x8Splat   OpcodeVec = 0x10
+	OpcodeVecI32x4Splat   OpcodeVec = 0x11
+	OpcodeVecI64x2Splat   OpcodeVec = 0x12
+	OpcodeVecF32x4Splat   OpcodeVec = 0x13
+	OpcodeVecF64x2Splat   OpcodeVec = 0x14
+
+	// i8 comparisons.
+
+	OpcodeVecI8x16Eq  OpcodeVec = 0x23
+	OpcodeVecI8x16Ne  OpcodeVec = 0x24
+	OpcodeVecI8x16LtS OpcodeVec = 0x25
+	OpcodeVecI8x16LtU OpcodeVec = 0x26
+	OpcodeVecI8x16GtS OpcodeVec = 0x27
+	OpcodeVecI8x16GtU OpcodeVec = 0x28
+	OpcodeVecI8x16LeS OpcodeVec = 0x29
+	OpcodeVecI8x16LeU OpcodeVec = 0x2a
+	OpcodeVecI8x16GeS OpcodeVec = 0x2b
+	OpcodeVecI8x16GeU OpcodeVec = 0x2c
+
+	// i16 comparisons.
+
+	OpcodeVecI16x8Eq  OpcodeVec = 0x2d
+	OpcodeVecI16x8Ne  OpcodeVec = 0x2e
+	OpcodeVecI16x8LtS OpcodeVec = 0x2f
+	OpcodeVecI16x8LtU OpcodeVec = 0x30
+	OpcodeVecI16x8GtS OpcodeVec = 0x31
+	OpcodeVecI16x8GtU OpcodeVec = 0x32
+	OpcodeVecI16x8LeS OpcodeVec = 0x33
+	OpcodeVecI16x8LeU OpcodeVec = 0x34
+	OpcodeVecI16x8GeS OpcodeVec = 0x35
+	OpcodeVecI16x8GeU OpcodeVec = 0x36
+
+	// i32 comparisons.
+
+	OpcodeVecI32x4Eq  OpcodeVec = 0x37
+	OpcodeVecI32x4Ne  OpcodeVec = 0x38
+	OpcodeVecI32x4LtS OpcodeVec = 0x39
+	OpcodeVecI32x4LtU OpcodeVec = 0x3a
+	OpcodeVecI32x4GtS OpcodeVec = 0x3b
+	OpcodeVecI32x4GtU OpcodeVec = 0x3c
+	OpcodeVecI32x4LeS OpcodeVec = 0x3d
+	OpcodeVecI32x4LeU OpcodeVec = 0x3e
+	OpcodeVecI32x4GeS OpcodeVec = 0x3f
+	OpcodeVecI32x4GeU OpcodeVec = 0x40
+
+	// i64 comparisons.
+
+	OpcodeVecI64x2Eq  OpcodeVec = 0xd6
+	OpcodeVecI64x2Ne  OpcodeVec = 0xd7
+	OpcodeVecI64x2LtS OpcodeVec = 0xd8
+	OpcodeVecI64x2GtS OpcodeVec = 0xd9
+	OpcodeVecI64x2LeS OpcodeVec = 0xda
+	OpcodeVecI64x2GeS OpcodeVec = 0xdb
+
+	// f32 comparisons.
+
+	OpcodeVecF32x4Eq OpcodeVec = 0x41
+	OpcodeVecF32x4Ne OpcodeVec = 0x42
+	OpcodeVecF32x4Lt OpcodeVec = 0x43
+	OpcodeVecF32x4Gt OpcodeVec = 0x44
+	OpcodeVecF32x4Le OpcodeVec = 0x45
+	OpcodeVecF32x4Ge OpcodeVec = 0x46
+
+	// f64 comparisons.
+
+	OpcodeVecF64x2Eq OpcodeVec = 0x47
+	OpcodeVecF64x2Ne OpcodeVec = 0x48
+	OpcodeVecF64x2Lt OpcodeVec = 0x49
+	OpcodeVecF64x2Gt OpcodeVec = 0x4a
+	OpcodeVecF64x2Le OpcodeVec = 0x4b
+	OpcodeVecF64x2Ge OpcodeVec = 0x4c
+
+	// v128 logical instructions.
+
+	OpcodeVecV128Not       OpcodeVec = 0x4d
+	OpcodeVecV128And       OpcodeVec = 0x4e
+	OpcodeVecV128AndNot    OpcodeVec = 0x4f
+	OpcodeVecV128Or        OpcodeVec = 0x50
+	OpcodeVecV128Xor       OpcodeVec = 0x51
+	OpcodeVecV128Bitselect OpcodeVec = 0x52
+	OpcodeVecV128AnyTrue   OpcodeVec = 0x53
+
+	// i8 misc.
+
+	OpcodeVecI8x16Abs          OpcodeVec = 0x60
+	OpcodeVecI8x16Neg          OpcodeVec = 0x61
+	OpcodeVecI8x16Popcnt       OpcodeVec = 0x62
+	OpcodeVecI8x16AllTrue      OpcodeVec = 0x63
+	OpcodeVecI8x16BitMask      OpcodeVec = 0x64
+	OpcodeVecI8x16NarrowI16x8S OpcodeVec = 0x65
+	OpcodeVecI8x16NarrowI16x8U OpcodeVec = 0x66
+
+	OpcodeVecI8x16Shl     OpcodeVec = 0x6b
+	OpcodeVecI8x16ShrS    OpcodeVec = 0x6c
+	OpcodeVecI8x16ShrU    OpcodeVec = 0x6d
+	OpcodeVecI8x16Add     OpcodeVec = 0x6e
+	OpcodeVecI8x16AddSatS OpcodeVec = 0x6f
+
+	OpcodeVecI8x16AddSatU OpcodeVec = 0x70
+	OpcodeVecI8x16Sub     OpcodeVec = 0x71
+	OpcodeVecI8x16SubSatS OpcodeVec = 0x72
+	OpcodeVecI8x16SubSatU OpcodeVec = 0x73
+	OpcodeVecI8x16MinS    OpcodeVec = 0x76
+	OpcodeVecI8x16MinU    OpcodeVec = 0x77
+	OpcodeVecI8x16MaxS    OpcodeVec = 0x78
+	OpcodeVecI8x16MaxU    OpcodeVec = 0x79
+	OpcodeVecI8x16AvgrU   OpcodeVec = 0x7b
+
+	// i16 misc.
+
+	OpcodeVecI16x8ExtaddPairwiseI8x16S OpcodeVec = 0x7c
+	OpcodeVecI16x8ExtaddPairwiseI8x16U OpcodeVec = 0x7d
+	OpcodeVecI16x8Abs                  OpcodeVec = 0x80
+	OpcodeVecI16x8Neg                  OpcodeVec = 0x81
+	OpcodeVecI16x8Q15mulrSatS          OpcodeVec = 0x82
+	OpcodeVecI16x8AllTrue              OpcodeVec = 0x83
+	OpcodeVecI16x8BitMask              OpcodeVec = 0x84
+	OpcodeVecI16x8NarrowI32x4S         OpcodeVec = 0x85
+	OpcodeVecI16x8NarrowI32x4U         OpcodeVec = 0x86
+	OpcodeVecI16x8ExtendLowI8x16S      OpcodeVec = 0x87
+	OpcodeVecI16x8ExtendHighI8x16S     OpcodeVec = 0x88
+	OpcodeVecI16x8ExtendLowI8x16U      OpcodeVec = 0x89
+	OpcodeVecI16x8ExtendHighI8x16U     OpcodeVec = 0x8a
+	OpcodeVecI16x8Shl                  OpcodeVec = 0x8b
+	OpcodeVecI16x8ShrS                 OpcodeVec = 0x8c
+	OpcodeVecI16x8ShrU                 OpcodeVec = 0x8d
+	OpcodeVecI16x8Add                  OpcodeVec = 0x8e
+	OpcodeVecI16x8AddSatS              OpcodeVec = 0x8f
+	OpcodeVecI16x8AddSatU              OpcodeVec = 0x90
+	OpcodeVecI16x8Sub                  OpcodeVec = 0x91
+	OpcodeVecI16x8SubSatS              OpcodeVec = 0x92
+	OpcodeVecI16x8SubSatU              OpcodeVec = 0x93
+	OpcodeVecI16x8Mul                  OpcodeVec = 0x95
+	OpcodeVecI16x8MinS                 OpcodeVec = 0x96
+	OpcodeVecI16x8MinU                 OpcodeVec = 0x97
+	OpcodeVecI16x8MaxS                 OpcodeVec = 0x98
+	OpcodeVecI16x8MaxU                 OpcodeVec = 0x99
+	OpcodeVecI16x8AvgrU                OpcodeVec = 0x9b
+	OpcodeVecI16x8ExtMulLowI8x16S      OpcodeVec = 0x9c
+	OpcodeVecI16x8ExtMulHighI8x16S     OpcodeVec = 0x9d
+	OpcodeVecI16x8ExtMulLowI8x16U      OpcodeVec = 0x9e
+	OpcodeVecI16x8ExtMulHighI8x16U     OpcodeVec = 0x9f
+
+	// i32 misc.
+
+	OpcodeVecI32x4ExtaddPairwiseI16x8S OpcodeVec = 0x7e
+	OpcodeVecI32x4ExtaddPairwiseI16x8U OpcodeVec = 0x7f
+	OpcodeVecI32x4Abs                  OpcodeVec = 0xa0
+	OpcodeVecI32x4Neg                  OpcodeVec = 0xa1
+	OpcodeVecI32x4AllTrue              OpcodeVec = 0xa3
+	OpcodeVecI32x4BitMask              OpcodeVec = 0xa4
+	OpcodeVecI32x4ExtendLowI16x8S      OpcodeVec = 0xa7
+	OpcodeVecI32x4ExtendHighI16x8S     OpcodeVec = 0xa8
+	OpcodeVecI32x4ExtendLowI16x8U      OpcodeVec = 0xa9
+	OpcodeVecI32x4ExtendHighI16x8U     OpcodeVec = 0xaa
+	OpcodeVecI32x4Shl                  OpcodeVec = 0xab
+	OpcodeVecI32x4ShrS                 OpcodeVec = 0xac
+	OpcodeVecI32x4ShrU                 OpcodeVec = 0xad
+	OpcodeVecI32x4Add                  OpcodeVec = 0xae
+	OpcodeVecI32x4Sub                  OpcodeVec = 0xb1
+	OpcodeVecI32x4Mul                  OpcodeVec = 0xb5
+	OpcodeVecI32x4MinS                 OpcodeVec = 0xb6
+	OpcodeVecI32x4MinU                 OpcodeVec = 0xb7
+	OpcodeVecI32x4MaxS                 OpcodeVec = 0xb8
+	OpcodeVecI32x4MaxU                 OpcodeVec = 0xb9
+	OpcodeVecI32x4DotI16x8S            OpcodeVec = 0xba
+	OpcodeVecI32x4ExtMulLowI16x8S      OpcodeVec = 0xbc
+	OpcodeVecI32x4ExtMulHighI16x8S     OpcodeVec = 0xbd
+	OpcodeVecI32x4ExtMulLowI16x8U      OpcodeVec = 0xbe
+	OpcodeVecI32x4ExtMulHighI16x8U     OpcodeVec = 0xbf
+
+	// i64 misc.
+
+	OpcodeVecI64x2Abs              OpcodeVec = 0xc0
+	OpcodeVecI64x2Neg              OpcodeVec = 0xc1
+	OpcodeVecI64x2AllTrue          OpcodeVec = 0xc3
+	OpcodeVecI64x2BitMask          OpcodeVec = 0xc4
+	OpcodeVecI64x2ExtendLowI32x4S  OpcodeVec = 0xc7
+	OpcodeVecI64x2ExtendHighI32x4S OpcodeVec = 0xc8
+	OpcodeVecI64x2ExtendLowI32x4U  OpcodeVec = 0xc9
+	OpcodeVecI64x2ExtendHighI32x4U OpcodeVec = 0xca
+	OpcodeVecI64x2Shl              OpcodeVec = 0xcb
+	OpcodeVecI64x2ShrS             OpcodeVec = 0xcc
+	OpcodeVecI64x2ShrU             OpcodeVec = 0xcd
+	OpcodeVecI64x2Add              OpcodeVec = 0xce
+	OpcodeVecI64x2Sub              OpcodeVec = 0xd1
+	OpcodeVecI64x2Mul              OpcodeVec = 0xd5
+	OpcodeVecI64x2ExtMulLowI32x4S  OpcodeVec = 0xdc
+	OpcodeVecI64x2ExtMulHighI32x4S OpcodeVec = 0xdd
+	OpcodeVecI64x2ExtMulLowI32x4U  OpcodeVec = 0xde
+	OpcodeVecI64x2ExtMulHighI32x4U OpcodeVec = 0xdf
+
+	// f32 misc.
+
+	OpcodeVecF32x4Ceil    OpcodeVec = 0x67
+	OpcodeVecF32x4Floor   OpcodeVec = 0x68
+	OpcodeVecF32x4Trunc   OpcodeVec = 0x69
+	OpcodeVecF32x4Nearest OpcodeVec = 0x6a
+	OpcodeVecF32x4Abs     OpcodeVec = 0xe0
+	OpcodeVecF32x4Neg     OpcodeVec = 0xe1
+	OpcodeVecF32x4Sqrt    OpcodeVec = 0xe3
+	OpcodeVecF32x4Add     OpcodeVec = 0xe4
+	OpcodeVecF32x4Sub     OpcodeVec = 0xe5
+	OpcodeVecF32x4Mul     OpcodeVec = 0xe6
+	OpcodeVecF32x4Div     OpcodeVec = 0xe7
+	OpcodeVecF32x4Min     OpcodeVec = 0xe8
+	OpcodeVecF32x4Max     OpcodeVec = 0xe9
+	OpcodeVecF32x4Pmin    OpcodeVec = 0xea
+	OpcodeVecF32x4Pmax    OpcodeVec = 0xeb
+
+	// f64 misc.
+
+	OpcodeVecF64x2Ceil    OpcodeVec = 0x74
+	OpcodeVecF64x2Floor   OpcodeVec = 0x75
+	OpcodeVecF64x2Trunc   OpcodeVec = 0x7a
+	OpcodeVecF64x2Nearest OpcodeVec = 0x94
+	OpcodeVecF64x2Abs     OpcodeVec = 0xec
+	OpcodeVecF64x2Neg     OpcodeVec = 0xed
+	OpcodeVecF64x2Sqrt    OpcodeVec = 0xef
+	OpcodeVecF64x2Add     OpcodeVec = 0xf0
+	OpcodeVecF64x2Sub     OpcodeVec = 0xf1
+	OpcodeVecF64x2Mul     OpcodeVec = 0xf2
+	OpcodeVecF64x2Div     OpcodeVec = 0xf3
+	OpcodeVecF64x2Min     OpcodeVec = 0xf4
+	OpcodeVecF64x2Max     OpcodeVec = 0xf5
+	OpcodeVecF64x2Pmin    OpcodeVec = 0xf6
+	OpcodeVecF64x2Pmax    OpcodeVec = 0xf7
+
+	// conversions.
+
+	OpcodeVecI32x4TruncSatF32x4S      OpcodeVec = 0xf8
+	OpcodeVecI32x4TruncSatF32x4U      OpcodeVec = 0xf9
+	OpcodeVecF32x4ConvertI32x4S       OpcodeVec = 0xfa
+	OpcodeVecF32x4ConvertI32x4U       OpcodeVec = 0xfb
+	OpcodeVecI32x4TruncSatF64x2SZero  OpcodeVec = 0xfc
+	OpcodeVecI32x4TruncSatF64x2UZero  OpcodeVec = 0xfd
+	OpcodeVecF64x2ConvertLowI32x4S    OpcodeVec = 0xfe
+	OpcodeVecF64x2ConvertLowI32x4U    OpcodeVec = 0xff
+	OpcodeVecF32x4DemoteF64x2Zero     OpcodeVec = 0x5e
+	OpcodeVecF64x2PromoteLowF32x4Zero OpcodeVec = 0x5f
+)
+
+// OpcodeAtomic represents an opcode of atomic instructions which has
+// multi-byte encoding and is prefixed by OpcodeAtomicPrefix.
+//
+// These opcodes are toggled with CoreFeaturesThreads.
+type OpcodeAtomic = byte
+
+const (
+	// OpcodeAtomicMemoryNotify represents the instruction memory.atomic.notify.
+	OpcodeAtomicMemoryNotify OpcodeAtomic = 0x00
+	// OpcodeAtomicMemoryWait32 represents the instruction memory.atomic.wait32.
+	OpcodeAtomicMemoryWait32 OpcodeAtomic = 0x01
+	// OpcodeAtomicMemoryWait64 represents the instruction memory.atomic.wait64.
+	OpcodeAtomicMemoryWait64 OpcodeAtomic = 0x02
+	// OpcodeAtomicFence represents the instruction atomic.fence.
+	OpcodeAtomicFence OpcodeAtomic = 0x03
+
+	// OpcodeAtomicI32Load represents the instruction i32.atomic.load.
+	OpcodeAtomicI32Load OpcodeAtomic = 0x10
+	// OpcodeAtomicI64Load represents the instruction i64.atomic.load.
+	OpcodeAtomicI64Load OpcodeAtomic = 0x11
+	// OpcodeAtomicI32Load8U represents the instruction i32.atomic.load8_u.
+	OpcodeAtomicI32Load8U OpcodeAtomic = 0x12
+	// OpcodeAtomicI32Load16U represents the instruction i32.atomic.load16_u.
+	OpcodeAtomicI32Load16U OpcodeAtomic = 0x13
+	// OpcodeAtomicI64Load8U represents the instruction i64.atomic.load8_u.
+	OpcodeAtomicI64Load8U OpcodeAtomic = 0x14
+	// OpcodeAtomicI64Load16U represents the instruction i64.atomic.load16_u.
+	OpcodeAtomicI64Load16U OpcodeAtomic = 0x15
+	// OpcodeAtomicI64Load32U represents the instruction i64.atomic.load32_u.
+	OpcodeAtomicI64Load32U OpcodeAtomic = 0x16
+	// OpcodeAtomicI32Store represents the instruction i32.atomic.store.
+	OpcodeAtomicI32Store OpcodeAtomic = 0x17
+	// OpcodeAtomicI64Store represents the instruction i64.atomic.store.
+	OpcodeAtomicI64Store OpcodeAtomic = 0x18
+	// OpcodeAtomicI32Store8 represents the instruction i32.atomic.store8.
+	OpcodeAtomicI32Store8 OpcodeAtomic = 0x19
+	// OpcodeAtomicI32Store16 represents the instruction i32.atomic.store16.
+	OpcodeAtomicI32Store16 OpcodeAtomic = 0x1a
+	// OpcodeAtomicI64Store8 represents the instruction i64.atomic.store8.
+	OpcodeAtomicI64Store8 OpcodeAtomic = 0x1b
+	// OpcodeAtomicI64Store16 represents the instruction i64.atomic.store16.
+	OpcodeAtomicI64Store16 OpcodeAtomic = 0x1c
+	// OpcodeAtomicI64Store32 represents the instruction i64.atomic.store32.
+	OpcodeAtomicI64Store32 OpcodeAtomic = 0x1d
+
+	// OpcodeAtomicI32RmwAdd represents the instruction i32.atomic.rmw.add.
+	OpcodeAtomicI32RmwAdd OpcodeAtomic = 0x1e
+	// OpcodeAtomicI64RmwAdd represents the instruction i64.atomic.rmw.add.
+	OpcodeAtomicI64RmwAdd OpcodeAtomic = 0x1f
+	// OpcodeAtomicI32Rmw8AddU represents the instruction i32.atomic.rmw8.add_u.
+	OpcodeAtomicI32Rmw8AddU OpcodeAtomic = 0x20
+	// OpcodeAtomicI32Rmw16AddU represents the instruction i32.atomic.rmw16.add_u.
+	OpcodeAtomicI32Rmw16AddU OpcodeAtomic = 0x21
+	// OpcodeAtomicI64Rmw8AddU represents the instruction i64.atomic.rmw8.add_u.
+	OpcodeAtomicI64Rmw8AddU OpcodeAtomic = 0x22
+	// OpcodeAtomicI64Rmw16AddU represents the instruction i64.atomic.rmw16.add_u.
+	OpcodeAtomicI64Rmw16AddU OpcodeAtomic = 0x23
+	// OpcodeAtomicI64Rmw32AddU represents the instruction i64.atomic.rmw32.add_u.
+	OpcodeAtomicI64Rmw32AddU OpcodeAtomic = 0x24
+
+	// OpcodeAtomicI32RmwSub represents the instruction i32.atomic.rmw.sub.
+	OpcodeAtomicI32RmwSub OpcodeAtomic = 0x25
+	// OpcodeAtomicI64RmwSub represents the instruction i64.atomic.rmw.sub.
+	OpcodeAtomicI64RmwSub OpcodeAtomic = 0x26
+	// OpcodeAtomicI32Rmw8SubU represents the instruction i32.atomic.rmw8.sub_u.
+	OpcodeAtomicI32Rmw8SubU OpcodeAtomic = 0x27
+	// OpcodeAtomicI32Rmw16SubU represents the instruction i32.atomic.rmw16.sub_u.
+	OpcodeAtomicI32Rmw16SubU OpcodeAtomic = 0x28
+	// OpcodeAtomicI64Rmw8SubU represents the instruction i64.atomic.rmw8.sub_u.
+	OpcodeAtomicI64Rmw8SubU OpcodeAtomic = 0x29
+	// OpcodeAtomicI64Rmw16SubU represents the instruction i64.atomic.rmw16.sub_u.
+	OpcodeAtomicI64Rmw16SubU OpcodeAtomic = 0x2a
+	// OpcodeAtomicI64Rmw32SubU represents the instruction i64.atomic.rmw32.sub_u.
+	OpcodeAtomicI64Rmw32SubU OpcodeAtomic = 0x2b
+
+	// OpcodeAtomicI32RmwAnd represents the instruction i32.atomic.rmw.and.
+	OpcodeAtomicI32RmwAnd OpcodeAtomic = 0x2c
+	// OpcodeAtomicI64RmwAnd represents the instruction i64.atomic.rmw.and.
+	OpcodeAtomicI64RmwAnd OpcodeAtomic = 0x2d
+	// OpcodeAtomicI32Rmw8AndU represents the instruction i32.atomic.rmw8.and_u.
+	OpcodeAtomicI32Rmw8AndU OpcodeAtomic = 0x2e
+	// OpcodeAtomicI32Rmw16AndU represents the instruction i32.atomic.rmw16.and_u.
+	OpcodeAtomicI32Rmw16AndU OpcodeAtomic = 0x2f
+	// OpcodeAtomicI64Rmw8AndU represents the instruction i64.atomic.rmw8.and_u.
+	OpcodeAtomicI64Rmw8AndU OpcodeAtomic = 0x30
+	// OpcodeAtomicI64Rmw16AndU represents the instruction i64.atomic.rmw16.and_u.
+	OpcodeAtomicI64Rmw16AndU OpcodeAtomic = 0x31
+	// OpcodeAtomicI64Rmw32AndU represents the instruction i64.atomic.rmw32.and_u.
+	OpcodeAtomicI64Rmw32AndU OpcodeAtomic = 0x32
+
+	// OpcodeAtomicI32RmwOr represents the instruction i32.atomic.rmw.or.
+	OpcodeAtomicI32RmwOr OpcodeAtomic = 0x33
+	// OpcodeAtomicI64RmwOr represents the instruction i64.atomic.rmw.or.
+	OpcodeAtomicI64RmwOr OpcodeAtomic = 0x34
+	// OpcodeAtomicI32Rmw8OrU represents the instruction i32.atomic.rmw8.or_u.
+	OpcodeAtomicI32Rmw8OrU OpcodeAtomic = 0x35
+	// OpcodeAtomicI32Rmw16OrU represents the instruction i32.atomic.rmw16.or_u.
+	OpcodeAtomicI32Rmw16OrU OpcodeAtomic = 0x36
+	// OpcodeAtomicI64Rmw8OrU represents the instruction i64.atomic.rmw8.or_u.
+	OpcodeAtomicI64Rmw8OrU OpcodeAtomic = 0x37
+	// OpcodeAtomicI64Rmw16OrU represents the instruction i64.atomic.rmw16.or_u.
+	OpcodeAtomicI64Rmw16OrU OpcodeAtomic = 0x38
+	// OpcodeAtomicI64Rmw32OrU represents the instruction i64.atomic.rmw32.or_u.
+	OpcodeAtomicI64Rmw32OrU OpcodeAtomic = 0x39
+
+	// OpcodeAtomicI32RmwXor represents the instruction i32.atomic.rmw.xor.
+	OpcodeAtomicI32RmwXor OpcodeAtomic = 0x3a
+	// OpcodeAtomicI64RmwXor represents the instruction i64.atomic.rmw.xor.
+	OpcodeAtomicI64RmwXor OpcodeAtomic = 0x3b
+	// OpcodeAtomicI32Rmw8XorU represents the instruction i32.atomic.rmw8.xor_u.
+	OpcodeAtomicI32Rmw8XorU OpcodeAtomic = 0x3c
+	// OpcodeAtomicI32Rmw16XorU represents the instruction i32.atomic.rmw16.xor_u.
+	OpcodeAtomicI32Rmw16XorU OpcodeAtomic = 0x3d
+	// OpcodeAtomicI64Rmw8XorU represents the instruction i64.atomic.rmw8.xor_u.
+	OpcodeAtomicI64Rmw8XorU OpcodeAtomic = 0x3e
+	// OpcodeAtomicI64Rmw16XorU represents the instruction i64.atomic.rmw16.xor_u.
+	OpcodeAtomicI64Rmw16XorU OpcodeAtomic = 0x3f
+	// OpcodeAtomicI64Rmw32XorU represents the instruction i64.atomic.rmw32.xor_u.
+	OpcodeAtomicI64Rmw32XorU OpcodeAtomic = 0x40
+
+	// OpcodeAtomicI32RmwXchg represents the instruction i32.atomic.rmw.xchg.
+	OpcodeAtomicI32RmwXchg OpcodeAtomic = 0x41
+	// OpcodeAtomicI64RmwXchg represents the instruction i64.atomic.rmw.xchg.
+	OpcodeAtomicI64RmwXchg OpcodeAtomic = 0x42
+	// OpcodeAtomicI32Rmw8XchgU represents the instruction i32.atomic.rmw8.xchg_u.
+	OpcodeAtomicI32Rmw8XchgU OpcodeAtomic = 0x43
+	// OpcodeAtomicI32Rmw16XchgU represents the instruction i32.atomic.rmw16.xchg_u.
+	OpcodeAtomicI32Rmw16XchgU OpcodeAtomic = 0x44
+	// OpcodeAtomicI64Rmw8XchgU represents the instruction i64.atomic.rmw8.xchg_u.
+	OpcodeAtomicI64Rmw8XchgU OpcodeAtomic = 0x45
+	// OpcodeAtomicI64Rmw16XchgU represents the instruction i64.atomic.rmw16.xchg_u.
+	OpcodeAtomicI64Rmw16XchgU OpcodeAtomic = 0x46
+	// OpcodeAtomicI64Rmw32XchgU represents the instruction i64.atomic.rmw32.xchg_u.
+	OpcodeAtomicI64Rmw32XchgU OpcodeAtomic = 0x47
+
+	// OpcodeAtomicI32RmwCmpxchg represents the instruction i32.atomic.rmw.cmpxchg.
+	OpcodeAtomicI32RmwCmpxchg OpcodeAtomic = 0x48
+	// OpcodeAtomicI64RmwCmpxchg represents the instruction i64.atomic.rmw.cmpxchg.
+	OpcodeAtomicI64RmwCmpxchg OpcodeAtomic = 0x49
+	// OpcodeAtomicI32Rmw8CmpxchgU represents the instruction i32.atomic.rmw8.cmpxchg_u.
+	OpcodeAtomicI32Rmw8CmpxchgU OpcodeAtomic = 0x4a
+	// OpcodeAtomicI32Rmw16CmpxchgU represents the instruction i32.atomic.rmw16.cmpxchg_u.
+	OpcodeAtomicI32Rmw16CmpxchgU OpcodeAtomic = 0x4b
+	// OpcodeAtomicI64Rmw8CmpxchgU represents the instruction i64.atomic.rmw8.cmpxchg_u.
+	OpcodeAtomicI64Rmw8CmpxchgU OpcodeAtomic = 0x4c
+	// OpcodeAtomicI64Rmw16CmpxchgU represents the instruction i64.atomic.rmw16.cmpxchg_u.
+	OpcodeAtomicI64Rmw16CmpxchgU OpcodeAtomic = 0x4d
+	// OpcodeAtomicI64Rmw32CmpxchgU represents the instruction i64.atomic.rmw32.cmpxchg_u.
+	OpcodeAtomicI64Rmw32CmpxchgU OpcodeAtomic = 0x4e
+)
+
+const (
+	OpcodeUnreachableName       = "unreachable"
+	OpcodeNopName               = "nop"
+	OpcodeBlockName             = "block"
+	OpcodeLoopName              = "loop"
+	OpcodeIfName                = "if"
+	OpcodeElseName              = "else"
+	OpcodeEndName               = "end"
+	OpcodeBrName                = "br"
+	OpcodeBrIfName              = "br_if"
+	OpcodeBrTableName           = "br_table"
+	OpcodeReturnName            = "return"
+	OpcodeCallName              = "call"
+	OpcodeCallIndirectName      = "call_indirect"
+	OpcodeDropName              = "drop"
+	OpcodeSelectName            = "select"
+	OpcodeTypedSelectName       = "typed_select"
+	OpcodeLocalGetName          = "local.get"
+	OpcodeLocalSetName          = "local.set"
+	OpcodeLocalTeeName          = "local.tee"
+	OpcodeGlobalGetName         = "global.get"
+	OpcodeGlobalSetName         = "global.set"
+	OpcodeI32LoadName           = "i32.load"
+	OpcodeI64LoadName           = "i64.load"
+	OpcodeF32LoadName           = "f32.load"
+	OpcodeF64LoadName           = "f64.load"
+	OpcodeI32Load8SName         = "i32.load8_s"
+	OpcodeI32Load8UName         = "i32.load8_u"
+	OpcodeI32Load16SName        = "i32.load16_s"
+	OpcodeI32Load16UName        = "i32.load16_u"
+	OpcodeI64Load8SName         = "i64.load8_s"
+	OpcodeI64Load8UName         = "i64.load8_u"
+	OpcodeI64Load16SName        = "i64.load16_s"
+	OpcodeI64Load16UName        = "i64.load16_u"
+	OpcodeI64Load32SName        = "i64.load32_s"
+	OpcodeI64Load32UName        = "i64.load32_u"
+	OpcodeI32StoreName          = "i32.store"
+	OpcodeI64StoreName          = "i64.store"
+	OpcodeF32StoreName          = "f32.store"
+	OpcodeF64StoreName          = "f64.store"
+	OpcodeI32Store8Name         = "i32.store8"
+	OpcodeI32Store16Name        = "i32.store16"
+	OpcodeI64Store8Name         = "i64.store8"
+	OpcodeI64Store16Name        = "i64.store16"
+	OpcodeI64Store32Name        = "i64.store32"
+	OpcodeMemorySizeName        = "memory.size"
+	OpcodeMemoryGrowName        = "memory.grow"
+	OpcodeI32ConstName          = "i32.const"
+	OpcodeI64ConstName          = "i64.const"
+	OpcodeF32ConstName          = "f32.const"
+	OpcodeF64ConstName          = "f64.const"
+	OpcodeI32EqzName            = "i32.eqz"
+	OpcodeI32EqName             = "i32.eq"
+	OpcodeI32NeName             = "i32.ne"
+	OpcodeI32LtSName            = "i32.lt_s"
+	OpcodeI32LtUName            = "i32.lt_u"
+	OpcodeI32GtSName            = "i32.gt_s"
+	OpcodeI32GtUName            = "i32.gt_u"
+	OpcodeI32LeSName            = "i32.le_s"
+	OpcodeI32LeUName            = "i32.le_u"
+	OpcodeI32GeSName            = "i32.ge_s"
+	OpcodeI32GeUName            = "i32.ge_u"
+	OpcodeI64EqzName            = "i64.eqz"
+	OpcodeI64EqName             = "i64.eq"
+	OpcodeI64NeName             = "i64.ne"
+	OpcodeI64LtSName            = "i64.lt_s"
+	OpcodeI64LtUName            = "i64.lt_u"
+	OpcodeI64GtSName            = "i64.gt_s"
+	OpcodeI64GtUName            = "i64.gt_u"
+	OpcodeI64LeSName            = "i64.le_s"
+	OpcodeI64LeUName            = "i64.le_u"
+	OpcodeI64GeSName            = "i64.ge_s"
+	OpcodeI64GeUName            = "i64.ge_u"
+	OpcodeF32EqName             = "f32.eq"
+	OpcodeF32NeName             = "f32.ne"
+	OpcodeF32LtName             = "f32.lt"
+	OpcodeF32GtName             = "f32.gt"
+	OpcodeF32LeName             = "f32.le"
+	OpcodeF32GeName             = "f32.ge"
+	OpcodeF64EqName             = "f64.eq"
+	OpcodeF64NeName             = "f64.ne"
+	OpcodeF64LtName             = "f64.lt"
+	OpcodeF64GtName             = "f64.gt"
+	OpcodeF64LeName             = "f64.le"
+	OpcodeF64GeName             = "f64.ge"
+	OpcodeI32ClzName            = "i32.clz"
+	OpcodeI32CtzName            = "i32.ctz"
+	OpcodeI32PopcntName         = "i32.popcnt"
+	OpcodeI32AddName            = "i32.add"
+	OpcodeI32SubName            = "i32.sub"
+	OpcodeI32MulName            = "i32.mul"
+	OpcodeI32DivSName           = "i32.div_s"
+	OpcodeI32DivUName           = "i32.div_u"
+	OpcodeI32RemSName           = "i32.rem_s"
+	OpcodeI32RemUName           = "i32.rem_u"
+	OpcodeI32AndName            = "i32.and"
+	OpcodeI32OrName             = "i32.or"
+	OpcodeI32XorName            = "i32.xor"
+	OpcodeI32ShlName            = "i32.shl"
+	OpcodeI32ShrSName           = "i32.shr_s"
+	OpcodeI32ShrUName           = "i32.shr_u"
+	OpcodeI32RotlName           = "i32.rotl"
+	OpcodeI32RotrName           = "i32.rotr"
+	OpcodeI64ClzName            = "i64.clz"
+	OpcodeI64CtzName            = "i64.ctz"
+	OpcodeI64PopcntName         = "i64.popcnt"
+	OpcodeI64AddName            = "i64.add"
+	OpcodeI64SubName            = "i64.sub"
+	OpcodeI64MulName            = "i64.mul"
+	OpcodeI64DivSName           = "i64.div_s"
+	OpcodeI64DivUName           = "i64.div_u"
+	OpcodeI64RemSName           = "i64.rem_s"
+	OpcodeI64RemUName           = "i64.rem_u"
+	OpcodeI64AndName            = "i64.and"
+	OpcodeI64OrName             = "i64.or"
+	OpcodeI64XorName            = "i64.xor"
+	OpcodeI64ShlName            = "i64.shl"
+	OpcodeI64ShrSName           = "i64.shr_s"
+	OpcodeI64ShrUName           = "i64.shr_u"
+	OpcodeI64RotlName           = "i64.rotl"
+	OpcodeI64RotrName           = "i64.rotr"
+	OpcodeF32AbsName            = "f32.abs"
+	OpcodeF32NegName            = "f32.neg"
+	OpcodeF32CeilName           = "f32.ceil"
+	OpcodeF32FloorName          = "f32.floor"
+	OpcodeF32TruncName          = "f32.trunc"
+	OpcodeF32NearestName        = "f32.nearest"
+	OpcodeF32SqrtName           = "f32.sqrt"
+	OpcodeF32AddName            = "f32.add"
+	OpcodeF32SubName            = "f32.sub"
+	OpcodeF32MulName            = "f32.mul"
+	OpcodeF32DivName            = "f32.div"
+	OpcodeF32MinName            = "f32.min"
+	OpcodeF32MaxName            = "f32.max"
+	OpcodeF32CopysignName       = "f32.copysign"
+	OpcodeF64AbsName            = "f64.abs"
+	OpcodeF64NegName            = "f64.neg"
+	OpcodeF64CeilName           = "f64.ceil"
+	OpcodeF64FloorName          = "f64.floor"
+	OpcodeF64TruncName          = "f64.trunc"
+	OpcodeF64NearestName        = "f64.nearest"
+	OpcodeF64SqrtName           = "f64.sqrt"
+	OpcodeF64AddName            = "f64.add"
+	OpcodeF64SubName            = "f64.sub"
+	OpcodeF64MulName            = "f64.mul"
+	OpcodeF64DivName            = "f64.div"
+	OpcodeF64MinName            = "f64.min"
+	OpcodeF64MaxName            = "f64.max"
+	OpcodeF64CopysignName       = "f64.copysign"
+	OpcodeI32WrapI64Name        = "i32.wrap_i64"
+	OpcodeI32TruncF32SName      = "i32.trunc_f32_s"
+	OpcodeI32TruncF32UName      = "i32.trunc_f32_u"
+	OpcodeI32TruncF64SName      = "i32.trunc_f64_s"
+	OpcodeI32TruncF64UName      = "i32.trunc_f64_u"
+	OpcodeI64ExtendI32SName     = "i64.extend_i32_s"
+	OpcodeI64ExtendI32UName     = "i64.extend_i32_u"
+	OpcodeI64TruncF32SName      = "i64.trunc_f32_s"
+	OpcodeI64TruncF32UName      = "i64.trunc_f32_u"
+	OpcodeI64TruncF64SName      = "i64.trunc_f64_s"
+	OpcodeI64TruncF64UName      = "i64.trunc_f64_u"
+	OpcodeF32ConvertI32SName    = "f32.convert_i32_s"
+	OpcodeF32ConvertI32UName    = "f32.convert_i32_u"
+	OpcodeF32ConvertI64SName    = "f32.convert_i64_s"
+	OpcodeF32ConvertI64UName    = "f32.convert_i64u"
+	OpcodeF32DemoteF64Name      = "f32.demote_f64"
+	OpcodeF64ConvertI32SName    = "f64.convert_i32_s"
+	OpcodeF64ConvertI32UName    = "f64.convert_i32_u"
+	OpcodeF64ConvertI64SName    = "f64.convert_i64_s"
+	OpcodeF64ConvertI64UName    = "f64.convert_i64_u"
+	OpcodeF64PromoteF32Name     = "f64.promote_f32"
+	OpcodeI32ReinterpretF32Name = "i32.reinterpret_f32"
+	OpcodeI64ReinterpretF64Name = "i64.reinterpret_f64"
+	OpcodeF32ReinterpretI32Name = "f32.reinterpret_i32"
+	OpcodeF64ReinterpretI64Name = "f64.reinterpret_i64"
+
+	OpcodeRefNullName   = "ref.null"
+	OpcodeRefIsNullName = "ref.is_null"
+	OpcodeRefFuncName   = "ref.func"
+
+	OpcodeTableGetName = "table.get"
+	OpcodeTableSetName = "table.set"
+
+	// Below are toggled with CoreFeatureSignExtensionOps
+
+	OpcodeI32Extend8SName  = "i32.extend8_s"
+	OpcodeI32Extend16SName = "i32.extend16_s"
+	OpcodeI64Extend8SName  = "i64.extend8_s"
+	OpcodeI64Extend16SName = "i64.extend16_s"
+	OpcodeI64Extend32SName = "i64.extend32_s"
+
+	OpcodeMiscPrefixName   = "misc_prefix"
+	OpcodeVecPrefixName    = "vector_prefix"
+	OpcodeAtomicPrefixName = "atomic_prefix"
+)
+
+var instructionNames = [256]string{
+	OpcodeUnreachable:       OpcodeUnreachableName,
+	OpcodeNop:               OpcodeNopName,
+	OpcodeBlock:             OpcodeBlockName,
+	OpcodeLoop:              OpcodeLoopName,
+	OpcodeIf:                OpcodeIfName,
+	OpcodeElse:              OpcodeElseName,
+	OpcodeEnd:               OpcodeEndName,
+	OpcodeBr:                OpcodeBrName,
+	OpcodeBrIf:              OpcodeBrIfName,
+	OpcodeBrTable:           OpcodeBrTableName,
+	OpcodeReturn:            OpcodeReturnName,
+	OpcodeCall:              OpcodeCallName,
+	OpcodeCallIndirect:      OpcodeCallIndirectName,
+	OpcodeDrop:              OpcodeDropName,
+	OpcodeSelect:            OpcodeSelectName,
+	OpcodeTypedSelect:       OpcodeTypedSelectName,
+	OpcodeLocalGet:          OpcodeLocalGetName,
+	OpcodeLocalSet:          OpcodeLocalSetName,
+	OpcodeLocalTee:          OpcodeLocalTeeName,
+	OpcodeGlobalGet:         OpcodeGlobalGetName,
+	OpcodeGlobalSet:         OpcodeGlobalSetName,
+	OpcodeI32Load:           OpcodeI32LoadName,
+	OpcodeI64Load:           OpcodeI64LoadName,
+	OpcodeF32Load:           OpcodeF32LoadName,
+	OpcodeF64Load:           OpcodeF64LoadName,
+	OpcodeI32Load8S:         OpcodeI32Load8SName,
+	OpcodeI32Load8U:         OpcodeI32Load8UName,
+	OpcodeI32Load16S:        OpcodeI32Load16SName,
+	OpcodeI32Load16U:        OpcodeI32Load16UName,
+	OpcodeI64Load8S:         OpcodeI64Load8SName,
+	OpcodeI64Load8U:         OpcodeI64Load8UName,
+	OpcodeI64Load16S:        OpcodeI64Load16SName,
+	OpcodeI64Load16U:        OpcodeI64Load16UName,
+	OpcodeI64Load32S:        OpcodeI64Load32SName,
+	OpcodeI64Load32U:        OpcodeI64Load32UName,
+	OpcodeI32Store:          OpcodeI32StoreName,
+	OpcodeI64Store:          OpcodeI64StoreName,
+	OpcodeF32Store:          OpcodeF32StoreName,
+	OpcodeF64Store:          OpcodeF64StoreName,
+	OpcodeI32Store8:         OpcodeI32Store8Name,
+	OpcodeI32Store16:        OpcodeI32Store16Name,
+	OpcodeI64Store8:         OpcodeI64Store8Name,
+	OpcodeI64Store16:        OpcodeI64Store16Name,
+	OpcodeI64Store32:        OpcodeI64Store32Name,
+	OpcodeMemorySize:        OpcodeMemorySizeName,
+	OpcodeMemoryGrow:        OpcodeMemoryGrowName,
+	OpcodeI32Const:          OpcodeI32ConstName,
+	OpcodeI64Const:          OpcodeI64ConstName,
+	OpcodeF32Const:          OpcodeF32ConstName,
+	OpcodeF64Const:          OpcodeF64ConstName,
+	OpcodeI32Eqz:            OpcodeI32EqzName,
+	OpcodeI32Eq:             OpcodeI32EqName,
+	OpcodeI32Ne:             OpcodeI32NeName,
+	OpcodeI32LtS:            OpcodeI32LtSName,
+	OpcodeI32LtU:            OpcodeI32LtUName,
+	OpcodeI32GtS:            OpcodeI32GtSName,
+	OpcodeI32GtU:            OpcodeI32GtUName,
+	OpcodeI32LeS:            OpcodeI32LeSName,
+	OpcodeI32LeU:            OpcodeI32LeUName,
+	OpcodeI32GeS:            OpcodeI32GeSName,
+	OpcodeI32GeU:            OpcodeI32GeUName,
+	OpcodeI64Eqz:            OpcodeI64EqzName,
+	OpcodeI64Eq:             OpcodeI64EqName,
+	OpcodeI64Ne:             OpcodeI64NeName,
+	OpcodeI64LtS:            OpcodeI64LtSName,
+	OpcodeI64LtU:            OpcodeI64LtUName,
+	OpcodeI64GtS:            OpcodeI64GtSName,
+	OpcodeI64GtU:            OpcodeI64GtUName,
+	OpcodeI64LeS:            OpcodeI64LeSName,
+	OpcodeI64LeU:            OpcodeI64LeUName,
+	OpcodeI64GeS:            OpcodeI64GeSName,
+	OpcodeI64GeU:            OpcodeI64GeUName,
+	OpcodeF32Eq:             OpcodeF32EqName,
+	OpcodeF32Ne:             OpcodeF32NeName,
+	OpcodeF32Lt:             OpcodeF32LtName,
+	OpcodeF32Gt:             OpcodeF32GtName,
+	OpcodeF32Le:             OpcodeF32LeName,
+	OpcodeF32Ge:             OpcodeF32GeName,
+	OpcodeF64Eq:             OpcodeF64EqName,
+	OpcodeF64Ne:             OpcodeF64NeName,
+	OpcodeF64Lt:             OpcodeF64LtName,
+	OpcodeF64Gt:             OpcodeF64GtName,
+	OpcodeF64Le:             OpcodeF64LeName,
+	OpcodeF64Ge:             OpcodeF64GeName,
+	OpcodeI32Clz:            OpcodeI32ClzName,
+	OpcodeI32Ctz:            OpcodeI32CtzName,
+	OpcodeI32Popcnt:         OpcodeI32PopcntName,
+	OpcodeI32Add:            OpcodeI32AddName,
+	OpcodeI32Sub:            OpcodeI32SubName,
+	OpcodeI32Mul:            OpcodeI32MulName,
+	OpcodeI32DivS:           OpcodeI32DivSName,
+	OpcodeI32DivU:           OpcodeI32DivUName,
+	OpcodeI32RemS:           OpcodeI32RemSName,
+	OpcodeI32RemU:           OpcodeI32RemUName,
+	OpcodeI32And:            OpcodeI32AndName,
+	OpcodeI32Or:             OpcodeI32OrName,
+	OpcodeI32Xor:            OpcodeI32XorName,
+	OpcodeI32Shl:            OpcodeI32ShlName,
+	OpcodeI32ShrS:           OpcodeI32ShrSName,
+	OpcodeI32ShrU:           OpcodeI32ShrUName,
+	OpcodeI32Rotl:           OpcodeI32RotlName,
+	OpcodeI32Rotr:           OpcodeI32RotrName,
+	OpcodeI64Clz:            OpcodeI64ClzName,
+	OpcodeI64Ctz:            OpcodeI64CtzName,
+	OpcodeI64Popcnt:         OpcodeI64PopcntName,
+	OpcodeI64Add:            OpcodeI64AddName,
+	OpcodeI64Sub:            OpcodeI64SubName,
+	OpcodeI64Mul:            OpcodeI64MulName,
+	OpcodeI64DivS:           OpcodeI64DivSName,
+	OpcodeI64DivU:           OpcodeI64DivUName,
+	OpcodeI64RemS:           OpcodeI64RemSName,
+	OpcodeI64RemU:           OpcodeI64RemUName,
+	OpcodeI64And:            OpcodeI64AndName,
+	OpcodeI64Or:             OpcodeI64OrName,
+	OpcodeI64Xor:            OpcodeI64XorName,
+	OpcodeI64Shl:            OpcodeI64ShlName,
+	OpcodeI64ShrS:           OpcodeI64ShrSName,
+	OpcodeI64ShrU:           OpcodeI64ShrUName,
+	OpcodeI64Rotl:           OpcodeI64RotlName,
+	OpcodeI64Rotr:           OpcodeI64RotrName,
+	OpcodeF32Abs:            OpcodeF32AbsName,
+	OpcodeF32Neg:            OpcodeF32NegName,
+	OpcodeF32Ceil:           OpcodeF32CeilName,
+	OpcodeF32Floor:          OpcodeF32FloorName,
+	OpcodeF32Trunc:          OpcodeF32TruncName,
+	OpcodeF32Nearest:        OpcodeF32NearestName,
+	OpcodeF32Sqrt:           OpcodeF32SqrtName,
+	OpcodeF32Add:            OpcodeF32AddName,
+	OpcodeF32Sub:            OpcodeF32SubName,
+	OpcodeF32Mul:            OpcodeF32MulName,
+	OpcodeF32Div:            OpcodeF32DivName,
+	OpcodeF32Min:            OpcodeF32MinName,
+	OpcodeF32Max:            OpcodeF32MaxName,
+	OpcodeF32Copysign:       OpcodeF32CopysignName,
+	OpcodeF64Abs:            OpcodeF64AbsName,
+	OpcodeF64Neg:            OpcodeF64NegName,
+	OpcodeF64Ceil:           OpcodeF64CeilName,
+	OpcodeF64Floor:          OpcodeF64FloorName,
+	OpcodeF64Trunc:          OpcodeF64TruncName,
+	OpcodeF64Nearest:        OpcodeF64NearestName,
+	OpcodeF64Sqrt:           OpcodeF64SqrtName,
+	OpcodeF64Add:            OpcodeF64AddName,
+	OpcodeF64Sub:            OpcodeF64SubName,
+	OpcodeF64Mul:            OpcodeF64MulName,
+	OpcodeF64Div:            OpcodeF64DivName,
+	OpcodeF64Min:            OpcodeF64MinName,
+	OpcodeF64Max:            OpcodeF64MaxName,
+	OpcodeF64Copysign:       OpcodeF64CopysignName,
+	OpcodeI32WrapI64:        OpcodeI32WrapI64Name,
+	OpcodeI32TruncF32S:      OpcodeI32TruncF32SName,
+	OpcodeI32TruncF32U:      OpcodeI32TruncF32UName,
+	OpcodeI32TruncF64S:      OpcodeI32TruncF64SName,
+	OpcodeI32TruncF64U:      OpcodeI32TruncF64UName,
+	OpcodeI64ExtendI32S:     OpcodeI64ExtendI32SName,
+	OpcodeI64ExtendI32U:     OpcodeI64ExtendI32UName,
+	OpcodeI64TruncF32S:      OpcodeI64TruncF32SName,
+	OpcodeI64TruncF32U:      OpcodeI64TruncF32UName,
+	OpcodeI64TruncF64S:      OpcodeI64TruncF64SName,
+	OpcodeI64TruncF64U:      OpcodeI64TruncF64UName,
+	OpcodeF32ConvertI32S:    OpcodeF32ConvertI32SName,
+	OpcodeF32ConvertI32U:    OpcodeF32ConvertI32UName,
+	OpcodeF32ConvertI64S:    OpcodeF32ConvertI64SName,
+	OpcodeF32ConvertI64U:    OpcodeF32ConvertI64UName,
+	OpcodeF32DemoteF64:      OpcodeF32DemoteF64Name,
+	OpcodeF64ConvertI32S:    OpcodeF64ConvertI32SName,
+	OpcodeF64ConvertI32U:    OpcodeF64ConvertI32UName,
+	OpcodeF64ConvertI64S:    OpcodeF64ConvertI64SName,
+	OpcodeF64ConvertI64U:    OpcodeF64ConvertI64UName,
+	OpcodeF64PromoteF32:     OpcodeF64PromoteF32Name,
+	OpcodeI32ReinterpretF32: OpcodeI32ReinterpretF32Name,
+	OpcodeI64ReinterpretF64: OpcodeI64ReinterpretF64Name,
+	OpcodeF32ReinterpretI32: OpcodeF32ReinterpretI32Name,
+	OpcodeF64ReinterpretI64: OpcodeF64ReinterpretI64Name,
+
+	OpcodeRefNull:   OpcodeRefNullName,
+	OpcodeRefIsNull: OpcodeRefIsNullName,
+	OpcodeRefFunc:   OpcodeRefFuncName,
+
+	OpcodeTableGet: OpcodeTableGetName,
+	OpcodeTableSet: OpcodeTableSetName,
+
+	// Below are toggled with CoreFeatureSignExtensionOps
+
+	OpcodeI32Extend8S:  OpcodeI32Extend8SName,
+	OpcodeI32Extend16S: OpcodeI32Extend16SName,
+	OpcodeI64Extend8S:  OpcodeI64Extend8SName,
+	OpcodeI64Extend16S: OpcodeI64Extend16SName,
+	OpcodeI64Extend32S: OpcodeI64Extend32SName,
+
+	OpcodeMiscPrefix: OpcodeMiscPrefixName,
+	OpcodeVecPrefix:  OpcodeVecPrefixName,
+}
+
+// InstructionName returns the instruction corresponding to this binary Opcode.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#a7-index-of-instructions
+func InstructionName(oc Opcode) string {
+	return instructionNames[oc]
+}
+
+const (
+	OpcodeI32TruncSatF32SName = "i32.trunc_sat_f32_s"
+	OpcodeI32TruncSatF32UName = "i32.trunc_sat_f32_u"
+	OpcodeI32TruncSatF64SName = "i32.trunc_sat_f64_s"
+	OpcodeI32TruncSatF64UName = "i32.trunc_sat_f64_u"
+	OpcodeI64TruncSatF32SName = "i64.trunc_sat_f32_s"
+	OpcodeI64TruncSatF32UName = "i64.trunc_sat_f32_u"
+	OpcodeI64TruncSatF64SName = "i64.trunc_sat_f64_s"
+	OpcodeI64TruncSatF64UName = "i64.trunc_sat_f64_u"
+
+	OpcodeMemoryInitName = "memory.init"
+	OpcodeDataDropName   = "data.drop"
+	OpcodeMemoryCopyName = "memory.copy"
+	OpcodeMemoryFillName = "memory.fill"
+	OpcodeTableInitName  = "table.init"
+	OpcodeElemDropName   = "elem.drop"
+	OpcodeTableCopyName  = "table.copy"
+	OpcodeTableGrowName  = "table.grow"
+	OpcodeTableSizeName  = "table.size"
+	OpcodeTableFillName  = "table.fill"
+)
+
+var miscInstructionNames = [256]string{
+	OpcodeMiscI32TruncSatF32S: OpcodeI32TruncSatF32SName,
+	OpcodeMiscI32TruncSatF32U: OpcodeI32TruncSatF32UName,
+	OpcodeMiscI32TruncSatF64S: OpcodeI32TruncSatF64SName,
+	OpcodeMiscI32TruncSatF64U: OpcodeI32TruncSatF64UName,
+	OpcodeMiscI64TruncSatF32S: OpcodeI64TruncSatF32SName,
+	OpcodeMiscI64TruncSatF32U: OpcodeI64TruncSatF32UName,
+	OpcodeMiscI64TruncSatF64S: OpcodeI64TruncSatF64SName,
+	OpcodeMiscI64TruncSatF64U: OpcodeI64TruncSatF64UName,
+
+	OpcodeMiscMemoryInit: OpcodeMemoryInitName,
+	OpcodeMiscDataDrop:   OpcodeDataDropName,
+	OpcodeMiscMemoryCopy: OpcodeMemoryCopyName,
+	OpcodeMiscMemoryFill: OpcodeMemoryFillName,
+	OpcodeMiscTableInit:  OpcodeTableInitName,
+	OpcodeMiscElemDrop:   OpcodeElemDropName,
+	OpcodeMiscTableCopy:  OpcodeTableCopyName,
+	OpcodeMiscTableGrow:  OpcodeTableGrowName,
+	OpcodeMiscTableSize:  OpcodeTableSizeName,
+	OpcodeMiscTableFill:  OpcodeTableFillName,
+}
+
+// MiscInstructionName returns the instruction corresponding to this miscellaneous Opcode.
+func MiscInstructionName(oc OpcodeMisc) string {
+	return miscInstructionNames[oc]
+}
+
+const (
+	OpcodeVecV128LoadName                  = "v128.load"
+	OpcodeVecV128Load8x8SName              = "v128.load8x8_s"
+	OpcodeVecV128Load8x8UName              = "v128.load8x8_u"
+	OpcodeVecV128Load16x4SName             = "v128.load16x4_s"
+	OpcodeVecV128Load16x4UName             = "v128.load16x4_u"
+	OpcodeVecV128Load32x2SName             = "v128.load32x2_s"
+	OpcodeVecV128Load32x2UName             = "v128.load32x2_u"
+	OpcodeVecV128Load8SplatName            = "v128.load8_splat"
+	OpcodeVecV128Load16SplatName           = "v128.load16_splat"
+	OpcodeVecV128Load32SplatName           = "v128.load32_splat"
+	OpcodeVecV128Load64SplatName           = "v128.load64_splat"
+	OpcodeVecV128Load32zeroName            = "v128.load32_zero"
+	OpcodeVecV128Load64zeroName            = "v128.load64_zero"
+	OpcodeVecV128StoreName                 = "v128.store"
+	OpcodeVecV128Load8LaneName             = "v128.load8_lane"
+	OpcodeVecV128Load16LaneName            = "v128.load16_lane"
+	OpcodeVecV128Load32LaneName            = "v128.load32_lane"
+	OpcodeVecV128Load64LaneName            = "v128.load64_lane"
+	OpcodeVecV128Store8LaneName            = "v128.store8_lane"
+	OpcodeVecV128Store16LaneName           = "v128.store16_lane"
+	OpcodeVecV128Store32LaneName           = "v128.store32_lane"
+	OpcodeVecV128Store64LaneName           = "v128.store64_lane"
+	OpcodeVecV128ConstName                 = "v128.const"
+	OpcodeVecV128i8x16ShuffleName          = "v128.shuffle"
+	OpcodeVecI8x16ExtractLaneSName         = "i8x16.extract_lane_s"
+	OpcodeVecI8x16ExtractLaneUName         = "i8x16.extract_lane_u"
+	OpcodeVecI8x16ReplaceLaneName          = "i8x16.replace_lane"
+	OpcodeVecI16x8ExtractLaneSName         = "i16x8.extract_lane_s"
+	OpcodeVecI16x8ExtractLaneUName         = "i16x8.extract_lane_u"
+	OpcodeVecI16x8ReplaceLaneName          = "i16x8.replace_lane"
+	OpcodeVecI32x4ExtractLaneName          = "i32x4.extract_lane"
+	OpcodeVecI32x4ReplaceLaneName          = "i32x4.replace_lane"
+	OpcodeVecI64x2ExtractLaneName          = "i64x2.extract_lane"
+	OpcodeVecI64x2ReplaceLaneName          = "i64x2.replace_lane"
+	OpcodeVecF32x4ExtractLaneName          = "f32x4.extract_lane"
+	OpcodeVecF32x4ReplaceLaneName          = "f32x4.replace_lane"
+	OpcodeVecF64x2ExtractLaneName          = "f64x2.extract_lane"
+	OpcodeVecF64x2ReplaceLaneName          = "f64x2.replace_lane"
+	OpcodeVecI8x16SwizzleName              = "i8x16.swizzle"
+	OpcodeVecI8x16SplatName                = "i8x16.splat"
+	OpcodeVecI16x8SplatName                = "i16x8.splat"
+	OpcodeVecI32x4SplatName                = "i32x4.splat"
+	OpcodeVecI64x2SplatName                = "i64x2.splat"
+	OpcodeVecF32x4SplatName                = "f32x4.splat"
+	OpcodeVecF64x2SplatName                = "f64x2.splat"
+	OpcodeVecI8x16EqName                   = "i8x16.eq"
+	OpcodeVecI8x16NeName                   = "i8x16.ne"
+	OpcodeVecI8x16LtSName                  = "i8x16.lt_s"
+	OpcodeVecI8x16LtUName                  = "i8x16.lt_u"
+	OpcodeVecI8x16GtSName                  = "i8x16.gt_s"
+	OpcodeVecI8x16GtUName                  = "i8x16.gt_u"
+	OpcodeVecI8x16LeSName                  = "i8x16.le_s"
+	OpcodeVecI8x16LeUName                  = "i8x16.le_u"
+	OpcodeVecI8x16GeSName                  = "i8x16.ge_s"
+	OpcodeVecI8x16GeUName                  = "i8x16.ge_u"
+	OpcodeVecI16x8EqName                   = "i16x8.eq"
+	OpcodeVecI16x8NeName                   = "i16x8.ne"
+	OpcodeVecI16x8LtSName                  = "i16x8.lt_s"
+	OpcodeVecI16x8LtUName                  = "i16x8.lt_u"
+	OpcodeVecI16x8GtSName                  = "i16x8.gt_s"
+	OpcodeVecI16x8GtUName                  = "i16x8.gt_u"
+	OpcodeVecI16x8LeSName                  = "i16x8.le_s"
+	OpcodeVecI16x8LeUName                  = "i16x8.le_u"
+	OpcodeVecI16x8GeSName                  = "i16x8.ge_s"
+	OpcodeVecI16x8GeUName                  = "i16x8.ge_u"
+	OpcodeVecI32x4EqName                   = "i32x4.eq"
+	OpcodeVecI32x4NeName                   = "i32x4.ne"
+	OpcodeVecI32x4LtSName                  = "i32x4.lt_s"
+	OpcodeVecI32x4LtUName                  = "i32x4.lt_u"
+	OpcodeVecI32x4GtSName                  = "i32x4.gt_s"
+	OpcodeVecI32x4GtUName                  = "i32x4.gt_u"
+	OpcodeVecI32x4LeSName                  = "i32x4.le_s"
+	OpcodeVecI32x4LeUName                  = "i32x4.le_u"
+	OpcodeVecI32x4GeSName                  = "i32x4.ge_s"
+	OpcodeVecI32x4GeUName                  = "i32x4.ge_u"
+	OpcodeVecI64x2EqName                   = "i64x2.eq"
+	OpcodeVecI64x2NeName                   = "i64x2.ne"
+	OpcodeVecI64x2LtSName                  = "i64x2.lt"
+	OpcodeVecI64x2GtSName                  = "i64x2.gt"
+	OpcodeVecI64x2LeSName                  = "i64x2.le"
+	OpcodeVecI64x2GeSName                  = "i64x2.ge"
+	OpcodeVecF32x4EqName                   = "f32x4.eq"
+	OpcodeVecF32x4NeName                   = "f32x4.ne"
+	OpcodeVecF32x4LtName                   = "f32x4.lt"
+	OpcodeVecF32x4GtName                   = "f32x4.gt"
+	OpcodeVecF32x4LeName                   = "f32x4.le"
+	OpcodeVecF32x4GeName                   = "f32x4.ge"
+	OpcodeVecF64x2EqName                   = "f64x2.eq"
+	OpcodeVecF64x2NeName                   = "f64x2.ne"
+	OpcodeVecF64x2LtName                   = "f64x2.lt"
+	OpcodeVecF64x2GtName                   = "f64x2.gt"
+	OpcodeVecF64x2LeName                   = "f64x2.le"
+	OpcodeVecF64x2GeName                   = "f64x2.ge"
+	OpcodeVecV128NotName                   = "v128.not"
+	OpcodeVecV128AndName                   = "v128.and"
+	OpcodeVecV128AndNotName                = "v128.andnot"
+	OpcodeVecV128OrName                    = "v128.or"
+	OpcodeVecV128XorName                   = "v128.xor"
+	OpcodeVecV128BitselectName             = "v128.bitselect"
+	OpcodeVecV128AnyTrueName               = "v128.any_true"
+	OpcodeVecI8x16AbsName                  = "i8x16.abs"
+	OpcodeVecI8x16NegName                  = "i8x16.neg"
+	OpcodeVecI8x16PopcntName               = "i8x16.popcnt"
+	OpcodeVecI8x16AllTrueName              = "i8x16.all_true"
+	OpcodeVecI8x16BitMaskName              = "i8x16.bitmask"
+	OpcodeVecI8x16NarrowI16x8SName         = "i8x16.narrow_i16x8_s"
+	OpcodeVecI8x16NarrowI16x8UName         = "i8x16.narrow_i16x8_u"
+	OpcodeVecI8x16ShlName                  = "i8x16.shl"
+	OpcodeVecI8x16ShrSName                 = "i8x16.shr_s"
+	OpcodeVecI8x16ShrUName                 = "i8x16.shr_u"
+	OpcodeVecI8x16AddName                  = "i8x16.add"
+	OpcodeVecI8x16AddSatSName              = "i8x16.add_sat_s"
+	OpcodeVecI8x16AddSatUName              = "i8x16.add_sat_u"
+	OpcodeVecI8x16SubName                  = "i8x16.sub"
+	OpcodeVecI8x16SubSatSName              = "i8x16.sub_s"
+	OpcodeVecI8x16SubSatUName              = "i8x16.sub_u"
+	OpcodeVecI8x16MinSName                 = "i8x16.min_s"
+	OpcodeVecI8x16MinUName                 = "i8x16.min_u"
+	OpcodeVecI8x16MaxSName                 = "i8x16.max_s"
+	OpcodeVecI8x16MaxUName                 = "i8x16.max_u"
+	OpcodeVecI8x16AvgrUName                = "i8x16.avgr_u"
+	OpcodeVecI16x8ExtaddPairwiseI8x16SName = "i16x8.extadd_pairwise_i8x16_s"
+	OpcodeVecI16x8ExtaddPairwiseI8x16UName = "i16x8.extadd_pairwise_i8x16_u"
+	OpcodeVecI16x8AbsName                  = "i16x8.abs"
+	OpcodeVecI16x8NegName                  = "i16x8.neg"
+	OpcodeVecI16x8Q15mulrSatSName          = "i16x8.q15mulr_sat_s"
+	OpcodeVecI16x8AllTrueName              = "i16x8.all_true"
+	OpcodeVecI16x8BitMaskName              = "i16x8.bitmask"
+	OpcodeVecI16x8NarrowI32x4SName         = "i16x8.narrow_i32x4_s"
+	OpcodeVecI16x8NarrowI32x4UName         = "i16x8.narrow_i32x4_u"
+	OpcodeVecI16x8ExtendLowI8x16SName      = "i16x8.extend_low_i8x16_s"
+	OpcodeVecI16x8ExtendHighI8x16SName     = "i16x8.extend_high_i8x16_s"
+	OpcodeVecI16x8ExtendLowI8x16UName      = "i16x8.extend_low_i8x16_u"
+	OpcodeVecI16x8ExtendHighI8x16UName     = "i16x8.extend_high_i8x16_u"
+	OpcodeVecI16x8ShlName                  = "i16x8.shl"
+	OpcodeVecI16x8ShrSName                 = "i16x8.shr_s"
+	OpcodeVecI16x8ShrUName                 = "i16x8.shr_u"
+	OpcodeVecI16x8AddName                  = "i16x8.add"
+	OpcodeVecI16x8AddSatSName              = "i16x8.add_sat_s"
+	OpcodeVecI16x8AddSatUName              = "i16x8.add_sat_u"
+	OpcodeVecI16x8SubName                  = "i16x8.sub"
+	OpcodeVecI16x8SubSatSName              = "i16x8.sub_sat_s"
+	OpcodeVecI16x8SubSatUName              = "i16x8.sub_sat_u"
+	OpcodeVecI16x8MulName                  = "i16x8.mul"
+	OpcodeVecI16x8MinSName                 = "i16x8.min_s"
+	OpcodeVecI16x8MinUName                 = "i16x8.min_u"
+	OpcodeVecI16x8MaxSName                 = "i16x8.max_s"
+	OpcodeVecI16x8MaxUName                 = "i16x8.max_u"
+	OpcodeVecI16x8AvgrUName                = "i16x8.avgr_u"
+	OpcodeVecI16x8ExtMulLowI8x16SName      = "i16x8.extmul_low_i8x16_s"
+	OpcodeVecI16x8ExtMulHighI8x16SName     = "i16x8.extmul_high_i8x16_s"
+	OpcodeVecI16x8ExtMulLowI8x16UName      = "i16x8.extmul_low_i8x16_u"
+	OpcodeVecI16x8ExtMulHighI8x16UName     = "i16x8.extmul_high_i8x16_u"
+	OpcodeVecI32x4ExtaddPairwiseI16x8SName = "i32x4.extadd_pairwise_i16x8_s"
+	OpcodeVecI32x4ExtaddPairwiseI16x8UName = "i32x4.extadd_pairwise_i16x8_u"
+	OpcodeVecI32x4AbsName                  = "i32x4.abs"
+	OpcodeVecI32x4NegName                  = "i32x4.neg"
+	OpcodeVecI32x4AllTrueName              = "i32x4.all_true"
+	OpcodeVecI32x4BitMaskName              = "i32x4.bitmask"
+	OpcodeVecI32x4ExtendLowI16x8SName      = "i32x4.extend_low_i16x8_s"
+	OpcodeVecI32x4ExtendHighI16x8SName     = "i32x4.extend_high_i16x8_s"
+	OpcodeVecI32x4ExtendLowI16x8UName      = "i32x4.extend_low_i16x8_u"
+	OpcodeVecI32x4ExtendHighI16x8UName     = "i32x4.extend_high_i16x8_u"
+	OpcodeVecI32x4ShlName                  = "i32x4.shl"
+	OpcodeVecI32x4ShrSName                 = "i32x4.shr_s"
+	OpcodeVecI32x4ShrUName                 = "i32x4.shr_u"
+	OpcodeVecI32x4AddName                  = "i32x4.add"
+	OpcodeVecI32x4SubName                  = "i32x4.sub"
+	OpcodeVecI32x4MulName                  = "i32x4.mul"
+	OpcodeVecI32x4MinSName                 = "i32x4.min_s"
+	OpcodeVecI32x4MinUName                 = "i32x4.min_u"
+	OpcodeVecI32x4MaxSName                 = "i32x4.max_s"
+	OpcodeVecI32x4MaxUName                 = "i32x4.max_u"
+	OpcodeVecI32x4DotI16x8SName            = "i32x4.dot_i16x8_s"
+	OpcodeVecI32x4ExtMulLowI16x8SName      = "i32x4.extmul_low_i16x8_s"
+	OpcodeVecI32x4ExtMulHighI16x8SName     = "i32x4.extmul_high_i16x8_s"
+	OpcodeVecI32x4ExtMulLowI16x8UName      = "i32x4.extmul_low_i16x8_u"
+	OpcodeVecI32x4ExtMulHighI16x8UName     = "i32x4.extmul_high_i16x8_u"
+	OpcodeVecI64x2AbsName                  = "i64x2.abs"
+	OpcodeVecI64x2NegName                  = "i64x2.neg"
+	OpcodeVecI64x2AllTrueName              = "i64x2.all_true"
+	OpcodeVecI64x2BitMaskName              = "i64x2.bitmask"
+	OpcodeVecI64x2ExtendLowI32x4SName      = "i64x2.extend_low_i32x4_s"
+	OpcodeVecI64x2ExtendHighI32x4SName     = "i64x2.extend_high_i32x4_s"
+	OpcodeVecI64x2ExtendLowI32x4UName      = "i64x2.extend_low_i32x4_u"
+	OpcodeVecI64x2ExtendHighI32x4UName     = "i64x2.extend_high_i32x4_u"
+	OpcodeVecI64x2ShlName                  = "i64x2.shl"
+	OpcodeVecI64x2ShrSName                 = "i64x2.shr_s"
+	OpcodeVecI64x2ShrUName                 = "i64x2.shr_u"
+	OpcodeVecI64x2AddName                  = "i64x2.add"
+	OpcodeVecI64x2SubName                  = "i64x2.sub"
+	OpcodeVecI64x2MulName                  = "i64x2.mul"
+	OpcodeVecI64x2ExtMulLowI32x4SName      = "i64x2.extmul_low_i32x4_s"
+	OpcodeVecI64x2ExtMulHighI32x4SName     = "i64x2.extmul_high_i32x4_s"
+	OpcodeVecI64x2ExtMulLowI32x4UName      = "i64x2.extmul_low_i32x4_u"
+	OpcodeVecI64x2ExtMulHighI32x4UName     = "i64x2.extmul_high_i32x4_u"
+	OpcodeVecF32x4CeilName                 = "f32x4.ceil"
+	OpcodeVecF32x4FloorName                = "f32x4.floor"
+	OpcodeVecF32x4TruncName                = "f32x4.trunc"
+	OpcodeVecF32x4NearestName              = "f32x4.nearest"
+	OpcodeVecF32x4AbsName                  = "f32x4.abs"
+	OpcodeVecF32x4NegName                  = "f32x4.neg"
+	OpcodeVecF32x4SqrtName                 = "f32x4.sqrt"
+	OpcodeVecF32x4AddName                  = "f32x4.add"
+	OpcodeVecF32x4SubName                  = "f32x4.sub"
+	OpcodeVecF32x4MulName                  = "f32x4.mul"
+	OpcodeVecF32x4DivName                  = "f32x4.div"
+	OpcodeVecF32x4MinName                  = "f32x4.min"
+	OpcodeVecF32x4MaxName                  = "f32x4.max"
+	OpcodeVecF32x4PminName                 = "f32x4.pmin"
+	OpcodeVecF32x4PmaxName                 = "f32x4.pmax"
+	OpcodeVecF64x2CeilName                 = "f64x2.ceil"
+	OpcodeVecF64x2FloorName                = "f64x2.floor"
+	OpcodeVecF64x2TruncName                = "f64x2.trunc"
+	OpcodeVecF64x2NearestName              = "f64x2.nearest"
+	OpcodeVecF64x2AbsName                  = "f64x2.abs"
+	OpcodeVecF64x2NegName                  = "f64x2.neg"
+	OpcodeVecF64x2SqrtName                 = "f64x2.sqrt"
+	OpcodeVecF64x2AddName                  = "f64x2.add"
+	OpcodeVecF64x2SubName                  = "f64x2.sub"
+	OpcodeVecF64x2MulName                  = "f64x2.mul"
+	OpcodeVecF64x2DivName                  = "f64x2.div"
+	OpcodeVecF64x2MinName                  = "f64x2.min"
+	OpcodeVecF64x2MaxName                  = "f64x2.max"
+	OpcodeVecF64x2PminName                 = "f64x2.pmin"
+	OpcodeVecF64x2PmaxName                 = "f64x2.pmax"
+	OpcodeVecI32x4TruncSatF32x4SName       = "i32x4.trunc_sat_f32x4_s"
+	OpcodeVecI32x4TruncSatF32x4UName       = "i32x4.trunc_sat_f32x4_u"
+	OpcodeVecF32x4ConvertI32x4SName        = "f32x4.convert_i32x4_s"
+	OpcodeVecF32x4ConvertI32x4UName        = "f32x4.convert_i32x4_u"
+	OpcodeVecI32x4TruncSatF64x2SZeroName   = "i32x4.trunc_sat_f64x2_s_zero"
+	OpcodeVecI32x4TruncSatF64x2UZeroName   = "i32x4.trunc_sat_f64x2_u_zero"
+	OpcodeVecF64x2ConvertLowI32x4SName     = "f64x2.convert_low_i32x4_s"
+	OpcodeVecF64x2ConvertLowI32x4UName     = "f64x2.convert_low_i32x4_u"
+	OpcodeVecF32x4DemoteF64x2ZeroName      = "f32x4.demote_f64x2_zero"
+	OpcodeVecF64x2PromoteLowF32x4ZeroName  = "f64x2.promote_low_f32x4"
+)
+
+var vectorInstructionName = map[OpcodeVec]string{
+	OpcodeVecV128Load:                  OpcodeVecV128LoadName,
+	OpcodeVecV128Load8x8s:              OpcodeVecV128Load8x8SName,
+	OpcodeVecV128Load8x8u:              OpcodeVecV128Load8x8UName,
+	OpcodeVecV128Load16x4s:             OpcodeVecV128Load16x4SName,
+	OpcodeVecV128Load16x4u:             OpcodeVecV128Load16x4UName,
+	OpcodeVecV128Load32x2s:             OpcodeVecV128Load32x2SName,
+	OpcodeVecV128Load32x2u:             OpcodeVecV128Load32x2UName,
+	OpcodeVecV128Load8Splat:            OpcodeVecV128Load8SplatName,
+	OpcodeVecV128Load16Splat:           OpcodeVecV128Load16SplatName,
+	OpcodeVecV128Load32Splat:           OpcodeVecV128Load32SplatName,
+	OpcodeVecV128Load64Splat:           OpcodeVecV128Load64SplatName,
+	OpcodeVecV128Load32zero:            OpcodeVecV128Load32zeroName,
+	OpcodeVecV128Load64zero:            OpcodeVecV128Load64zeroName,
+	OpcodeVecV128Store:                 OpcodeVecV128StoreName,
+	OpcodeVecV128Load8Lane:             OpcodeVecV128Load8LaneName,
+	OpcodeVecV128Load16Lane:            OpcodeVecV128Load16LaneName,
+	OpcodeVecV128Load32Lane:            OpcodeVecV128Load32LaneName,
+	OpcodeVecV128Load64Lane:            OpcodeVecV128Load64LaneName,
+	OpcodeVecV128Store8Lane:            OpcodeVecV128Store8LaneName,
+	OpcodeVecV128Store16Lane:           OpcodeVecV128Store16LaneName,
+	OpcodeVecV128Store32Lane:           OpcodeVecV128Store32LaneName,
+	OpcodeVecV128Store64Lane:           OpcodeVecV128Store64LaneName,
+	OpcodeVecV128Const:                 OpcodeVecV128ConstName,
+	OpcodeVecV128i8x16Shuffle:          OpcodeVecV128i8x16ShuffleName,
+	OpcodeVecI8x16ExtractLaneS:         OpcodeVecI8x16ExtractLaneSName,
+	OpcodeVecI8x16ExtractLaneU:         OpcodeVecI8x16ExtractLaneUName,
+	OpcodeVecI8x16ReplaceLane:          OpcodeVecI8x16ReplaceLaneName,
+	OpcodeVecI16x8ExtractLaneS:         OpcodeVecI16x8ExtractLaneSName,
+	OpcodeVecI16x8ExtractLaneU:         OpcodeVecI16x8ExtractLaneUName,
+	OpcodeVecI16x8ReplaceLane:          OpcodeVecI16x8ReplaceLaneName,
+	OpcodeVecI32x4ExtractLane:          OpcodeVecI32x4ExtractLaneName,
+	OpcodeVecI32x4ReplaceLane:          OpcodeVecI32x4ReplaceLaneName,
+	OpcodeVecI64x2ExtractLane:          OpcodeVecI64x2ExtractLaneName,
+	OpcodeVecI64x2ReplaceLane:          OpcodeVecI64x2ReplaceLaneName,
+	OpcodeVecF32x4ExtractLane:          OpcodeVecF32x4ExtractLaneName,
+	OpcodeVecF32x4ReplaceLane:          OpcodeVecF32x4ReplaceLaneName,
+	OpcodeVecF64x2ExtractLane:          OpcodeVecF64x2ExtractLaneName,
+	OpcodeVecF64x2ReplaceLane:          OpcodeVecF64x2ReplaceLaneName,
+	OpcodeVecI8x16Swizzle:              OpcodeVecI8x16SwizzleName,
+	OpcodeVecI8x16Splat:                OpcodeVecI8x16SplatName,
+	OpcodeVecI16x8Splat:                OpcodeVecI16x8SplatName,
+	OpcodeVecI32x4Splat:                OpcodeVecI32x4SplatName,
+	OpcodeVecI64x2Splat:                OpcodeVecI64x2SplatName,
+	OpcodeVecF32x4Splat:                OpcodeVecF32x4SplatName,
+	OpcodeVecF64x2Splat:                OpcodeVecF64x2SplatName,
+	OpcodeVecI8x16Eq:                   OpcodeVecI8x16EqName,
+	OpcodeVecI8x16Ne:                   OpcodeVecI8x16NeName,
+	OpcodeVecI8x16LtS:                  OpcodeVecI8x16LtSName,
+	OpcodeVecI8x16LtU:                  OpcodeVecI8x16LtUName,
+	OpcodeVecI8x16GtS:                  OpcodeVecI8x16GtSName,
+	OpcodeVecI8x16GtU:                  OpcodeVecI8x16GtUName,
+	OpcodeVecI8x16LeS:                  OpcodeVecI8x16LeSName,
+	OpcodeVecI8x16LeU:                  OpcodeVecI8x16LeUName,
+	OpcodeVecI8x16GeS:                  OpcodeVecI8x16GeSName,
+	OpcodeVecI8x16GeU:                  OpcodeVecI8x16GeUName,
+	OpcodeVecI16x8Eq:                   OpcodeVecI16x8EqName,
+	OpcodeVecI16x8Ne:                   OpcodeVecI16x8NeName,
+	OpcodeVecI16x8LtS:                  OpcodeVecI16x8LtSName,
+	OpcodeVecI16x8LtU:                  OpcodeVecI16x8LtUName,
+	OpcodeVecI16x8GtS:                  OpcodeVecI16x8GtSName,
+	OpcodeVecI16x8GtU:                  OpcodeVecI16x8GtUName,
+	OpcodeVecI16x8LeS:                  OpcodeVecI16x8LeSName,
+	OpcodeVecI16x8LeU:                  OpcodeVecI16x8LeUName,
+	OpcodeVecI16x8GeS:                  OpcodeVecI16x8GeSName,
+	OpcodeVecI16x8GeU:                  OpcodeVecI16x8GeUName,
+	OpcodeVecI32x4Eq:                   OpcodeVecI32x4EqName,
+	OpcodeVecI32x4Ne:                   OpcodeVecI32x4NeName,
+	OpcodeVecI32x4LtS:                  OpcodeVecI32x4LtSName,
+	OpcodeVecI32x4LtU:                  OpcodeVecI32x4LtUName,
+	OpcodeVecI32x4GtS:                  OpcodeVecI32x4GtSName,
+	OpcodeVecI32x4GtU:                  OpcodeVecI32x4GtUName,
+	OpcodeVecI32x4LeS:                  OpcodeVecI32x4LeSName,
+	OpcodeVecI32x4LeU:                  OpcodeVecI32x4LeUName,
+	OpcodeVecI32x4GeS:                  OpcodeVecI32x4GeSName,
+	OpcodeVecI32x4GeU:                  OpcodeVecI32x4GeUName,
+	OpcodeVecI64x2Eq:                   OpcodeVecI64x2EqName,
+	OpcodeVecI64x2Ne:                   OpcodeVecI64x2NeName,
+	OpcodeVecI64x2LtS:                  OpcodeVecI64x2LtSName,
+	OpcodeVecI64x2GtS:                  OpcodeVecI64x2GtSName,
+	OpcodeVecI64x2LeS:                  OpcodeVecI64x2LeSName,
+	OpcodeVecI64x2GeS:                  OpcodeVecI64x2GeSName,
+	OpcodeVecF32x4Eq:                   OpcodeVecF32x4EqName,
+	OpcodeVecF32x4Ne:                   OpcodeVecF32x4NeName,
+	OpcodeVecF32x4Lt:                   OpcodeVecF32x4LtName,
+	OpcodeVecF32x4Gt:                   OpcodeVecF32x4GtName,
+	OpcodeVecF32x4Le:                   OpcodeVecF32x4LeName,
+	OpcodeVecF32x4Ge:                   OpcodeVecF32x4GeName,
+	OpcodeVecF64x2Eq:                   OpcodeVecF64x2EqName,
+	OpcodeVecF64x2Ne:                   OpcodeVecF64x2NeName,
+	OpcodeVecF64x2Lt:                   OpcodeVecF64x2LtName,
+	OpcodeVecF64x2Gt:                   OpcodeVecF64x2GtName,
+	OpcodeVecF64x2Le:                   OpcodeVecF64x2LeName,
+	OpcodeVecF64x2Ge:                   OpcodeVecF64x2GeName,
+	OpcodeVecV128Not:                   OpcodeVecV128NotName,
+	OpcodeVecV128And:                   OpcodeVecV128AndName,
+	OpcodeVecV128AndNot:                OpcodeVecV128AndNotName,
+	OpcodeVecV128Or:                    OpcodeVecV128OrName,
+	OpcodeVecV128Xor:                   OpcodeVecV128XorName,
+	OpcodeVecV128Bitselect:             OpcodeVecV128BitselectName,
+	OpcodeVecV128AnyTrue:               OpcodeVecV128AnyTrueName,
+	OpcodeVecI8x16Abs:                  OpcodeVecI8x16AbsName,
+	OpcodeVecI8x16Neg:                  OpcodeVecI8x16NegName,
+	OpcodeVecI8x16Popcnt:               OpcodeVecI8x16PopcntName,
+	OpcodeVecI8x16AllTrue:              OpcodeVecI8x16AllTrueName,
+	OpcodeVecI8x16BitMask:              OpcodeVecI8x16BitMaskName,
+	OpcodeVecI8x16NarrowI16x8S:         OpcodeVecI8x16NarrowI16x8SName,
+	OpcodeVecI8x16NarrowI16x8U:         OpcodeVecI8x16NarrowI16x8UName,
+	OpcodeVecI8x16Shl:                  OpcodeVecI8x16ShlName,
+	OpcodeVecI8x16ShrS:                 OpcodeVecI8x16ShrSName,
+	OpcodeVecI8x16ShrU:                 OpcodeVecI8x16ShrUName,
+	OpcodeVecI8x16Add:                  OpcodeVecI8x16AddName,
+	OpcodeVecI8x16AddSatS:              OpcodeVecI8x16AddSatSName,
+	OpcodeVecI8x16AddSatU:              OpcodeVecI8x16AddSatUName,
+	OpcodeVecI8x16Sub:                  OpcodeVecI8x16SubName,
+	OpcodeVecI8x16SubSatS:              OpcodeVecI8x16SubSatSName,
+	OpcodeVecI8x16SubSatU:              OpcodeVecI8x16SubSatUName,
+	OpcodeVecI8x16MinS:                 OpcodeVecI8x16MinSName,
+	OpcodeVecI8x16MinU:                 OpcodeVecI8x16MinUName,
+	OpcodeVecI8x16MaxS:                 OpcodeVecI8x16MaxSName,
+	OpcodeVecI8x16MaxU:                 OpcodeVecI8x16MaxUName,
+	OpcodeVecI8x16AvgrU:                OpcodeVecI8x16AvgrUName,
+	OpcodeVecI16x8ExtaddPairwiseI8x16S: OpcodeVecI16x8ExtaddPairwiseI8x16SName,
+	OpcodeVecI16x8ExtaddPairwiseI8x16U: OpcodeVecI16x8ExtaddPairwiseI8x16UName,
+	OpcodeVecI16x8Abs:                  OpcodeVecI16x8AbsName,
+	OpcodeVecI16x8Neg:                  OpcodeVecI16x8NegName,
+	OpcodeVecI16x8Q15mulrSatS:          OpcodeVecI16x8Q15mulrSatSName,
+	OpcodeVecI16x8AllTrue:              OpcodeVecI16x8AllTrueName,
+	OpcodeVecI16x8BitMask:              OpcodeVecI16x8BitMaskName,
+	OpcodeVecI16x8NarrowI32x4S:         OpcodeVecI16x8NarrowI32x4SName,
+	OpcodeVecI16x8NarrowI32x4U:         OpcodeVecI16x8NarrowI32x4UName,
+	OpcodeVecI16x8ExtendLowI8x16S:      OpcodeVecI16x8ExtendLowI8x16SName,
+	OpcodeVecI16x8ExtendHighI8x16S:     OpcodeVecI16x8ExtendHighI8x16SName,
+	OpcodeVecI16x8ExtendLowI8x16U:      OpcodeVecI16x8ExtendLowI8x16UName,
+	OpcodeVecI16x8ExtendHighI8x16U:     OpcodeVecI16x8ExtendHighI8x16UName,
+	OpcodeVecI16x8Shl:                  OpcodeVecI16x8ShlName,
+	OpcodeVecI16x8ShrS:                 OpcodeVecI16x8ShrSName,
+	OpcodeVecI16x8ShrU:                 OpcodeVecI16x8ShrUName,
+	OpcodeVecI16x8Add:                  OpcodeVecI16x8AddName,
+	OpcodeVecI16x8AddSatS:              OpcodeVecI16x8AddSatSName,
+	OpcodeVecI16x8AddSatU:              OpcodeVecI16x8AddSatUName,
+	OpcodeVecI16x8Sub:                  OpcodeVecI16x8SubName,
+	OpcodeVecI16x8SubSatS:              OpcodeVecI16x8SubSatSName,
+	OpcodeVecI16x8SubSatU:              OpcodeVecI16x8SubSatUName,
+	OpcodeVecI16x8Mul:                  OpcodeVecI16x8MulName,
+	OpcodeVecI16x8MinS:                 OpcodeVecI16x8MinSName,
+	OpcodeVecI16x8MinU:                 OpcodeVecI16x8MinUName,
+	OpcodeVecI16x8MaxS:                 OpcodeVecI16x8MaxSName,
+	OpcodeVecI16x8MaxU:                 OpcodeVecI16x8MaxUName,
+	OpcodeVecI16x8AvgrU:                OpcodeVecI16x8AvgrUName,
+	OpcodeVecI16x8ExtMulLowI8x16S:      OpcodeVecI16x8ExtMulLowI8x16SName,
+	OpcodeVecI16x8ExtMulHighI8x16S:     OpcodeVecI16x8ExtMulHighI8x16SName,
+	OpcodeVecI16x8ExtMulLowI8x16U:      OpcodeVecI16x8ExtMulLowI8x16UName,
+	OpcodeVecI16x8ExtMulHighI8x16U:     OpcodeVecI16x8ExtMulHighI8x16UName,
+	OpcodeVecI32x4ExtaddPairwiseI16x8S: OpcodeVecI32x4ExtaddPairwiseI16x8SName,
+	OpcodeVecI32x4ExtaddPairwiseI16x8U: OpcodeVecI32x4ExtaddPairwiseI16x8UName,
+	OpcodeVecI32x4Abs:                  OpcodeVecI32x4AbsName,
+	OpcodeVecI32x4Neg:                  OpcodeVecI32x4NegName,
+	OpcodeVecI32x4AllTrue:              OpcodeVecI32x4AllTrueName,
+	OpcodeVecI32x4BitMask:              OpcodeVecI32x4BitMaskName,
+	OpcodeVecI32x4ExtendLowI16x8S:      OpcodeVecI32x4ExtendLowI16x8SName,
+	OpcodeVecI32x4ExtendHighI16x8S:     OpcodeVecI32x4ExtendHighI16x8SName,
+	OpcodeVecI32x4ExtendLowI16x8U:      OpcodeVecI32x4ExtendLowI16x8UName,
+	OpcodeVecI32x4ExtendHighI16x8U:     OpcodeVecI32x4ExtendHighI16x8UName,
+	OpcodeVecI32x4Shl:                  OpcodeVecI32x4ShlName,
+	OpcodeVecI32x4ShrS:                 OpcodeVecI32x4ShrSName,
+	OpcodeVecI32x4ShrU:                 OpcodeVecI32x4ShrUName,
+	OpcodeVecI32x4Add:                  OpcodeVecI32x4AddName,
+	OpcodeVecI32x4Sub:                  OpcodeVecI32x4SubName,
+	OpcodeVecI32x4Mul:                  OpcodeVecI32x4MulName,
+	OpcodeVecI32x4MinS:                 OpcodeVecI32x4MinSName,
+	OpcodeVecI32x4MinU:                 OpcodeVecI32x4MinUName,
+	OpcodeVecI32x4MaxS:                 OpcodeVecI32x4MaxSName,
+	OpcodeVecI32x4MaxU:                 OpcodeVecI32x4MaxUName,
+	OpcodeVecI32x4DotI16x8S:            OpcodeVecI32x4DotI16x8SName,
+	OpcodeVecI32x4ExtMulLowI16x8S:      OpcodeVecI32x4ExtMulLowI16x8SName,
+	OpcodeVecI32x4ExtMulHighI16x8S:     OpcodeVecI32x4ExtMulHighI16x8SName,
+	OpcodeVecI32x4ExtMulLowI16x8U:      OpcodeVecI32x4ExtMulLowI16x8UName,
+	OpcodeVecI32x4ExtMulHighI16x8U:     OpcodeVecI32x4ExtMulHighI16x8UName,
+	OpcodeVecI64x2Abs:                  OpcodeVecI64x2AbsName,
+	OpcodeVecI64x2Neg:                  OpcodeVecI64x2NegName,
+	OpcodeVecI64x2AllTrue:              OpcodeVecI64x2AllTrueName,
+	OpcodeVecI64x2BitMask:              OpcodeVecI64x2BitMaskName,
+	OpcodeVecI64x2ExtendLowI32x4S:      OpcodeVecI64x2ExtendLowI32x4SName,
+	OpcodeVecI64x2ExtendHighI32x4S:     OpcodeVecI64x2ExtendHighI32x4SName,
+	OpcodeVecI64x2ExtendLowI32x4U:      OpcodeVecI64x2ExtendLowI32x4UName,
+	OpcodeVecI64x2ExtendHighI32x4U:     OpcodeVecI64x2ExtendHighI32x4UName,
+	OpcodeVecI64x2Shl:                  OpcodeVecI64x2ShlName,
+	OpcodeVecI64x2ShrS:                 OpcodeVecI64x2ShrSName,
+	OpcodeVecI64x2ShrU:                 OpcodeVecI64x2ShrUName,
+	OpcodeVecI64x2Add:                  OpcodeVecI64x2AddName,
+	OpcodeVecI64x2Sub:                  OpcodeVecI64x2SubName,
+	OpcodeVecI64x2Mul:                  OpcodeVecI64x2MulName,
+	OpcodeVecI64x2ExtMulLowI32x4S:      OpcodeVecI64x2ExtMulLowI32x4SName,
+	OpcodeVecI64x2ExtMulHighI32x4S:     OpcodeVecI64x2ExtMulHighI32x4SName,
+	OpcodeVecI64x2ExtMulLowI32x4U:      OpcodeVecI64x2ExtMulLowI32x4UName,
+	OpcodeVecI64x2ExtMulHighI32x4U:     OpcodeVecI64x2ExtMulHighI32x4UName,
+	OpcodeVecF32x4Ceil:                 OpcodeVecF32x4CeilName,
+	OpcodeVecF32x4Floor:                OpcodeVecF32x4FloorName,
+	OpcodeVecF32x4Trunc:                OpcodeVecF32x4TruncName,
+	OpcodeVecF32x4Nearest:              OpcodeVecF32x4NearestName,
+	OpcodeVecF32x4Abs:                  OpcodeVecF32x4AbsName,
+	OpcodeVecF32x4Neg:                  OpcodeVecF32x4NegName,
+	OpcodeVecF32x4Sqrt:                 OpcodeVecF32x4SqrtName,
+	OpcodeVecF32x4Add:                  OpcodeVecF32x4AddName,
+	OpcodeVecF32x4Sub:                  OpcodeVecF32x4SubName,
+	OpcodeVecF32x4Mul:                  OpcodeVecF32x4MulName,
+	OpcodeVecF32x4Div:                  OpcodeVecF32x4DivName,
+	OpcodeVecF32x4Min:                  OpcodeVecF32x4MinName,
+	OpcodeVecF32x4Max:                  OpcodeVecF32x4MaxName,
+	OpcodeVecF32x4Pmin:                 OpcodeVecF32x4PminName,
+	OpcodeVecF32x4Pmax:                 OpcodeVecF32x4PmaxName,
+	OpcodeVecF64x2Ceil:                 OpcodeVecF64x2CeilName,
+	OpcodeVecF64x2Floor:                OpcodeVecF64x2FloorName,
+	OpcodeVecF64x2Trunc:                OpcodeVecF64x2TruncName,
+	OpcodeVecF64x2Nearest:              OpcodeVecF64x2NearestName,
+	OpcodeVecF64x2Abs:                  OpcodeVecF64x2AbsName,
+	OpcodeVecF64x2Neg:                  OpcodeVecF64x2NegName,
+	OpcodeVecF64x2Sqrt:                 OpcodeVecF64x2SqrtName,
+	OpcodeVecF64x2Add:                  OpcodeVecF64x2AddName,
+	OpcodeVecF64x2Sub:                  OpcodeVecF64x2SubName,
+	OpcodeVecF64x2Mul:                  OpcodeVecF64x2MulName,
+	OpcodeVecF64x2Div:                  OpcodeVecF64x2DivName,
+	OpcodeVecF64x2Min:                  OpcodeVecF64x2MinName,
+	OpcodeVecF64x2Max:                  OpcodeVecF64x2MaxName,
+	OpcodeVecF64x2Pmin:                 OpcodeVecF64x2PminName,
+	OpcodeVecF64x2Pmax:                 OpcodeVecF64x2PmaxName,
+	OpcodeVecI32x4TruncSatF32x4S:       OpcodeVecI32x4TruncSatF32x4SName,
+	OpcodeVecI32x4TruncSatF32x4U:       OpcodeVecI32x4TruncSatF32x4UName,
+	OpcodeVecF32x4ConvertI32x4S:        OpcodeVecF32x4ConvertI32x4SName,
+	OpcodeVecF32x4ConvertI32x4U:        OpcodeVecF32x4ConvertI32x4UName,
+	OpcodeVecI32x4TruncSatF64x2SZero:   OpcodeVecI32x4TruncSatF64x2SZeroName,
+	OpcodeVecI32x4TruncSatF64x2UZero:   OpcodeVecI32x4TruncSatF64x2UZeroName,
+	OpcodeVecF64x2ConvertLowI32x4S:     OpcodeVecF64x2ConvertLowI32x4SName,
+	OpcodeVecF64x2ConvertLowI32x4U:     OpcodeVecF64x2ConvertLowI32x4UName,
+	OpcodeVecF32x4DemoteF64x2Zero:      OpcodeVecF32x4DemoteF64x2ZeroName,
+	OpcodeVecF64x2PromoteLowF32x4Zero:  OpcodeVecF64x2PromoteLowF32x4ZeroName,
+}
+
+// VectorInstructionName returns the instruction name corresponding to the vector Opcode.
+func VectorInstructionName(oc OpcodeVec) (ret string) {
+	return vectorInstructionName[oc]
+}
+
+const (
+	OpcodeAtomicMemoryNotifyName = "memory.atomic.notify"
+	OpcodeAtomicMemoryWait32Name = "memory.atomic.wait32"
+	OpcodeAtomicMemoryWait64Name = "memory.atomic.wait64"
+	OpcodeAtomicFenceName        = "atomic.fence"
+
+	OpcodeAtomicI32LoadName    = "i32.atomic.load"
+	OpcodeAtomicI64LoadName    = "i64.atomic.load"
+	OpcodeAtomicI32Load8UName  = "i32.atomic.load8_u"
+	OpcodeAtomicI32Load16UName = "i32.atomic.load16_u"
+	OpcodeAtomicI64Load8UName  = "i64.atomic.load8_u"
+	OpcodeAtomicI64Load16UName = "i64.atomic.load16_u"
+	OpcodeAtomicI64Load32UName = "i64.atomic.load32_u"
+	OpcodeAtomicI32StoreName   = "i32.atomic.store"
+	OpcodeAtomicI64StoreName   = "i64.atomic.store"
+	OpcodeAtomicI32Store8Name  = "i32.atomic.store8"
+	OpcodeAtomicI32Store16Name = "i32.atomic.store16"
+	OpcodeAtomicI64Store8Name  = "i64.atomic.store8"
+	OpcodeAtomicI64Store16Name = "i64.atomic.store16"
+	OpcodeAtomicI64Store32Name = "i64.atomic.store32"
+
+	OpcodeAtomicI32RmwAddName    = "i32.atomic.rmw.add"
+	OpcodeAtomicI64RmwAddName    = "i64.atomic.rmw.add"
+	OpcodeAtomicI32Rmw8AddUName  = "i32.atomic.rmw8.add_u"
+	OpcodeAtomicI32Rmw16AddUName = "i32.atomic.rmw16.add_u"
+	OpcodeAtomicI64Rmw8AddUName  = "i64.atomic.rmw8.add_u"
+	OpcodeAtomicI64Rmw16AddUName = "i64.atomic.rmw16.add_u"
+	OpcodeAtomicI64Rmw32AddUName = "i64.atomic.rmw32.add_u"
+
+	OpcodeAtomicI32RmwSubName    = "i32.atomic.rmw.sub"
+	OpcodeAtomicI64RmwSubName    = "i64.atomic.rmw.sub"
+	OpcodeAtomicI32Rmw8SubUName  = "i32.atomic.rmw8.sub_u"
+	OpcodeAtomicI32Rmw16SubUName = "i32.atomic.rmw16.sub_u"
+	OpcodeAtomicI64Rmw8SubUName  = "i64.atomic.rmw8.sub_u"
+	OpcodeAtomicI64Rmw16SubUName = "i64.atomic.rmw16.sub_u"
+	OpcodeAtomicI64Rmw32SubUName = "i64.atomic.rmw32.sub_u"
+
+	OpcodeAtomicI32RmwAndName    = "i32.atomic.rmw.and"
+	OpcodeAtomicI64RmwAndName    = "i64.atomic.rmw.and"
+	OpcodeAtomicI32Rmw8AndUName  = "i32.atomic.rmw8.and_u"
+	OpcodeAtomicI32Rmw16AndUName = "i32.atomic.rmw16.and_u"
+	OpcodeAtomicI64Rmw8AndUName  = "i64.atomic.rmw8.and_u"
+	OpcodeAtomicI64Rmw16AndUName = "i64.atomic.rmw16.and_u"
+	OpcodeAtomicI64Rmw32AndUName = "i64.atomic.rmw32.and_u"
+
+	OpcodeAtomicI32RmwOrName    = "i32.atomic.rmw.or"
+	OpcodeAtomicI64RmwOrName    = "i64.atomic.rmw.or"
+	OpcodeAtomicI32Rmw8OrUName  = "i32.atomic.rmw8.or_u"
+	OpcodeAtomicI32Rmw16OrUName = "i32.atomic.rmw16.or_u"
+	OpcodeAtomicI64Rmw8OrUName  = "i64.atomic.rmw8.or_u"
+	OpcodeAtomicI64Rmw16OrUName = "i64.atomic.rmw16.or_u"
+	OpcodeAtomicI64Rmw32OrUName = "i64.atomic.rmw32.or_u"
+
+	OpcodeAtomicI32RmwXorName    = "i32.atomic.rmw.xor"
+	OpcodeAtomicI64RmwXorName    = "i64.atomic.rmw.xor"
+	OpcodeAtomicI32Rmw8XorUName  = "i32.atomic.rmw8.xor_u"
+	OpcodeAtomicI32Rmw16XorUName = "i32.atomic.rmw16.xor_u"
+	OpcodeAtomicI64Rmw8XorUName  = "i64.atomic.rmw8.xor_u"
+	OpcodeAtomicI64Rmw16XorUName = "i64.atomic.rmw16.xor_u"
+	OpcodeAtomicI64Rmw32XorUName = "i64.atomic.rmw32.xor_u"
+
+	OpcodeAtomicI32RmwXchgName    = "i32.atomic.rmw.xchg"
+	OpcodeAtomicI64RmwXchgName    = "i64.atomic.rmw.xchg"
+	OpcodeAtomicI32Rmw8XchgUName  = "i32.atomic.rmw8.xchg_u"
+	OpcodeAtomicI32Rmw16XchgUName = "i32.atomic.rmw16.xchg_u"
+	OpcodeAtomicI64Rmw8XchgUName  = "i64.atomic.rmw8.xchg_u"
+	OpcodeAtomicI64Rmw16XchgUName = "i64.atomic.rmw16.xchg_u"
+	OpcodeAtomicI64Rmw32XchgUName = "i64.atomic.rmw32.xchg_u"
+
+	OpcodeAtomicI32RmwCmpxchgName    = "i32.atomic.rmw.cmpxchg"
+	OpcodeAtomicI64RmwCmpxchgName    = "i64.atomic.rmw.cmpxchg"
+	OpcodeAtomicI32Rmw8CmpxchgUName  = "i32.atomic.rmw8.cmpxchg_u"
+	OpcodeAtomicI32Rmw16CmpxchgUName = "i32.atomic.rmw16.cmpxchg_u"
+	OpcodeAtomicI64Rmw8CmpxchgUName  = "i64.atomic.rmw8.cmpxchg_u"
+	OpcodeAtomicI64Rmw16CmpxchgUName = "i64.atomic.rmw16.cmpxchg_u"
+	OpcodeAtomicI64Rmw32CmpxchgUName = "i64.atomic.rmw32.cmpxchg_u"
+)
+
+var atomicInstructionName = map[OpcodeAtomic]string{
+	OpcodeAtomicMemoryNotify: OpcodeAtomicMemoryNotifyName,
+	OpcodeAtomicMemoryWait32: OpcodeAtomicMemoryWait32Name,
+	OpcodeAtomicMemoryWait64: OpcodeAtomicMemoryWait64Name,
+	OpcodeAtomicFence:        OpcodeAtomicFenceName,
+
+	OpcodeAtomicI32Load:    OpcodeAtomicI32LoadName,
+	OpcodeAtomicI64Load:    OpcodeAtomicI64LoadName,
+	OpcodeAtomicI32Load8U:  OpcodeAtomicI32Load8UName,
+	OpcodeAtomicI32Load16U: OpcodeAtomicI32Load16UName,
+	OpcodeAtomicI64Load8U:  OpcodeAtomicI64Load8UName,
+	OpcodeAtomicI64Load16U: OpcodeAtomicI64Load16UName,
+	OpcodeAtomicI64Load32U: OpcodeAtomicI64Load32UName,
+	OpcodeAtomicI32Store:   OpcodeAtomicI32StoreName,
+	OpcodeAtomicI64Store:   OpcodeAtomicI64StoreName,
+	OpcodeAtomicI32Store8:  OpcodeAtomicI32Store8Name,
+	OpcodeAtomicI32Store16: OpcodeAtomicI32Store16Name,
+	OpcodeAtomicI64Store8:  OpcodeAtomicI64Store8Name,
+	OpcodeAtomicI64Store16: OpcodeAtomicI64Store16Name,
+	OpcodeAtomicI64Store32: OpcodeAtomicI64Store32Name,
+
+	OpcodeAtomicI32RmwAdd:    OpcodeAtomicI32RmwAddName,
+	OpcodeAtomicI64RmwAdd:    OpcodeAtomicI64RmwAddName,
+	OpcodeAtomicI32Rmw8AddU:  OpcodeAtomicI32Rmw8AddUName,
+	OpcodeAtomicI32Rmw16AddU: OpcodeAtomicI32Rmw16AddUName,
+	OpcodeAtomicI64Rmw8AddU:  OpcodeAtomicI64Rmw8AddUName,
+	OpcodeAtomicI64Rmw16AddU: OpcodeAtomicI64Rmw16AddUName,
+	OpcodeAtomicI64Rmw32AddU: OpcodeAtomicI64Rmw32AddUName,
+
+	OpcodeAtomicI32RmwSub:    OpcodeAtomicI32RmwSubName,
+	OpcodeAtomicI64RmwSub:    OpcodeAtomicI64RmwSubName,
+	OpcodeAtomicI32Rmw8SubU:  OpcodeAtomicI32Rmw8SubUName,
+	OpcodeAtomicI32Rmw16SubU: OpcodeAtomicI32Rmw16SubUName,
+	OpcodeAtomicI64Rmw8SubU:  OpcodeAtomicI64Rmw8SubUName,
+	OpcodeAtomicI64Rmw16SubU: OpcodeAtomicI64Rmw16SubUName,
+	OpcodeAtomicI64Rmw32SubU: OpcodeAtomicI64Rmw32SubUName,
+
+	OpcodeAtomicI32RmwAnd:    OpcodeAtomicI32RmwAndName,
+	OpcodeAtomicI64RmwAnd:    OpcodeAtomicI64RmwAndName,
+	OpcodeAtomicI32Rmw8AndU:  OpcodeAtomicI32Rmw8AndUName,
+	OpcodeAtomicI32Rmw16AndU: OpcodeAtomicI32Rmw16AndUName,
+	OpcodeAtomicI64Rmw8AndU:  OpcodeAtomicI64Rmw8AndUName,
+	OpcodeAtomicI64Rmw16AndU: OpcodeAtomicI64Rmw16AndUName,
+	OpcodeAtomicI64Rmw32AndU: OpcodeAtomicI64Rmw32AndUName,
+
+	OpcodeAtomicI32RmwOr:    OpcodeAtomicI32RmwOrName,
+	OpcodeAtomicI64RmwOr:    OpcodeAtomicI64RmwOrName,
+	OpcodeAtomicI32Rmw8OrU:  OpcodeAtomicI32Rmw8OrUName,
+	OpcodeAtomicI32Rmw16OrU: OpcodeAtomicI32Rmw16OrUName,
+	OpcodeAtomicI64Rmw8OrU:  OpcodeAtomicI64Rmw8OrUName,
+	OpcodeAtomicI64Rmw16OrU: OpcodeAtomicI64Rmw16OrUName,
+	OpcodeAtomicI64Rmw32OrU: OpcodeAtomicI64Rmw32OrUName,
+
+	OpcodeAtomicI32RmwXor:    OpcodeAtomicI32RmwXorName,
+	OpcodeAtomicI64RmwXor:    OpcodeAtomicI64RmwXorName,
+	OpcodeAtomicI32Rmw8XorU:  OpcodeAtomicI32Rmw8XorUName,
+	OpcodeAtomicI32Rmw16XorU: OpcodeAtomicI32Rmw16XorUName,
+	OpcodeAtomicI64Rmw8XorU:  OpcodeAtomicI64Rmw8XorUName,
+	OpcodeAtomicI64Rmw16XorU: OpcodeAtomicI64Rmw16XorUName,
+	OpcodeAtomicI64Rmw32XorU: OpcodeAtomicI64Rmw32XorUName,
+
+	OpcodeAtomicI32RmwXchg:    OpcodeAtomicI32RmwXchgName,
+	OpcodeAtomicI64RmwXchg:    OpcodeAtomicI64RmwXchgName,
+	OpcodeAtomicI32Rmw8XchgU:  OpcodeAtomicI32Rmw8XchgUName,
+	OpcodeAtomicI32Rmw16XchgU: OpcodeAtomicI32Rmw16XchgUName,
+	OpcodeAtomicI64Rmw8XchgU:  OpcodeAtomicI64Rmw8XchgUName,
+	OpcodeAtomicI64Rmw16XchgU: OpcodeAtomicI64Rmw16XchgUName,
+	OpcodeAtomicI64Rmw32XchgU: OpcodeAtomicI64Rmw32XchgUName,
+
+	OpcodeAtomicI32RmwCmpxchg:    OpcodeAtomicI32RmwCmpxchgName,
+	OpcodeAtomicI64RmwCmpxchg:    OpcodeAtomicI64RmwCmpxchgName,
+	OpcodeAtomicI32Rmw8CmpxchgU:  OpcodeAtomicI32Rmw8CmpxchgUName,
+	OpcodeAtomicI32Rmw16CmpxchgU: OpcodeAtomicI32Rmw16CmpxchgUName,
+	OpcodeAtomicI64Rmw8CmpxchgU:  OpcodeAtomicI64Rmw8CmpxchgUName,
+	OpcodeAtomicI64Rmw16CmpxchgU: OpcodeAtomicI64Rmw16CmpxchgUName,
+	OpcodeAtomicI64Rmw32CmpxchgU: OpcodeAtomicI64Rmw32CmpxchgUName,
+}
+
+// AtomicInstructionName returns the instruction name corresponding to the atomic Opcode.
+func AtomicInstructionName(oc OpcodeAtomic) (ret string) {
+	return atomicInstructionName[oc]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go
new file mode 100644
index 000000000..5cc5012da
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go
@@ -0,0 +1,461 @@
+package wasm
+
+import (
+	"container/list"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"reflect"
+	"sync"
+	"sync/atomic"
+	"time"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/wasmruntime"
+)
+
+const (
+	// MemoryPageSize is the unit of memory length in WebAssembly,
+	// and is defined as 2^16 = 65536.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instances%E2%91%A0
+	MemoryPageSize = uint32(65536)
+	// MemoryLimitPages is maximum number of pages defined (2^16).
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	MemoryLimitPages = uint32(65536)
+	// MemoryPageSizeInBits satisfies the relation: "1 << MemoryPageSizeInBits == MemoryPageSize".
+	MemoryPageSizeInBits = 16
+)
+
+// compile-time check to ensure MemoryInstance implements api.Memory
+var _ api.Memory = &MemoryInstance{}
+
+type waiters struct {
+	mux sync.Mutex
+	l   *list.List
+}
+
+// MemoryInstance represents a memory instance in a store, and implements api.Memory.
+//
+// Note: In WebAssembly 1.0 (20191205), there may be up to one Memory per store, which means the precise memory is always
+// wasm.Store Memories index zero: `store.Memories[0]`
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instances%E2%91%A0.
+type MemoryInstance struct {
+	internalapi.WazeroOnlyType
+
+	Buffer        []byte
+	Min, Cap, Max uint32
+	Shared        bool
+	// definition is known at compile time.
+	definition api.MemoryDefinition
+
+	// Mux is used in interpreter mode to prevent overlapping calls to atomic instructions,
+	// introduced with WebAssembly threads proposal.
+	Mux sync.Mutex
+
+	// waiters implements atomic wait and notify. It is implemented similarly to golang.org/x/sync/semaphore,
+	// with a fixed weight of 1 and no spurious notifications.
+	waiters sync.Map
+
+	expBuffer experimental.LinearMemory
+}
+
+// NewMemoryInstance creates a new instance based on the parameters in the SectionIDMemory.
+func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *MemoryInstance {
+	minBytes := MemoryPagesToBytesNum(memSec.Min)
+	capBytes := MemoryPagesToBytesNum(memSec.Cap)
+	maxBytes := MemoryPagesToBytesNum(memSec.Max)
+
+	var buffer []byte
+	var expBuffer experimental.LinearMemory
+	if allocator != nil {
+		expBuffer = allocator.Allocate(capBytes, maxBytes)
+		buffer = expBuffer.Reallocate(minBytes)
+	} else if memSec.IsShared {
+		// Shared memory needs a fixed buffer, so allocate with the maximum size.
+		//
+		// The rationale as to why we can simply use make([]byte) to a fixed buffer is that Go's GC is non-relocating.
+		// That is not a part of Go spec, but is well-known thing in Go community (wazero's compiler heavily relies on it!)
+		// 	* https://github.com/go4org/unsafe-assume-no-moving-gc
+		//
+		// Also, allocating Max here isn't harmful as the Go runtime uses mmap for large allocations, therefore,
+		// the memory buffer allocation here is virtual and doesn't consume physical memory until it's used.
+		// 	* https://github.com/golang/go/blob/8121604559035734c9677d5281bbdac8b1c17a1e/src/runtime/malloc.go#L1059
+		//	* https://github.com/golang/go/blob/8121604559035734c9677d5281bbdac8b1c17a1e/src/runtime/malloc.go#L1165
+		buffer = make([]byte, minBytes, maxBytes)
+	} else {
+		buffer = make([]byte, minBytes, capBytes)
+	}
+	return &MemoryInstance{
+		Buffer:    buffer,
+		Min:       memSec.Min,
+		Cap:       memoryBytesNumToPages(uint64(cap(buffer))),
+		Max:       memSec.Max,
+		Shared:    memSec.IsShared,
+		expBuffer: expBuffer,
+	}
+}
+
+// Definition implements the same method as documented on api.Memory.
+func (m *MemoryInstance) Definition() api.MemoryDefinition {
+	return m.definition
+}
+
+// Size implements the same method as documented on api.Memory.
+func (m *MemoryInstance) Size() uint32 {
+	return uint32(len(m.Buffer))
+}
+
+// ReadByte implements the same method as documented on api.Memory.
+func (m *MemoryInstance) ReadByte(offset uint32) (byte, bool) {
+	if !m.hasSize(offset, 1) {
+		return 0, false
+	}
+	return m.Buffer[offset], true
+}
+
+// ReadUint16Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) ReadUint16Le(offset uint32) (uint16, bool) {
+	if !m.hasSize(offset, 2) {
+		return 0, false
+	}
+	return binary.LittleEndian.Uint16(m.Buffer[offset : offset+2]), true
+}
+
+// ReadUint32Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) ReadUint32Le(offset uint32) (uint32, bool) {
+	return m.readUint32Le(offset)
+}
+
+// ReadFloat32Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) ReadFloat32Le(offset uint32) (float32, bool) {
+	v, ok := m.readUint32Le(offset)
+	if !ok {
+		return 0, false
+	}
+	return math.Float32frombits(v), true
+}
+
+// ReadUint64Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) ReadUint64Le(offset uint32) (uint64, bool) {
+	return m.readUint64Le(offset)
+}
+
+// ReadFloat64Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) ReadFloat64Le(offset uint32) (float64, bool) {
+	v, ok := m.readUint64Le(offset)
+	if !ok {
+		return 0, false
+	}
+	return math.Float64frombits(v), true
+}
+
+// Read implements the same method as documented on api.Memory.
+func (m *MemoryInstance) Read(offset, byteCount uint32) ([]byte, bool) {
+	if !m.hasSize(offset, uint64(byteCount)) {
+		return nil, false
+	}
+	return m.Buffer[offset : offset+byteCount : offset+byteCount], true
+}
+
+// WriteByte implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteByte(offset uint32, v byte) bool {
+	if !m.hasSize(offset, 1) {
+		return false
+	}
+	m.Buffer[offset] = v
+	return true
+}
+
+// WriteUint16Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteUint16Le(offset uint32, v uint16) bool {
+	if !m.hasSize(offset, 2) {
+		return false
+	}
+	binary.LittleEndian.PutUint16(m.Buffer[offset:], v)
+	return true
+}
+
+// WriteUint32Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteUint32Le(offset, v uint32) bool {
+	return m.writeUint32Le(offset, v)
+}
+
+// WriteFloat32Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteFloat32Le(offset uint32, v float32) bool {
+	return m.writeUint32Le(offset, math.Float32bits(v))
+}
+
+// WriteUint64Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteUint64Le(offset uint32, v uint64) bool {
+	return m.writeUint64Le(offset, v)
+}
+
+// WriteFloat64Le implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteFloat64Le(offset uint32, v float64) bool {
+	return m.writeUint64Le(offset, math.Float64bits(v))
+}
+
+// Write implements the same method as documented on api.Memory.
+func (m *MemoryInstance) Write(offset uint32, val []byte) bool {
+	if !m.hasSize(offset, uint64(len(val))) {
+		return false
+	}
+	copy(m.Buffer[offset:], val)
+	return true
+}
+
+// WriteString implements the same method as documented on api.Memory.
+func (m *MemoryInstance) WriteString(offset uint32, val string) bool {
+	if !m.hasSize(offset, uint64(len(val))) {
+		return false
+	}
+	copy(m.Buffer[offset:], val)
+	return true
+}
+
+// MemoryPagesToBytesNum converts the given pages into the number of bytes contained in these pages.
+func MemoryPagesToBytesNum(pages uint32) (bytesNum uint64) {
+	return uint64(pages) << MemoryPageSizeInBits
+}
+
+// Grow implements the same method as documented on api.Memory.
+func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) {
+	currentPages := m.Pages()
+	if delta == 0 {
+		return currentPages, true
+	}
+
+	// If exceeds the max of memory size, we push -1 according to the spec.
+	newPages := currentPages + delta
+	if newPages > m.Max || int32(delta) < 0 {
+		return 0, false
+	} else if m.expBuffer != nil {
+		buffer := m.expBuffer.Reallocate(MemoryPagesToBytesNum(newPages))
+		if m.Shared {
+			if unsafe.SliceData(buffer) != unsafe.SliceData(m.Buffer) {
+				panic("shared memory cannot move, this is a bug in the memory allocator")
+			}
+			// We assume grow is called under a guest lock.
+			// But the memory length is accessed elsewhere,
+			// so use atomic to make the new length visible across threads.
+			atomicStoreLengthAndCap(&m.Buffer, uintptr(len(buffer)), uintptr(cap(buffer)))
+			m.Cap = memoryBytesNumToPages(uint64(cap(buffer)))
+		} else {
+			m.Buffer = buffer
+			m.Cap = newPages
+		}
+		return currentPages, true
+	} else if newPages > m.Cap { // grow the memory.
+		if m.Shared {
+			panic("shared memory cannot be grown, this is a bug in wazero")
+		}
+		m.Buffer = append(m.Buffer, make([]byte, MemoryPagesToBytesNum(delta))...)
+		m.Cap = newPages
+		return currentPages, true
+	} else { // We already have the capacity we need.
+		if m.Shared {
+			// We assume grow is called under a guest lock.
+			// But the memory length is accessed elsewhere,
+			// so use atomic to make the new length visible across threads.
+			atomicStoreLength(&m.Buffer, uintptr(MemoryPagesToBytesNum(newPages)))
+		} else {
+			m.Buffer = m.Buffer[:MemoryPagesToBytesNum(newPages)]
+		}
+		return currentPages, true
+	}
+}
+
+// Pages implements the same method as documented on api.Memory.
+func (m *MemoryInstance) Pages() (result uint32) {
+	return memoryBytesNumToPages(uint64(len(m.Buffer)))
+}
+
+// PagesToUnitOfBytes converts the pages to a human-readable form similar to what's specified. e.g. 1 -> "64Ki"
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instances%E2%91%A0
+func PagesToUnitOfBytes(pages uint32) string {
+	k := pages * 64
+	if k < 1024 {
+		return fmt.Sprintf("%d Ki", k)
+	}
+	m := k / 1024
+	if m < 1024 {
+		return fmt.Sprintf("%d Mi", m)
+	}
+	g := m / 1024
+	if g < 1024 {
+		return fmt.Sprintf("%d Gi", g)
+	}
+	return fmt.Sprintf("%d Ti", g/1024)
+}
+
+// Below are raw functions used to implement the api.Memory API:
+
+// Uses atomic write to update the length of a slice.
+func atomicStoreLengthAndCap(slice *[]byte, length uintptr, cap uintptr) {
+	slicePtr := (*reflect.SliceHeader)(unsafe.Pointer(slice))
+	capPtr := (*uintptr)(unsafe.Pointer(&slicePtr.Cap))
+	atomic.StoreUintptr(capPtr, cap)
+	lenPtr := (*uintptr)(unsafe.Pointer(&slicePtr.Len))
+	atomic.StoreUintptr(lenPtr, length)
+}
+
+// Uses atomic write to update the length of a slice.
+func atomicStoreLength(slice *[]byte, length uintptr) {
+	slicePtr := (*reflect.SliceHeader)(unsafe.Pointer(slice))
+	lenPtr := (*uintptr)(unsafe.Pointer(&slicePtr.Len))
+	atomic.StoreUintptr(lenPtr, length)
+}
+
+// memoryBytesNumToPages converts the given number of bytes into the number of pages.
+func memoryBytesNumToPages(bytesNum uint64) (pages uint32) {
+	return uint32(bytesNum >> MemoryPageSizeInBits)
+}
+
+// hasSize returns true if Len is sufficient for byteCount at the given offset.
+//
+// Note: This is always fine, because memory can grow, but never shrink.
+func (m *MemoryInstance) hasSize(offset uint32, byteCount uint64) bool {
+	return uint64(offset)+byteCount <= uint64(len(m.Buffer)) // uint64 prevents overflow on add
+}
+
+// readUint32Le implements ReadUint32Le without using a context. This is extracted as both ints and floats are stored in
+// memory as uint32le.
+func (m *MemoryInstance) readUint32Le(offset uint32) (uint32, bool) {
+	if !m.hasSize(offset, 4) {
+		return 0, false
+	}
+	return binary.LittleEndian.Uint32(m.Buffer[offset : offset+4]), true
+}
+
+// readUint64Le implements ReadUint64Le without using a context. This is extracted as both ints and floats are stored in
+// memory as uint64le.
+func (m *MemoryInstance) readUint64Le(offset uint32) (uint64, bool) {
+	if !m.hasSize(offset, 8) {
+		return 0, false
+	}
+	return binary.LittleEndian.Uint64(m.Buffer[offset : offset+8]), true
+}
+
+// writeUint32Le implements WriteUint32Le without using a context. This is extracted as both ints and floats are stored
+// in memory as uint32le.
+func (m *MemoryInstance) writeUint32Le(offset uint32, v uint32) bool {
+	if !m.hasSize(offset, 4) {
+		return false
+	}
+	binary.LittleEndian.PutUint32(m.Buffer[offset:], v)
+	return true
+}
+
+// writeUint64Le implements WriteUint64Le without using a context. This is extracted as both ints and floats are stored
+// in memory as uint64le.
+func (m *MemoryInstance) writeUint64Le(offset uint32, v uint64) bool {
+	if !m.hasSize(offset, 8) {
+		return false
+	}
+	binary.LittleEndian.PutUint64(m.Buffer[offset:], v)
+	return true
+}
+
+// Wait32 suspends the caller until the offset is notified by a different agent.
+func (m *MemoryInstance) Wait32(offset uint32, exp uint32, timeout int64, reader func(mem *MemoryInstance, offset uint32) uint32) uint64 {
+	w := m.getWaiters(offset)
+	w.mux.Lock()
+
+	cur := reader(m, offset)
+	if cur != exp {
+		w.mux.Unlock()
+		return 1
+	}
+
+	return m.wait(w, timeout)
+}
+
+// Wait64 suspends the caller until the offset is notified by a different agent.
+func (m *MemoryInstance) Wait64(offset uint32, exp uint64, timeout int64, reader func(mem *MemoryInstance, offset uint32) uint64) uint64 {
+	w := m.getWaiters(offset)
+	w.mux.Lock()
+
+	cur := reader(m, offset)
+	if cur != exp {
+		w.mux.Unlock()
+		return 1
+	}
+
+	return m.wait(w, timeout)
+}
+
+func (m *MemoryInstance) wait(w *waiters, timeout int64) uint64 {
+	if w.l == nil {
+		w.l = list.New()
+	}
+
+	// The specification requires a trap if the number of existing waiters + 1 == 2^32, so we add a check here.
+	// In practice, it is unlikely the application would ever accumulate such a large number of waiters as it
+	// indicates several GB of RAM used just for the list of waiters.
+	// https://github.com/WebAssembly/threads/blob/main/proposals/threads/Overview.md#wait
+	if uint64(w.l.Len()+1) == 1<<32 {
+		w.mux.Unlock()
+		panic(wasmruntime.ErrRuntimeTooManyWaiters)
+	}
+
+	ready := make(chan struct{})
+	elem := w.l.PushBack(ready)
+	w.mux.Unlock()
+
+	if timeout < 0 {
+		<-ready
+		return 0
+	} else {
+		select {
+		case <-ready:
+			return 0
+		case <-time.After(time.Duration(timeout)):
+			// While we could see if the channel completed by now and ignore the timeout, similar to x/sync/semaphore,
+			// the Wasm spec doesn't specify this behavior, so we keep things simple by prioritizing the timeout.
+			w.mux.Lock()
+			w.l.Remove(elem)
+			w.mux.Unlock()
+			return 2
+		}
+	}
+}
+
+func (m *MemoryInstance) getWaiters(offset uint32) *waiters {
+	wAny, ok := m.waiters.Load(offset)
+	if !ok {
+		// The first time an address is waited on, simultaneous waits will cause extra allocations.
+		// Further operations will be loaded above, which is also the general pattern of usage with
+		// mutexes.
+		wAny, _ = m.waiters.LoadOrStore(offset, &waiters{})
+	}
+
+	return wAny.(*waiters)
+}
+
+// Notify wakes up at most count waiters at the given offset.
+func (m *MemoryInstance) Notify(offset uint32, count uint32) uint32 {
+	wAny, ok := m.waiters.Load(offset)
+	if !ok {
+		return 0
+	}
+	w := wAny.(*waiters)
+
+	w.mux.Lock()
+	defer w.mux.Unlock()
+	if w.l == nil {
+		return 0
+	}
+
+	res := uint32(0)
+	for num := w.l.Len(); num > 0 && res < count; num = w.l.Len() {
+		w := w.l.Remove(w.l.Front()).(chan struct{})
+		close(w)
+		res++
+	}
+
+	return res
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory_definition.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory_definition.go
new file mode 100644
index 000000000..03d6fd303
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory_definition.go
@@ -0,0 +1,128 @@
+package wasm
+
+import (
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+)
+
+// ImportedMemories implements the same method as documented on wazero.CompiledModule.
+func (m *Module) ImportedMemories() (ret []api.MemoryDefinition) {
+	for i := range m.MemoryDefinitionSection {
+		d := &m.MemoryDefinitionSection[i]
+		if d.importDesc != nil {
+			ret = append(ret, d)
+		}
+	}
+	return
+}
+
+// ExportedMemories implements the same method as documented on wazero.CompiledModule.
+func (m *Module) ExportedMemories() map[string]api.MemoryDefinition {
+	ret := map[string]api.MemoryDefinition{}
+	for i := range m.MemoryDefinitionSection {
+		d := &m.MemoryDefinitionSection[i]
+		for _, e := range d.exportNames {
+			ret[e] = d
+		}
+	}
+	return ret
+}
+
+// BuildMemoryDefinitions generates memory metadata that can be parsed from
+// the module. This must be called after all validation.
+//
+// Note: This is exported for wazero.Runtime `CompileModule`.
+func (m *Module) BuildMemoryDefinitions() {
+	var moduleName string
+	if m.NameSection != nil {
+		moduleName = m.NameSection.ModuleName
+	}
+
+	memoryCount := m.ImportMemoryCount
+	if m.MemorySection != nil {
+		memoryCount++
+	}
+
+	if memoryCount == 0 {
+		return
+	}
+
+	m.MemoryDefinitionSection = make([]MemoryDefinition, 0, memoryCount)
+	importMemIdx := Index(0)
+	for i := range m.ImportSection {
+		imp := &m.ImportSection[i]
+		if imp.Type != ExternTypeMemory {
+			continue
+		}
+
+		m.MemoryDefinitionSection = append(m.MemoryDefinitionSection, MemoryDefinition{
+			importDesc: &[2]string{imp.Module, imp.Name},
+			index:      importMemIdx,
+			memory:     imp.DescMem,
+		})
+		importMemIdx++
+	}
+
+	if m.MemorySection != nil {
+		m.MemoryDefinitionSection = append(m.MemoryDefinitionSection, MemoryDefinition{
+			index:  importMemIdx,
+			memory: m.MemorySection,
+		})
+	}
+
+	for i := range m.MemoryDefinitionSection {
+		d := &m.MemoryDefinitionSection[i]
+		d.moduleName = moduleName
+		for i := range m.ExportSection {
+			e := &m.ExportSection[i]
+			if e.Type == ExternTypeMemory && e.Index == d.index {
+				d.exportNames = append(d.exportNames, e.Name)
+			}
+		}
+	}
+}
+
+// MemoryDefinition implements api.MemoryDefinition
+type MemoryDefinition struct {
+	internalapi.WazeroOnlyType
+	moduleName  string
+	index       Index
+	importDesc  *[2]string
+	exportNames []string
+	memory      *Memory
+}
+
+// ModuleName implements the same method as documented on api.MemoryDefinition.
+func (f *MemoryDefinition) ModuleName() string {
+	return f.moduleName
+}
+
+// Index implements the same method as documented on api.MemoryDefinition.
+func (f *MemoryDefinition) Index() uint32 {
+	return f.index
+}
+
+// Import implements the same method as documented on api.MemoryDefinition.
+func (f *MemoryDefinition) Import() (moduleName, name string, isImport bool) {
+	if importDesc := f.importDesc; importDesc != nil {
+		moduleName, name, isImport = importDesc[0], importDesc[1], true
+	}
+	return
+}
+
+// ExportNames implements the same method as documented on api.MemoryDefinition.
+func (f *MemoryDefinition) ExportNames() []string {
+	return f.exportNames
+}
+
+// Min implements the same method as documented on api.MemoryDefinition.
+func (f *MemoryDefinition) Min() uint32 {
+	return f.memory.Min
+}
+
+// Max implements the same method as documented on api.MemoryDefinition.
+func (f *MemoryDefinition) Max() (max uint32, encoded bool) {
+	max = f.memory.Max
+	encoded = f.memory.IsMaxEncoded
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go
new file mode 100644
index 000000000..68573b918
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go
@@ -0,0 +1,1083 @@
+package wasm
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+	"sync"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/ieee754"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+// Module is a WebAssembly binary representation.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#modules%E2%91%A8
+//
+// Differences from the specification:
+// * NameSection is the only key ("name") decoded from the SectionIDCustom.
+// * ExportSection is represented as a map for lookup convenience.
+// * Code.GoFunc is contains any go `func`. It may be present when Code.Body is not.
+type Module struct {
+	// TypeSection contains the unique FunctionType of functions imported or defined in this module.
+	//
+	// Note: Currently, there is no type ambiguity in the index as WebAssembly 1.0 only defines function type.
+	// In the future, other types may be introduced to support CoreFeatures such as module linking.
+	//
+	// Note: In the Binary Format, this is SectionIDType.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#types%E2%91%A0%E2%91%A0
+	TypeSection []FunctionType
+
+	// ImportSection contains imported functions, tables, memories or globals required for instantiation
+	// (Store.Instantiate).
+	//
+	// Note: there are no unique constraints relating to the two-level namespace of Import.Module and Import.Name.
+	//
+	// Note: In the Binary Format, this is SectionIDImport.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#import-section%E2%91%A0
+	ImportSection []Import
+	// ImportFunctionCount ImportGlobalCount ImportMemoryCount, and ImportTableCount are
+	// the cached import count per ExternType set during decoding.
+	ImportFunctionCount,
+	ImportGlobalCount,
+	ImportMemoryCount,
+	ImportTableCount Index
+	// ImportPerModule maps a module name to the list of Import to be imported from the module.
+	// This is used to do fast import resolution during instantiation.
+	ImportPerModule map[string][]*Import
+
+	// FunctionSection contains the index in TypeSection of each function defined in this module.
+	//
+	// Note: The function Index space begins with imported functions and ends with those defined in this module.
+	// For example, if there are two imported functions and one defined in this module, the function Index 3 is defined
+	// in this module at FunctionSection[0].
+	//
+	// Note: FunctionSection is index correlated with the CodeSection. If given the same position, e.g. 2, a function
+	// type is at TypeSection[FunctionSection[2]], while its locals and body are at CodeSection[2].
+	//
+	// Note: In the Binary Format, this is SectionIDFunction.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#function-section%E2%91%A0
+	FunctionSection []Index
+
+	// TableSection contains each table defined in this module.
+	//
+	// Note: The table Index space begins with imported tables and ends with those defined in this module.
+	// For example, if there are two imported tables and one defined in this module, the table Index 3 is defined in
+	// this module at TableSection[0].
+	//
+	// Note: Version 1.0 (20191205) of the WebAssembly spec allows at most one table definition per module, so the
+	// length of the TableSection can be zero or one, and can only be one if there is no imported table.
+	//
+	// Note: In the Binary Format, this is SectionIDTable.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#table-section%E2%91%A0
+	TableSection []Table
+
+	// MemorySection contains each memory defined in this module.
+	//
+	// Note: The memory Index space begins with imported memories and ends with those defined in this module.
+	// For example, if there are two imported memories and one defined in this module, the memory Index 3 is defined in
+	// this module at TableSection[0].
+	//
+	// Note: Version 1.0 (20191205) of the WebAssembly spec allows at most one memory definition per module, so the
+	// length of the MemorySection can be zero or one, and can only be one if there is no imported memory.
+	//
+	// Note: In the Binary Format, this is SectionIDMemory.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-section%E2%91%A0
+	MemorySection *Memory
+
+	// GlobalSection contains each global defined in this module.
+	//
+	// Global indexes are offset by any imported globals because the global index begins with imports, followed by
+	// ones defined in this module. For example, if there are two imported globals and three defined in this module, the
+	// global at index 3 is defined in this module at GlobalSection[0].
+	//
+	// Note: In the Binary Format, this is SectionIDGlobal.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#global-section%E2%91%A0
+	GlobalSection []Global
+
+	// ExportSection contains each export defined in this module.
+	//
+	// Note: In the Binary Format, this is SectionIDExport.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+	ExportSection []Export
+	// Exports maps a name to Export, and is convenient for fast look up of exported instances at runtime.
+	// Each item of this map points to an element of ExportSection.
+	Exports map[string]*Export
+
+	// StartSection is the index of a function to call before returning from Store.Instantiate.
+	//
+	// Note: The index here is not the position in the FunctionSection, rather in the function index, which
+	// begins with imported functions.
+	//
+	// Note: In the Binary Format, this is SectionIDStart.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#start-section%E2%91%A0
+	StartSection *Index
+
+	// Note: In the Binary Format, this is SectionIDElement.
+	ElementSection []ElementSegment
+
+	// CodeSection is index-correlated with FunctionSection and contains each
+	// function's locals and body.
+	//
+	// When present, the HostFunctionSection of the same index must be nil.
+	//
+	// Note: In the Binary Format, this is SectionIDCode.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#code-section%E2%91%A0
+	CodeSection []Code
+
+	// Note: In the Binary Format, this is SectionIDData.
+	DataSection []DataSegment
+
+	// NameSection is set when the SectionIDCustom "name" was successfully decoded from the binary format.
+	//
+	// Note: This is the only SectionIDCustom defined in the WebAssembly 1.0 (20191205) Binary Format.
+	// Others are skipped as they are not used in wazero.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#name-section%E2%91%A0
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+	NameSection *NameSection
+
+	// CustomSections are set when the SectionIDCustom other than "name" were successfully decoded from the binary format.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+	CustomSections []*CustomSection
+
+	// DataCountSection is the optional section and holds the number of data segments in the data section.
+	//
+	// Note: This may exist in WebAssembly 2.0 or WebAssembly 1.0 with CoreFeatureBulkMemoryOperations.
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-count-section
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions
+	DataCountSection *uint32
+
+	// ID is the sha256 value of the source wasm plus the configurations which affect the runtime representation of
+	// Wasm binary. This is only used for caching.
+	ID ModuleID
+
+	// IsHostModule true if this is the host module, false otherwise.
+	IsHostModule bool
+
+	// functionDefinitionSectionInitOnce guards FunctionDefinitionSection so that it is initialized exactly once.
+	functionDefinitionSectionInitOnce sync.Once
+
+	// FunctionDefinitionSection is a wazero-specific section.
+	FunctionDefinitionSection []FunctionDefinition
+
+	// MemoryDefinitionSection is a wazero-specific section.
+	MemoryDefinitionSection []MemoryDefinition
+
+	// DWARFLines is used to emit DWARF based stack trace. This is created from the multiple custom sections
+	// as described in https://yurydelendik.github.io/webassembly-dwarf/, though it is not specified in the Wasm
+	// specification: https://github.com/WebAssembly/debugging/issues/1
+	DWARFLines *wasmdebug.DWARFLines
+
+	// NonStaticLocals collects the local indexes that will change its value through either local.get or local.tee.
+	NonStaticLocals []map[Index]struct{}
+}
+
+// ModuleID represents sha256 hash value uniquely assigned to Module.
+type ModuleID = [sha256.Size]byte
+
+// The wazero specific limitation described at RATIONALE.md.
+// TL;DR; We multiply by 8 (to get offsets in bytes) and the multiplication result must be less than 32bit max
+const (
+	MaximumGlobals       = uint32(1 << 27)
+	MaximumFunctionIndex = uint32(1 << 27)
+	MaximumTableIndex    = uint32(1 << 27)
+)
+
+// AssignModuleID calculates a sha256 checksum on `wasm` and other args, and set Module.ID to the result.
+// See the doc on Module.ID on what it's used for.
+func (m *Module) AssignModuleID(wasm []byte, listeners []experimental.FunctionListener, withEnsureTermination bool) {
+	h := sha256.New()
+	h.Write(wasm)
+	// Use the pre-allocated space backed by m.ID below.
+
+	// Write the existence of listeners to the checksum per function.
+	for i, l := range listeners {
+		binary.LittleEndian.PutUint32(m.ID[:], uint32(i))
+		m.ID[4] = boolToByte(l != nil)
+		h.Write(m.ID[:5])
+	}
+	// Write the flag of ensureTermination to the checksum.
+	m.ID[0] = boolToByte(withEnsureTermination)
+	h.Write(m.ID[:1])
+	// Get checksum by passing the slice underlying m.ID.
+	h.Sum(m.ID[:0])
+}
+
+func boolToByte(b bool) (ret byte) {
+	if b {
+		ret = 1
+	}
+	return
+}
+
+// typeOfFunction returns the wasm.FunctionType for the given function space index or nil.
+func (m *Module) typeOfFunction(funcIdx Index) *FunctionType {
+	typeSectionLength, importedFunctionCount := uint32(len(m.TypeSection)), m.ImportFunctionCount
+	if funcIdx < importedFunctionCount {
+		// Imports are not exclusively functions. This is the current function index in the loop.
+		cur := Index(0)
+		for i := range m.ImportSection {
+			imp := &m.ImportSection[i]
+			if imp.Type != ExternTypeFunc {
+				continue
+			}
+			if funcIdx == cur {
+				if imp.DescFunc >= typeSectionLength {
+					return nil
+				}
+				return &m.TypeSection[imp.DescFunc]
+			}
+			cur++
+		}
+	}
+
+	funcSectionIdx := funcIdx - m.ImportFunctionCount
+	if funcSectionIdx >= uint32(len(m.FunctionSection)) {
+		return nil
+	}
+	typeIdx := m.FunctionSection[funcSectionIdx]
+	if typeIdx >= typeSectionLength {
+		return nil
+	}
+	return &m.TypeSection[typeIdx]
+}
+
+func (m *Module) Validate(enabledFeatures api.CoreFeatures) error {
+	for i := range m.TypeSection {
+		tp := &m.TypeSection[i]
+		tp.CacheNumInUint64()
+	}
+
+	if err := m.validateStartSection(); err != nil {
+		return err
+	}
+
+	functions, globals, memory, tables, err := m.AllDeclarations()
+	if err != nil {
+		return err
+	}
+
+	if err = m.validateImports(enabledFeatures); err != nil {
+		return err
+	}
+
+	if err = m.validateGlobals(globals, uint32(len(functions)), MaximumGlobals); err != nil {
+		return err
+	}
+
+	if err = m.validateMemory(memory, globals, enabledFeatures); err != nil {
+		return err
+	}
+
+	if err = m.validateExports(enabledFeatures, functions, globals, memory, tables); err != nil {
+		return err
+	}
+
+	if m.CodeSection != nil {
+		if err = m.validateFunctions(enabledFeatures, functions, globals, memory, tables, MaximumFunctionIndex); err != nil {
+			return err
+		}
+	} // No need to validate host functions as NewHostModule validates
+
+	if err = m.validateTable(enabledFeatures, tables, MaximumTableIndex); err != nil {
+		return err
+	}
+
+	if err = m.validateDataCountSection(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (m *Module) validateStartSection() error {
+	// Check the start function is valid.
+	// TODO: this should be verified during decode so that errors have the correct source positions
+	if m.StartSection != nil {
+		startIndex := *m.StartSection
+		ft := m.typeOfFunction(startIndex)
+		if ft == nil { // TODO: move this check to decoder so that a module can never be decoded invalidly
+			return fmt.Errorf("invalid start function: func[%d] has an invalid type", startIndex)
+		}
+		if len(ft.Params) > 0 || len(ft.Results) > 0 {
+			return fmt.Errorf("invalid start function: func[%d] must have an empty (nullary) signature: %s", startIndex, ft)
+		}
+	}
+	return nil
+}
+
+func (m *Module) validateGlobals(globals []GlobalType, numFuncts, maxGlobals uint32) error {
+	if uint32(len(globals)) > maxGlobals {
+		return fmt.Errorf("too many globals in a module")
+	}
+
+	// Global initialization constant expression can only reference the imported globals.
+	// See the note on https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#constant-expressions%E2%91%A0
+	importedGlobals := globals[:m.ImportGlobalCount]
+	for i := range m.GlobalSection {
+		g := &m.GlobalSection[i]
+		if err := validateConstExpression(importedGlobals, numFuncts, &g.Init, g.Type.ValType); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (m *Module) validateFunctions(enabledFeatures api.CoreFeatures, functions []Index, globals []GlobalType, memory *Memory, tables []Table, maximumFunctionIndex uint32) error {
+	if uint32(len(functions)) > maximumFunctionIndex {
+		return fmt.Errorf("too many functions (%d) in a module", len(functions))
+	}
+
+	functionCount := m.SectionElementCount(SectionIDFunction)
+	codeCount := m.SectionElementCount(SectionIDCode)
+	if functionCount == 0 && codeCount == 0 {
+		return nil
+	}
+
+	typeCount := m.SectionElementCount(SectionIDType)
+	if codeCount != functionCount {
+		return fmt.Errorf("code count (%d) != function count (%d)", codeCount, functionCount)
+	}
+
+	declaredFuncIndexes, err := m.declaredFunctionIndexes()
+	if err != nil {
+		return err
+	}
+
+	// Create bytes.Reader once as it causes allocation, and
+	// we frequently need it (e.g. on every If instruction).
+	br := bytes.NewReader(nil)
+	// Also, we reuse the stacks across multiple function validations to reduce allocations.
+	vs := &stacks{}
+	// Non-static locals are gathered during validation and used in the down-stream compilation.
+	m.NonStaticLocals = make([]map[Index]struct{}, len(m.FunctionSection))
+	for idx, typeIndex := range m.FunctionSection {
+		if typeIndex >= typeCount {
+			return fmt.Errorf("invalid %s: type section index %d out of range", m.funcDesc(SectionIDFunction, Index(idx)), typeIndex)
+		}
+		c := &m.CodeSection[idx]
+		if c.GoFunc != nil {
+			continue
+		}
+		if err = m.validateFunction(vs, enabledFeatures, Index(idx), functions, globals, memory, tables, declaredFuncIndexes, br); err != nil {
+			return fmt.Errorf("invalid %s: %w", m.funcDesc(SectionIDFunction, Index(idx)), err)
+		}
+	}
+	return nil
+}
+
+// declaredFunctionIndexes returns a set of function indexes that can be used as an immediate for OpcodeRefFunc instruction.
+//
+// The criteria for which function indexes can be available for that instruction is vague in the spec:
+//
+//   - "References: the list of function indices that occur in the module outside functions and can hence be used to form references inside them."
+//   - https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/conventions.html#contexts
+//   - "Ref is the set funcidx(module with functions=ε, start=ε) , i.e., the set of function indices occurring in the module, except in its functions or start function."
+//   - https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/valid/modules.html#valid-module
+//
+// To clarify, we reverse-engineer logic required to pass the WebAssembly Core specification 2.0 test suite:
+// https://github.com/WebAssembly/spec/blob/d39195773112a22b245ffbe864bab6d1182ccb06/test/core/ref_func.wast#L78-L115
+//
+// To summarize, the function indexes OpcodeRefFunc can refer include:
+//   - existing in an element section regardless of its mode (active, passive, declarative).
+//   - defined as globals whose value type is ValueRefFunc.
+//   - used as an exported function.
+//
+// See https://github.com/WebAssembly/reference-types/issues/31
+// See https://github.com/WebAssembly/reference-types/issues/76
+func (m *Module) declaredFunctionIndexes() (ret map[Index]struct{}, err error) {
+	ret = map[uint32]struct{}{}
+
+	for i := range m.ExportSection {
+		exp := &m.ExportSection[i]
+		if exp.Type == ExternTypeFunc {
+			ret[exp.Index] = struct{}{}
+		}
+	}
+
+	for i := range m.GlobalSection {
+		g := &m.GlobalSection[i]
+		if g.Init.Opcode == OpcodeRefFunc {
+			var index uint32
+			index, _, err = leb128.LoadUint32(g.Init.Data)
+			if err != nil {
+				err = fmt.Errorf("%s[%d] failed to initialize: %w", SectionIDName(SectionIDGlobal), i, err)
+				return
+			}
+			ret[index] = struct{}{}
+		}
+	}
+
+	for i := range m.ElementSection {
+		elem := &m.ElementSection[i]
+		for _, index := range elem.Init {
+			if index != ElementInitNullReference {
+				ret[index] = struct{}{}
+			}
+		}
+	}
+	return
+}
+
+func (m *Module) funcDesc(sectionID SectionID, sectionIndex Index) string {
+	// Try to improve the error message by collecting any exports:
+	var exportNames []string
+	funcIdx := sectionIndex + m.ImportFunctionCount
+	for i := range m.ExportSection {
+		exp := &m.ExportSection[i]
+		if exp.Index == funcIdx && exp.Type == ExternTypeFunc {
+			exportNames = append(exportNames, fmt.Sprintf("%q", exp.Name))
+		}
+	}
+	sectionIDName := SectionIDName(sectionID)
+	if exportNames == nil {
+		return fmt.Sprintf("%s[%d]", sectionIDName, sectionIndex)
+	}
+	sort.Strings(exportNames) // go map keys do not iterate consistently
+	return fmt.Sprintf("%s[%d] export[%s]", sectionIDName, sectionIndex, strings.Join(exportNames, ","))
+}
+
+func (m *Module) validateMemory(memory *Memory, globals []GlobalType, _ api.CoreFeatures) error {
+	var activeElementCount int
+	for i := range m.DataSection {
+		d := &m.DataSection[i]
+		if !d.IsPassive() {
+			activeElementCount++
+		}
+	}
+	if activeElementCount > 0 && memory == nil {
+		return fmt.Errorf("unknown memory")
+	}
+
+	// Constant expression can only reference imported globals.
+	// https://github.com/WebAssembly/spec/blob/5900d839f38641989a9d8df2df4aee0513365d39/test/core/data.wast#L84-L91
+	importedGlobals := globals[:m.ImportGlobalCount]
+	for i := range m.DataSection {
+		d := &m.DataSection[i]
+		if !d.IsPassive() {
+			if err := validateConstExpression(importedGlobals, 0, &d.OffsetExpression, ValueTypeI32); err != nil {
+				return fmt.Errorf("calculate offset: %w", err)
+			}
+		}
+	}
+	return nil
+}
+
+func (m *Module) validateImports(enabledFeatures api.CoreFeatures) error {
+	for i := range m.ImportSection {
+		imp := &m.ImportSection[i]
+		if imp.Module == "" {
+			return fmt.Errorf("import[%d] has an empty module name", i)
+		}
+		switch imp.Type {
+		case ExternTypeFunc:
+			if int(imp.DescFunc) >= len(m.TypeSection) {
+				return fmt.Errorf("invalid import[%q.%q] function: type index out of range", imp.Module, imp.Name)
+			}
+		case ExternTypeGlobal:
+			if !imp.DescGlobal.Mutable {
+				continue
+			}
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureMutableGlobal); err != nil {
+				return fmt.Errorf("invalid import[%q.%q] global: %w", imp.Module, imp.Name, err)
+			}
+		}
+	}
+	return nil
+}
+
+func (m *Module) validateExports(enabledFeatures api.CoreFeatures, functions []Index, globals []GlobalType, memory *Memory, tables []Table) error {
+	for i := range m.ExportSection {
+		exp := &m.ExportSection[i]
+		index := exp.Index
+		switch exp.Type {
+		case ExternTypeFunc:
+			if index >= uint32(len(functions)) {
+				return fmt.Errorf("unknown function for export[%q]", exp.Name)
+			}
+		case ExternTypeGlobal:
+			if index >= uint32(len(globals)) {
+				return fmt.Errorf("unknown global for export[%q]", exp.Name)
+			}
+			if !globals[index].Mutable {
+				continue
+			}
+			if err := enabledFeatures.RequireEnabled(api.CoreFeatureMutableGlobal); err != nil {
+				return fmt.Errorf("invalid export[%q] global[%d]: %w", exp.Name, index, err)
+			}
+		case ExternTypeMemory:
+			if index > 0 || memory == nil {
+				return fmt.Errorf("memory for export[%q] out of range", exp.Name)
+			}
+		case ExternTypeTable:
+			if index >= uint32(len(tables)) {
+				return fmt.Errorf("table for export[%q] out of range", exp.Name)
+			}
+		}
+	}
+	return nil
+}
+
+func validateConstExpression(globals []GlobalType, numFuncs uint32, expr *ConstantExpression, expectedType ValueType) (err error) {
+	var actualType ValueType
+	switch expr.Opcode {
+	case OpcodeI32Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		_, _, err = leb128.LoadInt32(expr.Data)
+		if err != nil {
+			return fmt.Errorf("read i32: %w", err)
+		}
+		actualType = ValueTypeI32
+	case OpcodeI64Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		_, _, err = leb128.LoadInt64(expr.Data)
+		if err != nil {
+			return fmt.Errorf("read i64: %w", err)
+		}
+		actualType = ValueTypeI64
+	case OpcodeF32Const:
+		_, err = ieee754.DecodeFloat32(expr.Data)
+		if err != nil {
+			return fmt.Errorf("read f32: %w", err)
+		}
+		actualType = ValueTypeF32
+	case OpcodeF64Const:
+		_, err = ieee754.DecodeFloat64(expr.Data)
+		if err != nil {
+			return fmt.Errorf("read f64: %w", err)
+		}
+		actualType = ValueTypeF64
+	case OpcodeGlobalGet:
+		id, _, err := leb128.LoadUint32(expr.Data)
+		if err != nil {
+			return fmt.Errorf("read index of global: %w", err)
+		}
+		if uint32(len(globals)) <= id {
+			return fmt.Errorf("global index out of range")
+		}
+		actualType = globals[id].ValType
+	case OpcodeRefNull:
+		if len(expr.Data) == 0 {
+			return fmt.Errorf("read reference type for ref.null: %w", io.ErrShortBuffer)
+		}
+		reftype := expr.Data[0]
+		if reftype != RefTypeFuncref && reftype != RefTypeExternref {
+			return fmt.Errorf("invalid type for ref.null: 0x%x", reftype)
+		}
+		actualType = reftype
+	case OpcodeRefFunc:
+		index, _, err := leb128.LoadUint32(expr.Data)
+		if err != nil {
+			return fmt.Errorf("read i32: %w", err)
+		} else if index >= numFuncs {
+			return fmt.Errorf("ref.func index out of range [%d] with length %d", index, numFuncs-1)
+		}
+		actualType = ValueTypeFuncref
+	case OpcodeVecV128Const:
+		if len(expr.Data) != 16 {
+			return fmt.Errorf("%s needs 16 bytes but was %d bytes", OpcodeVecV128ConstName, len(expr.Data))
+		}
+		actualType = ValueTypeV128
+	default:
+		return fmt.Errorf("invalid opcode for const expression: 0x%x", expr.Opcode)
+	}
+
+	if actualType != expectedType {
+		return fmt.Errorf("const expression type mismatch expected %s but got %s",
+			ValueTypeName(expectedType), ValueTypeName(actualType))
+	}
+	return nil
+}
+
+func (m *Module) validateDataCountSection() (err error) {
+	if m.DataCountSection != nil && int(*m.DataCountSection) != len(m.DataSection) {
+		err = fmt.Errorf("data count section (%d) doesn't match the length of data section (%d)",
+			*m.DataCountSection, len(m.DataSection))
+	}
+	return
+}
+
+func (m *ModuleInstance) buildGlobals(module *Module, funcRefResolver func(funcIndex Index) Reference) {
+	importedGlobals := m.Globals[:module.ImportGlobalCount]
+
+	me := m.Engine
+	engineOwnGlobal := me.OwnsGlobals()
+	for i := Index(0); i < Index(len(module.GlobalSection)); i++ {
+		gs := &module.GlobalSection[i]
+		g := &GlobalInstance{}
+		if engineOwnGlobal {
+			g.Me = me
+			g.Index = i + module.ImportGlobalCount
+		}
+		m.Globals[i+module.ImportGlobalCount] = g
+		g.Type = gs.Type
+		g.initialize(importedGlobals, &gs.Init, funcRefResolver)
+	}
+}
+
+func paramNames(localNames IndirectNameMap, funcIdx uint32, paramLen int) []string {
+	for i := range localNames {
+		nm := &localNames[i]
+		// Only build parameter names if we have one for each.
+		if nm.Index != funcIdx || len(nm.NameMap) < paramLen {
+			continue
+		}
+
+		ret := make([]string, paramLen)
+		for j := range nm.NameMap {
+			p := &nm.NameMap[j]
+			if int(p.Index) < paramLen {
+				ret[p.Index] = p.Name
+			}
+		}
+		return ret
+	}
+	return nil
+}
+
+func (m *ModuleInstance) buildMemory(module *Module, allocator experimental.MemoryAllocator) {
+	memSec := module.MemorySection
+	if memSec != nil {
+		m.MemoryInstance = NewMemoryInstance(memSec, allocator)
+		m.MemoryInstance.definition = &module.MemoryDefinitionSection[0]
+	}
+}
+
+// Index is the offset in an index, not necessarily an absolute position in a Module section. This is because
+// indexs are often preceded by a corresponding type in the Module.ImportSection.
+//
+// For example, the function index starts with any ExternTypeFunc in the Module.ImportSection followed by
+// the Module.FunctionSection
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-index
+type Index = uint32
+
+// FunctionType is a possibly empty function signature.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#function-types%E2%91%A0
+type FunctionType struct {
+	// Params are the possibly empty sequence of value types accepted by a function with this signature.
+	Params []ValueType
+
+	// Results are the possibly empty sequence of value types returned by a function with this signature.
+	//
+	// Note: In WebAssembly 1.0 (20191205), there can be at most one result.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#result-types%E2%91%A0
+	Results []ValueType
+
+	// string is cached as it is used both for String and key
+	string string
+
+	// ParamNumInUint64 is the number of uint64 values requires to represent the Wasm param type.
+	ParamNumInUint64 int
+
+	// ResultsNumInUint64 is the number of uint64 values requires to represent the Wasm result type.
+	ResultNumInUint64 int
+}
+
+func (f *FunctionType) CacheNumInUint64() {
+	if f.ParamNumInUint64 == 0 {
+		for _, tp := range f.Params {
+			f.ParamNumInUint64++
+			if tp == ValueTypeV128 {
+				f.ParamNumInUint64++
+			}
+		}
+	}
+
+	if f.ResultNumInUint64 == 0 {
+		for _, tp := range f.Results {
+			f.ResultNumInUint64++
+			if tp == ValueTypeV128 {
+				f.ResultNumInUint64++
+			}
+		}
+	}
+}
+
+// EqualsSignature returns true if the function type has the same parameters and results.
+func (f *FunctionType) EqualsSignature(params []ValueType, results []ValueType) bool {
+	return bytes.Equal(f.Params, params) && bytes.Equal(f.Results, results)
+}
+
+// key gets or generates the key for Store.typeIDs. e.g. "i32_v" for one i32 parameter and no (void) result.
+func (f *FunctionType) key() string {
+	if f.string != "" {
+		return f.string
+	}
+	var ret string
+	for _, b := range f.Params {
+		ret += ValueTypeName(b)
+	}
+	if len(f.Params) == 0 {
+		ret += "v_"
+	} else {
+		ret += "_"
+	}
+	for _, b := range f.Results {
+		ret += ValueTypeName(b)
+	}
+	if len(f.Results) == 0 {
+		ret += "v"
+	}
+	f.string = ret
+	return ret
+}
+
+// String implements fmt.Stringer.
+func (f *FunctionType) String() string {
+	return f.key()
+}
+
+// Import is the binary representation of an import indicated by Type
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-import
+type Import struct {
+	Type ExternType
+	// Module is the possibly empty primary namespace of this import
+	Module string
+	// Module is the possibly empty secondary namespace of this import
+	Name string
+	// DescFunc is the index in Module.TypeSection when Type equals ExternTypeFunc
+	DescFunc Index
+	// DescTable is the inlined Table when Type equals ExternTypeTable
+	DescTable Table
+	// DescMem is the inlined Memory when Type equals ExternTypeMemory
+	DescMem *Memory
+	// DescGlobal is the inlined GlobalType when Type equals ExternTypeGlobal
+	DescGlobal GlobalType
+	// IndexPerType has the index of this import per ExternType.
+	IndexPerType Index
+}
+
+// Memory describes the limits of pages (64KB) in a memory.
+type Memory struct {
+	Min, Cap, Max uint32
+	// IsMaxEncoded true if the Max is encoded in the original binary.
+	IsMaxEncoded bool
+	// IsShared true if the memory is shared for access from multiple agents.
+	IsShared bool
+}
+
+// Validate ensures values assigned to Min, Cap and Max are within valid thresholds.
+func (m *Memory) Validate(memoryLimitPages uint32) error {
+	min, capacity, max := m.Min, m.Cap, m.Max
+
+	if max > memoryLimitPages {
+		return fmt.Errorf("max %d pages (%s) over limit of %d pages (%s)",
+			max, PagesToUnitOfBytes(max), memoryLimitPages, PagesToUnitOfBytes(memoryLimitPages))
+	} else if min > memoryLimitPages {
+		return fmt.Errorf("min %d pages (%s) over limit of %d pages (%s)",
+			min, PagesToUnitOfBytes(min), memoryLimitPages, PagesToUnitOfBytes(memoryLimitPages))
+	} else if min > max {
+		return fmt.Errorf("min %d pages (%s) > max %d pages (%s)",
+			min, PagesToUnitOfBytes(min), max, PagesToUnitOfBytes(max))
+	} else if capacity < min {
+		return fmt.Errorf("capacity %d pages (%s) less than minimum %d pages (%s)",
+			capacity, PagesToUnitOfBytes(capacity), min, PagesToUnitOfBytes(min))
+	} else if capacity > memoryLimitPages {
+		return fmt.Errorf("capacity %d pages (%s) over limit of %d pages (%s)",
+			capacity, PagesToUnitOfBytes(capacity), memoryLimitPages, PagesToUnitOfBytes(memoryLimitPages))
+	}
+	return nil
+}
+
+type GlobalType struct {
+	ValType ValueType
+	Mutable bool
+}
+
+type Global struct {
+	Type GlobalType
+	Init ConstantExpression
+}
+
+type ConstantExpression struct {
+	Opcode Opcode
+	Data   []byte
+}
+
+// Export is the binary representation of an export indicated by Type
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-export
+type Export struct {
+	Type ExternType
+
+	// Name is what the host refers to this definition as.
+	Name string
+
+	// Index is the index of the definition to export, the index is by Type
+	// e.g. If ExternTypeFunc, this is a position in the function index.
+	Index Index
+}
+
+// Code is an entry in the Module.CodeSection containing the locals and body of the function.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-code
+type Code struct {
+	// LocalTypes are any function-scoped variables in insertion order.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-local
+	LocalTypes []ValueType
+
+	// Body is a sequence of expressions ending in OpcodeEnd
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-expr
+	Body []byte
+
+	// GoFunc is non-nil when IsHostFunction and defined in go, either
+	// api.GoFunction or api.GoModuleFunction. When present, LocalTypes and Body must
+	// be nil.
+	//
+	// Note: This has no serialization format, so is not encodable.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#host-functions%E2%91%A2
+	GoFunc interface{}
+
+	// BodyOffsetInCodeSection is the offset of the beginning of the body in the code section.
+	// This is used for DWARF based stack trace where a program counter represents an offset in code section.
+	BodyOffsetInCodeSection uint64
+}
+
+type DataSegment struct {
+	OffsetExpression ConstantExpression
+	Init             []byte
+	Passive          bool
+}
+
+// IsPassive returns true if this data segment is "passive" in the sense that memory offset and
+// index is determined at runtime and used by OpcodeMemoryInitName instruction in the bulk memory
+// operations proposal.
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions
+func (d *DataSegment) IsPassive() bool {
+	return d.Passive
+}
+
+// NameSection represent the known custom name subsections defined in the WebAssembly Binary Format
+//
+// Note: This can be nil if no names were decoded for any reason including configuration.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#name-section%E2%91%A0
+type NameSection struct {
+	// ModuleName is the symbolic identifier for a module. e.g. math
+	//
+	// Note: This can be empty for any reason including configuration.
+	ModuleName string
+
+	// FunctionNames is an association of a function index to its symbolic identifier. e.g. add
+	//
+	// * the key (idx) is in the function index, where module defined functions are preceded by imported ones.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#functions%E2%91%A7
+	//
+	// For example, assuming the below text format is the second import, you would expect FunctionNames[1] = "mul"
+	//	(import "Math" "Mul" (func $mul (param $x f32) (param $y f32) (result f32)))
+	//
+	// Note: FunctionNames are only used for debugging. At runtime, functions are called based on raw numeric index.
+	// Note: This can be nil for any reason including configuration.
+	FunctionNames NameMap
+
+	// LocalNames contains symbolic names for function parameters or locals that have one.
+	//
+	// Note: In the Text Format, function local names can inherit parameter
+	// names from their type. Here are some examples:
+	//  * (module (import (func (param $x i32) (param i32))) (func (type 0))) = [{0, {x,0}}]
+	//  * (module (import (func (param i32) (param $y i32))) (func (type 0) (local $z i32))) = [0, [{y,1},{z,2}]]
+	//  * (module (func (param $x i32) (local $y i32) (local $z i32))) = [{x,0},{y,1},{z,2}]
+	//
+	// Note: LocalNames are only used for debugging. At runtime, locals are called based on raw numeric index.
+	// Note: This can be nil for any reason including configuration.
+	LocalNames IndirectNameMap
+
+	// ResultNames is a wazero-specific mechanism to store result names.
+	ResultNames IndirectNameMap
+}
+
+// CustomSection contains the name and raw data of a custom section.
+type CustomSection struct {
+	Name string
+	Data []byte
+}
+
+// NameMap associates an index with any associated names.
+//
+// Note: Often the index bridges multiple sections. For example, the function index starts with any
+// ExternTypeFunc in the Module.ImportSection followed by the Module.FunctionSection
+//
+// Note: NameMap is unique by NameAssoc.Index, but NameAssoc.Name needn't be unique.
+// Note: When encoding in the Binary format, this must be ordered by NameAssoc.Index
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namemap
+type NameMap []NameAssoc
+
+type NameAssoc struct {
+	Index Index
+	Name  string
+}
+
+// IndirectNameMap associates an index with an association of names.
+//
+// Note: IndirectNameMap is unique by NameMapAssoc.Index, but NameMapAssoc.NameMap needn't be unique.
+// Note: When encoding in the Binary format, this must be ordered by NameMapAssoc.Index
+// https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-indirectnamemap
+type IndirectNameMap []NameMapAssoc
+
+type NameMapAssoc struct {
+	Index   Index
+	NameMap NameMap
+}
+
+// AllDeclarations returns all declarations for functions, globals, memories and tables in a module including imported ones.
+func (m *Module) AllDeclarations() (functions []Index, globals []GlobalType, memory *Memory, tables []Table, err error) {
+	for i := range m.ImportSection {
+		imp := &m.ImportSection[i]
+		switch imp.Type {
+		case ExternTypeFunc:
+			functions = append(functions, imp.DescFunc)
+		case ExternTypeGlobal:
+			globals = append(globals, imp.DescGlobal)
+		case ExternTypeMemory:
+			memory = imp.DescMem
+		case ExternTypeTable:
+			tables = append(tables, imp.DescTable)
+		}
+	}
+
+	functions = append(functions, m.FunctionSection...)
+	for i := range m.GlobalSection {
+		g := &m.GlobalSection[i]
+		globals = append(globals, g.Type)
+	}
+	if m.MemorySection != nil {
+		if memory != nil { // shouldn't be possible due to Validate
+			err = errors.New("at most one table allowed in module")
+			return
+		}
+		memory = m.MemorySection
+	}
+	if m.TableSection != nil {
+		tables = append(tables, m.TableSection...)
+	}
+	return
+}
+
+// SectionID identifies the sections of a Module in the WebAssembly 1.0 (20191205) Binary Format.
+//
+// Note: these are defined in the wasm package, instead of the binary package, as a key per section is needed regardless
+// of format, and deferring to the binary type avoids confusion.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+type SectionID = byte
+
+const (
+	// SectionIDCustom includes the standard defined NameSection and possibly others not defined in the standard.
+	SectionIDCustom SectionID = iota // don't add anything not in https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+	SectionIDType
+	SectionIDImport
+	SectionIDFunction
+	SectionIDTable
+	SectionIDMemory
+	SectionIDGlobal
+	SectionIDExport
+	SectionIDStart
+	SectionIDElement
+	SectionIDCode
+	SectionIDData
+
+	// SectionIDDataCount may exist in WebAssembly 2.0 or WebAssembly 1.0 with CoreFeatureBulkMemoryOperations enabled.
+	//
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-count-section
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions
+	SectionIDDataCount
+)
+
+// SectionIDName returns the canonical name of a module section.
+// https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+func SectionIDName(sectionID SectionID) string {
+	switch sectionID {
+	case SectionIDCustom:
+		return "custom"
+	case SectionIDType:
+		return "type"
+	case SectionIDImport:
+		return "import"
+	case SectionIDFunction:
+		return "function"
+	case SectionIDTable:
+		return "table"
+	case SectionIDMemory:
+		return "memory"
+	case SectionIDGlobal:
+		return "global"
+	case SectionIDExport:
+		return "export"
+	case SectionIDStart:
+		return "start"
+	case SectionIDElement:
+		return "element"
+	case SectionIDCode:
+		return "code"
+	case SectionIDData:
+		return "data"
+	case SectionIDDataCount:
+		return "data_count"
+	}
+	return "unknown"
+}
+
+// ValueType is an alias of api.ValueType defined to simplify imports.
+type ValueType = api.ValueType
+
+const (
+	ValueTypeI32 = api.ValueTypeI32
+	ValueTypeI64 = api.ValueTypeI64
+	ValueTypeF32 = api.ValueTypeF32
+	ValueTypeF64 = api.ValueTypeF64
+	// TODO: ValueTypeV128 is not exposed in the api pkg yet.
+	ValueTypeV128 ValueType = 0x7b
+	// TODO: ValueTypeFuncref is not exposed in the api pkg yet.
+	ValueTypeFuncref   ValueType = 0x70
+	ValueTypeExternref           = api.ValueTypeExternref
+)
+
+// ValueTypeName is an alias of api.ValueTypeName defined to simplify imports.
+func ValueTypeName(t ValueType) string {
+	if t == ValueTypeFuncref {
+		return "funcref"
+	} else if t == ValueTypeV128 {
+		return "v128"
+	}
+	return api.ValueTypeName(t)
+}
+
+func isReferenceValueType(vt ValueType) bool {
+	return vt == ValueTypeExternref || vt == ValueTypeFuncref
+}
+
+// ExternType is an alias of api.ExternType defined to simplify imports.
+type ExternType = api.ExternType
+
+const (
+	ExternTypeFunc       = api.ExternTypeFunc
+	ExternTypeFuncName   = api.ExternTypeFuncName
+	ExternTypeTable      = api.ExternTypeTable
+	ExternTypeTableName  = api.ExternTypeTableName
+	ExternTypeMemory     = api.ExternTypeMemory
+	ExternTypeMemoryName = api.ExternTypeMemoryName
+	ExternTypeGlobal     = api.ExternTypeGlobal
+	ExternTypeGlobalName = api.ExternTypeGlobalName
+)
+
+// ExternTypeName is an alias of api.ExternTypeName defined to simplify imports.
+func ExternTypeName(t ValueType) string {
+	return api.ExternTypeName(t)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module_instance.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module_instance.go
new file mode 100644
index 000000000..20c733e6f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module_instance.go
@@ -0,0 +1,251 @@
+package wasm
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FailIfClosed returns a sys.ExitError if CloseWithExitCode was called.
+func (m *ModuleInstance) FailIfClosed() (err error) {
+	if closed := m.Closed.Load(); closed != 0 {
+		switch closed & exitCodeFlagMask {
+		case exitCodeFlagResourceClosed:
+		case exitCodeFlagResourceNotClosed:
+			// This happens when this module is closed asynchronously in CloseModuleOnCanceledOrTimeout,
+			// and the closure of resources have been deferred here.
+			_ = m.ensureResourcesClosed(context.Background())
+		}
+		return sys.NewExitError(uint32(closed >> 32)) // Unpack the high order bits as the exit code.
+	}
+	return nil
+}
+
+// CloseModuleOnCanceledOrTimeout take a context `ctx`, which might be a Cancel or Timeout context,
+// and spawns the Goroutine to check the context is canceled ot deadline exceeded. If it reaches
+// one of the conditions, it sets the appropriate exit code.
+//
+// Callers of this function must invoke the returned context.CancelFunc to release the spawned Goroutine.
+func (m *ModuleInstance) CloseModuleOnCanceledOrTimeout(ctx context.Context) context.CancelFunc {
+	// Creating an empty channel in this case is a bit more efficient than
+	// creating a context.Context and canceling it with the same effect. We
+	// really just need to be notified when to stop listening to the users
+	// context. Closing the channel will unblock the select in the goroutine
+	// causing it to return an stop listening to ctx.Done().
+	cancelChan := make(chan struct{})
+	go m.closeModuleOnCanceledOrTimeout(ctx, cancelChan)
+	return func() { close(cancelChan) }
+}
+
+// closeModuleOnCanceledOrTimeout is extracted from CloseModuleOnCanceledOrTimeout for testing.
+func (m *ModuleInstance) closeModuleOnCanceledOrTimeout(ctx context.Context, cancelChan <-chan struct{}) {
+	select {
+	case <-ctx.Done():
+		select {
+		case <-cancelChan:
+			// In some cases by the time this goroutine is scheduled, the caller
+			// has already closed both the context and the cancelChan. In this
+			// case go will randomize which branch of the outer select to enter
+			// and we don't want to close the module.
+		default:
+			// This is the same logic as CloseWithCtxErr except this calls closeWithExitCodeWithoutClosingResource
+			// so that we can defer the resource closure in FailIfClosed.
+			switch {
+			case errors.Is(ctx.Err(), context.Canceled):
+				// TODO: figure out how to report error here.
+				_ = m.closeWithExitCodeWithoutClosingResource(sys.ExitCodeContextCanceled)
+			case errors.Is(ctx.Err(), context.DeadlineExceeded):
+				// TODO: figure out how to report error here.
+				_ = m.closeWithExitCodeWithoutClosingResource(sys.ExitCodeDeadlineExceeded)
+			}
+		}
+	case <-cancelChan:
+	}
+}
+
+// CloseWithCtxErr closes the module with an exit code based on the type of
+// error reported by the context.
+//
+// If the context's error is unknown or nil, the module does not close.
+func (m *ModuleInstance) CloseWithCtxErr(ctx context.Context) {
+	switch {
+	case errors.Is(ctx.Err(), context.Canceled):
+		// TODO: figure out how to report error here.
+		_ = m.CloseWithExitCode(ctx, sys.ExitCodeContextCanceled)
+	case errors.Is(ctx.Err(), context.DeadlineExceeded):
+		// TODO: figure out how to report error here.
+		_ = m.CloseWithExitCode(ctx, sys.ExitCodeDeadlineExceeded)
+	}
+}
+
+// Name implements the same method as documented on api.Module
+func (m *ModuleInstance) Name() string {
+	return m.ModuleName
+}
+
+// String implements the same method as documented on api.Module
+func (m *ModuleInstance) String() string {
+	return fmt.Sprintf("Module[%s]", m.Name())
+}
+
+// Close implements the same method as documented on api.Module.
+func (m *ModuleInstance) Close(ctx context.Context) (err error) {
+	return m.CloseWithExitCode(ctx, 0)
+}
+
+// CloseWithExitCode implements the same method as documented on api.Module.
+func (m *ModuleInstance) CloseWithExitCode(ctx context.Context, exitCode uint32) (err error) {
+	if !m.setExitCode(exitCode, exitCodeFlagResourceClosed) {
+		return nil // not an error to have already closed
+	}
+	_ = m.s.deleteModule(m)
+	return m.ensureResourcesClosed(ctx)
+}
+
+// IsClosed implements the same method as documented on api.Module.
+func (m *ModuleInstance) IsClosed() bool {
+	return m.Closed.Load() != 0
+}
+
+func (m *ModuleInstance) closeWithExitCodeWithoutClosingResource(exitCode uint32) (err error) {
+	if !m.setExitCode(exitCode, exitCodeFlagResourceNotClosed) {
+		return nil // not an error to have already closed
+	}
+	_ = m.s.deleteModule(m)
+	return nil
+}
+
+// closeWithExitCode is the same as CloseWithExitCode besides this doesn't delete it from Store.moduleList.
+func (m *ModuleInstance) closeWithExitCode(ctx context.Context, exitCode uint32) (err error) {
+	if !m.setExitCode(exitCode, exitCodeFlagResourceClosed) {
+		return nil // not an error to have already closed
+	}
+	return m.ensureResourcesClosed(ctx)
+}
+
+type exitCodeFlag = uint64
+
+const exitCodeFlagMask = 0xff
+
+const (
+	// exitCodeFlagResourceClosed indicates that the module was closed and resources were already closed.
+	exitCodeFlagResourceClosed = 1 << iota
+	// exitCodeFlagResourceNotClosed indicates that the module was closed while resources are not closed yet.
+	exitCodeFlagResourceNotClosed
+)
+
+func (m *ModuleInstance) setExitCode(exitCode uint32, flag exitCodeFlag) bool {
+	closed := flag | uint64(exitCode)<<32 // Store exitCode as high-order bits.
+	return m.Closed.CompareAndSwap(0, closed)
+}
+
+// ensureResourcesClosed ensures that resources assigned to ModuleInstance is released.
+// Only one call will happen per module, due to external atomic guards on Closed.
+func (m *ModuleInstance) ensureResourcesClosed(ctx context.Context) (err error) {
+	if closeNotifier := m.CloseNotifier; closeNotifier != nil { // experimental
+		closeNotifier.CloseNotify(ctx, uint32(m.Closed.Load()>>32))
+		m.CloseNotifier = nil
+	}
+
+	if sysCtx := m.Sys; sysCtx != nil { // nil if from HostModuleBuilder
+		err = sysCtx.FS().Close()
+		m.Sys = nil
+	}
+
+	if mem := m.MemoryInstance; mem != nil {
+		if mem.expBuffer != nil {
+			mem.expBuffer.Free()
+			mem.expBuffer = nil
+		}
+	}
+
+	if m.CodeCloser != nil {
+		if e := m.CodeCloser.Close(ctx); err == nil {
+			err = e
+		}
+		m.CodeCloser = nil
+	}
+	return err
+}
+
+// Memory implements the same method as documented on api.Module.
+func (m *ModuleInstance) Memory() api.Memory {
+	return m.MemoryInstance
+}
+
+// ExportedMemory implements the same method as documented on api.Module.
+func (m *ModuleInstance) ExportedMemory(name string) api.Memory {
+	_, err := m.getExport(name, ExternTypeMemory)
+	if err != nil {
+		return nil
+	}
+	// We Assume that we have at most one memory.
+	return m.MemoryInstance
+}
+
+// ExportedMemoryDefinitions implements the same method as documented on
+// api.Module.
+func (m *ModuleInstance) ExportedMemoryDefinitions() map[string]api.MemoryDefinition {
+	// Special case as we currently only support one memory.
+	if mem := m.MemoryInstance; mem != nil {
+		// Now, find out if it is exported
+		for name, exp := range m.Exports {
+			if exp.Type == ExternTypeMemory {
+				return map[string]api.MemoryDefinition{name: mem.definition}
+			}
+		}
+	}
+	return map[string]api.MemoryDefinition{}
+}
+
+// ExportedFunction implements the same method as documented on api.Module.
+func (m *ModuleInstance) ExportedFunction(name string) api.Function {
+	exp, err := m.getExport(name, ExternTypeFunc)
+	if err != nil {
+		return nil
+	}
+	return m.Engine.NewFunction(exp.Index)
+}
+
+// ExportedFunctionDefinitions implements the same method as documented on
+// api.Module.
+func (m *ModuleInstance) ExportedFunctionDefinitions() map[string]api.FunctionDefinition {
+	result := map[string]api.FunctionDefinition{}
+	for name, exp := range m.Exports {
+		if exp.Type == ExternTypeFunc {
+			result[name] = m.Source.FunctionDefinition(exp.Index)
+		}
+	}
+	return result
+}
+
+// GlobalVal is an internal hack to get the lower 64 bits of a global.
+func (m *ModuleInstance) GlobalVal(idx Index) uint64 {
+	return m.Globals[idx].Val
+}
+
+// ExportedGlobal implements the same method as documented on api.Module.
+func (m *ModuleInstance) ExportedGlobal(name string) api.Global {
+	exp, err := m.getExport(name, ExternTypeGlobal)
+	if err != nil {
+		return nil
+	}
+	g := m.Globals[exp.Index]
+	if g.Type.Mutable {
+		return mutableGlobal{g: g}
+	}
+	return constantGlobal{g: g}
+}
+
+// NumGlobal implements experimental.InternalModule.
+func (m *ModuleInstance) NumGlobal() int {
+	return len(m.Globals)
+}
+
+// Global implements experimental.InternalModule.
+func (m *ModuleInstance) Global(idx int) api.Global {
+	return constantGlobal{g: m.Globals[idx]}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module_instance_lookup.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module_instance_lookup.go
new file mode 100644
index 000000000..442d26a22
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module_instance_lookup.go
@@ -0,0 +1,73 @@
+package wasm
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+)
+
+// LookupFunction looks up the table by the given index, and returns the api.Function implementation if found,
+// otherwise this panics according to the same semantics as call_indirect instruction.
+// Currently, this is only used by emscripten which needs to do call_indirect-like operation in the host function.
+func (m *ModuleInstance) LookupFunction(t *TableInstance, typeId FunctionTypeID, tableOffset Index) api.Function {
+	fm, index := m.Engine.LookupFunction(t, typeId, tableOffset)
+	if source := fm.Source; source.IsHostModule {
+		// This case, the found function is a host function stored in the table. Generally, Engine.NewFunction are only
+		// responsible for calling Wasm-defined functions (not designed for calling Go functions!). Hence we need to wrap
+		// the host function as a special case.
+		def := &source.FunctionDefinitionSection[index]
+		goF := source.CodeSection[index].GoFunc
+		switch typed := goF.(type) {
+		case api.GoFunction:
+			// GoFunction doesn't need looked up module.
+			return &lookedUpGoFunction{def: def, g: goFunctionAsGoModuleFunction(typed)}
+		case api.GoModuleFunction:
+			return &lookedUpGoFunction{def: def, lookedUpModule: m, g: typed}
+		default:
+			panic(fmt.Sprintf("unexpected GoFunc type: %T", goF))
+		}
+	} else {
+		return fm.Engine.NewFunction(index)
+	}
+}
+
+// lookedUpGoFunction implements lookedUpGoModuleFunction.
+type lookedUpGoFunction struct {
+	internalapi.WazeroOnly
+	def *FunctionDefinition
+	// lookedUpModule is the *ModuleInstance from which this Go function is looked up, i.e. owner of the table.
+	lookedUpModule *ModuleInstance
+	g              api.GoModuleFunction
+}
+
+// goFunctionAsGoModuleFunction converts api.GoFunction to api.GoModuleFunction which ignores the api.Module argument.
+func goFunctionAsGoModuleFunction(g api.GoFunction) api.GoModuleFunction {
+	return api.GoModuleFunc(func(ctx context.Context, _ api.Module, stack []uint64) {
+		g.Call(ctx, stack)
+	})
+}
+
+// Definition implements api.Function.
+func (l *lookedUpGoFunction) Definition() api.FunctionDefinition { return l.def }
+
+// Call implements api.Function.
+func (l *lookedUpGoFunction) Call(ctx context.Context, params ...uint64) ([]uint64, error) {
+	typ := l.def.Functype
+	stackSize := typ.ParamNumInUint64
+	rn := typ.ResultNumInUint64
+	if rn > stackSize {
+		stackSize = rn
+	}
+	stack := make([]uint64, stackSize)
+	copy(stack, params)
+	return stack[:rn], l.CallWithStack(ctx, stack)
+}
+
+// CallWithStack implements api.Function.
+func (l *lookedUpGoFunction) CallWithStack(ctx context.Context, stack []uint64) error {
+	// The Go host function always needs to access caller's module, in this case the one holding the table.
+	l.g.Call(ctx, l.lookedUpModule, stack)
+	return nil
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go
new file mode 100644
index 000000000..1db661e85
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/store.go
@@ -0,0 +1,668 @@
+package wasm
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"sync"
+	"sync/atomic"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/leb128"
+	internalsys "github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// nameToModuleShrinkThreshold is the size the nameToModule map can grow to
+// before it starts to be monitored for shrinking.
+// The capacity will never be smaller than this once the threshold is met.
+const nameToModuleShrinkThreshold = 100
+
+type (
+	// Store is the runtime representation of "instantiated" Wasm module and objects.
+	// Multiple modules can be instantiated within a single store, and each instance,
+	// (e.g. function instance) can be referenced by other module instances in a Store via Module.ImportSection.
+	//
+	// Every type whose name ends with "Instance" suffix belongs to exactly one store.
+	//
+	// Note that store is not thread (concurrency) safe, meaning that using single Store
+	// via multiple goroutines might result in race conditions. In that case, the invocation
+	// and access to any methods and field of Store must be guarded by mutex.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#store%E2%91%A0
+	Store struct {
+		// moduleList ensures modules are closed in reverse initialization order.
+		moduleList *ModuleInstance // guarded by mux
+
+		// nameToModule holds the instantiated Wasm modules by module name from Instantiate.
+		// It ensures no race conditions instantiating two modules of the same name.
+		nameToModule map[string]*ModuleInstance // guarded by mux
+
+		// nameToModuleCap tracks the growth of the nameToModule map in order to
+		// track when to shrink it.
+		nameToModuleCap int // guarded by mux
+
+		// EnabledFeatures are read-only to allow optimizations.
+		EnabledFeatures api.CoreFeatures
+
+		// Engine is a global context for a Store which is in responsible for compilation and execution of Wasm modules.
+		Engine Engine
+
+		// typeIDs maps each FunctionType.String() to a unique FunctionTypeID. This is used at runtime to
+		// do type-checks on indirect function calls.
+		typeIDs map[string]FunctionTypeID
+
+		// functionMaxTypes represents the limit on the number of function types in a store.
+		// Note: this is fixed to 2^27 but have this a field for testability.
+		functionMaxTypes uint32
+
+		// mux is used to guard the fields from concurrent access.
+		mux sync.RWMutex
+	}
+
+	// ModuleInstance represents instantiated wasm module.
+	// The difference from the spec is that in wazero, a ModuleInstance holds pointers
+	// to the instances, rather than "addresses" (i.e. index to Store.Functions, Globals, etc) for convenience.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-moduleinst
+	//
+	// This implements api.Module.
+	ModuleInstance struct {
+		internalapi.WazeroOnlyType
+
+		ModuleName     string
+		Exports        map[string]*Export
+		Globals        []*GlobalInstance
+		MemoryInstance *MemoryInstance
+		Tables         []*TableInstance
+
+		// Engine implements function calls for this module.
+		Engine ModuleEngine
+
+		// TypeIDs is index-correlated with types and holds typeIDs which is uniquely assigned to a type by store.
+		// This is necessary to achieve fast runtime type checking for indirect function calls at runtime.
+		TypeIDs []FunctionTypeID
+
+		// DataInstances holds data segments bytes of the module.
+		// This is only used by bulk memory operations.
+		//
+		// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/runtime.html#data-instances
+		DataInstances []DataInstance
+
+		// ElementInstances holds the element instance, and each holds the references to either functions
+		// or external objects (unimplemented).
+		ElementInstances []ElementInstance
+
+		// Sys is exposed for use in special imports such as WASI, assemblyscript.
+		//
+		// # Notes
+		//
+		//   - This is a part of ModuleInstance so that scope and Close is coherent.
+		//   - This is not exposed outside this repository (as a host function
+		//	  parameter) because we haven't thought through capabilities based
+		//	  security implications.
+		Sys *internalsys.Context
+
+		// Closed is used both to guard moduleEngine.CloseWithExitCode and to store the exit code.
+		//
+		// The update value is closedType + exitCode << 32. This ensures an exit code of zero isn't mistaken for never closed.
+		//
+		// Note: Exclusively reading and updating this with atomics guarantees cross-goroutine observations.
+		// See /RATIONALE.md
+		Closed atomic.Uint64
+
+		// CodeCloser is non-nil when the code should be closed after this module.
+		CodeCloser api.Closer
+
+		// s is the Store on which this module is instantiated.
+		s *Store
+		// prev and next hold the nodes in the linked list of ModuleInstance held by Store.
+		prev, next *ModuleInstance
+		// Source is a pointer to the Module from which this ModuleInstance derives.
+		Source *Module
+
+		// CloseNotifier is an experimental hook called once on close.
+		CloseNotifier experimental.CloseNotifier
+	}
+
+	// DataInstance holds bytes corresponding to the data segment in a module.
+	//
+	// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/runtime.html#data-instances
+	DataInstance = []byte
+
+	// GlobalInstance represents a global instance in a store.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#global-instances%E2%91%A0
+	GlobalInstance struct {
+		Type GlobalType
+		// Val holds a 64-bit representation of the actual value.
+		// If me is non-nil, the value will not be updated and the current value is stored in the module engine.
+		Val uint64
+		// ValHi is only used for vector type globals, and holds the higher bits of the vector.
+		// If me is non-nil, the value will not be updated and the current value is stored in the module engine.
+		ValHi uint64
+		// Me is the module engine that owns this global instance.
+		// The .Val and .ValHi fields are only valid when me is nil.
+		// If me is non-nil, the value is stored in the module engine.
+		Me    ModuleEngine
+		Index Index
+	}
+
+	// FunctionTypeID is a uniquely assigned integer for a function type.
+	// This is wazero specific runtime object and specific to a store,
+	// and used at runtime to do type-checks on indirect function calls.
+	FunctionTypeID uint32
+)
+
+// The wazero specific limitations described at RATIONALE.md.
+const maximumFunctionTypes = 1 << 27
+
+// GetFunctionTypeID is used by emscripten.
+func (m *ModuleInstance) GetFunctionTypeID(t *FunctionType) FunctionTypeID {
+	id, err := m.s.GetFunctionTypeID(t)
+	if err != nil {
+		// This is not recoverable in practice since the only error GetFunctionTypeID returns is
+		// when there's too many function types in the store.
+		panic(err)
+	}
+	return id
+}
+
+func (m *ModuleInstance) buildElementInstances(elements []ElementSegment) {
+	m.ElementInstances = make([][]Reference, len(elements))
+	for i, elm := range elements {
+		if elm.Type == RefTypeFuncref && elm.Mode == ElementModePassive {
+			// Only passive elements can be access as element instances.
+			// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/syntax/modules.html#element-segments
+			inits := elm.Init
+			inst := make([]Reference, len(inits))
+			m.ElementInstances[i] = inst
+			for j, idx := range inits {
+				if index, ok := unwrapElementInitGlobalReference(idx); ok {
+					global := m.Globals[index]
+					inst[j] = Reference(global.Val)
+				} else {
+					if idx != ElementInitNullReference {
+						inst[j] = m.Engine.FunctionInstanceReference(idx)
+					}
+				}
+			}
+		}
+	}
+}
+
+func (m *ModuleInstance) applyElements(elems []ElementSegment) {
+	for elemI := range elems {
+		elem := &elems[elemI]
+		if !elem.IsActive() ||
+			// Per https://github.com/WebAssembly/spec/issues/1427 init can be no-op.
+			len(elem.Init) == 0 {
+			continue
+		}
+		var offset uint32
+		if elem.OffsetExpr.Opcode == OpcodeGlobalGet {
+			// Ignore error as it's already validated.
+			globalIdx, _, _ := leb128.LoadUint32(elem.OffsetExpr.Data)
+			global := m.Globals[globalIdx]
+			offset = uint32(global.Val)
+		} else {
+			// Ignore error as it's already validated.
+			o, _, _ := leb128.LoadInt32(elem.OffsetExpr.Data)
+			offset = uint32(o)
+		}
+
+		table := m.Tables[elem.TableIndex]
+		references := table.References
+		if int(offset)+len(elem.Init) > len(references) {
+			// ErrElementOffsetOutOfBounds is the error raised when the active element offset exceeds the table length.
+			// Before CoreFeatureReferenceTypes, this was checked statically before instantiation, after the proposal,
+			// this must be raised as runtime error (as in assert_trap in spectest), not even an instantiation error.
+			// https://github.com/WebAssembly/spec/blob/d39195773112a22b245ffbe864bab6d1182ccb06/test/core/linking.wast#L264-L274
+			//
+			// In wazero, we ignore it since in any way, the instantiated module and engines are fine and can be used
+			// for function invocations.
+			return
+		}
+
+		if table.Type == RefTypeExternref {
+			for i := 0; i < len(elem.Init); i++ {
+				references[offset+uint32(i)] = Reference(0)
+			}
+		} else {
+			for i, init := range elem.Init {
+				if init == ElementInitNullReference {
+					continue
+				}
+
+				var ref Reference
+				if index, ok := unwrapElementInitGlobalReference(init); ok {
+					global := m.Globals[index]
+					ref = Reference(global.Val)
+				} else {
+					ref = m.Engine.FunctionInstanceReference(index)
+				}
+				references[offset+uint32(i)] = ref
+			}
+		}
+	}
+}
+
+// validateData ensures that data segments are valid in terms of memory boundary.
+// Note: this is used only when bulk-memory/reference type feature is disabled.
+func (m *ModuleInstance) validateData(data []DataSegment) (err error) {
+	for i := range data {
+		d := &data[i]
+		if !d.IsPassive() {
+			offset := int(executeConstExpressionI32(m.Globals, &d.OffsetExpression))
+			ceil := offset + len(d.Init)
+			if offset < 0 || ceil > len(m.MemoryInstance.Buffer) {
+				return fmt.Errorf("%s[%d]: out of bounds memory access", SectionIDName(SectionIDData), i)
+			}
+		}
+	}
+	return
+}
+
+// applyData uses the given data segments and mutate the memory according to the initial contents on it
+// and populate the `DataInstances`. This is called after all the validation phase passes and out of
+// bounds memory access error here is not a validation error, but rather a runtime error.
+func (m *ModuleInstance) applyData(data []DataSegment) error {
+	m.DataInstances = make([][]byte, len(data))
+	for i := range data {
+		d := &data[i]
+		m.DataInstances[i] = d.Init
+		if !d.IsPassive() {
+			offset := executeConstExpressionI32(m.Globals, &d.OffsetExpression)
+			if offset < 0 || int(offset)+len(d.Init) > len(m.MemoryInstance.Buffer) {
+				return fmt.Errorf("%s[%d]: out of bounds memory access", SectionIDName(SectionIDData), i)
+			}
+			copy(m.MemoryInstance.Buffer[offset:], d.Init)
+		}
+	}
+	return nil
+}
+
+// GetExport returns an export of the given name and type or errs if not exported or the wrong type.
+func (m *ModuleInstance) getExport(name string, et ExternType) (*Export, error) {
+	exp, ok := m.Exports[name]
+	if !ok {
+		return nil, fmt.Errorf("%q is not exported in module %q", name, m.ModuleName)
+	}
+	if exp.Type != et {
+		return nil, fmt.Errorf("export %q in module %q is a %s, not a %s", name, m.ModuleName, ExternTypeName(exp.Type), ExternTypeName(et))
+	}
+	return exp, nil
+}
+
+func NewStore(enabledFeatures api.CoreFeatures, engine Engine) *Store {
+	return &Store{
+		nameToModule:     map[string]*ModuleInstance{},
+		nameToModuleCap:  nameToModuleShrinkThreshold,
+		EnabledFeatures:  enabledFeatures,
+		Engine:           engine,
+		typeIDs:          map[string]FunctionTypeID{},
+		functionMaxTypes: maximumFunctionTypes,
+	}
+}
+
+// Instantiate uses name instead of the Module.NameSection ModuleName as it allows instantiating the same module under
+// different names safely and concurrently.
+//
+// * ctx: the default context used for function calls.
+// * name: the name of the module.
+// * sys: the system context, which will be closed (SysContext.Close) on ModuleInstance.Close.
+//
+// Note: Module.Validate must be called prior to instantiation.
+func (s *Store) Instantiate(
+	ctx context.Context,
+	module *Module,
+	name string,
+	sys *internalsys.Context,
+	typeIDs []FunctionTypeID,
+) (*ModuleInstance, error) {
+	// Instantiate the module and add it to the store so that other modules can import it.
+	m, err := s.instantiate(ctx, module, name, sys, typeIDs)
+	if err != nil {
+		return nil, err
+	}
+
+	// Now that the instantiation is complete without error, add it.
+	if err = s.registerModule(m); err != nil {
+		_ = m.Close(ctx)
+		return nil, err
+	}
+	return m, nil
+}
+
+func (s *Store) instantiate(
+	ctx context.Context,
+	module *Module,
+	name string,
+	sysCtx *internalsys.Context,
+	typeIDs []FunctionTypeID,
+) (m *ModuleInstance, err error) {
+	m = &ModuleInstance{ModuleName: name, TypeIDs: typeIDs, Sys: sysCtx, s: s, Source: module}
+
+	m.Tables = make([]*TableInstance, int(module.ImportTableCount)+len(module.TableSection))
+	m.Globals = make([]*GlobalInstance, int(module.ImportGlobalCount)+len(module.GlobalSection))
+	m.Engine, err = s.Engine.NewModuleEngine(module, m)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = m.resolveImports(module); err != nil {
+		return nil, err
+	}
+
+	err = m.buildTables(module,
+		// As of reference-types proposal, boundary check must be done after instantiation.
+		s.EnabledFeatures.IsEnabled(api.CoreFeatureReferenceTypes))
+	if err != nil {
+		return nil, err
+	}
+
+	allocator, _ := ctx.Value(expctxkeys.MemoryAllocatorKey{}).(experimental.MemoryAllocator)
+
+	m.buildGlobals(module, m.Engine.FunctionInstanceReference)
+	m.buildMemory(module, allocator)
+	m.Exports = module.Exports
+	for _, exp := range m.Exports {
+		if exp.Type == ExternTypeTable {
+			t := m.Tables[exp.Index]
+			t.involvingModuleInstances = append(t.involvingModuleInstances, m)
+		}
+	}
+
+	// As of reference types proposal, data segment validation must happen after instantiation,
+	// and the side effect must persist even if there's out of bounds error after instantiation.
+	// https://github.com/WebAssembly/spec/blob/d39195773112a22b245ffbe864bab6d1182ccb06/test/core/linking.wast#L395-L405
+	if !s.EnabledFeatures.IsEnabled(api.CoreFeatureReferenceTypes) {
+		if err = m.validateData(module.DataSection); err != nil {
+			return nil, err
+		}
+	}
+
+	// After engine creation, we can create the funcref element instances and initialize funcref type globals.
+	m.buildElementInstances(module.ElementSection)
+
+	// Now all the validation passes, we are safe to mutate memory instances (possibly imported ones).
+	if err = m.applyData(module.DataSection); err != nil {
+		return nil, err
+	}
+
+	m.applyElements(module.ElementSection)
+
+	m.Engine.DoneInstantiation()
+
+	// Execute the start function.
+	if module.StartSection != nil {
+		funcIdx := *module.StartSection
+		ce := m.Engine.NewFunction(funcIdx)
+		_, err = ce.Call(ctx)
+		if exitErr, ok := err.(*sys.ExitError); ok { // Don't wrap an exit error!
+			return nil, exitErr
+		} else if err != nil {
+			return nil, fmt.Errorf("start %s failed: %w", module.funcDesc(SectionIDFunction, funcIdx), err)
+		}
+	}
+	return
+}
+
+func (m *ModuleInstance) resolveImports(module *Module) (err error) {
+	for moduleName, imports := range module.ImportPerModule {
+		var importedModule *ModuleInstance
+		importedModule, err = m.s.module(moduleName)
+		if err != nil {
+			return err
+		}
+
+		for _, i := range imports {
+			var imported *Export
+			imported, err = importedModule.getExport(i.Name, i.Type)
+			if err != nil {
+				return
+			}
+
+			switch i.Type {
+			case ExternTypeFunc:
+				expectedType := &module.TypeSection[i.DescFunc]
+				src := importedModule.Source
+				actual := src.typeOfFunction(imported.Index)
+				if !actual.EqualsSignature(expectedType.Params, expectedType.Results) {
+					err = errorInvalidImport(i, fmt.Errorf("signature mismatch: %s != %s", expectedType, actual))
+					return
+				}
+
+				m.Engine.ResolveImportedFunction(i.IndexPerType, imported.Index, importedModule.Engine)
+			case ExternTypeTable:
+				expected := i.DescTable
+				importedTable := importedModule.Tables[imported.Index]
+				if expected.Type != importedTable.Type {
+					err = errorInvalidImport(i, fmt.Errorf("table type mismatch: %s != %s",
+						RefTypeName(expected.Type), RefTypeName(importedTable.Type)))
+					return
+				}
+
+				if expected.Min > importedTable.Min {
+					err = errorMinSizeMismatch(i, expected.Min, importedTable.Min)
+					return
+				}
+
+				if expected.Max != nil {
+					expectedMax := *expected.Max
+					if importedTable.Max == nil {
+						err = errorNoMax(i, expectedMax)
+						return
+					} else if expectedMax < *importedTable.Max {
+						err = errorMaxSizeMismatch(i, expectedMax, *importedTable.Max)
+						return
+					}
+				}
+				m.Tables[i.IndexPerType] = importedTable
+				importedTable.involvingModuleInstancesMutex.Lock()
+				if len(importedTable.involvingModuleInstances) == 0 {
+					panic("BUG: involvingModuleInstances must not be nil when it's imported")
+				}
+				importedTable.involvingModuleInstances = append(importedTable.involvingModuleInstances, m)
+				importedTable.involvingModuleInstancesMutex.Unlock()
+			case ExternTypeMemory:
+				expected := i.DescMem
+				importedMemory := importedModule.MemoryInstance
+
+				if expected.Min > memoryBytesNumToPages(uint64(len(importedMemory.Buffer))) {
+					err = errorMinSizeMismatch(i, expected.Min, importedMemory.Min)
+					return
+				}
+
+				if expected.Max < importedMemory.Max {
+					err = errorMaxSizeMismatch(i, expected.Max, importedMemory.Max)
+					return
+				}
+				m.MemoryInstance = importedMemory
+				m.Engine.ResolveImportedMemory(importedModule.Engine)
+			case ExternTypeGlobal:
+				expected := i.DescGlobal
+				importedGlobal := importedModule.Globals[imported.Index]
+
+				if expected.Mutable != importedGlobal.Type.Mutable {
+					err = errorInvalidImport(i, fmt.Errorf("mutability mismatch: %t != %t",
+						expected.Mutable, importedGlobal.Type.Mutable))
+					return
+				}
+
+				if expected.ValType != importedGlobal.Type.ValType {
+					err = errorInvalidImport(i, fmt.Errorf("value type mismatch: %s != %s",
+						ValueTypeName(expected.ValType), ValueTypeName(importedGlobal.Type.ValType)))
+					return
+				}
+				m.Globals[i.IndexPerType] = importedGlobal
+			}
+		}
+	}
+	return
+}
+
+func errorMinSizeMismatch(i *Import, expected, actual uint32) error {
+	return errorInvalidImport(i, fmt.Errorf("minimum size mismatch: %d > %d", expected, actual))
+}
+
+func errorNoMax(i *Import, expected uint32) error {
+	return errorInvalidImport(i, fmt.Errorf("maximum size mismatch: %d, but actual has no max", expected))
+}
+
+func errorMaxSizeMismatch(i *Import, expected, actual uint32) error {
+	return errorInvalidImport(i, fmt.Errorf("maximum size mismatch: %d < %d", expected, actual))
+}
+
+func errorInvalidImport(i *Import, err error) error {
+	return fmt.Errorf("import %s[%s.%s]: %w", ExternTypeName(i.Type), i.Module, i.Name, err)
+}
+
+// executeConstExpressionI32 executes the ConstantExpression which returns ValueTypeI32.
+// The validity of the expression is ensured when calling this function as this is only called
+// during instantiation phrase, and the validation happens in compilation (validateConstExpression).
+func executeConstExpressionI32(importedGlobals []*GlobalInstance, expr *ConstantExpression) (ret int32) {
+	switch expr.Opcode {
+	case OpcodeI32Const:
+		ret, _, _ = leb128.LoadInt32(expr.Data)
+	case OpcodeGlobalGet:
+		id, _, _ := leb128.LoadUint32(expr.Data)
+		g := importedGlobals[id]
+		ret = int32(g.Val)
+	}
+	return
+}
+
+// initialize initializes the value of this global instance given the const expr and imported globals.
+// funcRefResolver is called to get the actual funcref (engine specific) from the OpcodeRefFunc const expr.
+//
+// Global initialization constant expression can only reference the imported globals.
+// See the note on https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#constant-expressions%E2%91%A0
+func (g *GlobalInstance) initialize(importedGlobals []*GlobalInstance, expr *ConstantExpression, funcRefResolver func(funcIndex Index) Reference) {
+	switch expr.Opcode {
+	case OpcodeI32Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		v, _, _ := leb128.LoadInt32(expr.Data)
+		g.Val = uint64(uint32(v))
+	case OpcodeI64Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		v, _, _ := leb128.LoadInt64(expr.Data)
+		g.Val = uint64(v)
+	case OpcodeF32Const:
+		g.Val = uint64(binary.LittleEndian.Uint32(expr.Data))
+	case OpcodeF64Const:
+		g.Val = binary.LittleEndian.Uint64(expr.Data)
+	case OpcodeGlobalGet:
+		id, _, _ := leb128.LoadUint32(expr.Data)
+		importedG := importedGlobals[id]
+		switch importedG.Type.ValType {
+		case ValueTypeI32:
+			g.Val = uint64(uint32(importedG.Val))
+		case ValueTypeI64:
+			g.Val = importedG.Val
+		case ValueTypeF32:
+			g.Val = importedG.Val
+		case ValueTypeF64:
+			g.Val = importedG.Val
+		case ValueTypeV128:
+			g.Val, g.ValHi = importedG.Val, importedG.ValHi
+		case ValueTypeFuncref, ValueTypeExternref:
+			g.Val = importedG.Val
+		}
+	case OpcodeRefNull:
+		switch expr.Data[0] {
+		case ValueTypeExternref, ValueTypeFuncref:
+			g.Val = 0 // Reference types are opaque 64bit pointer at runtime.
+		}
+	case OpcodeRefFunc:
+		v, _, _ := leb128.LoadUint32(expr.Data)
+		g.Val = uint64(funcRefResolver(v))
+	case OpcodeVecV128Const:
+		g.Val, g.ValHi = binary.LittleEndian.Uint64(expr.Data[0:8]), binary.LittleEndian.Uint64(expr.Data[8:16])
+	}
+}
+
+// String implements api.Global.
+func (g *GlobalInstance) String() string {
+	switch g.Type.ValType {
+	case ValueTypeI32, ValueTypeI64:
+		return fmt.Sprintf("global(%d)", g.Val)
+	case ValueTypeF32:
+		return fmt.Sprintf("global(%f)", api.DecodeF32(g.Val))
+	case ValueTypeF64:
+		return fmt.Sprintf("global(%f)", api.DecodeF64(g.Val))
+	default:
+		panic(fmt.Errorf("BUG: unknown value type %X", g.Type.ValType))
+	}
+}
+
+func (g *GlobalInstance) Value() (uint64, uint64) {
+	if g.Me != nil {
+		return g.Me.GetGlobalValue(g.Index)
+	}
+	return g.Val, g.ValHi
+}
+
+func (g *GlobalInstance) SetValue(lo, hi uint64) {
+	if g.Me != nil {
+		g.Me.SetGlobalValue(g.Index, lo, hi)
+	} else {
+		g.Val, g.ValHi = lo, hi
+	}
+}
+
+func (s *Store) GetFunctionTypeIDs(ts []FunctionType) ([]FunctionTypeID, error) {
+	ret := make([]FunctionTypeID, len(ts))
+	for i := range ts {
+		t := &ts[i]
+		inst, err := s.GetFunctionTypeID(t)
+		if err != nil {
+			return nil, err
+		}
+		ret[i] = inst
+	}
+	return ret, nil
+}
+
+func (s *Store) GetFunctionTypeID(t *FunctionType) (FunctionTypeID, error) {
+	s.mux.RLock()
+	key := t.key()
+	id, ok := s.typeIDs[key]
+	s.mux.RUnlock()
+	if !ok {
+		s.mux.Lock()
+		defer s.mux.Unlock()
+		// Check again in case another goroutine has already added the type.
+		if id, ok = s.typeIDs[key]; ok {
+			return id, nil
+		}
+		l := len(s.typeIDs)
+		if uint32(l) >= s.functionMaxTypes {
+			return 0, fmt.Errorf("too many function types in a store")
+		}
+		id = FunctionTypeID(l)
+		s.typeIDs[key] = id
+	}
+	return id, nil
+}
+
+// CloseWithExitCode implements the same method as documented on wazero.Runtime.
+func (s *Store) CloseWithExitCode(ctx context.Context, exitCode uint32) (err error) {
+	s.mux.Lock()
+	defer s.mux.Unlock()
+	// Close modules in reverse initialization order.
+	for m := s.moduleList; m != nil; m = m.next {
+		// If closing this module errs, proceed anyway to close the others.
+		if e := m.closeWithExitCode(ctx, exitCode); e != nil && err == nil {
+			// TODO: use multiple errors handling in Go 1.20.
+			err = e // first error
+		}
+	}
+	s.moduleList = nil
+	s.nameToModule = nil
+	s.nameToModuleCap = 0
+	s.typeIDs = nil
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go
new file mode 100644
index 000000000..17c63e38e
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/store_module_list.go
@@ -0,0 +1,97 @@
+package wasm
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/api"
+)
+
+// deleteModule makes the moduleName available for instantiation again.
+func (s *Store) deleteModule(m *ModuleInstance) error {
+	s.mux.Lock()
+	defer s.mux.Unlock()
+
+	// Remove this module name.
+	if m.prev != nil {
+		m.prev.next = m.next
+	}
+	if m.next != nil {
+		m.next.prev = m.prev
+	}
+	if s.moduleList == m {
+		s.moduleList = m.next
+	}
+	// Clear the m state so it does not enter any other branch
+	// on subsequent calls to deleteModule.
+	m.prev = nil
+	m.next = nil
+
+	if m.ModuleName != "" {
+		delete(s.nameToModule, m.ModuleName)
+
+		// Shrink the map if it's allocated more than twice the size of the list
+		newCap := len(s.nameToModule)
+		if newCap < nameToModuleShrinkThreshold {
+			newCap = nameToModuleShrinkThreshold
+		}
+		if newCap*2 <= s.nameToModuleCap {
+			nameToModule := make(map[string]*ModuleInstance, newCap)
+			for k, v := range s.nameToModule {
+				nameToModule[k] = v
+			}
+			s.nameToModule = nameToModule
+			s.nameToModuleCap = newCap
+		}
+	}
+	return nil
+}
+
+// module returns the module of the given name or error if not in this store
+func (s *Store) module(moduleName string) (*ModuleInstance, error) {
+	s.mux.RLock()
+	defer s.mux.RUnlock()
+	m, ok := s.nameToModule[moduleName]
+	if !ok {
+		return nil, fmt.Errorf("module[%s] not instantiated", moduleName)
+	}
+	return m, nil
+}
+
+// registerModule registers a ModuleInstance into the store.
+// This makes the ModuleInstance visible for import if it's not anonymous, and ensures it is closed when the store is.
+func (s *Store) registerModule(m *ModuleInstance) error {
+	s.mux.Lock()
+	defer s.mux.Unlock()
+
+	if s.nameToModule == nil {
+		return errors.New("already closed")
+	}
+
+	if m.ModuleName != "" {
+		if _, ok := s.nameToModule[m.ModuleName]; ok {
+			return fmt.Errorf("module[%s] has already been instantiated", m.ModuleName)
+		}
+		s.nameToModule[m.ModuleName] = m
+		if len(s.nameToModule) > s.nameToModuleCap {
+			s.nameToModuleCap = len(s.nameToModule)
+		}
+	}
+
+	// Add the newest node to the moduleNamesList as the head.
+	m.next = s.moduleList
+	if m.next != nil {
+		m.next.prev = m
+	}
+	s.moduleList = m
+	return nil
+}
+
+// Module implements wazero.Runtime Module
+func (s *Store) Module(moduleName string) api.Module {
+	m, err := s.module(moduleName)
+	if err != nil {
+		return nil
+	}
+	return m
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/table.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/table.go
new file mode 100644
index 000000000..2123693c6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/table.go
@@ -0,0 +1,339 @@
+package wasm
+
+import (
+	"fmt"
+	"math"
+	"sync"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/leb128"
+)
+
+// Table describes the limits of elements and its type in a table.
+type Table struct {
+	Min  uint32
+	Max  *uint32
+	Type RefType
+}
+
+// RefType is either RefTypeFuncref or RefTypeExternref as of WebAssembly core 2.0.
+type RefType = byte
+
+const (
+	// RefTypeFuncref represents a reference to a function.
+	RefTypeFuncref = ValueTypeFuncref
+	// RefTypeExternref represents a reference to a host object, which is not currently supported in wazero.
+	RefTypeExternref = ValueTypeExternref
+)
+
+func RefTypeName(t RefType) (ret string) {
+	switch t {
+	case RefTypeFuncref:
+		ret = "funcref"
+	case RefTypeExternref:
+		ret = "externref"
+	default:
+		ret = fmt.Sprintf("unknown(0x%x)", t)
+	}
+	return
+}
+
+// ElementMode represents a mode of element segment which is either active, passive or declarative.
+//
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/syntax/modules.html#element-segments
+type ElementMode = byte
+
+const (
+	// ElementModeActive is the mode which requires the runtime to initialize table with the contents in .Init field combined with OffsetExpr.
+	ElementModeActive ElementMode = iota
+	// ElementModePassive is the mode which doesn't require the runtime to initialize table, and only used with OpcodeTableInitName.
+	ElementModePassive
+	// ElementModeDeclarative is introduced in reference-types proposal which can be used to declare function indexes used by OpcodeRefFunc.
+	ElementModeDeclarative
+)
+
+// ElementSegment are initialization instructions for a TableInstance
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-elem
+type ElementSegment struct {
+	// OffsetExpr returns the table element offset to apply to Init indices.
+	// Note: This can be validated prior to instantiation unless it includes OpcodeGlobalGet (an imported global).
+	OffsetExpr ConstantExpression
+
+	// TableIndex is the table's index to which this element segment is applied.
+	// Note: This is used if and only if the Mode is active.
+	TableIndex Index
+
+	// Followings are set/used regardless of the Mode.
+
+	// Init indices are (nullable) table elements where each index is the function index by which the module initialize the table.
+	Init []Index
+
+	// Type holds the type of this element segment, which is the RefType in WebAssembly 2.0.
+	Type RefType
+
+	// Mode is the mode of this element segment.
+	Mode ElementMode
+}
+
+const (
+	// ElementInitNullReference represents the null reference in ElementSegment's Init.
+	// In Wasm spec, an init item represents either Function's Index or null reference,
+	// and in wazero, we limit the maximum number of functions available in a module to
+	// MaximumFunctionIndex. Therefore, it is safe to use 1 << 31 to represent the null
+	// reference in Element segments.
+	ElementInitNullReference Index = 1 << 31
+	// elementInitImportedGlobalReferenceType represents an init item which is resolved via an imported global constexpr.
+	// The actual function reference stored at Global is only known at instantiation-time, so we set this flag
+	// to items of ElementSegment.Init at binary decoding, and unwrap this flag at instantiation to resolve the value.
+	//
+	// This might collide the init element resolved via ref.func instruction which is resolved with the func index at decoding,
+	// but in practice, that is not allowed in wazero thanks to our limit MaximumFunctionIndex. Thus, it is safe to set this flag
+	// in init element to indicate as such.
+	elementInitImportedGlobalReferenceType Index = 1 << 30
+)
+
+// unwrapElementInitGlobalReference takes an item of the init vector of an ElementSegment,
+// and returns the Global index if it is supposed to get generated from a global.
+// ok is true if the given init item is as such.
+func unwrapElementInitGlobalReference(init Index) (_ Index, ok bool) {
+	if init&elementInitImportedGlobalReferenceType == elementInitImportedGlobalReferenceType {
+		return init &^ elementInitImportedGlobalReferenceType, true
+	}
+	return init, false
+}
+
+// WrapGlobalIndexAsElementInit wraps the given index as an init item which is resolved via an imported global value.
+// See the comments on elementInitImportedGlobalReferenceType for more details.
+func WrapGlobalIndexAsElementInit(init Index) Index {
+	return init | elementInitImportedGlobalReferenceType
+}
+
+// IsActive returns true if the element segment is "active" mode which requires the runtime to initialize table
+// with the contents in .Init field.
+func (e *ElementSegment) IsActive() bool {
+	return e.Mode == ElementModeActive
+}
+
+// TableInstance represents a table of (RefTypeFuncref) elements in a module.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#table-instances%E2%91%A0
+type TableInstance struct {
+	// References holds references whose type is either RefTypeFuncref or RefTypeExternref (unsupported).
+	//
+	// Currently, only function references are supported.
+	References []Reference
+
+	// Min is the minimum (function) elements in this table and cannot grow to accommodate ElementSegment.
+	Min uint32
+
+	// Max if present is the maximum (function) elements in this table, or nil if unbounded.
+	Max *uint32
+
+	// Type is either RefTypeFuncref or RefTypeExternRef.
+	Type RefType
+
+	// The following is only used when the table is exported.
+
+	// involvingModuleInstances is a set of module instances which are involved in the table instance.
+	// This is critical for safety purpose because once a table is imported, it can hold any reference to
+	// any function in the owner and importing module instances. Therefore, these module instance,
+	// transitively the compiled modules, must be alive as long as the table instance is alive.
+	involvingModuleInstances []*ModuleInstance
+	// involvingModuleInstancesMutex is a mutex to protect involvingModuleInstances.
+	involvingModuleInstancesMutex sync.RWMutex
+}
+
+// ElementInstance represents an element instance in a module.
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/runtime.html#element-instances
+type ElementInstance = []Reference
+
+// Reference is the runtime representation of RefType which is either RefTypeFuncref or RefTypeExternref.
+type Reference = uintptr
+
+// validateTable ensures any ElementSegment is valid. This caches results via Module.validatedActiveElementSegments.
+// Note: limitsType are validated by decoders, so not re-validated here.
+func (m *Module) validateTable(enabledFeatures api.CoreFeatures, tables []Table, maximumTableIndex uint32) error {
+	if len(tables) > int(maximumTableIndex) {
+		return fmt.Errorf("too many tables in a module: %d given with limit %d", len(tables), maximumTableIndex)
+	}
+
+	importedTableCount := m.ImportTableCount
+
+	// Create bounds checks as these can err prior to instantiation
+	funcCount := m.ImportFunctionCount + m.SectionElementCount(SectionIDFunction)
+	globalsCount := m.ImportGlobalCount + m.SectionElementCount(SectionIDGlobal)
+
+	// Now, we have to figure out which table elements can be resolved before instantiation and also fail early if there
+	// are any imported globals that are known to be invalid by their declarations.
+	for i := range m.ElementSection {
+		elem := &m.ElementSection[i]
+		idx := Index(i)
+		initCount := uint32(len(elem.Init))
+
+		// Any offset applied is to the element, not the function index: validate here if the funcidx is sound.
+		for ei, init := range elem.Init {
+			if init == ElementInitNullReference {
+				continue
+			}
+			index, ok := unwrapElementInitGlobalReference(init)
+			if ok {
+				if index >= globalsCount {
+					return fmt.Errorf("%s[%d].init[%d] global index %d out of range", SectionIDName(SectionIDElement), idx, ei, index)
+				}
+			} else {
+				if elem.Type == RefTypeExternref {
+					return fmt.Errorf("%s[%d].init[%d] must be ref.null but was %d", SectionIDName(SectionIDElement), idx, ei, init)
+				}
+				if index >= funcCount {
+					return fmt.Errorf("%s[%d].init[%d] func index %d out of range", SectionIDName(SectionIDElement), idx, ei, index)
+				}
+			}
+		}
+
+		if elem.IsActive() {
+			if len(tables) <= int(elem.TableIndex) {
+				return fmt.Errorf("unknown table %d as active element target", elem.TableIndex)
+			}
+
+			t := tables[elem.TableIndex]
+			if t.Type != elem.Type {
+				return fmt.Errorf("element type mismatch: table has %s but element has %s",
+					RefTypeName(t.Type), RefTypeName(elem.Type),
+				)
+			}
+
+			// global.get needs to be discovered during initialization
+			oc := elem.OffsetExpr.Opcode
+			if oc == OpcodeGlobalGet {
+				globalIdx, _, err := leb128.LoadUint32(elem.OffsetExpr.Data)
+				if err != nil {
+					return fmt.Errorf("%s[%d] couldn't read global.get parameter: %w", SectionIDName(SectionIDElement), idx, err)
+				} else if err = m.verifyImportGlobalI32(SectionIDElement, idx, globalIdx); err != nil {
+					return err
+				}
+			} else if oc == OpcodeI32Const {
+				// Per https://github.com/WebAssembly/spec/blob/wg-1.0/test/core/elem.wast#L117 we must pass if imported
+				// table has set its min=0. Per https://github.com/WebAssembly/spec/blob/wg-1.0/test/core/elem.wast#L142, we
+				// have to do fail if module-defined min=0.
+				if !enabledFeatures.IsEnabled(api.CoreFeatureReferenceTypes) && elem.TableIndex >= importedTableCount {
+					// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+					o, _, err := leb128.LoadInt32(elem.OffsetExpr.Data)
+					if err != nil {
+						return fmt.Errorf("%s[%d] couldn't read i32.const parameter: %w", SectionIDName(SectionIDElement), idx, err)
+					}
+					offset := Index(o)
+					if err = checkSegmentBounds(t.Min, uint64(initCount)+uint64(offset), idx); err != nil {
+						return err
+					}
+				}
+			} else {
+				return fmt.Errorf("%s[%d] has an invalid const expression: %s", SectionIDName(SectionIDElement), idx, InstructionName(oc))
+			}
+		}
+	}
+	return nil
+}
+
+// buildTable returns TableInstances if the module defines or imports a table.
+//   - importedTables: returned as `tables` unmodified.
+//   - importedGlobals: include all instantiated, imported globals.
+//
+// If the result `init` is non-nil, it is the `tableInit` parameter of Engine.NewModuleEngine.
+//
+// Note: An error is only possible when an ElementSegment.OffsetExpr is out of range of the TableInstance.Min.
+func (m *ModuleInstance) buildTables(module *Module, skipBoundCheck bool) (err error) {
+	idx := module.ImportTableCount
+	for i := range module.TableSection {
+		tsec := &module.TableSection[i]
+		// The module defining the table is the one that sets its Min/Max etc.
+		m.Tables[idx] = &TableInstance{
+			References: make([]Reference, tsec.Min), Min: tsec.Min, Max: tsec.Max,
+			Type: tsec.Type,
+		}
+		idx++
+	}
+
+	if !skipBoundCheck {
+		for elemI := range module.ElementSection { // Do not loop over the value since elementSegments is a slice of value.
+			elem := &module.ElementSection[elemI]
+			table := m.Tables[elem.TableIndex]
+			var offset uint32
+			if elem.OffsetExpr.Opcode == OpcodeGlobalGet {
+				// Ignore error as it's already validated.
+				globalIdx, _, _ := leb128.LoadUint32(elem.OffsetExpr.Data)
+				global := m.Globals[globalIdx]
+				offset = uint32(global.Val)
+			} else { // i32.const
+				// Ignore error as it's already validated.
+				o, _, _ := leb128.LoadInt32(elem.OffsetExpr.Data)
+				offset = uint32(o)
+			}
+
+			// Check to see if we are out-of-bounds
+			initCount := uint64(len(elem.Init))
+			if err = checkSegmentBounds(table.Min, uint64(offset)+initCount, Index(elemI)); err != nil {
+				return
+			}
+		}
+	}
+	return
+}
+
+// checkSegmentBounds fails if the capacity needed for an ElementSegment.Init is larger than limitsType.Min
+//
+// WebAssembly 1.0 (20191205) doesn't forbid growing to accommodate element segments, and spectests are inconsistent.
+// For example, the spectests enforce elements within Table limitsType.Min, but ignore Import.DescTable min. What this
+// means is we have to delay offset checks on imported tables until we link to them.
+// e.g. https://github.com/WebAssembly/spec/blob/wg-1.0/test/core/elem.wast#L117 wants pass on min=0 for import
+// e.g. https://github.com/WebAssembly/spec/blob/wg-1.0/test/core/elem.wast#L142 wants fail on min=0 module-defined
+func checkSegmentBounds(min uint32, requireMin uint64, idx Index) error { // uint64 in case offset was set to -1
+	if requireMin > uint64(min) {
+		return fmt.Errorf("%s[%d].init exceeds min table size", SectionIDName(SectionIDElement), idx)
+	}
+	return nil
+}
+
+func (m *Module) verifyImportGlobalI32(sectionID SectionID, sectionIdx Index, idx uint32) error {
+	ig := uint32(math.MaxUint32) // +1 == 0
+	for i := range m.ImportSection {
+		imp := &m.ImportSection[i]
+		if imp.Type == ExternTypeGlobal {
+			ig++
+			if ig == idx {
+				if imp.DescGlobal.ValType != ValueTypeI32 {
+					return fmt.Errorf("%s[%d] (global.get %d): import[%d].global.ValType != i32", SectionIDName(sectionID), sectionIdx, idx, i)
+				}
+				return nil
+			}
+		}
+	}
+	return fmt.Errorf("%s[%d] (global.get %d): out of range of imported globals", SectionIDName(sectionID), sectionIdx, idx)
+}
+
+// Grow appends the `initialRef` by `delta` times into the References slice.
+// Returns -1 if the operation is not valid, otherwise the old length of the table.
+//
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/instructions.html#xref-syntax-instructions-syntax-instr-table-mathsf-table-grow-x
+func (t *TableInstance) Grow(delta uint32, initialRef Reference) (currentLen uint32) {
+	currentLen = uint32(len(t.References))
+	if delta == 0 {
+		return
+	}
+
+	if newLen := int64(currentLen) + int64(delta); // adding as 64bit ints to avoid overflow.
+	newLen >= math.MaxUint32 || (t.Max != nil && newLen > int64(*t.Max)) {
+		return 0xffffffff // = -1 in signed 32-bit integer.
+	}
+	t.References = append(t.References, make([]uintptr, delta)...)
+
+	// Uses the copy trick for faster filling the new region with the initial value.
+	// https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+	newRegion := t.References[currentLen:]
+	newRegion[0] = initialRef
+	for i := 1; i < len(newRegion); i *= 2 {
+		copy(newRegion[i:], newRegion[:i])
+	}
+	return
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/debug.go b/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/debug.go
new file mode 100644
index 000000000..ff0e0cccc
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/debug.go
@@ -0,0 +1,170 @@
+// Package wasmdebug contains utilities used to give consistent search keys between stack traces and error messages.
+// Note: This is named wasmdebug to avoid conflicts with the normal go module.
+// Note: This only imports "api" as importing "wasm" would create a cyclic dependency.
+package wasmdebug
+
+import (
+	"fmt"
+	"runtime"
+	"runtime/debug"
+	"strconv"
+	"strings"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasmruntime"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FuncName returns the naming convention of "moduleName.funcName".
+//
+//   - moduleName is the possibly empty name the module was instantiated with.
+//   - funcName is the name in the Custom Name section.
+//   - funcIdx is the position in the function index, prefixed with
+//     imported functions.
+//
+// Note: "moduleName.$funcIdx" is used when the funcName is empty, as commonly
+// the case in TinyGo.
+func FuncName(moduleName, funcName string, funcIdx uint32) string {
+	var ret strings.Builder
+
+	// Start module.function
+	ret.WriteString(moduleName)
+	ret.WriteByte('.')
+	if funcName == "" {
+		ret.WriteByte('$')
+		ret.WriteString(strconv.Itoa(int(funcIdx)))
+	} else {
+		ret.WriteString(funcName)
+	}
+
+	return ret.String()
+}
+
+// signature returns a formatted signature similar to how it is defined in Go.
+//
+// * paramTypes should be from wasm.FunctionType
+// * resultTypes should be from wasm.FunctionType
+// TODO: add paramNames
+func signature(funcName string, paramTypes []api.ValueType, resultTypes []api.ValueType) string {
+	var ret strings.Builder
+	ret.WriteString(funcName)
+
+	// Start params
+	ret.WriteByte('(')
+	paramCount := len(paramTypes)
+	switch paramCount {
+	case 0:
+	case 1:
+		ret.WriteString(api.ValueTypeName(paramTypes[0]))
+	default:
+		ret.WriteString(api.ValueTypeName(paramTypes[0]))
+		for _, vt := range paramTypes[1:] {
+			ret.WriteByte(',')
+			ret.WriteString(api.ValueTypeName(vt))
+		}
+	}
+	ret.WriteByte(')')
+
+	// Start results
+	resultCount := len(resultTypes)
+	switch resultCount {
+	case 0:
+	case 1:
+		ret.WriteByte(' ')
+		ret.WriteString(api.ValueTypeName(resultTypes[0]))
+	default: // As this is used for errors, don't panic if there are multiple returns, even if that's invalid!
+		ret.WriteByte(' ')
+		ret.WriteByte('(')
+		ret.WriteString(api.ValueTypeName(resultTypes[0]))
+		for _, vt := range resultTypes[1:] {
+			ret.WriteByte(',')
+			ret.WriteString(api.ValueTypeName(vt))
+		}
+		ret.WriteByte(')')
+	}
+
+	return ret.String()
+}
+
+// ErrorBuilder helps build consistent errors, particularly adding a WASM stack trace.
+//
+// AddFrame should be called beginning at the frame that panicked until no more frames exist. Once done, call Format.
+type ErrorBuilder interface {
+	// AddFrame adds the next frame.
+	//
+	// * funcName should be from FuncName
+	// * paramTypes should be from wasm.FunctionType
+	// * resultTypes should be from wasm.FunctionType
+	// * sources is the source code information for this frame and can be empty.
+	//
+	// Note: paramTypes and resultTypes are present because signature misunderstanding, mismatch or overflow are common.
+	AddFrame(funcName string, paramTypes, resultTypes []api.ValueType, sources []string)
+
+	// FromRecovered returns an error with the wasm stack trace appended to it.
+	FromRecovered(recovered interface{}) error
+}
+
+func NewErrorBuilder() ErrorBuilder {
+	return &stackTrace{}
+}
+
+type stackTrace struct {
+	// frameCount is the number of stack frame currently pushed into lines.
+	frameCount int
+	// lines contains the stack trace and possibly the inlined source code information.
+	lines []string
+}
+
+// GoRuntimeErrorTracePrefix is the prefix coming before the Go runtime stack trace included in the face of runtime.Error.
+// This is exported for testing purpose.
+const GoRuntimeErrorTracePrefix = "Go runtime stack trace:"
+
+func (s *stackTrace) FromRecovered(recovered interface{}) error {
+	if false {
+		debug.PrintStack()
+	}
+
+	if exitErr, ok := recovered.(*sys.ExitError); ok { // Don't wrap an exit error!
+		return exitErr
+	}
+
+	stack := strings.Join(s.lines, "\n\t")
+
+	// If the error was internal, don't mention it was recovered.
+	if wasmErr, ok := recovered.(*wasmruntime.Error); ok {
+		return fmt.Errorf("wasm error: %w\nwasm stack trace:\n\t%s", wasmErr, stack)
+	}
+
+	// If we have a runtime.Error, something severe happened which should include the stack trace. This could be
+	// a nil pointer from wazero or a user-defined function from HostModuleBuilder.
+	if runtimeErr, ok := recovered.(runtime.Error); ok {
+		return fmt.Errorf("%w (recovered by wazero)\nwasm stack trace:\n\t%s\n\n%s\n%s",
+			runtimeErr, stack, GoRuntimeErrorTracePrefix, debug.Stack())
+	}
+
+	// At this point we expect the error was from a function defined by HostModuleBuilder that intentionally called panic.
+	if runtimeErr, ok := recovered.(error); ok { // e.g. panic(errors.New("whoops"))
+		return fmt.Errorf("%w (recovered by wazero)\nwasm stack trace:\n\t%s", runtimeErr, stack)
+	} else { // e.g. panic("whoops")
+		return fmt.Errorf("%v (recovered by wazero)\nwasm stack trace:\n\t%s", recovered, stack)
+	}
+}
+
+// MaxFrames is the maximum number of frames to include in the stack trace.
+const MaxFrames = 30
+
+// AddFrame implements ErrorBuilder.AddFrame
+func (s *stackTrace) AddFrame(funcName string, paramTypes, resultTypes []api.ValueType, sources []string) {
+	if s.frameCount == MaxFrames {
+		return
+	}
+	s.frameCount++
+	sig := signature(funcName, paramTypes, resultTypes)
+	s.lines = append(s.lines, sig)
+	for _, source := range sources {
+		s.lines = append(s.lines, "\t"+source)
+	}
+	if s.frameCount == MaxFrames {
+		s.lines = append(s.lines, "... maybe followed by omitted frames")
+	}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go b/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go
new file mode 100644
index 000000000..3b0d3a7a6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasmdebug/dwarf.go
@@ -0,0 +1,226 @@
+package wasmdebug
+
+import (
+	"debug/dwarf"
+	"errors"
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+	"sync"
+)
+
+// DWARFLines is used to retrieve source code line information from the DWARF data.
+type DWARFLines struct {
+	// d is created by DWARF custom sections.
+	d *dwarf.Data
+	// linesPerEntry maps dwarf.Offset for dwarf.Entry to the list of lines contained by the entry.
+	// The value is sorted in the increasing order by the address.
+	linesPerEntry map[dwarf.Offset][]line
+	mux           sync.Mutex
+}
+
+type line struct {
+	addr uint64
+	pos  dwarf.LineReaderPos
+}
+
+// NewDWARFLines returns DWARFLines for the given *dwarf.Data.
+func NewDWARFLines(d *dwarf.Data) *DWARFLines {
+	if d == nil {
+		return nil
+	}
+	return &DWARFLines{d: d, linesPerEntry: map[dwarf.Offset][]line{}}
+}
+
+// isTombstoneAddr returns true if the given address is invalid a.k.a tombstone address which was made no longer valid
+// by linker. According to the DWARF spec[1], the value is encoded as 0xffffffff for Wasm (as 32-bit target),
+// but some tools encode it either in -1, -2 [2] or 1<<32 (This might not be by tools, but by debug/dwarf package's bug).
+//
+// [1] https://dwarfstd.org/issues/200609.1.html
+// [2] https://github.com/WebAssembly/binaryen/blob/97178d08d4a20d2a5e3a6be813fc6a7079ef86e1/src/wasm/wasm-debug.cpp#L651-L660
+// [3] https://reviews.llvm.org/D81784
+func isTombstoneAddr(addr uint64) bool {
+	addr32 := int32(addr)
+	return addr32 == -1 || addr32 == -2 ||
+		addr32 == 0 // This covers 1 <<32.
+}
+
+// Line returns the line information for the given instructionOffset which is an offset in
+// the code section of the original Wasm binary. Returns empty string if the info is not found.
+func (d *DWARFLines) Line(instructionOffset uint64) (ret []string) {
+	if d == nil {
+		return
+	}
+
+	// DWARFLines is created per Wasm binary, so there's a possibility that multiple instances
+	// created from a same binary face runtime error at the same time, and that results in
+	// concurrent access to this function.
+	d.mux.Lock()
+	defer d.mux.Unlock()
+
+	r := d.d.Reader()
+
+	var inlinedRoutines []*dwarf.Entry
+	var cu *dwarf.Entry
+	var inlinedDone bool
+entry:
+	for {
+		ent, err := r.Next()
+		if err != nil || ent == nil {
+			break
+		}
+
+		// If we already found the compilation unit and relevant inlined routines, we can stop searching entries.
+		if cu != nil && inlinedDone {
+			break
+		}
+
+		switch ent.Tag {
+		case dwarf.TagCompileUnit, dwarf.TagInlinedSubroutine:
+		default:
+			// Only CompileUnit and InlinedSubroutines are relevant.
+			continue
+		}
+
+		// Check if the entry spans the range which contains the target instruction.
+		ranges, err := d.d.Ranges(ent)
+		if err != nil {
+			continue
+		}
+		for _, pcs := range ranges {
+			start, end := pcs[0], pcs[1]
+			if isTombstoneAddr(start) || isTombstoneAddr(end) {
+				continue
+			}
+			if start <= instructionOffset && instructionOffset < end {
+				switch ent.Tag {
+				case dwarf.TagCompileUnit:
+					cu = ent
+				case dwarf.TagInlinedSubroutine:
+					inlinedRoutines = append(inlinedRoutines, ent)
+					// Search inlined subroutines until all the children.
+					inlinedDone = !ent.Children
+					// Not that "children" in the DWARF spec is defined as the next entry to this entry.
+					// See "2.3 Relationship of Debugging Information Entries" in https://dwarfstd.org/doc/DWARF4.pdf
+				}
+				continue entry
+			}
+		}
+	}
+
+	// If the relevant compilation unit is not found, nothing we can do with this DWARF info.
+	if cu == nil {
+		return
+	}
+
+	lineReader, err := d.d.LineReader(cu)
+	if err != nil || lineReader == nil {
+		return
+	}
+	var lines []line
+	var ok bool
+	var le dwarf.LineEntry
+	// Get the lines inside the entry.
+	if lines, ok = d.linesPerEntry[cu.Offset]; !ok {
+		// If not found, we create the list of lines by reading all the LineEntries in the Entry.
+		//
+		// Note that the dwarf.LineEntry.SeekPC API shouldn't be used because the Go's dwarf package assumes that
+		// all the line entries in an Entry are sorted in increasing order which *might not* be true
+		// for some languages. Such order requirement is not a part of DWARF specification,
+		// and in fact Zig language tends to emit interleaved line information.
+		//
+		// Thus, here we read all line entries here, and sort them in the increasing order wrt addresses.
+		for {
+			pos := lineReader.Tell()
+			err = lineReader.Next(&le)
+			if errors.Is(err, io.EOF) {
+				break
+			} else if err != nil {
+				return
+			}
+			// TODO: Maybe we should ignore tombstone addresses by using isTombstoneAddr,
+			//  but not sure if that would be an issue in practice.
+			lines = append(lines, line{addr: le.Address, pos: pos})
+		}
+		sort.Slice(lines, func(i, j int) bool { return lines[i].addr < lines[j].addr })
+		d.linesPerEntry[cu.Offset] = lines // Caches for the future inquiries for the same Entry.
+	}
+
+	// Now we have the lines for this entry. We can find the corresponding source line for instructionOffset
+	// via binary search on the list.
+	n := len(lines)
+	index := sort.Search(n, func(i int) bool { return lines[i].addr >= instructionOffset })
+
+	if index == n { // This case the address is not found. See the doc sort.Search.
+		return
+	}
+
+	ln := lines[index]
+	if ln.addr != instructionOffset {
+		// If the address doesn't match exactly, the previous entry is the one that contains the instruction.
+		// That can happen anytime as the DWARF spec allows it, and other tools can handle it in this way conventionally
+		// https://github.com/gimli-rs/addr2line/blob/3a2dbaf84551a06a429f26e9c96071bb409b371f/src/lib.rs#L236-L242
+		// https://github.com/kateinoigakukun/wasminspect/blob/f29f052f1b03104da9f702508ac0c1bbc3530ae4/crates/debugger/src/dwarf/mod.rs#L453-L459
+		if index-1 < 0 {
+			return
+		}
+		ln = lines[index-1]
+	}
+
+	// Advance the line reader for the found position.
+	lineReader.Seek(ln.pos)
+	err = lineReader.Next(&le)
+
+	if err != nil {
+		// If we reach this block, that means there's a bug in the []line creation logic above.
+		panic("BUG: stored dwarf.LineReaderPos is invalid")
+	}
+
+	// In the inlined case, the line info is the innermost inlined function call.
+	inlined := len(inlinedRoutines) != 0
+	prefix := fmt.Sprintf("%#x: ", instructionOffset)
+	ret = append(ret, formatLine(prefix, le.File.Name, int64(le.Line), int64(le.Column), inlined))
+
+	if inlined {
+		prefix = strings.Repeat(" ", len(prefix))
+		files := lineReader.Files()
+		// inlinedRoutines contain the inlined call information in the reverse order (children is higher than parent),
+		// so we traverse the reverse order and emit the inlined calls.
+		for i := len(inlinedRoutines) - 1; i >= 0; i-- {
+			inlined := inlinedRoutines[i]
+			fileIndex, ok := inlined.Val(dwarf.AttrCallFile).(int64)
+			if !ok {
+				return
+			} else if fileIndex >= int64(len(files)) {
+				// This in theory shouldn't happen according to the spec, but guard against ill-formed DWARF info.
+				return
+			}
+			fileName := files[fileIndex]
+			line, _ := inlined.Val(dwarf.AttrCallLine).(int64)
+			col, _ := inlined.Val(dwarf.AttrCallColumn).(int64)
+			ret = append(ret, formatLine(prefix, fileName.Name, line, col,
+				// Last one is the origin of the inlined function calls.
+				i != 0))
+		}
+	}
+	return
+}
+
+func formatLine(prefix, fileName string, line, col int64, inlined bool) string {
+	builder := strings.Builder{}
+	builder.WriteString(prefix)
+	builder.WriteString(fileName)
+
+	if line != 0 {
+		builder.WriteString(fmt.Sprintf(":%d", line))
+		if col != 0 {
+			builder.WriteString(fmt.Sprintf(":%d", col))
+		}
+	}
+
+	if inlined {
+		builder.WriteString(" (inlined)")
+	}
+	return builder.String()
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasmruntime/errors.go b/vendor/github.com/tetratelabs/wazero/internal/wasmruntime/errors.go
new file mode 100644
index 000000000..556e5de82
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasmruntime/errors.go
@@ -0,0 +1,50 @@
+// Package wasmruntime contains internal symbols shared between modules for error handling.
+// Note: This is named wasmruntime to avoid conflicts with the normal go module.
+// Note: This only imports "api" as importing "wasm" would create a cyclic dependency.
+package wasmruntime
+
+var (
+	// ErrRuntimeStackOverflow indicates that there are too many function calls,
+	// and the Engine terminated the execution.
+	ErrRuntimeStackOverflow = New("stack overflow")
+	// ErrRuntimeInvalidConversionToInteger indicates the Wasm function tries to
+	// convert NaN floating point value to integers during trunc variant instructions.
+	ErrRuntimeInvalidConversionToInteger = New("invalid conversion to integer")
+	// ErrRuntimeIntegerOverflow indicates that an integer arithmetic resulted in
+	// overflow value. For example, when the program tried to truncate a float value
+	// which doesn't fit in the range of target integer.
+	ErrRuntimeIntegerOverflow = New("integer overflow")
+	// ErrRuntimeIntegerDivideByZero indicates that an integer div or rem instructions
+	// was executed with 0 as the divisor.
+	ErrRuntimeIntegerDivideByZero = New("integer divide by zero")
+	// ErrRuntimeUnreachable means "unreachable" instruction was executed by the program.
+	ErrRuntimeUnreachable = New("unreachable")
+	// ErrRuntimeOutOfBoundsMemoryAccess indicates that the program tried to access the
+	// region beyond the linear memory.
+	ErrRuntimeOutOfBoundsMemoryAccess = New("out of bounds memory access")
+	// ErrRuntimeInvalidTableAccess means either offset to the table was out of bounds of table, or
+	// the target element in the table was uninitialized during call_indirect instruction.
+	ErrRuntimeInvalidTableAccess = New("invalid table access")
+	// ErrRuntimeIndirectCallTypeMismatch indicates that the type check failed during call_indirect.
+	ErrRuntimeIndirectCallTypeMismatch = New("indirect call type mismatch")
+	// ErrRuntimeUnalignedAtomic indicates that an atomic operation was made with incorrect memory alignment.
+	ErrRuntimeUnalignedAtomic = New("unaligned atomic")
+	// ErrRuntimeExpectedSharedMemory indicates that an operation was made against unshared memory when not allowed.
+	ErrRuntimeExpectedSharedMemory = New("expected shared memory")
+	// ErrRuntimeTooManyWaiters indicates that atomic.wait was called with too many waiters.
+	ErrRuntimeTooManyWaiters = New("too many waiters")
+)
+
+// Error is returned by a wasm.Engine during the execution of Wasm functions, and they indicate that the Wasm runtime
+// state is unrecoverable.
+type Error struct {
+	s string
+}
+
+func New(text string) *Error {
+	return &Error{s: text}
+}
+
+func (e *Error) Error() string {
+	return e.s
+}
diff --git a/vendor/github.com/tetratelabs/wazero/netlify.toml b/vendor/github.com/tetratelabs/wazero/netlify.toml
new file mode 100644
index 000000000..1ba638bfe
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/netlify.toml
@@ -0,0 +1,15 @@
+[build]
+  base = "site"
+  publish = "public"
+
+[build.environment]
+  HUGO_VERSION = "0.115.2"
+
+[context.production]
+  command = "git submodule update --init && hugo --gc --minify"
+
+[context.deploy-preview]
+  command = "git submodule update --init && hugo --gc --minify -b $DEPLOY_PRIME_URL"
+
+[context.branch-deploy]
+  command = "git submodule update --init && hugo --gc --minify -b $DEPLOY_PRIME_URL"
diff --git a/vendor/github.com/tetratelabs/wazero/runtime.go b/vendor/github.com/tetratelabs/wazero/runtime.go
new file mode 100644
index 000000000..d1f0a1a31
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/runtime.go
@@ -0,0 +1,374 @@
+package wazero
+
+import (
+	"context"
+	"fmt"
+	"sync/atomic"
+
+	"github.com/tetratelabs/wazero/api"
+	experimentalapi "github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+	internalsock "github.com/tetratelabs/wazero/internal/sock"
+	internalsys "github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	binaryformat "github.com/tetratelabs/wazero/internal/wasm/binary"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// Runtime allows embedding of WebAssembly modules.
+//
+// The below is an example of basic initialization:
+//
+//	ctx := context.Background()
+//	r := wazero.NewRuntime(ctx)
+//	defer r.Close(ctx) // This closes everything this Runtime created.
+//
+//	mod, _ := r.Instantiate(ctx, wasm)
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing this closes any CompiledModule or Module it instantiated.
+type Runtime interface {
+	// Instantiate instantiates a module from the WebAssembly binary (%.wasm)
+	// with default configuration, which notably calls the "_start" function,
+	// if it exists.
+	//
+	// Here's an example:
+	//	ctx := context.Background()
+	//	r := wazero.NewRuntime(ctx)
+	//	defer r.Close(ctx) // This closes everything this Runtime created.
+	//
+	//	mod, _ := r.Instantiate(ctx, wasm)
+	//
+	// # Notes
+	//
+	//   - See notes on InstantiateModule for error scenarios.
+	//   - See InstantiateWithConfig for configuration overrides.
+	Instantiate(ctx context.Context, source []byte) (api.Module, error)
+
+	// InstantiateWithConfig instantiates a module from the WebAssembly binary
+	// (%.wasm) or errs for reasons including exit or validation.
+	//
+	// Here's an example:
+	//	ctx := context.Background()
+	//	r := wazero.NewRuntime(ctx)
+	//	defer r.Close(ctx) // This closes everything this Runtime created.
+	//
+	//	mod, _ := r.InstantiateWithConfig(ctx, wasm,
+	//		wazero.NewModuleConfig().WithName("rotate"))
+	//
+	// # Notes
+	//
+	//   - See notes on InstantiateModule for error scenarios.
+	//   - If you aren't overriding defaults, use Instantiate.
+	//   - This is a convenience utility that chains CompileModule with
+	//     InstantiateModule. To instantiate the same source multiple times,
+	//     use CompileModule as InstantiateModule avoids redundant decoding
+	//     and/or compilation.
+	InstantiateWithConfig(ctx context.Context, source []byte, config ModuleConfig) (api.Module, error)
+
+	// NewHostModuleBuilder lets you create modules out of functions defined in Go.
+	//
+	// Below defines and instantiates a module named "env" with one function:
+	//
+	//	ctx := context.Background()
+	//	hello := func() {
+	//		fmt.Fprintln(stdout, "hello!")
+	//	}
+	//	_, err := r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+	//		Instantiate(ctx, r)
+	//
+	// Note: empty `moduleName` is not allowed.
+	NewHostModuleBuilder(moduleName string) HostModuleBuilder
+
+	// CompileModule decodes the WebAssembly binary (%.wasm) or errs if invalid.
+	// Any pre-compilation done after decoding wasm is dependent on RuntimeConfig.
+	//
+	// There are two main reasons to use CompileModule instead of Instantiate:
+	//   - Improve performance when the same module is instantiated multiple times under different names
+	//   - Reduce the amount of errors that can occur during InstantiateModule.
+	//
+	// # Notes
+	//
+	//   - The resulting module name defaults to what was binary from the custom name section.
+	//   - Any pre-compilation done after decoding the source is dependent on RuntimeConfig.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#name-section%E2%91%A0
+	CompileModule(ctx context.Context, binary []byte) (CompiledModule, error)
+
+	// InstantiateModule instantiates the module or errs for reasons including
+	// exit or validation.
+	//
+	// Here's an example:
+	//	mod, _ := n.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().
+	//		WithName("prod"))
+	//
+	// # Errors
+	//
+	// While CompiledModule is pre-validated, there are a few situations which
+	// can cause an error:
+	//   - The module name is already in use.
+	//   - The module has a table element initializer that resolves to an index
+	//     outside the Table minimum size.
+	//   - The module has a start function, and it failed to execute.
+	//   - The module was compiled to WASI and exited with a non-zero exit
+	//     code, you'll receive a sys.ExitError.
+	//   - RuntimeConfig.WithCloseOnContextDone was enabled and a context
+	//     cancellation or deadline triggered before a start function returned.
+	InstantiateModule(ctx context.Context, compiled CompiledModule, config ModuleConfig) (api.Module, error)
+
+	// CloseWithExitCode closes all the modules that have been initialized in this Runtime with the provided exit code.
+	// An error is returned if any module returns an error when closed.
+	//
+	// Here's an example:
+	//	ctx := context.Background()
+	//	r := wazero.NewRuntime(ctx)
+	//	defer r.CloseWithExitCode(ctx, 2) // This closes everything this Runtime created.
+	//
+	//	// Everything below here can be closed, but will anyway due to above.
+	//	_, _ = wasi_snapshot_preview1.InstantiateSnapshotPreview1(ctx, r)
+	//	mod, _ := r.Instantiate(ctx, wasm)
+	CloseWithExitCode(ctx context.Context, exitCode uint32) error
+
+	// Module returns an instantiated module in this runtime or nil if there aren't any.
+	Module(moduleName string) api.Module
+
+	// Closer closes all compiled code by delegating to CloseWithExitCode with an exit code of zero.
+	api.Closer
+}
+
+// NewRuntime returns a runtime with a configuration assigned by NewRuntimeConfig.
+func NewRuntime(ctx context.Context) Runtime {
+	return NewRuntimeWithConfig(ctx, NewRuntimeConfig())
+}
+
+// NewRuntimeWithConfig returns a runtime with the given configuration.
+func NewRuntimeWithConfig(ctx context.Context, rConfig RuntimeConfig) Runtime {
+	config := rConfig.(*runtimeConfig)
+	var engine wasm.Engine
+	var cacheImpl *cache
+	if c := config.cache; c != nil {
+		// If the Cache is configured, we share the engine.
+		cacheImpl = c.(*cache)
+		engine = cacheImpl.initEngine(config.engineKind, config.newEngine, ctx, config.enabledFeatures)
+	} else {
+		// Otherwise, we create a new engine.
+		engine = config.newEngine(ctx, config.enabledFeatures, nil)
+	}
+	store := wasm.NewStore(config.enabledFeatures, engine)
+	return &runtime{
+		cache:                 cacheImpl,
+		store:                 store,
+		enabledFeatures:       config.enabledFeatures,
+		memoryLimitPages:      config.memoryLimitPages,
+		memoryCapacityFromMax: config.memoryCapacityFromMax,
+		dwarfDisabled:         config.dwarfDisabled,
+		storeCustomSections:   config.storeCustomSections,
+		ensureTermination:     config.ensureTermination,
+	}
+}
+
+// runtime allows decoupling of public interfaces from internal representation.
+type runtime struct {
+	store                 *wasm.Store
+	cache                 *cache
+	enabledFeatures       api.CoreFeatures
+	memoryLimitPages      uint32
+	memoryCapacityFromMax bool
+	dwarfDisabled         bool
+	storeCustomSections   bool
+
+	// closed is the pointer used both to guard moduleEngine.CloseWithExitCode and to store the exit code.
+	//
+	// The update value is 1 + exitCode << 32. This ensures an exit code of zero isn't mistaken for never closed.
+	//
+	// Note: Exclusively reading and updating this with atomics guarantees cross-goroutine observations.
+	// See /RATIONALE.md
+	closed atomic.Uint64
+
+	ensureTermination bool
+}
+
+// Module implements Runtime.Module.
+func (r *runtime) Module(moduleName string) api.Module {
+	if len(moduleName) == 0 {
+		return nil
+	}
+	return r.store.Module(moduleName)
+}
+
+// CompileModule implements Runtime.CompileModule
+func (r *runtime) CompileModule(ctx context.Context, binary []byte) (CompiledModule, error) {
+	if err := r.failIfClosed(); err != nil {
+		return nil, err
+	}
+
+	internal, err := binaryformat.DecodeModule(binary, r.enabledFeatures,
+		r.memoryLimitPages, r.memoryCapacityFromMax, !r.dwarfDisabled, r.storeCustomSections)
+	if err != nil {
+		return nil, err
+	} else if err = internal.Validate(r.enabledFeatures); err != nil {
+		// TODO: decoders should validate before returning, as that allows
+		// them to err with the correct position in the wasm binary.
+		return nil, err
+	}
+
+	// Now that the module is validated, cache the memory definitions.
+	// TODO: lazy initialization of memory definition.
+	internal.BuildMemoryDefinitions()
+
+	c := &compiledModule{module: internal, compiledEngine: r.store.Engine}
+
+	// typeIDs are static and compile-time known.
+	typeIDs, err := r.store.GetFunctionTypeIDs(internal.TypeSection)
+	if err != nil {
+		return nil, err
+	}
+	c.typeIDs = typeIDs
+
+	listeners, err := buildFunctionListeners(ctx, internal)
+	if err != nil {
+		return nil, err
+	}
+	internal.AssignModuleID(binary, listeners, r.ensureTermination)
+	if err = r.store.Engine.CompileModule(ctx, internal, listeners, r.ensureTermination); err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+func buildFunctionListeners(ctx context.Context, internal *wasm.Module) ([]experimentalapi.FunctionListener, error) {
+	// Test to see if internal code are using an experimental feature.
+	fnlf := ctx.Value(expctxkeys.FunctionListenerFactoryKey{})
+	if fnlf == nil {
+		return nil, nil
+	}
+	factory := fnlf.(experimentalapi.FunctionListenerFactory)
+	importCount := internal.ImportFunctionCount
+	listeners := make([]experimentalapi.FunctionListener, len(internal.FunctionSection))
+	for i := 0; i < len(listeners); i++ {
+		listeners[i] = factory.NewFunctionListener(internal.FunctionDefinition(uint32(i) + importCount))
+	}
+	return listeners, nil
+}
+
+// failIfClosed returns an error if CloseWithExitCode was called implicitly (by Close) or explicitly.
+func (r *runtime) failIfClosed() error {
+	if closed := r.closed.Load(); closed != 0 {
+		return fmt.Errorf("runtime closed with exit_code(%d)", uint32(closed>>32))
+	}
+	return nil
+}
+
+// Instantiate implements Runtime.Instantiate
+func (r *runtime) Instantiate(ctx context.Context, binary []byte) (api.Module, error) {
+	return r.InstantiateWithConfig(ctx, binary, NewModuleConfig())
+}
+
+// InstantiateWithConfig implements Runtime.InstantiateWithConfig
+func (r *runtime) InstantiateWithConfig(ctx context.Context, binary []byte, config ModuleConfig) (api.Module, error) {
+	if compiled, err := r.CompileModule(ctx, binary); err != nil {
+		return nil, err
+	} else {
+		compiled.(*compiledModule).closeWithModule = true
+		return r.InstantiateModule(ctx, compiled, config)
+	}
+}
+
+// InstantiateModule implements Runtime.InstantiateModule.
+func (r *runtime) InstantiateModule(
+	ctx context.Context,
+	compiled CompiledModule,
+	mConfig ModuleConfig,
+) (mod api.Module, err error) {
+	if err = r.failIfClosed(); err != nil {
+		return nil, err
+	}
+
+	code := compiled.(*compiledModule)
+	config := mConfig.(*moduleConfig)
+
+	// Only add guest module configuration to guests.
+	if !code.module.IsHostModule {
+		if sockConfig, ok := ctx.Value(internalsock.ConfigKey{}).(*internalsock.Config); ok {
+			config.sockConfig = sockConfig
+		}
+	}
+
+	var sysCtx *internalsys.Context
+	if sysCtx, err = config.toSysContext(); err != nil {
+		return
+	}
+
+	name := config.name
+	if !config.nameSet && code.module.NameSection != nil && code.module.NameSection.ModuleName != "" {
+		name = code.module.NameSection.ModuleName
+	}
+
+	// Instantiate the module.
+	mod, err = r.store.Instantiate(ctx, code.module, name, sysCtx, code.typeIDs)
+	if err != nil {
+		// If there was an error, don't leak the compiled module.
+		if code.closeWithModule {
+			_ = code.Close(ctx) // don't overwrite the error
+		}
+		return
+	}
+
+	if closeNotifier, ok := ctx.Value(expctxkeys.CloseNotifierKey{}).(experimentalapi.CloseNotifier); ok {
+		mod.(*wasm.ModuleInstance).CloseNotifier = closeNotifier
+	}
+
+	// Attach the code closer so that anything afterward closes the compiled
+	// code when closing the module.
+	if code.closeWithModule {
+		mod.(*wasm.ModuleInstance).CodeCloser = code
+	}
+
+	// Now, invoke any start functions, failing at first error.
+	for _, fn := range config.startFunctions {
+		start := mod.ExportedFunction(fn)
+		if start == nil {
+			continue
+		}
+		if _, err = start.Call(ctx); err != nil {
+			_ = mod.Close(ctx) // Don't leak the module on error.
+
+			if se, ok := err.(*sys.ExitError); ok {
+				if se.ExitCode() == 0 { // Don't err on success.
+					err = nil
+				}
+				return // Don't wrap an exit error
+			}
+			err = fmt.Errorf("module[%s] function[%s] failed: %w", name, fn, err)
+			return
+		}
+	}
+	return
+}
+
+// Close implements api.Closer embedded in Runtime.
+func (r *runtime) Close(ctx context.Context) error {
+	return r.CloseWithExitCode(ctx, 0)
+}
+
+// CloseWithExitCode implements Runtime.CloseWithExitCode
+//
+// Note: it also marks the internal `closed` field
+func (r *runtime) CloseWithExitCode(ctx context.Context, exitCode uint32) error {
+	closed := uint64(1) + uint64(exitCode)<<32 // Store exitCode as high-order bits.
+	if !r.closed.CompareAndSwap(0, closed) {
+		return nil
+	}
+	err := r.store.CloseWithExitCode(ctx, exitCode)
+	if r.cache == nil {
+		// Close the engine if the cache is not configured, which means that this engine is scoped in this runtime.
+		if errCloseEngine := r.store.Engine.Close(); errCloseEngine != nil {
+			return errCloseEngine
+		}
+	}
+	return err
+}
diff --git a/vendor/github.com/tetratelabs/wazero/sys/clock.go b/vendor/github.com/tetratelabs/wazero/sys/clock.go
new file mode 100644
index 000000000..1c91ce246
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/clock.go
@@ -0,0 +1,26 @@
+package sys
+
+// ClockResolution is a positive granularity of clock precision in
+// nanoseconds. For example, if the resolution is 1us, this returns 1000.
+//
+// Note: Some implementations return arbitrary resolution because there's
+// no perfect alternative. For example, according to the source in time.go,
+// windows monotonic resolution can be 15ms. See /RATIONALE.md.
+type ClockResolution uint32
+
+// Walltime returns the current unix/epoch time, seconds since midnight UTC
+// 1 January 1970, with a nanosecond fraction.
+type Walltime func() (sec int64, nsec int32)
+
+// Nanotime returns nanoseconds since an arbitrary start point, used to measure
+// elapsed time. This is sometimes referred to as a tick or monotonic time.
+//
+// Note: There are no constraints on the value return except that it
+// increments. For example, -1 is a valid if the next value is >= 0.
+type Nanotime func() int64
+
+// Nanosleep puts the current goroutine to sleep for at least ns nanoseconds.
+type Nanosleep func(ns int64)
+
+// Osyield yields the processor, typically to implement spin-wait loops.
+type Osyield func()
diff --git a/vendor/github.com/tetratelabs/wazero/sys/error.go b/vendor/github.com/tetratelabs/wazero/sys/error.go
new file mode 100644
index 000000000..c3efbad96
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/error.go
@@ -0,0 +1,83 @@
+// Package sys includes constants and types used by both public and internal APIs.
+package sys
+
+import (
+	"context"
+	"fmt"
+)
+
+// These two special exit codes are reserved by wazero for context Cancel and Timeout integrations.
+// The assumption here is that well-behaving Wasm programs won't use these two exit codes.
+const (
+	// ExitCodeContextCanceled corresponds to context.Canceled and returned by ExitError.ExitCode in that case.
+	ExitCodeContextCanceled uint32 = 0xffffffff
+	// ExitCodeDeadlineExceeded corresponds to context.DeadlineExceeded and returned by ExitError.ExitCode in that case.
+	ExitCodeDeadlineExceeded uint32 = 0xefffffff
+)
+
+// ExitError is returned to a caller of api.Function when api.Module CloseWithExitCode was invoked,
+// or context.Context passed to api.Function Call was canceled or reached the Timeout.
+//
+// ExitCode zero value means success while any other value is an error.
+//
+// Here's an example of how to get the exit code:
+//
+//	main := module.ExportedFunction("main")
+//	if err := main(ctx); err != nil {
+//		if exitErr, ok := err.(*sys.ExitError); ok {
+//			// This means your module exited with non-zero code!
+//		}
+//	--snip--
+//
+// Note: While possible the reason of this was "proc_exit" from "wasi_snapshot_preview1", it could be from other host
+// functions, for example an AssemblyScript's abort handler, or any arbitrary caller of CloseWithExitCode.
+//
+// See https://github.com/WebAssembly/WASI/blob/main/phases/snapshot/docs.md#proc_exit and
+// https://www.assemblyscript.org/concepts.html#special-imports
+//
+// Note: In the case of context cancellation or timeout, the api.Module from which the api.Function created is closed.
+type ExitError struct {
+	// Note: this is a struct not a uint32 type as it was originally one and
+	// we don't want to break call-sites that cast into it.
+	exitCode uint32
+}
+
+var exitZero = &ExitError{}
+
+func NewExitError(exitCode uint32) *ExitError {
+	if exitCode == 0 {
+		return exitZero
+	}
+	return &ExitError{exitCode: exitCode}
+}
+
+// ExitCode returns zero on success, and an arbitrary value otherwise.
+func (e *ExitError) ExitCode() uint32 {
+	return e.exitCode
+}
+
+// Error implements the error interface.
+func (e *ExitError) Error() string {
+	switch e.exitCode {
+	case ExitCodeContextCanceled:
+		return fmt.Sprintf("module closed with %s", context.Canceled)
+	case ExitCodeDeadlineExceeded:
+		return fmt.Sprintf("module closed with %s", context.DeadlineExceeded)
+	default:
+		return fmt.Sprintf("module closed with exit_code(%d)", e.exitCode)
+	}
+}
+
+// Is allows use via errors.Is
+func (e *ExitError) Is(err error) bool {
+	if target, ok := err.(*ExitError); ok {
+		return e.exitCode == target.exitCode
+	}
+	if e.exitCode == ExitCodeContextCanceled && err == context.Canceled {
+		return true
+	}
+	if e.exitCode == ExitCodeDeadlineExceeded && err == context.DeadlineExceeded {
+		return true
+	}
+	return false
+}
diff --git a/vendor/github.com/tetratelabs/wazero/sys/stat.go b/vendor/github.com/tetratelabs/wazero/sys/stat.go
new file mode 100644
index 000000000..bb7b9e5d3
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/stat.go
@@ -0,0 +1,107 @@
+package sys
+
+import "io/fs"
+
+// Inode is the file serial number, or zero if unknown.
+//
+// Any constant value will invalidate functions that use this for
+// equivalence, such as os.SameFile (Stat_t.Ino).
+//
+// When zero is returned by a `readdir`, some compilers will attempt to
+// get a non-zero value with `lstat`. Those using this for darwin's definition
+// of `getdirentries` conflate zero `d_fileno` with a deleted file, so skip the
+// entry. See /RATIONALE.md for more on this.
+type Inode = uint64
+
+// ^-- Inode is a type alias to consolidate documentation and aid in reference
+// searches. While only Stat_t is exposed publicly at the moment, this is used
+// internally for Dirent and several function return values.
+
+// EpochNanos is a timestamp in epoch nanoseconds, or zero if unknown.
+//
+// This defines epoch time the same way as Walltime, except this value is
+// packed into an int64. Common conversions are detailed in the examples.
+type EpochNanos = int64
+
+// Stat_t is similar to syscall.Stat_t, except available on all operating
+// systems, including Windows.
+//
+// # Notes
+//
+//   - This is used for WebAssembly ABI emulating the POSIX `stat` system call.
+//     Fields included are required for WebAssembly ABI including wasip1
+//     (a.k.a. wasix) and wasi-filesystem (a.k.a. wasip2). See
+//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+//   - Fields here are required for WebAssembly ABI including wasip1
+//     (a.k.a. wasix) and wasi-filesystem (a.k.a. wasip2).
+//   - This isn't the same as syscall.Stat_t because wazero supports Windows,
+//     which doesn't have that type. runtime.GOOS that has this already also
+//     have inconsistent field lengths, which complicates wasm binding.
+//   - Use NewStat_t to create this from an existing fs.FileInfo.
+//   - For portability, numeric fields are 64-bit when at least one platform
+//     defines it that large.
+type Stat_t struct {
+	// Dev is the device ID of device containing the file.
+	Dev uint64
+
+	// Ino is the file serial number, or zero if not available. See Inode for
+	// more details including impact returning a zero value.
+	Ino Inode
+
+	// Mode is the same as Mode on fs.FileInfo containing bits to identify the
+	// type of the file (fs.ModeType) and its permissions (fs.ModePerm).
+	Mode fs.FileMode
+
+	// Nlink is the number of hard links to the file.
+	//
+	// Note: This value is platform-specific and often at least one. Linux will
+	// return 1+N for a directory, where BSD (like Darwin) return 2+N, which
+	// includes the dot entry.
+	Nlink uint64
+
+	// Size is the length in bytes for regular files. For symbolic links, this
+	// is length in bytes of the pathname contained in the symbolic link.
+	Size int64
+
+	// Atim is the last data access timestamp in epoch nanoseconds.
+	Atim EpochNanos
+
+	// Mtim is the last data modification timestamp in epoch nanoseconds.
+	Mtim EpochNanos
+
+	// Ctim is the last file status change timestamp in epoch nanoseconds.
+	Ctim EpochNanos
+}
+
+// NewStat_t fills a new Stat_t from `info`, including any runtime.GOOS-specific
+// details from fs.FileInfo `Sys`. When `Sys` is already a *Stat_t, it is
+// returned as-is.
+//
+// # Notes
+//
+//   - When already in fs.FileInfo `Sys`, Stat_t must be a pointer.
+//   - When runtime.GOOS is "windows" Stat_t.Ino will be zero.
+//   - When fs.FileInfo `Sys` is nil or unknown, some fields not in fs.FileInfo
+//     are defaulted: Stat_t.Atim and Stat_t.Ctim are set to `ModTime`, and
+//     are set to ModTime and Stat_t.Nlink is set to 1.
+func NewStat_t(info fs.FileInfo) Stat_t {
+	// Note: Pointer, not val, for parity with Go, which sets *syscall.Stat_t
+	if st, ok := info.Sys().(*Stat_t); ok {
+		return *st
+	}
+	return statFromFileInfo(info)
+}
+
+func defaultStatFromFileInfo(info fs.FileInfo) Stat_t {
+	st := Stat_t{}
+	st.Ino = 0
+	st.Dev = 0
+	st.Mode = info.Mode()
+	st.Nlink = 1
+	st.Size = info.Size()
+	mtim := info.ModTime().UnixNano() // Set all times to the mod time
+	st.Atim = mtim
+	st.Mtim = mtim
+	st.Ctim = mtim
+	return st
+}
diff --git a/vendor/github.com/tetratelabs/wazero/sys/stat_bsd.go b/vendor/github.com/tetratelabs/wazero/sys/stat_bsd.go
new file mode 100644
index 000000000..3bf9b5d14
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/stat_bsd.go
@@ -0,0 +1,29 @@
+//go:build (amd64 || arm64) && (darwin || freebsd)
+
+package sys
+
+import (
+	"io/fs"
+	"syscall"
+)
+
+const sysParseable = true
+
+func statFromFileInfo(info fs.FileInfo) Stat_t {
+	if d, ok := info.Sys().(*syscall.Stat_t); ok {
+		st := Stat_t{}
+		st.Dev = uint64(d.Dev)
+		st.Ino = d.Ino
+		st.Mode = info.Mode()
+		st.Nlink = uint64(d.Nlink)
+		st.Size = d.Size
+		atime := d.Atimespec
+		st.Atim = atime.Sec*1e9 + atime.Nsec
+		mtime := d.Mtimespec
+		st.Mtim = mtime.Sec*1e9 + mtime.Nsec
+		ctime := d.Ctimespec
+		st.Ctim = ctime.Sec*1e9 + ctime.Nsec
+		return st
+	}
+	return defaultStatFromFileInfo(info)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/sys/stat_linux.go b/vendor/github.com/tetratelabs/wazero/sys/stat_linux.go
new file mode 100644
index 000000000..9b5e20e8d
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/stat_linux.go
@@ -0,0 +1,32 @@
+//go:build (amd64 || arm64 || riscv64) && linux
+
+// Note: This expression is not the same as compiler support, even if it looks
+// similar. Platform functions here are used in interpreter mode as well.
+
+package sys
+
+import (
+	"io/fs"
+	"syscall"
+)
+
+const sysParseable = true
+
+func statFromFileInfo(info fs.FileInfo) Stat_t {
+	if d, ok := info.Sys().(*syscall.Stat_t); ok {
+		st := Stat_t{}
+		st.Dev = uint64(d.Dev)
+		st.Ino = uint64(d.Ino)
+		st.Mode = info.Mode()
+		st.Nlink = uint64(d.Nlink)
+		st.Size = d.Size
+		atime := d.Atim
+		st.Atim = atime.Sec*1e9 + atime.Nsec
+		mtime := d.Mtim
+		st.Mtim = mtime.Sec*1e9 + mtime.Nsec
+		ctime := d.Ctim
+		st.Ctim = ctime.Sec*1e9 + ctime.Nsec
+		return st
+	}
+	return defaultStatFromFileInfo(info)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go b/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go
new file mode 100644
index 000000000..583c2adb0
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/stat_unsupported.go
@@ -0,0 +1,17 @@
+//go:build (!((amd64 || arm64 || riscv64) && linux) && !((amd64 || arm64) && (darwin || freebsd)) && !((amd64 || arm64) && windows)) || js
+
+package sys
+
+import "io/fs"
+
+// sysParseable is only used here as we define "supported" as being able to
+// parse `info.Sys()`. The above `go:build` constraints exclude 32-bit until
+// that's requested.
+//
+// TODO: When Go 1.21 is out, use the "unix" build constraint (as 1.21 makes
+// our floor Go version 1.19.
+const sysParseable = false
+
+func statFromFileInfo(info fs.FileInfo) Stat_t {
+	return defaultStatFromFileInfo(info)
+}
diff --git a/vendor/github.com/tetratelabs/wazero/sys/stat_windows.go b/vendor/github.com/tetratelabs/wazero/sys/stat_windows.go
new file mode 100644
index 000000000..1a7070f48
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/sys/stat_windows.go
@@ -0,0 +1,26 @@
+//go:build (amd64 || arm64) && windows
+
+package sys
+
+import (
+	"io/fs"
+	"syscall"
+)
+
+const sysParseable = true
+
+func statFromFileInfo(info fs.FileInfo) Stat_t {
+	if d, ok := info.Sys().(*syscall.Win32FileAttributeData); ok {
+		st := Stat_t{}
+		st.Ino = 0 // not in Win32FileAttributeData
+		st.Dev = 0 // not in Win32FileAttributeData
+		st.Mode = info.Mode()
+		st.Nlink = 1 // not in Win32FileAttributeData
+		st.Size = info.Size()
+		st.Atim = d.LastAccessTime.Nanoseconds()
+		st.Mtim = d.LastWriteTime.Nanoseconds()
+		st.Ctim = d.CreationTime.Nanoseconds()
+		return st
+	}
+	return defaultStatFromFileInfo(info)
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 6c766fe69..c9687d03e 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -517,9 +517,21 @@ github.com/modern-go/concurrent
 # github.com/modern-go/reflect2 v1.0.2
 ## explicit; go 1.12
 github.com/modern-go/reflect2
+# github.com/ncruces/go-sqlite3 v0.16.0
+## explicit; go 1.21
+github.com/ncruces/go-sqlite3
+github.com/ncruces/go-sqlite3/driver
+github.com/ncruces/go-sqlite3/embed
+github.com/ncruces/go-sqlite3/internal/util
+github.com/ncruces/go-sqlite3/util/osutil
+github.com/ncruces/go-sqlite3/vfs
+github.com/ncruces/go-sqlite3/vfs/memdb
 # github.com/ncruces/go-strftime v0.1.9
 ## explicit; go 1.17
 github.com/ncruces/go-strftime
+# github.com/ncruces/julianday v1.0.0
+## explicit; go 1.17
+github.com/ncruces/julianday
 # github.com/oklog/ulid v1.3.1
 ## explicit
 github.com/oklog/ulid
@@ -820,6 +832,41 @@ github.com/tdewolff/parse/v2/strconv
 # github.com/technologize/otel-go-contrib v1.1.1
 ## explicit; go 1.17
 github.com/technologize/otel-go-contrib/otelginmetrics
+# github.com/tetratelabs/wazero v1.7.2
+## explicit; go 1.20
+github.com/tetratelabs/wazero
+github.com/tetratelabs/wazero/api
+github.com/tetratelabs/wazero/experimental
+github.com/tetratelabs/wazero/experimental/sys
+github.com/tetratelabs/wazero/internal/descriptor
+github.com/tetratelabs/wazero/internal/engine/interpreter
+github.com/tetratelabs/wazero/internal/engine/wazevo
+github.com/tetratelabs/wazero/internal/engine/wazevo/backend
+github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64
+github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64
+github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc
+github.com/tetratelabs/wazero/internal/engine/wazevo/frontend
+github.com/tetratelabs/wazero/internal/engine/wazevo/ssa
+github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi
+github.com/tetratelabs/wazero/internal/expctxkeys
+github.com/tetratelabs/wazero/internal/filecache
+github.com/tetratelabs/wazero/internal/fsapi
+github.com/tetratelabs/wazero/internal/ieee754
+github.com/tetratelabs/wazero/internal/internalapi
+github.com/tetratelabs/wazero/internal/leb128
+github.com/tetratelabs/wazero/internal/moremath
+github.com/tetratelabs/wazero/internal/platform
+github.com/tetratelabs/wazero/internal/sock
+github.com/tetratelabs/wazero/internal/sys
+github.com/tetratelabs/wazero/internal/sysfs
+github.com/tetratelabs/wazero/internal/u32
+github.com/tetratelabs/wazero/internal/u64
+github.com/tetratelabs/wazero/internal/version
+github.com/tetratelabs/wazero/internal/wasm
+github.com/tetratelabs/wazero/internal/wasm/binary
+github.com/tetratelabs/wazero/internal/wasmdebug
+github.com/tetratelabs/wazero/internal/wasmruntime
+github.com/tetratelabs/wazero/sys
 # github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc
 ## explicit
 github.com/tmthrgd/go-hex
@@ -1270,7 +1317,7 @@ modernc.org/mathutil
 # modernc.org/memory v1.8.0
 ## explicit; go 1.18
 modernc.org/memory
-# modernc.org/sqlite v1.29.8 => gitlab.com/NyaaaWhatsUpDoc/sqlite v1.29.9-concurrency-workaround
+# modernc.org/sqlite v0.0.0-00010101000000-000000000000 => gitlab.com/NyaaaWhatsUpDoc/sqlite v1.29.9-concurrency-workaround
 ## explicit; go 1.20
 modernc.org/sqlite
 modernc.org/sqlite/lib