mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-12-31 19:46:31 +00:00
55aacaf4b0
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
1725 lines
45 KiB
Go
1725 lines
45 KiB
Go
// Copyright 2014 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Table-driven decoding of x86 instructions.
|
|
|
|
package x86asm
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"runtime"
|
|
)
|
|
|
|
// Set trace to true to cause the decoder to print the PC sequence
|
|
// of the executed instruction codes. This is typically only useful
|
|
// when you are running a test of a single input case.
|
|
const trace = false
|
|
|
|
// A decodeOp is a single instruction in the decoder bytecode program.
|
|
//
|
|
// The decodeOps correspond to consuming and conditionally branching
|
|
// on input bytes, consuming additional fields, and then interpreting
|
|
// consumed data as instruction arguments. The names of the xRead and xArg
|
|
// operations are taken from the Intel manual conventions, for example
|
|
// Volume 2, Section 3.1.1, page 487 of
|
|
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
|
|
//
|
|
// The actual decoding program is generated by ../x86map.
|
|
//
|
|
// TODO(rsc): We may be able to merge various of the memory operands
|
|
// since we don't care about, say, the distinction between m80dec and m80bcd.
|
|
// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
|
|
|
|
type decodeOp uint16
|
|
|
|
const (
|
|
xFail decodeOp = iota // invalid instruction (return)
|
|
xMatch // completed match
|
|
xJump // jump to pc
|
|
|
|
xCondByte // switch on instruction byte value
|
|
xCondSlashR // read and switch on instruction /r value
|
|
xCondPrefix // switch on presence of instruction prefix
|
|
xCondIs64 // switch on 64-bit processor mode
|
|
xCondDataSize // switch on operand size
|
|
xCondAddrSize // switch on address size
|
|
xCondIsMem // switch on memory vs register argument
|
|
|
|
xSetOp // set instruction opcode
|
|
|
|
xReadSlashR // read /r
|
|
xReadIb // read ib
|
|
xReadIw // read iw
|
|
xReadId // read id
|
|
xReadIo // read io
|
|
xReadCb // read cb
|
|
xReadCw // read cw
|
|
xReadCd // read cd
|
|
xReadCp // read cp
|
|
xReadCm // read cm
|
|
|
|
xArg1 // arg 1
|
|
xArg3 // arg 3
|
|
xArgAL // arg AL
|
|
xArgAX // arg AX
|
|
xArgCL // arg CL
|
|
xArgCR0dashCR7 // arg CR0-CR7
|
|
xArgCS // arg CS
|
|
xArgDR0dashDR7 // arg DR0-DR7
|
|
xArgDS // arg DS
|
|
xArgDX // arg DX
|
|
xArgEAX // arg EAX
|
|
xArgEDX // arg EDX
|
|
xArgES // arg ES
|
|
xArgFS // arg FS
|
|
xArgGS // arg GS
|
|
xArgImm16 // arg imm16
|
|
xArgImm32 // arg imm32
|
|
xArgImm64 // arg imm64
|
|
xArgImm8 // arg imm8
|
|
xArgImm8u // arg imm8 but record as unsigned
|
|
xArgImm16u // arg imm8 but record as unsigned
|
|
xArgM // arg m
|
|
xArgM128 // arg m128
|
|
xArgM256 // arg m256
|
|
xArgM1428byte // arg m14/28byte
|
|
xArgM16 // arg m16
|
|
xArgM16and16 // arg m16&16
|
|
xArgM16and32 // arg m16&32
|
|
xArgM16and64 // arg m16&64
|
|
xArgM16colon16 // arg m16:16
|
|
xArgM16colon32 // arg m16:32
|
|
xArgM16colon64 // arg m16:64
|
|
xArgM16int // arg m16int
|
|
xArgM2byte // arg m2byte
|
|
xArgM32 // arg m32
|
|
xArgM32and32 // arg m32&32
|
|
xArgM32fp // arg m32fp
|
|
xArgM32int // arg m32int
|
|
xArgM512byte // arg m512byte
|
|
xArgM64 // arg m64
|
|
xArgM64fp // arg m64fp
|
|
xArgM64int // arg m64int
|
|
xArgM8 // arg m8
|
|
xArgM80bcd // arg m80bcd
|
|
xArgM80dec // arg m80dec
|
|
xArgM80fp // arg m80fp
|
|
xArgM94108byte // arg m94/108byte
|
|
xArgMm // arg mm
|
|
xArgMm1 // arg mm1
|
|
xArgMm2 // arg mm2
|
|
xArgMm2M64 // arg mm2/m64
|
|
xArgMmM32 // arg mm/m32
|
|
xArgMmM64 // arg mm/m64
|
|
xArgMem // arg mem
|
|
xArgMoffs16 // arg moffs16
|
|
xArgMoffs32 // arg moffs32
|
|
xArgMoffs64 // arg moffs64
|
|
xArgMoffs8 // arg moffs8
|
|
xArgPtr16colon16 // arg ptr16:16
|
|
xArgPtr16colon32 // arg ptr16:32
|
|
xArgR16 // arg r16
|
|
xArgR16op // arg r16 with +rw in opcode
|
|
xArgR32 // arg r32
|
|
xArgR32M16 // arg r32/m16
|
|
xArgR32M8 // arg r32/m8
|
|
xArgR32op // arg r32 with +rd in opcode
|
|
xArgR64 // arg r64
|
|
xArgR64M16 // arg r64/m16
|
|
xArgR64op // arg r64 with +rd in opcode
|
|
xArgR8 // arg r8
|
|
xArgR8op // arg r8 with +rb in opcode
|
|
xArgRAX // arg RAX
|
|
xArgRDX // arg RDX
|
|
xArgRM // arg r/m
|
|
xArgRM16 // arg r/m16
|
|
xArgRM32 // arg r/m32
|
|
xArgRM64 // arg r/m64
|
|
xArgRM8 // arg r/m8
|
|
xArgReg // arg reg
|
|
xArgRegM16 // arg reg/m16
|
|
xArgRegM32 // arg reg/m32
|
|
xArgRegM8 // arg reg/m8
|
|
xArgRel16 // arg rel16
|
|
xArgRel32 // arg rel32
|
|
xArgRel8 // arg rel8
|
|
xArgSS // arg SS
|
|
xArgST // arg ST, aka ST(0)
|
|
xArgSTi // arg ST(i) with +i in opcode
|
|
xArgSreg // arg Sreg
|
|
xArgTR0dashTR7 // arg TR0-TR7
|
|
xArgXmm // arg xmm
|
|
xArgXMM0 // arg <XMM0>
|
|
xArgXmm1 // arg xmm1
|
|
xArgXmm2 // arg xmm2
|
|
xArgXmm2M128 // arg xmm2/m128
|
|
xArgYmm2M256 // arg ymm2/m256
|
|
xArgXmm2M16 // arg xmm2/m16
|
|
xArgXmm2M32 // arg xmm2/m32
|
|
xArgXmm2M64 // arg xmm2/m64
|
|
xArgXmmM128 // arg xmm/m128
|
|
xArgXmmM32 // arg xmm/m32
|
|
xArgXmmM64 // arg xmm/m64
|
|
xArgYmm1 // arg ymm1
|
|
xArgRmf16 // arg r/m16 but force mod=3
|
|
xArgRmf32 // arg r/m32 but force mod=3
|
|
xArgRmf64 // arg r/m64 but force mod=3
|
|
)
|
|
|
|
// instPrefix returns an Inst describing just one prefix byte.
|
|
// It is only used if there is a prefix followed by an unintelligible
|
|
// or invalid instruction byte sequence.
|
|
func instPrefix(b byte, mode int) (Inst, error) {
|
|
// When tracing it is useful to see what called instPrefix to report an error.
|
|
if trace {
|
|
_, file, line, _ := runtime.Caller(1)
|
|
fmt.Printf("%s:%d\n", file, line)
|
|
}
|
|
p := Prefix(b)
|
|
switch p {
|
|
case PrefixDataSize:
|
|
if mode == 16 {
|
|
p = PrefixData32
|
|
} else {
|
|
p = PrefixData16
|
|
}
|
|
case PrefixAddrSize:
|
|
if mode == 32 {
|
|
p = PrefixAddr16
|
|
} else {
|
|
p = PrefixAddr32
|
|
}
|
|
}
|
|
// Note: using composite literal with Prefix key confuses 'bundle' tool.
|
|
inst := Inst{Len: 1}
|
|
inst.Prefix = Prefixes{p}
|
|
return inst, nil
|
|
}
|
|
|
|
// truncated reports a truncated instruction.
|
|
// For now we use instPrefix but perhaps later we will return
|
|
// a specific error here.
|
|
func truncated(src []byte, mode int) (Inst, error) {
|
|
if len(src) == 0 {
|
|
return Inst{}, ErrTruncated
|
|
}
|
|
return instPrefix(src[0], mode) // too long
|
|
}
|
|
|
|
// These are the errors returned by Decode.
|
|
var (
|
|
ErrInvalidMode = errors.New("invalid x86 mode in Decode")
|
|
ErrTruncated = errors.New("truncated instruction")
|
|
ErrUnrecognized = errors.New("unrecognized instruction")
|
|
)
|
|
|
|
// decoderCover records coverage information for which parts
|
|
// of the byte code have been executed.
|
|
var decoderCover []bool
|
|
|
|
// Decode decodes the leading bytes in src as a single instruction.
|
|
// The mode arguments specifies the assumed processor mode:
|
|
// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
|
|
func Decode(src []byte, mode int) (inst Inst, err error) {
|
|
return decode1(src, mode, false)
|
|
}
|
|
|
|
// decode1 is the implementation of Decode but takes an extra
|
|
// gnuCompat flag to cause it to change its behavior to mimic
|
|
// bugs (or at least unique features) of GNU libopcodes as used
|
|
// by objdump. We don't believe that logic is the right thing to do
|
|
// in general, but when testing against libopcodes it simplifies the
|
|
// comparison if we adjust a few small pieces of logic.
|
|
// The affected logic is in the conditional branch for "mandatory" prefixes,
|
|
// case xCondPrefix.
|
|
func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
|
|
switch mode {
|
|
case 16, 32, 64:
|
|
// ok
|
|
// TODO(rsc): 64-bit mode not tested, probably not working.
|
|
default:
|
|
return Inst{}, ErrInvalidMode
|
|
}
|
|
|
|
// Maximum instruction size is 15 bytes.
|
|
// If we need to read more, return 'truncated instruction.
|
|
if len(src) > 15 {
|
|
src = src[:15]
|
|
}
|
|
|
|
var (
|
|
// prefix decoding information
|
|
pos = 0 // position reading src
|
|
nprefix = 0 // number of prefixes
|
|
lockIndex = -1 // index of LOCK prefix in src and inst.Prefix
|
|
repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix
|
|
segIndex = -1 // index of Group 2 prefix in src and inst.Prefix
|
|
dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix
|
|
addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix
|
|
rex Prefix // rex byte if present (or 0)
|
|
rexUsed Prefix // bits used in rex byte
|
|
rexIndex = -1 // index of rex byte
|
|
vex Prefix // use vex encoding
|
|
vexIndex = -1 // index of vex prefix
|
|
|
|
addrMode = mode // address mode (width in bits)
|
|
dataMode = mode // operand mode (width in bits)
|
|
|
|
// decoded ModR/M fields
|
|
haveModrm bool
|
|
modrm int
|
|
mod int
|
|
regop int
|
|
rm int
|
|
|
|
// if ModR/M is memory reference, Mem form
|
|
mem Mem
|
|
haveMem bool
|
|
|
|
// decoded SIB fields
|
|
haveSIB bool
|
|
sib int
|
|
scale int
|
|
index int
|
|
base int
|
|
displen int
|
|
dispoff int
|
|
|
|
// decoded immediate values
|
|
imm int64
|
|
imm8 int8
|
|
immc int64
|
|
immcpos int
|
|
|
|
// output
|
|
opshift int
|
|
inst Inst
|
|
narg int // number of arguments written to inst
|
|
)
|
|
|
|
if mode == 64 {
|
|
dataMode = 32
|
|
}
|
|
|
|
// Prefixes are certainly the most complex and underspecified part of
|
|
// decoding x86 instructions. Although the manuals say things like
|
|
// up to four prefixes, one from each group, nearly everyone seems to
|
|
// agree that in practice as many prefixes as possible, including multiple
|
|
// from a particular group or repetitions of a given prefix, can be used on
|
|
// an instruction, provided the total instruction length including prefixes
|
|
// does not exceed the agreed-upon maximum of 15 bytes.
|
|
// Everyone also agrees that if one of these prefixes is the LOCK prefix
|
|
// and the instruction is not one of the instructions that can be used with
|
|
// the LOCK prefix or if the destination is not a memory operand,
|
|
// then the instruction is invalid and produces the #UD exception.
|
|
// However, that is the end of any semblance of agreement.
|
|
//
|
|
// What happens if prefixes are given that conflict with other prefixes?
|
|
// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
|
|
// conflict with each other: only one segment can be in effect.
|
|
// Disassemblers seem to agree that later prefixes take priority over
|
|
// earlier ones. I have not taken the time to write assembly programs
|
|
// to check to see if the hardware agrees.
|
|
//
|
|
// What happens if prefixes are given that have no meaning for the
|
|
// specific instruction to which they are attached? It depends.
|
|
// If they really have no meaning, they are ignored. However, a future
|
|
// processor may assign a different meaning. As a disassembler, we
|
|
// don't really know whether we're seeing a meaningless prefix or one
|
|
// whose meaning we simply haven't been told yet.
|
|
//
|
|
// Combining the two questions, what happens when conflicting
|
|
// extension prefixes are given? No one seems to know for sure.
|
|
// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
|
|
// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
|
|
// Which prefix wins? See the xCondPrefix prefix for more.
|
|
//
|
|
// Writing assembly test cases to divine which interpretation the
|
|
// CPU uses might clarify the situation, but more likely it would
|
|
// make the situation even less clear.
|
|
|
|
// Read non-REX prefixes.
|
|
ReadPrefixes:
|
|
for ; pos < len(src); pos++ {
|
|
p := Prefix(src[pos])
|
|
switch p {
|
|
default:
|
|
nprefix = pos
|
|
break ReadPrefixes
|
|
|
|
// Group 1 - lock and repeat prefixes
|
|
// According to Intel, there should only be one from this set,
|
|
// but according to AMD both can be present.
|
|
case 0xF0:
|
|
if lockIndex >= 0 {
|
|
inst.Prefix[lockIndex] |= PrefixIgnored
|
|
}
|
|
lockIndex = pos
|
|
case 0xF2, 0xF3:
|
|
if repIndex >= 0 {
|
|
inst.Prefix[repIndex] |= PrefixIgnored
|
|
}
|
|
repIndex = pos
|
|
|
|
// Group 2 - segment override / branch hints
|
|
case 0x26, 0x2E, 0x36, 0x3E:
|
|
if mode == 64 {
|
|
p |= PrefixIgnored
|
|
break
|
|
}
|
|
fallthrough
|
|
case 0x64, 0x65:
|
|
if segIndex >= 0 {
|
|
inst.Prefix[segIndex] |= PrefixIgnored
|
|
}
|
|
segIndex = pos
|
|
|
|
// Group 3 - operand size override
|
|
case 0x66:
|
|
if mode == 16 {
|
|
dataMode = 32
|
|
p = PrefixData32
|
|
} else {
|
|
dataMode = 16
|
|
p = PrefixData16
|
|
}
|
|
if dataSizeIndex >= 0 {
|
|
inst.Prefix[dataSizeIndex] |= PrefixIgnored
|
|
}
|
|
dataSizeIndex = pos
|
|
|
|
// Group 4 - address size override
|
|
case 0x67:
|
|
if mode == 32 {
|
|
addrMode = 16
|
|
p = PrefixAddr16
|
|
} else {
|
|
addrMode = 32
|
|
p = PrefixAddr32
|
|
}
|
|
if addrSizeIndex >= 0 {
|
|
inst.Prefix[addrSizeIndex] |= PrefixIgnored
|
|
}
|
|
addrSizeIndex = pos
|
|
|
|
//Group 5 - Vex encoding
|
|
case 0xC5:
|
|
if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
|
|
vex = p
|
|
vexIndex = pos
|
|
inst.Prefix[pos] = p
|
|
inst.Prefix[pos+1] = Prefix(src[pos+1])
|
|
pos += 1
|
|
continue
|
|
} else {
|
|
nprefix = pos
|
|
break ReadPrefixes
|
|
}
|
|
case 0xC4:
|
|
if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
|
|
vex = p
|
|
vexIndex = pos
|
|
inst.Prefix[pos] = p
|
|
inst.Prefix[pos+1] = Prefix(src[pos+1])
|
|
inst.Prefix[pos+2] = Prefix(src[pos+2])
|
|
pos += 2
|
|
continue
|
|
} else {
|
|
nprefix = pos
|
|
break ReadPrefixes
|
|
}
|
|
}
|
|
|
|
if pos >= len(inst.Prefix) {
|
|
return instPrefix(src[0], mode) // too long
|
|
}
|
|
|
|
inst.Prefix[pos] = p
|
|
}
|
|
|
|
// Read REX prefix.
|
|
if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
|
|
rex = Prefix(src[pos])
|
|
rexIndex = pos
|
|
if pos >= len(inst.Prefix) {
|
|
return instPrefix(src[0], mode) // too long
|
|
}
|
|
inst.Prefix[pos] = rex
|
|
pos++
|
|
if rex&PrefixREXW != 0 {
|
|
dataMode = 64
|
|
if dataSizeIndex >= 0 {
|
|
inst.Prefix[dataSizeIndex] |= PrefixIgnored
|
|
}
|
|
}
|
|
}
|
|
|
|
// Decode instruction stream, interpreting decoding instructions.
|
|
// opshift gives the shift to use when saving the next
|
|
// opcode byte into inst.Opcode.
|
|
opshift = 24
|
|
|
|
// Decode loop, executing decoder program.
|
|
var oldPC, prevPC int
|
|
Decode:
|
|
for pc := 1; ; { // TODO uint
|
|
oldPC = prevPC
|
|
prevPC = pc
|
|
if trace {
|
|
println("run", pc)
|
|
}
|
|
x := decoder[pc]
|
|
if decoderCover != nil {
|
|
decoderCover[pc] = true
|
|
}
|
|
pc++
|
|
|
|
// Read and decode ModR/M if needed by opcode.
|
|
switch decodeOp(x) {
|
|
case xCondSlashR, xReadSlashR:
|
|
if haveModrm {
|
|
return Inst{Len: pos}, errInternal
|
|
}
|
|
haveModrm = true
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
modrm = int(src[pos])
|
|
pos++
|
|
if opshift >= 0 {
|
|
inst.Opcode |= uint32(modrm) << uint(opshift)
|
|
opshift -= 8
|
|
}
|
|
mod = modrm >> 6
|
|
regop = (modrm >> 3) & 07
|
|
rm = modrm & 07
|
|
if rex&PrefixREXR != 0 {
|
|
rexUsed |= PrefixREXR
|
|
regop |= 8
|
|
}
|
|
if addrMode == 16 {
|
|
// 16-bit modrm form
|
|
if mod != 3 {
|
|
haveMem = true
|
|
mem = addr16[rm]
|
|
if rm == 6 && mod == 0 {
|
|
mem.Base = 0
|
|
}
|
|
|
|
// Consume disp16 if present.
|
|
if mod == 0 && rm == 6 || mod == 2 {
|
|
if pos+2 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
|
|
pos += 2
|
|
}
|
|
|
|
// Consume disp8 if present.
|
|
if mod == 1 {
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
mem.Disp = int64(int8(src[pos]))
|
|
pos++
|
|
}
|
|
}
|
|
} else {
|
|
haveMem = mod != 3
|
|
|
|
// 32-bit or 64-bit form
|
|
// Consume SIB encoding if present.
|
|
if rm == 4 && mod != 3 {
|
|
haveSIB = true
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
sib = int(src[pos])
|
|
pos++
|
|
if opshift >= 0 {
|
|
inst.Opcode |= uint32(sib) << uint(opshift)
|
|
opshift -= 8
|
|
}
|
|
scale = sib >> 6
|
|
index = (sib >> 3) & 07
|
|
base = sib & 07
|
|
if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
|
|
rexUsed |= PrefixREXB
|
|
base |= 8
|
|
}
|
|
if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
|
|
rexUsed |= PrefixREXX
|
|
index |= 8
|
|
}
|
|
|
|
mem.Scale = 1 << uint(scale)
|
|
if index == 4 {
|
|
// no mem.Index
|
|
} else {
|
|
mem.Index = baseRegForBits(addrMode) + Reg(index)
|
|
}
|
|
if base&7 == 5 && mod == 0 {
|
|
// no mem.Base
|
|
} else {
|
|
mem.Base = baseRegForBits(addrMode) + Reg(base)
|
|
}
|
|
} else {
|
|
if rex&PrefixREXB != 0 {
|
|
rexUsed |= PrefixREXB
|
|
rm |= 8
|
|
}
|
|
if mod == 0 && rm&7 == 5 || rm&7 == 4 {
|
|
// base omitted
|
|
} else if mod != 3 {
|
|
mem.Base = baseRegForBits(addrMode) + Reg(rm)
|
|
}
|
|
}
|
|
|
|
// Consume disp32 if present.
|
|
if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
|
|
if pos+4 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
dispoff = pos
|
|
displen = 4
|
|
mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
|
|
pos += 4
|
|
}
|
|
|
|
// Consume disp8 if present.
|
|
if mod == 1 {
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
dispoff = pos
|
|
displen = 1
|
|
mem.Disp = int64(int8(src[pos]))
|
|
pos++
|
|
}
|
|
|
|
// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
|
|
// See Vol 2A. Table 2-7.
|
|
if mode == 64 && mod == 0 && rm&7 == 5 {
|
|
if addrMode == 32 {
|
|
mem.Base = EIP
|
|
} else {
|
|
mem.Base = RIP
|
|
}
|
|
}
|
|
}
|
|
|
|
if segIndex >= 0 {
|
|
mem.Segment = prefixToSegment(inst.Prefix[segIndex])
|
|
}
|
|
}
|
|
|
|
// Execute single opcode.
|
|
switch decodeOp(x) {
|
|
default:
|
|
println("bad op", x, "at", pc-1, "from", oldPC)
|
|
return Inst{Len: pos}, errInternal
|
|
|
|
case xFail:
|
|
inst.Op = 0
|
|
break Decode
|
|
|
|
case xMatch:
|
|
break Decode
|
|
|
|
case xJump:
|
|
pc = int(decoder[pc])
|
|
|
|
// Conditional branches.
|
|
|
|
case xCondByte:
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
b := src[pos]
|
|
n := int(decoder[pc])
|
|
pc++
|
|
for i := 0; i < n; i++ {
|
|
xb, xpc := decoder[pc], int(decoder[pc+1])
|
|
pc += 2
|
|
if b == byte(xb) {
|
|
pc = xpc
|
|
pos++
|
|
if opshift >= 0 {
|
|
inst.Opcode |= uint32(b) << uint(opshift)
|
|
opshift -= 8
|
|
}
|
|
continue Decode
|
|
}
|
|
}
|
|
// xCondByte is the only conditional with a fall through,
|
|
// so that it can be used to pick off special cases before
|
|
// an xCondSlash. If the fallthrough instruction is xFail,
|
|
// advance the position so that the decoded instruction
|
|
// size includes the byte we just compared against.
|
|
if decodeOp(decoder[pc]) == xJump {
|
|
pc = int(decoder[pc+1])
|
|
}
|
|
if decodeOp(decoder[pc]) == xFail {
|
|
pos++
|
|
}
|
|
|
|
case xCondIs64:
|
|
if mode == 64 {
|
|
pc = int(decoder[pc+1])
|
|
} else {
|
|
pc = int(decoder[pc])
|
|
}
|
|
|
|
case xCondIsMem:
|
|
mem := haveMem
|
|
if !haveModrm {
|
|
if pos >= len(src) {
|
|
return instPrefix(src[0], mode) // too long
|
|
}
|
|
mem = src[pos]>>6 != 3
|
|
}
|
|
if mem {
|
|
pc = int(decoder[pc+1])
|
|
} else {
|
|
pc = int(decoder[pc])
|
|
}
|
|
|
|
case xCondDataSize:
|
|
switch dataMode {
|
|
case 16:
|
|
if dataSizeIndex >= 0 {
|
|
inst.Prefix[dataSizeIndex] |= PrefixImplicit
|
|
}
|
|
pc = int(decoder[pc])
|
|
case 32:
|
|
if dataSizeIndex >= 0 {
|
|
inst.Prefix[dataSizeIndex] |= PrefixImplicit
|
|
}
|
|
pc = int(decoder[pc+1])
|
|
case 64:
|
|
rexUsed |= PrefixREXW
|
|
pc = int(decoder[pc+2])
|
|
}
|
|
|
|
case xCondAddrSize:
|
|
switch addrMode {
|
|
case 16:
|
|
if addrSizeIndex >= 0 {
|
|
inst.Prefix[addrSizeIndex] |= PrefixImplicit
|
|
}
|
|
pc = int(decoder[pc])
|
|
case 32:
|
|
if addrSizeIndex >= 0 {
|
|
inst.Prefix[addrSizeIndex] |= PrefixImplicit
|
|
}
|
|
pc = int(decoder[pc+1])
|
|
case 64:
|
|
pc = int(decoder[pc+2])
|
|
}
|
|
|
|
case xCondPrefix:
|
|
// Conditional branch based on presence or absence of prefixes.
|
|
// The conflict cases here are completely undocumented and
|
|
// differ significantly between GNU libopcodes and Intel xed.
|
|
// I have not written assembly code to divine what various CPUs
|
|
// do, but it wouldn't surprise me if they are not consistent either.
|
|
//
|
|
// The basic idea is to switch on the presence of a prefix, so that
|
|
// for example:
|
|
//
|
|
// xCondPrefix, 4
|
|
// 0xF3, 123,
|
|
// 0xF2, 234,
|
|
// 0x66, 345,
|
|
// 0, 456
|
|
//
|
|
// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
|
|
// is present, 66 if the 345 prefix is present, and 456 otherwise.
|
|
// The prefixes are given in descending order so that the 0 will be last.
|
|
//
|
|
// It is unclear what should happen if multiple conditions are
|
|
// satisfied: what if F2 and F3 are both present, or if 66 and F2
|
|
// are present, or if all three are present? The one chosen becomes
|
|
// part of the opcode and the others do not. Perhaps the answer
|
|
// depends on the specific opcodes in question.
|
|
//
|
|
// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
|
|
// it comes in 16-bit and 32-bit forms based on the 66 prefix,
|
|
// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
|
|
// with the 66 being only an operand size override, and probably
|
|
// F2 66 0F 38 F1 /r should be treated the same.
|
|
// Perhaps that rule is specific to the case of CRC32, since no
|
|
// 66 0F 38 F1 instruction is defined (today) (that we know of).
|
|
// However, both libopcodes and xed seem to generalize this
|
|
// example and choose F2/F3 in preference to 66, and we
|
|
// do the same.
|
|
//
|
|
// Next, what if both F2 and F3 are present? Which wins?
|
|
// The Intel xed rule, and ours, is that the one that occurs last wins.
|
|
// The GNU libopcodes rule, which we implement only in gnuCompat mode,
|
|
// is that F3 beats F2 unless F3 has no special meaning, in which
|
|
// case F3 can be a modified on an F2 special meaning.
|
|
//
|
|
// Concretely,
|
|
// 66 0F D6 /r is MOVQ
|
|
// F2 0F D6 /r is MOVDQ2Q
|
|
// F3 0F D6 /r is MOVQ2DQ.
|
|
//
|
|
// F2 66 0F D6 /r is 66 + MOVDQ2Q always.
|
|
// 66 F2 0F D6 /r is 66 + MOVDQ2Q always.
|
|
// F3 66 0F D6 /r is 66 + MOVQ2DQ always.
|
|
// 66 F3 0F D6 /r is 66 + MOVQ2DQ always.
|
|
// F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
|
|
// F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
|
|
// Adding 66 anywhere in the prefix section of the
|
|
// last two cases does not change the outcome.
|
|
//
|
|
// Finally, what if there is a variant in which 66 is a mandatory
|
|
// prefix rather than an operand size override, but we know of
|
|
// no corresponding F2/F3 form, and we see both F2/F3 and 66.
|
|
// Does F2/F3 still take priority, so that the result is an unknown
|
|
// instruction, or does the 66 take priority, so that the extended
|
|
// 66 instruction should be interpreted as having a REP/REPN prefix?
|
|
// Intel xed does the former and GNU libopcodes does the latter.
|
|
// We side with Intel xed, unless we are trying to match libopcodes
|
|
// more closely during the comparison-based test suite.
|
|
//
|
|
// In 64-bit mode REX.W is another valid prefix to test for, but
|
|
// there is less ambiguity about that. When present, REX.W is
|
|
// always the first entry in the table.
|
|
n := int(decoder[pc])
|
|
pc++
|
|
sawF3 := false
|
|
for j := 0; j < n; j++ {
|
|
prefix := Prefix(decoder[pc+2*j])
|
|
if prefix.IsREX() {
|
|
rexUsed |= prefix
|
|
if rex&prefix == prefix {
|
|
pc = int(decoder[pc+2*j+1])
|
|
continue Decode
|
|
}
|
|
continue
|
|
}
|
|
ok := false
|
|
if prefix == 0 {
|
|
ok = true
|
|
} else if prefix.IsREX() {
|
|
rexUsed |= prefix
|
|
if rex&prefix == prefix {
|
|
ok = true
|
|
}
|
|
} else if prefix == 0xC5 || prefix == 0xC4 {
|
|
if vex == prefix {
|
|
ok = true
|
|
}
|
|
} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
|
|
prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
|
|
var vexM, vexP Prefix
|
|
if vex == 0xC5 {
|
|
vexM = 1 // 2 byte vex always implies 0F
|
|
vexP = inst.Prefix[vexIndex+1]
|
|
} else {
|
|
vexM = inst.Prefix[vexIndex+1]
|
|
vexP = inst.Prefix[vexIndex+2]
|
|
}
|
|
switch prefix {
|
|
case 0x66:
|
|
ok = vexP&3 == 1
|
|
case 0xF3:
|
|
ok = vexP&3 == 2
|
|
case 0xF2:
|
|
ok = vexP&3 == 3
|
|
case 0x0F:
|
|
ok = vexM&3 == 1
|
|
case 0x0F38:
|
|
ok = vexM&3 == 2
|
|
case 0x0F3A:
|
|
ok = vexM&3 == 3
|
|
}
|
|
} else {
|
|
if prefix == 0xF3 {
|
|
sawF3 = true
|
|
}
|
|
switch prefix {
|
|
case PrefixLOCK:
|
|
if lockIndex >= 0 {
|
|
inst.Prefix[lockIndex] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
case PrefixREP, PrefixREPN:
|
|
if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
|
|
inst.Prefix[repIndex] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
|
|
// Check to see if earlier prefix F3 is present.
|
|
for i := repIndex - 1; i >= 0; i-- {
|
|
if inst.Prefix[i]&0xFF == prefix {
|
|
inst.Prefix[i] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
}
|
|
}
|
|
if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
|
|
// Check to see if earlier prefix F2 is present.
|
|
for i := repIndex - 1; i >= 0; i-- {
|
|
if inst.Prefix[i]&0xFF == prefix {
|
|
inst.Prefix[i] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
}
|
|
}
|
|
case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
|
|
if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
|
|
inst.Prefix[segIndex] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
case PrefixDataSize:
|
|
// Looking for 66 mandatory prefix.
|
|
// The F2/F3 mandatory prefixes take priority when both are present.
|
|
// If we got this far in the xCondPrefix table and an F2/F3 is present,
|
|
// it means the table didn't have any entry for that prefix. But if 66 has
|
|
// special meaning, perhaps F2/F3 have special meaning that we don't know.
|
|
// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
|
|
// GNU libopcodes allows the 66 to match. We do what Intel xed does
|
|
// except in gnuCompat mode.
|
|
if repIndex >= 0 && !gnuCompat {
|
|
inst.Op = 0
|
|
break Decode
|
|
}
|
|
if dataSizeIndex >= 0 {
|
|
inst.Prefix[dataSizeIndex] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
case PrefixAddrSize:
|
|
if addrSizeIndex >= 0 {
|
|
inst.Prefix[addrSizeIndex] |= PrefixImplicit
|
|
ok = true
|
|
}
|
|
}
|
|
}
|
|
if ok {
|
|
pc = int(decoder[pc+2*j+1])
|
|
continue Decode
|
|
}
|
|
}
|
|
inst.Op = 0
|
|
break Decode
|
|
|
|
case xCondSlashR:
|
|
pc = int(decoder[pc+regop&7])
|
|
|
|
// Input.
|
|
|
|
case xReadSlashR:
|
|
// done above
|
|
|
|
case xReadIb:
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
imm8 = int8(src[pos])
|
|
pos++
|
|
|
|
case xReadIw:
|
|
if pos+2 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
imm = int64(binary.LittleEndian.Uint16(src[pos:]))
|
|
pos += 2
|
|
|
|
case xReadId:
|
|
if pos+4 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
imm = int64(binary.LittleEndian.Uint32(src[pos:]))
|
|
pos += 4
|
|
|
|
case xReadIo:
|
|
if pos+8 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
imm = int64(binary.LittleEndian.Uint64(src[pos:]))
|
|
pos += 8
|
|
|
|
case xReadCb:
|
|
if pos >= len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
immcpos = pos
|
|
immc = int64(src[pos])
|
|
pos++
|
|
|
|
case xReadCw:
|
|
if pos+2 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
immcpos = pos
|
|
immc = int64(binary.LittleEndian.Uint16(src[pos:]))
|
|
pos += 2
|
|
|
|
case xReadCm:
|
|
immcpos = pos
|
|
if addrMode == 16 {
|
|
if pos+2 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
immc = int64(binary.LittleEndian.Uint16(src[pos:]))
|
|
pos += 2
|
|
} else if addrMode == 32 {
|
|
if pos+4 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
immc = int64(binary.LittleEndian.Uint32(src[pos:]))
|
|
pos += 4
|
|
} else {
|
|
if pos+8 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
immc = int64(binary.LittleEndian.Uint64(src[pos:]))
|
|
pos += 8
|
|
}
|
|
case xReadCd:
|
|
immcpos = pos
|
|
if pos+4 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
immc = int64(binary.LittleEndian.Uint32(src[pos:]))
|
|
pos += 4
|
|
|
|
case xReadCp:
|
|
immcpos = pos
|
|
if pos+6 > len(src) {
|
|
return truncated(src, mode)
|
|
}
|
|
w := binary.LittleEndian.Uint32(src[pos:])
|
|
w2 := binary.LittleEndian.Uint16(src[pos+4:])
|
|
immc = int64(w2)<<32 | int64(w)
|
|
pos += 6
|
|
|
|
// Output.
|
|
|
|
case xSetOp:
|
|
inst.Op = Op(decoder[pc])
|
|
pc++
|
|
|
|
case xArg1,
|
|
xArg3,
|
|
xArgAL,
|
|
xArgAX,
|
|
xArgCL,
|
|
xArgCS,
|
|
xArgDS,
|
|
xArgDX,
|
|
xArgEAX,
|
|
xArgEDX,
|
|
xArgES,
|
|
xArgFS,
|
|
xArgGS,
|
|
xArgRAX,
|
|
xArgRDX,
|
|
xArgSS,
|
|
xArgST,
|
|
xArgXMM0:
|
|
inst.Args[narg] = fixedArg[x]
|
|
narg++
|
|
|
|
case xArgImm8:
|
|
inst.Args[narg] = Imm(imm8)
|
|
narg++
|
|
|
|
case xArgImm8u:
|
|
inst.Args[narg] = Imm(uint8(imm8))
|
|
narg++
|
|
|
|
case xArgImm16:
|
|
inst.Args[narg] = Imm(int16(imm))
|
|
narg++
|
|
|
|
case xArgImm16u:
|
|
inst.Args[narg] = Imm(uint16(imm))
|
|
narg++
|
|
|
|
case xArgImm32:
|
|
inst.Args[narg] = Imm(int32(imm))
|
|
narg++
|
|
|
|
case xArgImm64:
|
|
inst.Args[narg] = Imm(imm)
|
|
narg++
|
|
|
|
case xArgM,
|
|
xArgM128,
|
|
xArgM256,
|
|
xArgM1428byte,
|
|
xArgM16,
|
|
xArgM16and16,
|
|
xArgM16and32,
|
|
xArgM16and64,
|
|
xArgM16colon16,
|
|
xArgM16colon32,
|
|
xArgM16colon64,
|
|
xArgM16int,
|
|
xArgM2byte,
|
|
xArgM32,
|
|
xArgM32and32,
|
|
xArgM32fp,
|
|
xArgM32int,
|
|
xArgM512byte,
|
|
xArgM64,
|
|
xArgM64fp,
|
|
xArgM64int,
|
|
xArgM8,
|
|
xArgM80bcd,
|
|
xArgM80dec,
|
|
xArgM80fp,
|
|
xArgM94108byte,
|
|
xArgMem:
|
|
if !haveMem {
|
|
inst.Op = 0
|
|
break Decode
|
|
}
|
|
inst.Args[narg] = mem
|
|
inst.MemBytes = int(memBytes[decodeOp(x)])
|
|
if mem.Base == RIP {
|
|
inst.PCRel = displen
|
|
inst.PCRelOff = dispoff
|
|
}
|
|
narg++
|
|
|
|
case xArgPtr16colon16:
|
|
inst.Args[narg] = Imm(immc >> 16)
|
|
inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
|
|
narg += 2
|
|
|
|
case xArgPtr16colon32:
|
|
inst.Args[narg] = Imm(immc >> 32)
|
|
inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
|
|
narg += 2
|
|
|
|
case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
|
|
// TODO(rsc): Can address be 64 bits?
|
|
mem = Mem{Disp: int64(immc)}
|
|
if segIndex >= 0 {
|
|
mem.Segment = prefixToSegment(inst.Prefix[segIndex])
|
|
inst.Prefix[segIndex] |= PrefixImplicit
|
|
}
|
|
inst.Args[narg] = mem
|
|
inst.MemBytes = int(memBytes[decodeOp(x)])
|
|
if mem.Base == RIP {
|
|
inst.PCRel = displen
|
|
inst.PCRelOff = dispoff
|
|
}
|
|
narg++
|
|
|
|
case xArgYmm1:
|
|
base := baseReg[x]
|
|
index := Reg(regop)
|
|
if inst.Prefix[vexIndex+1]&0x80 == 0 {
|
|
index += 8
|
|
}
|
|
inst.Args[narg] = base + index
|
|
narg++
|
|
|
|
case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
|
|
base := baseReg[x]
|
|
index := Reg(regop)
|
|
if rex != 0 && base == AL && index >= 4 {
|
|
rexUsed |= PrefixREX
|
|
index -= 4
|
|
base = SPB
|
|
}
|
|
inst.Args[narg] = base + index
|
|
narg++
|
|
|
|
case xArgMm, xArgMm1, xArgTR0dashTR7:
|
|
inst.Args[narg] = baseReg[x] + Reg(regop&7)
|
|
narg++
|
|
|
|
case xArgCR0dashCR7:
|
|
// AMD documents an extension that the LOCK prefix
|
|
// can be used in place of a REX prefix in order to access
|
|
// CR8 from 32-bit mode. The LOCK prefix is allowed in
|
|
// all modes, provided the corresponding CPUID bit is set.
|
|
if lockIndex >= 0 {
|
|
inst.Prefix[lockIndex] |= PrefixImplicit
|
|
regop += 8
|
|
}
|
|
inst.Args[narg] = CR0 + Reg(regop)
|
|
narg++
|
|
|
|
case xArgSreg:
|
|
regop &= 7
|
|
if regop >= 6 {
|
|
inst.Op = 0
|
|
break Decode
|
|
}
|
|
inst.Args[narg] = ES + Reg(regop)
|
|
narg++
|
|
|
|
case xArgRmf16, xArgRmf32, xArgRmf64:
|
|
base := baseReg[x]
|
|
index := Reg(modrm & 07)
|
|
if rex&PrefixREXB != 0 {
|
|
rexUsed |= PrefixREXB
|
|
index += 8
|
|
}
|
|
inst.Args[narg] = base + index
|
|
narg++
|
|
|
|
case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
|
|
n := inst.Opcode >> uint(opshift+8) & 07
|
|
base := baseReg[x]
|
|
index := Reg(n)
|
|
if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
|
|
rexUsed |= PrefixREXB
|
|
index += 8
|
|
}
|
|
if rex != 0 && base == AL && index >= 4 {
|
|
rexUsed |= PrefixREX
|
|
index -= 4
|
|
base = SPB
|
|
}
|
|
inst.Args[narg] = base + index
|
|
narg++
|
|
case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
|
|
xArgMmM32, xArgMmM64, xArgMm2M64,
|
|
xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
|
|
xArgYmm2M256:
|
|
if haveMem {
|
|
inst.Args[narg] = mem
|
|
inst.MemBytes = int(memBytes[decodeOp(x)])
|
|
if mem.Base == RIP {
|
|
inst.PCRel = displen
|
|
inst.PCRelOff = dispoff
|
|
}
|
|
} else {
|
|
base := baseReg[x]
|
|
index := Reg(rm)
|
|
switch decodeOp(x) {
|
|
case xArgMmM32, xArgMmM64, xArgMm2M64:
|
|
// There are only 8 MMX registers, so these ignore the REX.X bit.
|
|
index &= 7
|
|
case xArgRM8:
|
|
if rex != 0 && index >= 4 {
|
|
rexUsed |= PrefixREX
|
|
index -= 4
|
|
base = SPB
|
|
}
|
|
case xArgYmm2M256:
|
|
if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
|
|
index += 8
|
|
}
|
|
}
|
|
inst.Args[narg] = base + index
|
|
}
|
|
narg++
|
|
|
|
case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
|
|
if haveMem {
|
|
inst.Op = 0
|
|
break Decode
|
|
}
|
|
inst.Args[narg] = baseReg[x] + Reg(rm&7)
|
|
narg++
|
|
|
|
case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
|
|
if haveMem {
|
|
inst.Op = 0
|
|
break Decode
|
|
}
|
|
inst.Args[narg] = baseReg[x] + Reg(rm)
|
|
narg++
|
|
|
|
case xArgRel8:
|
|
inst.PCRelOff = immcpos
|
|
inst.PCRel = 1
|
|
inst.Args[narg] = Rel(int8(immc))
|
|
narg++
|
|
|
|
case xArgRel16:
|
|
inst.PCRelOff = immcpos
|
|
inst.PCRel = 2
|
|
inst.Args[narg] = Rel(int16(immc))
|
|
narg++
|
|
|
|
case xArgRel32:
|
|
inst.PCRelOff = immcpos
|
|
inst.PCRel = 4
|
|
inst.Args[narg] = Rel(int32(immc))
|
|
narg++
|
|
}
|
|
}
|
|
|
|
if inst.Op == 0 {
|
|
// Invalid instruction.
|
|
if nprefix > 0 {
|
|
return instPrefix(src[0], mode) // invalid instruction
|
|
}
|
|
return Inst{Len: pos}, ErrUnrecognized
|
|
}
|
|
|
|
// Matched! Hooray!
|
|
|
|
// 90 decodes as XCHG EAX, EAX but is NOP.
|
|
// 66 90 decodes as XCHG AX, AX and is NOP too.
|
|
// 48 90 decodes as XCHG RAX, RAX and is NOP too.
|
|
// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
|
|
// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
|
|
// It's all too special to handle in the decoding tables, at least for now.
|
|
if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
|
|
if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
|
|
inst.Op = NOP
|
|
if dataSizeIndex >= 0 {
|
|
inst.Prefix[dataSizeIndex] &^= PrefixImplicit
|
|
}
|
|
inst.Args[0] = nil
|
|
inst.Args[1] = nil
|
|
}
|
|
if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
|
|
inst.Prefix[repIndex] |= PrefixImplicit
|
|
inst.Op = PAUSE
|
|
inst.Args[0] = nil
|
|
inst.Args[1] = nil
|
|
} else if gnuCompat {
|
|
for i := nprefix - 1; i >= 0; i-- {
|
|
if inst.Prefix[i]&0xFF == 0xF3 {
|
|
inst.Prefix[i] |= PrefixImplicit
|
|
inst.Op = PAUSE
|
|
inst.Args[0] = nil
|
|
inst.Args[1] = nil
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// defaultSeg returns the default segment for an implicit
|
|
// memory reference: the final override if present, or else DS.
|
|
defaultSeg := func() Reg {
|
|
if segIndex >= 0 {
|
|
inst.Prefix[segIndex] |= PrefixImplicit
|
|
return prefixToSegment(inst.Prefix[segIndex])
|
|
}
|
|
return DS
|
|
}
|
|
|
|
// Add implicit arguments not present in the tables.
|
|
// Normally we shy away from making implicit arguments explicit,
|
|
// following the Intel manuals, but adding the arguments seems
|
|
// the best way to express the effect of the segment override prefixes.
|
|
// TODO(rsc): Perhaps add these to the tables and
|
|
// create bytecode instructions for them.
|
|
usedAddrSize := false
|
|
switch inst.Op {
|
|
case INSB, INSW, INSD:
|
|
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
|
|
inst.Args[1] = DX
|
|
usedAddrSize = true
|
|
|
|
case OUTSB, OUTSW, OUTSD:
|
|
inst.Args[0] = DX
|
|
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
|
|
usedAddrSize = true
|
|
|
|
case MOVSB, MOVSW, MOVSD, MOVSQ:
|
|
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
|
|
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
|
|
usedAddrSize = true
|
|
|
|
case CMPSB, CMPSW, CMPSD, CMPSQ:
|
|
inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
|
|
inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
|
|
usedAddrSize = true
|
|
|
|
case LODSB, LODSW, LODSD, LODSQ:
|
|
switch inst.Op {
|
|
case LODSB:
|
|
inst.Args[0] = AL
|
|
case LODSW:
|
|
inst.Args[0] = AX
|
|
case LODSD:
|
|
inst.Args[0] = EAX
|
|
case LODSQ:
|
|
inst.Args[0] = RAX
|
|
}
|
|
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
|
|
usedAddrSize = true
|
|
|
|
case STOSB, STOSW, STOSD, STOSQ:
|
|
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
|
|
switch inst.Op {
|
|
case STOSB:
|
|
inst.Args[1] = AL
|
|
case STOSW:
|
|
inst.Args[1] = AX
|
|
case STOSD:
|
|
inst.Args[1] = EAX
|
|
case STOSQ:
|
|
inst.Args[1] = RAX
|
|
}
|
|
usedAddrSize = true
|
|
|
|
case SCASB, SCASW, SCASD, SCASQ:
|
|
inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
|
|
switch inst.Op {
|
|
case SCASB:
|
|
inst.Args[0] = AL
|
|
case SCASW:
|
|
inst.Args[0] = AX
|
|
case SCASD:
|
|
inst.Args[0] = EAX
|
|
case SCASQ:
|
|
inst.Args[0] = RAX
|
|
}
|
|
usedAddrSize = true
|
|
|
|
case XLATB:
|
|
inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
|
|
usedAddrSize = true
|
|
}
|
|
|
|
// If we used the address size annotation to construct the
|
|
// argument list, mark that prefix as implicit: it doesn't need
|
|
// to be shown when printing the instruction.
|
|
if haveMem || usedAddrSize {
|
|
if addrSizeIndex >= 0 {
|
|
inst.Prefix[addrSizeIndex] |= PrefixImplicit
|
|
}
|
|
}
|
|
|
|
// Similarly, if there's some memory operand, the segment
|
|
// will be shown there and doesn't need to be shown as an
|
|
// explicit prefix.
|
|
if haveMem {
|
|
if segIndex >= 0 {
|
|
inst.Prefix[segIndex] |= PrefixImplicit
|
|
}
|
|
}
|
|
|
|
// Branch predict prefixes are overloaded segment prefixes,
|
|
// since segment prefixes don't make sense on conditional jumps.
|
|
// Rewrite final instance to prediction prefix.
|
|
// The set of instructions to which the prefixes apply (other then the
|
|
// Jcc conditional jumps) is not 100% clear from the manuals, but
|
|
// the disassemblers seem to agree about the LOOP and JCXZ instructions,
|
|
// so we'll follow along.
|
|
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
|
|
if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
|
|
PredictLoop:
|
|
for i := nprefix - 1; i >= 0; i-- {
|
|
p := inst.Prefix[i]
|
|
switch p & 0xFF {
|
|
case PrefixCS:
|
|
inst.Prefix[i] = PrefixPN
|
|
break PredictLoop
|
|
case PrefixDS:
|
|
inst.Prefix[i] = PrefixPT
|
|
break PredictLoop
|
|
}
|
|
}
|
|
}
|
|
|
|
// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
|
|
// A REPN applied to certain control transfers is a BND prefix to bound
|
|
// the range of possible destinations. There's surprisingly little documentation
|
|
// about this, so we just do what libopcodes and xed agree on.
|
|
// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
|
|
// does not turn into a BND.
|
|
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
|
|
if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
|
|
for i := nprefix - 1; i >= 0; i-- {
|
|
p := inst.Prefix[i]
|
|
if p&^PrefixIgnored == PrefixREPN {
|
|
inst.Prefix[i] = PrefixBND
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// The LOCK prefix only applies to certain instructions, and then only
|
|
// to instances of the instruction with a memory destination.
|
|
// Other uses of LOCK are invalid and cause a processor exception,
|
|
// in contrast to the "just ignore it" spirit applied to all other prefixes.
|
|
// Mark invalid lock prefixes.
|
|
hasLock := false
|
|
if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
|
|
switch inst.Op {
|
|
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
|
|
case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
|
|
if isMem(inst.Args[0]) {
|
|
hasLock = true
|
|
break
|
|
}
|
|
fallthrough
|
|
default:
|
|
inst.Prefix[lockIndex] |= PrefixInvalid
|
|
}
|
|
}
|
|
|
|
// In certain cases, all of which require a memory destination,
|
|
// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
|
|
// from the Intel Transactional Synchroniation Extensions (TSX).
|
|
//
|
|
// The specific rules are:
|
|
// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
|
|
// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
|
|
// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
|
|
if isMem(inst.Args[0]) {
|
|
if inst.Op == XCHG {
|
|
hasLock = true
|
|
}
|
|
|
|
for i := len(inst.Prefix) - 1; i >= 0; i-- {
|
|
p := inst.Prefix[i] &^ PrefixIgnored
|
|
switch p {
|
|
case PrefixREPN:
|
|
if hasLock {
|
|
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
|
|
}
|
|
|
|
case PrefixREP:
|
|
if hasLock {
|
|
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
|
|
}
|
|
|
|
if inst.Op == MOV {
|
|
op := (inst.Opcode >> 24) &^ 1
|
|
if op == 0x88 || op == 0xC6 {
|
|
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
|
|
if repIndex >= 0 {
|
|
switch inst.Prefix[repIndex] {
|
|
case PrefixREP, PrefixREPN:
|
|
switch inst.Op {
|
|
// According to the manuals, the REP/REPE prefix applies to all of these,
|
|
// while the REPN applies only to some of them. However, both libopcodes
|
|
// and xed show both prefixes explicitly for all instructions, so we do the same.
|
|
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
|
|
case INSB, INSW, INSD,
|
|
MOVSB, MOVSW, MOVSD, MOVSQ,
|
|
OUTSB, OUTSW, OUTSD,
|
|
LODSB, LODSW, LODSD, LODSQ,
|
|
CMPSB, CMPSW, CMPSD, CMPSQ,
|
|
SCASB, SCASW, SCASD, SCASQ,
|
|
STOSB, STOSW, STOSD, STOSQ:
|
|
// ok
|
|
default:
|
|
inst.Prefix[repIndex] |= PrefixIgnored
|
|
}
|
|
}
|
|
}
|
|
|
|
// If REX was present, mark implicit if all the 1 bits were consumed.
|
|
if rexIndex >= 0 {
|
|
if rexUsed != 0 {
|
|
rexUsed |= PrefixREX
|
|
}
|
|
if rex&^rexUsed == 0 {
|
|
inst.Prefix[rexIndex] |= PrefixImplicit
|
|
}
|
|
}
|
|
|
|
inst.DataSize = dataMode
|
|
inst.AddrSize = addrMode
|
|
inst.Mode = mode
|
|
inst.Len = pos
|
|
return inst, nil
|
|
}
|
|
|
|
var errInternal = errors.New("internal error")
|
|
|
|
// addr16 records the eight 16-bit addressing modes.
|
|
var addr16 = [8]Mem{
|
|
{Base: BX, Scale: 1, Index: SI},
|
|
{Base: BX, Scale: 1, Index: DI},
|
|
{Base: BP, Scale: 1, Index: SI},
|
|
{Base: BP, Scale: 1, Index: DI},
|
|
{Base: SI},
|
|
{Base: DI},
|
|
{Base: BP},
|
|
{Base: BX},
|
|
}
|
|
|
|
// baseRegForBits returns the base register for a given register size in bits.
|
|
func baseRegForBits(bits int) Reg {
|
|
switch bits {
|
|
case 8:
|
|
return AL
|
|
case 16:
|
|
return AX
|
|
case 32:
|
|
return EAX
|
|
case 64:
|
|
return RAX
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// baseReg records the base register for argument types that specify
|
|
// a range of registers indexed by op, regop, or rm.
|
|
var baseReg = [...]Reg{
|
|
xArgDR0dashDR7: DR0,
|
|
xArgMm1: M0,
|
|
xArgMm2: M0,
|
|
xArgMm2M64: M0,
|
|
xArgMm: M0,
|
|
xArgMmM32: M0,
|
|
xArgMmM64: M0,
|
|
xArgR16: AX,
|
|
xArgR16op: AX,
|
|
xArgR32: EAX,
|
|
xArgR32M16: EAX,
|
|
xArgR32M8: EAX,
|
|
xArgR32op: EAX,
|
|
xArgR64: RAX,
|
|
xArgR64M16: RAX,
|
|
xArgR64op: RAX,
|
|
xArgR8: AL,
|
|
xArgR8op: AL,
|
|
xArgRM16: AX,
|
|
xArgRM32: EAX,
|
|
xArgRM64: RAX,
|
|
xArgRM8: AL,
|
|
xArgRmf16: AX,
|
|
xArgRmf32: EAX,
|
|
xArgRmf64: RAX,
|
|
xArgSTi: F0,
|
|
xArgTR0dashTR7: TR0,
|
|
xArgXmm1: X0,
|
|
xArgYmm1: X0,
|
|
xArgXmm2: X0,
|
|
xArgXmm2M128: X0,
|
|
xArgYmm2M256: X0,
|
|
xArgXmm2M16: X0,
|
|
xArgXmm2M32: X0,
|
|
xArgXmm2M64: X0,
|
|
xArgXmm: X0,
|
|
xArgXmmM128: X0,
|
|
xArgXmmM32: X0,
|
|
xArgXmmM64: X0,
|
|
}
|
|
|
|
// prefixToSegment returns the segment register
|
|
// corresponding to a particular segment prefix.
|
|
func prefixToSegment(p Prefix) Reg {
|
|
switch p &^ PrefixImplicit {
|
|
case PrefixCS:
|
|
return CS
|
|
case PrefixDS:
|
|
return DS
|
|
case PrefixES:
|
|
return ES
|
|
case PrefixFS:
|
|
return FS
|
|
case PrefixGS:
|
|
return GS
|
|
case PrefixSS:
|
|
return SS
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// fixedArg records the fixed arguments corresponding to the given bytecodes.
|
|
var fixedArg = [...]Arg{
|
|
xArg1: Imm(1),
|
|
xArg3: Imm(3),
|
|
xArgAL: AL,
|
|
xArgAX: AX,
|
|
xArgDX: DX,
|
|
xArgEAX: EAX,
|
|
xArgEDX: EDX,
|
|
xArgRAX: RAX,
|
|
xArgRDX: RDX,
|
|
xArgCL: CL,
|
|
xArgCS: CS,
|
|
xArgDS: DS,
|
|
xArgES: ES,
|
|
xArgFS: FS,
|
|
xArgGS: GS,
|
|
xArgSS: SS,
|
|
xArgST: F0,
|
|
xArgXMM0: X0,
|
|
}
|
|
|
|
// memBytes records the size of the memory pointed at
|
|
// by a memory argument of the given form.
|
|
var memBytes = [...]int8{
|
|
xArgM128: 128 / 8,
|
|
xArgM256: 256 / 8,
|
|
xArgM16: 16 / 8,
|
|
xArgM16and16: (16 + 16) / 8,
|
|
xArgM16colon16: (16 + 16) / 8,
|
|
xArgM16colon32: (16 + 32) / 8,
|
|
xArgM16int: 16 / 8,
|
|
xArgM2byte: 2,
|
|
xArgM32: 32 / 8,
|
|
xArgM32and32: (32 + 32) / 8,
|
|
xArgM32fp: 32 / 8,
|
|
xArgM32int: 32 / 8,
|
|
xArgM64: 64 / 8,
|
|
xArgM64fp: 64 / 8,
|
|
xArgM64int: 64 / 8,
|
|
xArgMm2M64: 64 / 8,
|
|
xArgMmM32: 32 / 8,
|
|
xArgMmM64: 64 / 8,
|
|
xArgMoffs16: 16 / 8,
|
|
xArgMoffs32: 32 / 8,
|
|
xArgMoffs64: 64 / 8,
|
|
xArgMoffs8: 8 / 8,
|
|
xArgR32M16: 16 / 8,
|
|
xArgR32M8: 8 / 8,
|
|
xArgR64M16: 16 / 8,
|
|
xArgRM16: 16 / 8,
|
|
xArgRM32: 32 / 8,
|
|
xArgRM64: 64 / 8,
|
|
xArgRM8: 8 / 8,
|
|
xArgXmm2M128: 128 / 8,
|
|
xArgYmm2M256: 256 / 8,
|
|
xArgXmm2M16: 16 / 8,
|
|
xArgXmm2M32: 32 / 8,
|
|
xArgXmm2M64: 64 / 8,
|
|
xArgXmm: 128 / 8,
|
|
xArgXmmM128: 128 / 8,
|
|
xArgXmmM32: 32 / 8,
|
|
xArgXmmM64: 64 / 8,
|
|
}
|
|
|
|
// isCondJmp records the conditional jumps.
|
|
var isCondJmp = [maxOp + 1]bool{
|
|
JA: true,
|
|
JAE: true,
|
|
JB: true,
|
|
JBE: true,
|
|
JE: true,
|
|
JG: true,
|
|
JGE: true,
|
|
JL: true,
|
|
JLE: true,
|
|
JNE: true,
|
|
JNO: true,
|
|
JNP: true,
|
|
JNS: true,
|
|
JO: true,
|
|
JP: true,
|
|
JS: true,
|
|
}
|
|
|
|
// isLoop records the loop operators.
|
|
var isLoop = [maxOp + 1]bool{
|
|
LOOP: true,
|
|
LOOPE: true,
|
|
LOOPNE: true,
|
|
JECXZ: true,
|
|
JRCXZ: true,
|
|
}
|